diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,247528 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6987876035079138, + "eval_steps": 500, + "global_step": 2500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0, + "logps_train/chosen": -13.803085327148438, + "logps_train/ref_chosen": -14.0, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -11.196195602416992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01969146728515625, + "rewards_train/margins": 0.02681102743372321, + "rewards_train/rejected": -0.007119560148566961, + "step": 0 + }, + { + "epoch": 0, + "logps_train/chosen": -80.23464965820312, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -121.24168395996094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02346496656537056, + "rewards_train/margins": -0.04929657094180584, + "rewards_train/rejected": 0.02583160437643528, + "step": 0 + }, + { + "epoch": 0, + "logps_train/chosen": -3.3026418685913086, + "logps_train/ref_chosen": -3.328125, + "logps_train/ref_rejected": -3.734375, + "logps_train/rejected": -3.7183098793029785, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.002548313234001398, + "rewards_train/margins": 0.0009418011177331209, + "rewards_train/rejected": 0.0016065121162682772, + "step": 0 + }, + { + "epoch": 0, + "logps_train/chosen": -173.25656127929688, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -135.61160278320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02565612830221653, + "rewards_train/margins": 0.03550414927303791, + "rewards_train/rejected": -0.06116027757525444, + "step": 0 + }, + { + "epoch": 0, + "logps_train/chosen": -3.267651319503784, + "logps_train/ref_chosen": -3.21875, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -12.15180778503418, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00489013222977519, + "rewards_train/margins": -0.014709353912621737, + "rewards_train/rejected": 0.009819221682846546, + "step": 0 + }, + { + "epoch": 0, + "logps_train/chosen": -149.8172607421875, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -132.38787841796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01827392540872097, + "rewards_train/margins": -0.04293823428452015, + "rewards_train/rejected": 0.06121215969324112, + "step": 0 + }, + { + "epoch": 0, + "logps_train/chosen": -92.09083557128906, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -117.85350036621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009083556942641735, + "rewards_train/margins": 0.026266478933393955, + "rewards_train/rejected": -0.03535003587603569, + "step": 0 + }, + { + "epoch": 0, + "logps_train/chosen": -132.36911010742188, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -130.66387939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06308899074792862, + "rewards_train/margins": 0.02947692945599556, + "rewards_train/rejected": 0.03361206129193306, + "step": 0 + }, + { + "epoch": 0.0, + "logps_train/chosen": -5.294323921203613, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -3.65625, + "logps_train/rejected": -3.753222703933716, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004942608065903187, + "rewards_train/margins": 0.014639878645539284, + "rewards_train/rejected": -0.009697270579636097, + "step": 1 + }, + { + "epoch": 0.0, + "logps_train/chosen": -84.53573608398438, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -74.44483947753906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0035736083518713713, + "rewards_train/margins": -0.009089660597965121, + "rewards_train/rejected": 0.00551605224609375, + "step": 1 + }, + { + "epoch": 0.0, + "logps_train/chosen": -2.393968105316162, + "logps_train/ref_chosen": -2.421875, + "logps_train/ref_rejected": -2.9375, + "logps_train/rejected": -2.868701696395874, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.002790689468383789, + "rewards_train/margins": -0.004089140798896551, + "rewards_train/rejected": 0.00687983026728034, + "step": 1 + }, + { + "epoch": 0.0, + "logps_train/chosen": -14.290733337402344, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -9.809741020202637, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.008426666259765625, + "rewards_train/margins": -0.004349231719970703, + "rewards_train/rejected": 0.012775897979736328, + "step": 1 + }, + { + "epoch": 0.0, + "logps_train/chosen": -46.53409194946289, + "logps_train/ref_chosen": -46.5, + "logps_train/ref_rejected": -42.0, + "logps_train/rejected": -41.908111572265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0034091949928551912, + "rewards_train/margins": -0.012598037952557206, + "rewards_train/rejected": 0.009188842959702015, + "step": 1 + }, + { + "epoch": 0.0, + "logps_train/chosen": -4.394209861755371, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -6.39879846572876, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004329014103859663, + "rewards_train/margins": 0.01920886104926467, + "rewards_train/rejected": -0.014879846945405006, + "step": 1 + }, + { + "epoch": 0.0, + "logps_train/chosen": -128.67457580566406, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -147.18020629882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03254241868853569, + "rewards_train/margins": 0.05056304857134819, + "rewards_train/rejected": -0.0180206298828125, + "step": 1 + }, + { + "epoch": 0.0, + "logps_train/chosen": -22.330501556396484, + "logps_train/ref_chosen": -22.25, + "logps_train/ref_rejected": -46.0, + "logps_train/rejected": -46.07271957397461, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.008050155825912952, + "rewards_train/margins": -0.0007781982421875, + "rewards_train/rejected": -0.007271957583725452, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 3.2e-08, + "loss": 0.6911, + "step": 2 + }, + { + "epoch": 0.0, + "logps_train/chosen": 0.0, + "logps_train/ref_chosen": 0.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -92.4157943725586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0, + "rewards_train/margins": -0.10842056572437286, + "rewards_train/rejected": 0.10842056572437286, + "step": 2 + }, + { + "epoch": 0.0, + "logps_train/chosen": -6.010020732879639, + "logps_train/ref_chosen": -6.125, + "logps_train/ref_rejected": -1.6640625, + "logps_train/rejected": -1.6754021644592285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011497926898300648, + "rewards_train/margins": 0.012631893390789628, + "rewards_train/rejected": -0.0011339664924889803, + "step": 2 + }, + { + "epoch": 0.0, + "logps_train/chosen": -98.36221313476562, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -89.97427368164062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03622131422162056, + "rewards_train/margins": -0.08879394829273224, + "rewards_train/rejected": 0.05257263407111168, + "step": 2 + }, + { + "epoch": 0.0, + "logps_train/chosen": -117.21041870117188, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -9.164359092712402, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02895813062787056, + "rewards_train/margins": 0.026644039899110794, + "rewards_train/rejected": 0.0023140907287597656, + "step": 2 + }, + { + "epoch": 0.0, + "logps_train/chosen": -3.198298215866089, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -5.46875, + "logps_train/rejected": -5.4128193855285645, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.003607678459957242, + "rewards_train/margins": -0.0019853829871863127, + "rewards_train/rejected": 0.005593061447143555, + "step": 2 + }, + { + "epoch": 0.0, + "logps_train/chosen": -116.11625671386719, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -116.34332275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03837433084845543, + "rewards_train/margins": 0.02270660549402237, + "rewards_train/rejected": 0.01566772535443306, + "step": 2 + }, + { + "epoch": 0.0, + "logps_train/chosen": -7.011392116546631, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -27.58001708984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0011392117012292147, + "rewards_train/margins": 0.006862497655674815, + "rewards_train/rejected": -0.00800170935690403, + "step": 2 + }, + { + "epoch": 0.0, + "logps_train/chosen": -91.3703384399414, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -76.22172546386719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.037033844739198685, + "rewards_train/margins": -0.014861298725008965, + "rewards_train/rejected": -0.02217254601418972, + "step": 2 + }, + { + "epoch": 0.0, + "logps_train/chosen": -90.89939880371094, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -129.98529052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06006012111902237, + "rewards_train/margins": 0.058589173830114305, + "rewards_train/rejected": 0.0014709472889080644, + "step": 3 + }, + { + "epoch": 0.0, + "logps_train/chosen": -101.91035461425781, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -98.77587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00896453857421875, + "rewards_train/margins": 0.08655243366956711, + "rewards_train/rejected": -0.07758789509534836, + "step": 3 + }, + { + "epoch": 0.0, + "logps_train/chosen": -83.06622314453125, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -173.9846649169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04337768629193306, + "rewards_train/margins": 0.14184417948126793, + "rewards_train/rejected": -0.09846649318933487, + "step": 3 + }, + { + "epoch": 0.0, + "logps_train/chosen": -2.894551992416382, + "logps_train/ref_chosen": -3.046875, + "logps_train/ref_rejected": -1.15625, + "logps_train/rejected": -1.1290661096572876, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.015232301317155361, + "rewards_train/margins": 0.012513912282884121, + "rewards_train/rejected": 0.0027183890342712402, + "step": 3 + }, + { + "epoch": 0.0, + "logps_train/chosen": -110.49559020996094, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -120.32583618164062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04955902323126793, + "rewards_train/margins": -0.01697540283203125, + "rewards_train/rejected": -0.03258362039923668, + "step": 3 + }, + { + "epoch": 0.0, + "logps_train/chosen": -139.16519165039062, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -114.92196655273438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01651916466653347, + "rewards_train/margins": -0.024322509299963713, + "rewards_train/rejected": 0.0078033446334302425, + "step": 3 + }, + { + "epoch": 0.0, + "logps_train/chosen": -1.4318901300430298, + "logps_train/ref_chosen": -1.6171875, + "logps_train/ref_rejected": -1.6171875, + "logps_train/rejected": -1.7104541063308716, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01852973736822605, + "rewards_train/margins": 0.02785639837384224, + "rewards_train/rejected": -0.009326661005616188, + "step": 3 + }, + { + "epoch": 0.0, + "logps_train/chosen": -5.121835231781006, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -15.188529968261719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.015308523550629616, + "rewards_train/margins": -0.01520552672445774, + "rewards_train/rejected": -0.000102996826171875, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 6.4e-08, + "loss": 0.6891, + "step": 4 + }, + { + "epoch": 0.0, + "logps_train/chosen": -3.547455310821533, + "logps_train/ref_chosen": -3.546875, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -4.586217403411865, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.80310843361076e-05, + "rewards_train/margins": -0.01331129055688507, + "rewards_train/rejected": 0.013253259472548962, + "step": 4 + }, + { + "epoch": 0.0, + "logps_train/chosen": -17.6850643157959, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -21.01703453063965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018993569537997246, + "rewards_train/margins": 0.020697022671811283, + "rewards_train/rejected": -0.0017034531338140368, + "step": 4 + }, + { + "epoch": 0.0, + "logps_train/chosen": -0.7039369344711304, + "logps_train/ref_chosen": -0.6796875, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -4.088665008544922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.002424943493679166, + "rewards_train/margins": -0.0029334426508285105, + "rewards_train/rejected": 0.0005084991571493447, + "step": 4 + }, + { + "epoch": 0.0, + "logps_train/chosen": -36.87697982788086, + "logps_train/ref_chosen": -37.0, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -9.688961029052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012302017770707607, + "rewards_train/margins": 0.012448120673070662, + "rewards_train/rejected": -0.00014610290236305445, + "step": 4 + }, + { + "epoch": 0.0, + "logps_train/chosen": -1.5388572216033936, + "logps_train/ref_chosen": -1.609375, + "logps_train/ref_rejected": -2.34375, + "logps_train/rejected": -2.3408689498901367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0070517780259251595, + "rewards_train/margins": 0.006763673009118065, + "rewards_train/rejected": 0.0002881050168070942, + "step": 4 + }, + { + "epoch": 0.0, + "logps_train/chosen": -68.2456283569336, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -81.55203247070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.024562835693359375, + "rewards_train/margins": 0.030640412122011185, + "rewards_train/rejected": -0.05520324781537056, + "step": 4 + }, + { + "epoch": 0.0, + "logps_train/chosen": -6.085680961608887, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -7.357687950134277, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.011693096719682217, + "rewards_train/margins": -0.013424301752820611, + "rewards_train/rejected": 0.0017312050331383944, + "step": 4 + }, + { + "epoch": 0.0, + "logps_train/chosen": -17.117542266845703, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -3.203125, + "logps_train/rejected": -3.243626117706299, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.011754226870834827, + "rewards_train/margins": -0.007704115007072687, + "rewards_train/rejected": -0.00405011186376214, + "step": 4 + }, + { + "epoch": 0.0, + "logps_train/chosen": -17.581058502197266, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -20.127044677734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.008105850778520107, + "rewards_train/margins": -0.020401382818818092, + "rewards_train/rejected": 0.012295532040297985, + "step": 5 + }, + { + "epoch": 0.0, + "logps_train/chosen": -73.1087417602539, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -99.67393493652344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.060874175280332565, + "rewards_train/margins": -0.093480683863163, + "rewards_train/rejected": 0.03260650858283043, + "step": 5 + }, + { + "epoch": 0.0, + "logps_train/chosen": -9.904411315917969, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -5.4375, + "logps_train/rejected": -5.368002891540527, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00955886859446764, + "rewards_train/margins": 0.002609157469123602, + "rewards_train/rejected": 0.006949711125344038, + "step": 5 + }, + { + "epoch": 0.0, + "logps_train/chosen": -0.5071470141410828, + "logps_train/ref_chosen": -0.494140625, + "logps_train/ref_rejected": -0.494140625, + "logps_train/rejected": -0.4667227566242218, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.001300638890825212, + "rewards_train/margins": -0.0040424257749691606, + "rewards_train/rejected": 0.0027417868841439486, + "step": 5 + }, + { + "epoch": 0.0, + "logps_train/chosen": -103.36321258544922, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -70.89925384521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01367874164134264, + "rewards_train/margins": 0.003604126162827015, + "rewards_train/rejected": 0.010074615478515625, + "step": 5 + }, + { + "epoch": 0.0, + "logps_train/chosen": -12.646930694580078, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -0.322265625, + "logps_train/rejected": -0.2876869738101959, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016556931659579277, + "rewards_train/margins": 0.013099066447466612, + "rewards_train/rejected": 0.003457865212112665, + "step": 5 + }, + { + "epoch": 0.0, + "logps_train/chosen": -13.1285400390625, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -25.96734046936035, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02535400353372097, + "rewards_train/margins": -0.01611995603889227, + "rewards_train/rejected": -0.009234047494828701, + "step": 5 + }, + { + "epoch": 0.0, + "logps_train/chosen": -1.5687675476074219, + "logps_train/ref_chosen": -1.53125, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -10.859012603759766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0037517547607421875, + "rewards_train/margins": -0.01785049494355917, + "rewards_train/rejected": 0.014098740182816982, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 9.6e-08, + "loss": 0.6965, + "step": 6 + }, + { + "epoch": 0.0, + "logps_train/chosen": -20.607295989990234, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -4.46875, + "logps_train/rejected": -4.461714267730713, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0017704010242596269, + "rewards_train/margins": 0.001066827797330916, + "rewards_train/rejected": 0.0007035732269287109, + "step": 6 + }, + { + "epoch": 0.0, + "logps_train/chosen": -28.434589385986328, + "logps_train/ref_chosen": -28.5, + "logps_train/ref_rejected": -7.71875, + "logps_train/rejected": -7.8914794921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00654106168076396, + "rewards_train/margins": 0.02381401089951396, + "rewards_train/rejected": -0.01727294921875, + "step": 6 + }, + { + "epoch": 0.0, + "logps_train/chosen": -147.01898193359375, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -189.0, + "logps_train/rejected": -189.36825561523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0018981933826580644, + "rewards_train/margins": 0.034927370375953615, + "rewards_train/rejected": -0.03682556375861168, + "step": 6 + }, + { + "epoch": 0.0, + "logps_train/chosen": -16.194332122802734, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -10.99102783203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.019433213397860527, + "rewards_train/margins": -0.001580430194735527, + "rewards_train/rejected": -0.017852783203125, + "step": 6 + }, + { + "epoch": 0.0, + "logps_train/chosen": -5.896612644195557, + "logps_train/ref_chosen": -5.84375, + "logps_train/ref_rejected": -31.25, + "logps_train/rejected": -31.29709243774414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0052862646989524364, + "rewards_train/margins": -0.000577020924538374, + "rewards_train/rejected": -0.0047092437744140625, + "step": 6 + }, + { + "epoch": 0.0, + "logps_train/chosen": -103.71453857421875, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -122.2554702758789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.021453857421875, + "rewards_train/margins": 0.004093170166015625, + "rewards_train/rejected": -0.025547027587890625, + "step": 6 + }, + { + "epoch": 0.0, + "logps_train/chosen": -14.637649536132812, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -6.159282684326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02001495473086834, + "rewards_train/margins": 0.005288314074277878, + "rewards_train/rejected": -0.025303268805146217, + "step": 6 + }, + { + "epoch": 0.0, + "logps_train/chosen": -58.792694091796875, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -88.46823120117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02926941029727459, + "rewards_train/margins": 0.01755371131002903, + "rewards_train/rejected": -0.04682312160730362, + "step": 6 + }, + { + "epoch": 0.0, + "logps_train/chosen": -173.48117065429688, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -177.265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04811706766486168, + "rewards_train/margins": -0.02155456691980362, + "rewards_train/rejected": -0.02656250074505806, + "step": 7 + }, + { + "epoch": 0.0, + "logps_train/chosen": -10.257511138916016, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -9.484679222106934, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.000751113926526159, + "rewards_train/margins": 0.01646680902922526, + "rewards_train/rejected": -0.01721792295575142, + "step": 7 + }, + { + "epoch": 0.0, + "logps_train/chosen": -49.846134185791016, + "logps_train/ref_chosen": -50.0, + "logps_train/ref_rejected": -50.0, + "logps_train/rejected": -49.7490234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.015386581420898438, + "rewards_train/margins": -0.009711075574159622, + "rewards_train/rejected": 0.02509765699505806, + "step": 7 + }, + { + "epoch": 0.0, + "logps_train/chosen": -105.93916320800781, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -124.24298095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006083679385483265, + "rewards_train/margins": 0.030381775461137295, + "rewards_train/rejected": -0.02429809607565403, + "step": 7 + }, + { + "epoch": 0.0, + "logps_train/chosen": -131.2230987548828, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -135.122314453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02230987511575222, + "rewards_train/margins": -0.11007842980325222, + "rewards_train/rejected": 0.0877685546875, + "step": 7 + }, + { + "epoch": 0.0, + "logps_train/chosen": -9.188741683959961, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -16.25306510925293, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.006374168675392866, + "rewards_train/margins": -0.018567657563835382, + "rewards_train/rejected": 0.012193488888442516, + "step": 7 + }, + { + "epoch": 0.0, + "logps_train/chosen": -74.52889251708984, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -33.50979995727539, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.052889253944158554, + "rewards_train/margins": -0.07690925896167755, + "rewards_train/rejected": 0.024020005017518997, + "step": 7 + }, + { + "epoch": 0.0, + "logps_train/chosen": -105.2614974975586, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -138.11231994628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.023850250989198685, + "rewards_train/margins": 0.13508224859833717, + "rewards_train/rejected": -0.11123199760913849, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 1.28e-07, + "loss": 0.6927, + "step": 8 + }, + { + "epoch": 0.0, + "logps_train/chosen": -108.22491455078125, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -128.80174255371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07750854641199112, + "rewards_train/margins": 0.05768280103802681, + "rewards_train/rejected": 0.01982574537396431, + "step": 8 + }, + { + "epoch": 0.0, + "logps_train/chosen": -4.022251129150391, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -7.589479446411133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007149887271225452, + "rewards_train/margins": 0.003597831819206476, + "rewards_train/rejected": 0.003552055452018976, + "step": 8 + }, + { + "epoch": 0.0, + "logps_train/chosen": -162.3518829345703, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -179.96109008789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03518829494714737, + "rewards_train/margins": 0.06092071533203125, + "rewards_train/rejected": -0.09610901027917862, + "step": 8 + }, + { + "epoch": 0.0, + "logps_train/chosen": -136.99331665039062, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -143.55612182617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0006683349492959678, + "rewards_train/margins": 0.05628051905659959, + "rewards_train/rejected": -0.05561218410730362, + "step": 8 + }, + { + "epoch": 0.0, + "logps_train/chosen": -0.15606704354286194, + "logps_train/ref_chosen": -0.15625, + "logps_train/ref_rejected": -0.15625, + "logps_train/rejected": -0.1557483673095703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 1.8295646441401914e-05, + "rewards_train/margins": -3.186762478435412e-05, + "rewards_train/rejected": 5.0163271225756034e-05, + "step": 8 + }, + { + "epoch": 0.0, + "logps_train/chosen": -172.82183837890625, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -205.0, + "logps_train/rejected": -205.21009826660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01781616173684597, + "rewards_train/margins": 0.03882598876953125, + "rewards_train/rejected": -0.02100982703268528, + "step": 8 + }, + { + "epoch": 0.0, + "logps_train/chosen": -101.36676788330078, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -129.44992065429688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.036676790565252304, + "rewards_train/margins": -0.0916847251355648, + "rewards_train/rejected": 0.0550079345703125, + "step": 8 + }, + { + "epoch": 0.0, + "logps_train/chosen": -9.328425407409668, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -11.450916290283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.023407459259033203, + "rewards_train/margins": 0.024749088333919644, + "rewards_train/rejected": -0.0013416290748864412, + "step": 8 + }, + { + "epoch": 0.0, + "logps_train/chosen": -81.06945037841797, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -105.02044677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0069450377486646175, + "rewards_train/margins": 0.04509964073076844, + "rewards_train/rejected": -0.05204467847943306, + "step": 9 + }, + { + "epoch": 0.0, + "logps_train/chosen": -8.37950325012207, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -12.608146667480469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00045032502384856343, + "rewards_train/margins": -0.008385658089537174, + "rewards_train/rejected": 0.00793533306568861, + "step": 9 + }, + { + "epoch": 0.0, + "logps_train/chosen": -23.306652069091797, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -6.231266975402832, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.018165206536650658, + "rewards_train/margins": -0.010663508903235197, + "rewards_train/rejected": -0.007501697633415461, + "step": 9 + }, + { + "epoch": 0.0, + "logps_train/chosen": -28.07453727722168, + "logps_train/ref_chosen": -28.0, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -18.514867782592773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007453728001564741, + "rewards_train/margins": 0.03153305174782872, + "rewards_train/rejected": -0.03898677974939346, + "step": 9 + }, + { + "epoch": 0.0, + "logps_train/chosen": -5.225481033325195, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -18.994707107543945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.006923103239387274, + "rewards_train/margins": 0.005047607701271772, + "rewards_train/rejected": -0.011970710940659046, + "step": 9 + }, + { + "epoch": 0.0, + "logps_train/chosen": -20.312286376953125, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -14.666935920715332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01877136342227459, + "rewards_train/margins": 0.03546495549380779, + "rewards_train/rejected": -0.016693592071533203, + "step": 9 + }, + { + "epoch": 0.0, + "logps_train/chosen": -12.621526718139648, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -24.375, + "logps_train/rejected": -24.365337371826172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.012152671813964844, + "rewards_train/margins": -0.013118934642989188, + "rewards_train/rejected": 0.0009662628290243447, + "step": 9 + }, + { + "epoch": 0.0, + "logps_train/chosen": -10.27659797668457, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -9.839640617370605, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.003590202424675226, + "rewards_train/margins": -0.006195735651999712, + "rewards_train/rejected": 0.009785938076674938, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 1.6e-07, + "loss": 0.6862, + "step": 10 + }, + { + "epoch": 0.0, + "logps_train/chosen": -0.6419546604156494, + "logps_train/ref_chosen": -0.58984375, + "logps_train/ref_rejected": -0.58984375, + "logps_train/rejected": -0.6401293277740479, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.005211091134697199, + "rewards_train/margins": -0.0001825331710278988, + "rewards_train/rejected": -0.0050285579636693, + "step": 10 + }, + { + "epoch": 0.0, + "logps_train/chosen": -203.25872802734375, + "logps_train/ref_chosen": -203.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -176.56118774414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02587280236184597, + "rewards_train/margins": 0.03024597279727459, + "rewards_train/rejected": -0.05611877515912056, + "step": 10 + }, + { + "epoch": 0.0, + "logps_train/chosen": -87.3477783203125, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -137.41366577148438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.015222168527543545, + "rewards_train/margins": -0.043411255814135075, + "rewards_train/rejected": 0.05863342434167862, + "step": 10 + }, + { + "epoch": 0.0, + "logps_train/chosen": -12.528746604919434, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -2.1875, + "logps_train/rejected": -2.1721277236938477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0033753395546227694, + "rewards_train/margins": 0.0018381119007244706, + "rewards_train/rejected": 0.0015372276538982987, + "step": 10 + }, + { + "epoch": 0.0, + "logps_train/chosen": -131.96060180664062, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -100.24911499023438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.003939819522202015, + "rewards_train/margins": -0.021148682571947575, + "rewards_train/rejected": 0.02508850209414959, + "step": 10 + }, + { + "epoch": 0.0, + "logps_train/chosen": -5.290165424346924, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -27.773571014404297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0008915424696169794, + "rewards_train/margins": -0.011034441587980837, + "rewards_train/rejected": 0.010142899118363857, + "step": 10 + }, + { + "epoch": 0.0, + "logps_train/chosen": -4.399106502532959, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -10.571213722229004, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.003839349839836359, + "rewards_train/margins": 0.02971072169020772, + "rewards_train/rejected": -0.02587137185037136, + "step": 10 + }, + { + "epoch": 0.0, + "logps_train/chosen": -134.84225463867188, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -117.01587677001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01577453687787056, + "rewards_train/margins": 0.0673622153699398, + "rewards_train/rejected": -0.051587678492069244, + "step": 10 + }, + { + "epoch": 0.0, + "logps_train/chosen": -120.3760757446289, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -93.56324005126953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.062392424792051315, + "rewards_train/margins": 0.06871643010526896, + "rewards_train/rejected": -0.00632400531321764, + "step": 11 + }, + { + "epoch": 0.0, + "logps_train/chosen": -2.6490936279296875, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -15.21127700805664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.002409362932667136, + "rewards_train/margins": -0.012531662127003074, + "rewards_train/rejected": 0.010122299194335938, + "step": 11 + }, + { + "epoch": 0.0, + "logps_train/chosen": -34.58559799194336, + "logps_train/ref_chosen": -34.25, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -19.043758392333984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03355979919433594, + "rewards_train/margins": -0.029183960054069757, + "rewards_train/rejected": -0.00437583914026618, + "step": 11 + }, + { + "epoch": 0.0, + "logps_train/chosen": -89.32051086425781, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -108.78838348388672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01794891431927681, + "rewards_train/margins": -0.0032127369195222855, + "rewards_train/rejected": 0.021161651238799095, + "step": 11 + }, + { + "epoch": 0.0, + "logps_train/chosen": -4.342546463012695, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -2.625, + "logps_train/rejected": -2.6761634349823, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00012035370309604332, + "rewards_train/margins": 0.005236697201326024, + "rewards_train/rejected": -0.0051163434982299805, + "step": 11 + }, + { + "epoch": 0.0, + "logps_train/chosen": -80.10397338867188, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -107.85597229003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01039733923971653, + "rewards_train/margins": 0.0251998919993639, + "rewards_train/rejected": -0.03559723123908043, + "step": 11 + }, + { + "epoch": 0.0, + "logps_train/chosen": -73.38087463378906, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -76.04228210449219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011912536807358265, + "rewards_train/margins": 0.016140747349709272, + "rewards_train/rejected": -0.0042282105423510075, + "step": 11 + }, + { + "epoch": 0.0, + "logps_train/chosen": -90.06268310546875, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -119.3911361694336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.006268310826271772, + "rewards_train/margins": -0.0671546938829124, + "rewards_train/rejected": 0.060886383056640625, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 1.92e-07, + "loss": 0.6916, + "step": 12 + }, + { + "epoch": 0.0, + "logps_train/chosen": -1.4377089738845825, + "logps_train/ref_chosen": -1.453125, + "logps_train/ref_rejected": -3.171875, + "logps_train/rejected": -3.1492443084716797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0015416026581078768, + "rewards_train/margins": -0.0007214664947241545, + "rewards_train/rejected": 0.0022630691528320312, + "step": 12 + }, + { + "epoch": 0.0, + "logps_train/chosen": -7.737253665924072, + "logps_train/ref_chosen": -7.9375, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -6.872251987457275, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.020024633035063744, + "rewards_train/margins": 0.02287483192048967, + "rewards_train/rejected": -0.0028501988854259253, + "step": 12 + }, + { + "epoch": 0.0, + "logps_train/chosen": -84.52112579345703, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -112.53483581542969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0021125792991369963, + "rewards_train/margins": 0.0013710022903978825, + "rewards_train/rejected": -0.0034835815895348787, + "step": 12 + }, + { + "epoch": 0.0, + "logps_train/chosen": -88.50775146484375, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -99.44091796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0007751464727334678, + "rewards_train/margins": -0.05668334959773347, + "rewards_train/rejected": 0.055908203125, + "step": 12 + }, + { + "epoch": 0.0, + "logps_train/chosen": -88.21830749511719, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -73.88739013671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02183075062930584, + "rewards_train/margins": -0.03309173695743084, + "rewards_train/rejected": 0.011260986328125, + "step": 12 + }, + { + "epoch": 0.0, + "logps_train/chosen": -5.212318420410156, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -9.549352645874023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0037681579124182463, + "rewards_train/margins": 0.00870342249982059, + "rewards_train/rejected": -0.004935264587402344, + "step": 12 + }, + { + "epoch": 0.0, + "logps_train/chosen": -14.776863098144531, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -8.998406410217285, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.008936310186982155, + "rewards_train/margins": -0.0028456691652536392, + "rewards_train/rejected": -0.006090641021728516, + "step": 12 + }, + { + "epoch": 0.0, + "logps_train/chosen": -78.25521850585938, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -105.88114166259766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02552185021340847, + "rewards_train/margins": -0.03740768413990736, + "rewards_train/rejected": 0.01188583392649889, + "step": 12 + }, + { + "epoch": 0.0, + "logps_train/chosen": -111.3990478515625, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -127.24703979492188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.010095215402543545, + "rewards_train/margins": -0.015200805850327015, + "rewards_train/rejected": 0.02529602125287056, + "step": 13 + }, + { + "epoch": 0.0, + "logps_train/chosen": -25.5622501373291, + "logps_train/ref_chosen": -25.625, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -12.115472793579102, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.006274986546486616, + "rewards_train/margins": -0.013427734840661287, + "rewards_train/rejected": 0.019702721387147903, + "step": 13 + }, + { + "epoch": 0.0, + "logps_train/chosen": -90.66697692871094, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -91.64388275146484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03330230712890625, + "rewards_train/margins": 0.04769058246165514, + "rewards_train/rejected": -0.01438827533274889, + "step": 13 + }, + { + "epoch": 0.0, + "logps_train/chosen": -109.85852813720703, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -83.85087585449219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.014147186651825905, + "rewards_train/margins": -0.0007652277126908302, + "rewards_train/rejected": 0.014912414364516735, + "step": 13 + }, + { + "epoch": 0.0, + "logps_train/chosen": -153.5693359375, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -137.25152587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04306640848517418, + "rewards_train/margins": 0.06821899674832821, + "rewards_train/rejected": -0.02515258826315403, + "step": 13 + }, + { + "epoch": 0.0, + "logps_train/chosen": -9.043453216552734, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -9.003278732299805, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.010595321655273438, + "rewards_train/margins": -0.01026744840783067, + "rewards_train/rejected": -0.000327873247442767, + "step": 13 + }, + { + "epoch": 0.0, + "logps_train/chosen": -2.0766053199768066, + "logps_train/ref_chosen": -2.15625, + "logps_train/ref_rejected": -4.0, + "logps_train/rejected": -4.048585891723633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00796446856111288, + "rewards_train/margins": 0.01282305782660842, + "rewards_train/rejected": -0.004858589265495539, + "step": 13 + }, + { + "epoch": 0.0, + "logps_train/chosen": -17.922595977783203, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -9.5625, + "logps_train/rejected": -9.44135570526123, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03274040296673775, + "rewards_train/margins": 0.020625973120331764, + "rewards_train/rejected": 0.012114429846405983, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 2.24e-07, + "loss": 0.6929, + "step": 14 + }, + { + "epoch": 0.0, + "logps_train/chosen": -4.486982822418213, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -5.15625, + "logps_train/rejected": -5.152585029602051, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004426717758178711, + "rewards_train/margins": 0.004060220700921491, + "rewards_train/rejected": 0.00036649705725722015, + "step": 14 + }, + { + "epoch": 0.0, + "logps_train/chosen": -5.943935394287109, + "logps_train/ref_chosen": -6.0, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -6.0257368087768555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005606460850685835, + "rewards_train/margins": 0.008180141681805253, + "rewards_train/rejected": -0.002573680831119418, + "step": 14 + }, + { + "epoch": 0.0, + "logps_train/chosen": -136.58615112304688, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -103.51517486572266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05861511453986168, + "rewards_train/margins": -0.10709762945771217, + "rewards_train/rejected": 0.048482514917850494, + "step": 14 + }, + { + "epoch": 0.0, + "logps_train/chosen": -74.49932861328125, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -77.87213134765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04993286356329918, + "rewards_train/margins": -0.01271972805261612, + "rewards_train/rejected": -0.03721313551068306, + "step": 14 + }, + { + "epoch": 0.0, + "logps_train/chosen": -116.23719787597656, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -92.67724609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02371978759765625, + "rewards_train/margins": -0.00599517859518528, + "rewards_train/rejected": -0.01772460900247097, + "step": 14 + }, + { + "epoch": 0.0, + "logps_train/chosen": -13.628402709960938, + "logps_train/ref_chosen": -13.625, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -19.069299697875977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0003402710135560483, + "rewards_train/margins": 0.0065896988671738654, + "rewards_train/rejected": -0.006929969880729914, + "step": 14 + }, + { + "epoch": 0.0, + "logps_train/chosen": -164.1067657470703, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -174.99758911132812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.010676574893295765, + "rewards_train/margins": -0.010917663763393648, + "rewards_train/rejected": 0.00024108887009788305, + "step": 14 + }, + { + "epoch": 0.0, + "logps_train/chosen": -35.780704498291016, + "logps_train/ref_chosen": -35.75, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -9.210390090942383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00307044992223382, + "rewards_train/margins": 0.011718559544533491, + "rewards_train/rejected": -0.014789009466767311, + "step": 14 + }, + { + "epoch": 0.0, + "logps_train/chosen": -11.1250638961792, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -24.5, + "logps_train/rejected": -24.869232177734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.3896181927702855e-06, + "rewards_train/margins": 0.03691682890030279, + "rewards_train/rejected": -0.03692321851849556, + "step": 15 + }, + { + "epoch": 0.0, + "logps_train/chosen": -6.899374485015869, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -6.59375, + "logps_train/rejected": -6.561091899871826, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0024374485947191715, + "rewards_train/margins": -0.005703258560970426, + "rewards_train/rejected": 0.003265809966251254, + "step": 15 + }, + { + "epoch": 0.0, + "logps_train/chosen": -150.5303192138672, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -91.92230224609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.04696808010339737, + "rewards_train/margins": -0.01080169528722763, + "rewards_train/rejected": 0.057769775390625, + "step": 15 + }, + { + "epoch": 0.0, + "logps_train/chosen": -1.4820587635040283, + "logps_train/ref_chosen": -1.5, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -9.417102813720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0017941236728802323, + "rewards_train/margins": 0.006004405324347317, + "rewards_train/rejected": -0.004210281651467085, + "step": 15 + }, + { + "epoch": 0.0, + "logps_train/chosen": -4.481874465942383, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -12.0, + "logps_train/rejected": -12.105172157287598, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004437446594238281, + "rewards_train/margins": 0.006079769693315029, + "rewards_train/rejected": -0.01051721628755331, + "step": 15 + }, + { + "epoch": 0.0, + "logps_train/chosen": -15.198909759521484, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -19.584569931030273, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.026140976697206497, + "rewards_train/margins": -0.042683983221650124, + "rewards_train/rejected": 0.016543006524443626, + "step": 15 + }, + { + "epoch": 0.0, + "logps_train/chosen": -74.28204345703125, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -126.14984893798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02179565466940403, + "rewards_train/margins": 0.08678054995834827, + "rewards_train/rejected": -0.06498489528894424, + "step": 15 + }, + { + "epoch": 0.0, + "logps_train/chosen": -16.613386154174805, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -3.78125, + "logps_train/rejected": -3.7691783905029297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.011338615790009499, + "rewards_train/margins": -0.012545776786282659, + "rewards_train/rejected": 0.00120716099627316, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 2.56e-07, + "loss": 0.6944, + "step": 16 + }, + { + "epoch": 0.0, + "logps_train/chosen": -110.79779052734375, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -113.46686553955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02977905236184597, + "rewards_train/margins": 0.016907503828406334, + "rewards_train/rejected": -0.046686556190252304, + "step": 16 + }, + { + "epoch": 0.0, + "logps_train/chosen": -9.911771774291992, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -4.9177021980285645, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0036771774757653475, + "rewards_train/margins": -0.0025319576961919665, + "rewards_train/rejected": -0.001145219779573381, + "step": 16 + }, + { + "epoch": 0.0, + "logps_train/chosen": -150.19644165039062, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -151.48968505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08035583794116974, + "rewards_train/margins": 0.12932434305548668, + "rewards_train/rejected": -0.04896850511431694, + "step": 16 + }, + { + "epoch": 0.0, + "logps_train/chosen": -139.82028198242188, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -124.21117401123047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.08202820271253586, + "rewards_train/margins": -0.060910800471901894, + "rewards_train/rejected": -0.021117402240633965, + "step": 16 + }, + { + "epoch": 0.0, + "logps_train/chosen": -1.6019182205200195, + "logps_train/ref_chosen": -1.5546875, + "logps_train/ref_rejected": -1.390625, + "logps_train/rejected": -1.3867329359054565, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.004723072052001953, + "rewards_train/margins": -0.005112278478918597, + "rewards_train/rejected": 0.000389206426916644, + "step": 16 + }, + { + "epoch": 0.0, + "logps_train/chosen": -66.14653015136719, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -55.069740295410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03534698486328125, + "rewards_train/margins": 0.017321014776825905, + "rewards_train/rejected": 0.018025970086455345, + "step": 16 + }, + { + "epoch": 0.0, + "logps_train/chosen": -142.3668212890625, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -112.41207885742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06331787258386612, + "rewards_train/margins": 0.054525758139789104, + "rewards_train/rejected": 0.008792114444077015, + "step": 16 + }, + { + "epoch": 0.0, + "logps_train/chosen": -44.99235534667969, + "logps_train/ref_chosen": -45.0, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -16.04937171936035, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0007644653669558465, + "rewards_train/margins": 0.005701637302991003, + "rewards_train/rejected": -0.004937171936035156, + "step": 16 + }, + { + "epoch": 0.0, + "logps_train/chosen": -41.47843933105469, + "logps_train/ref_chosen": -41.25, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -62.55552291870117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02284393273293972, + "rewards_train/margins": -0.06729164160788059, + "rewards_train/rejected": 0.04444770887494087, + "step": 17 + }, + { + "epoch": 0.0, + "logps_train/chosen": -109.66050720214844, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -109.7200927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03394928202033043, + "rewards_train/margins": 0.05595855973660946, + "rewards_train/rejected": -0.02200927771627903, + "step": 17 + }, + { + "epoch": 0.0, + "logps_train/chosen": -0.4862343370914459, + "logps_train/ref_chosen": -0.490234375, + "logps_train/ref_rejected": -0.490234375, + "logps_train/rejected": -0.4909893572330475, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0004000038024969399, + "rewards_train/margins": 0.00047550202725687996, + "rewards_train/rejected": -7.549822475994006e-05, + "step": 17 + }, + { + "epoch": 0.0, + "logps_train/chosen": -75.81597137451172, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -78.14228820800781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018402863293886185, + "rewards_train/margins": 0.032631684094667435, + "rewards_train/rejected": -0.01422882080078125, + "step": 17 + }, + { + "epoch": 0.0, + "logps_train/chosen": -115.05105590820312, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -121.18283081054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04489440843462944, + "rewards_train/margins": 0.11317748948931694, + "rewards_train/rejected": -0.0682830810546875, + "step": 17 + }, + { + "epoch": 0.0, + "logps_train/chosen": -109.4424057006836, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -132.7696990966797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.00575943011790514, + "rewards_train/margins": -0.01727066095918417, + "rewards_train/rejected": 0.02303009107708931, + "step": 17 + }, + { + "epoch": 0.0, + "logps_train/chosen": -11.136351585388184, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -2.859375, + "logps_train/rejected": -2.780308485031128, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.001135158585384488, + "rewards_train/margins": -0.00904181064106524, + "rewards_train/rejected": 0.007906652055680752, + "step": 17 + }, + { + "epoch": 0.0, + "logps_train/chosen": -10.570751190185547, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -11.028528213500977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005424880888313055, + "rewards_train/margins": 0.014527702238410711, + "rewards_train/rejected": -0.009102821350097656, + "step": 17 + }, + { + "epoch": 0.01, + "learning_rate": 2.88e-07, + "loss": 0.6849, + "step": 18 + }, + { + "epoch": 0.01, + "logps_train/chosen": -1.8400323390960693, + "logps_train/ref_chosen": -1.9375, + "logps_train/ref_rejected": -1.9375, + "logps_train/rejected": -1.859066128730774, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009746766649186611, + "rewards_train/margins": 0.0019033793359994888, + "rewards_train/rejected": 0.007843387313187122, + "step": 18 + }, + { + "epoch": 0.01, + "logps_train/chosen": -11.135076522827148, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -38.488243103027344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.026007652282714844, + "rewards_train/margins": -0.0021833423525094986, + "rewards_train/rejected": -0.023824309930205345, + "step": 18 + }, + { + "epoch": 0.01, + "logps_train/chosen": -137.93698120117188, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -129.9855499267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0063018798828125, + "rewards_train/margins": 0.10485687106847763, + "rewards_train/rejected": -0.09855499118566513, + "step": 18 + }, + { + "epoch": 0.01, + "logps_train/chosen": -167.58822631835938, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -150.26654052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04117736965417862, + "rewards_train/margins": 0.06783142313361168, + "rewards_train/rejected": -0.02665405347943306, + "step": 18 + }, + { + "epoch": 0.01, + "logps_train/chosen": -106.94757843017578, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -116.33006286621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.055242158472537994, + "rewards_train/margins": -0.011751554906368256, + "rewards_train/rejected": 0.06699371337890625, + "step": 18 + }, + { + "epoch": 0.01, + "logps_train/chosen": -20.924142837524414, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -6.418634414672852, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.029914284124970436, + "rewards_train/margins": -0.028675842680968344, + "rewards_train/rejected": -0.0012384414440020919, + "step": 18 + }, + { + "epoch": 0.01, + "logps_train/chosen": -39.19263458251953, + "logps_train/ref_chosen": -39.25, + "logps_train/ref_rejected": -30.75, + "logps_train/rejected": -30.896373748779297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00573654193431139, + "rewards_train/margins": 0.020373917184770107, + "rewards_train/rejected": -0.014637375250458717, + "step": 18 + }, + { + "epoch": 0.01, + "logps_train/chosen": -63.532196044921875, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -63.97895812988281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04678039625287056, + "rewards_train/margins": 0.044676209101453424, + "rewards_train/rejected": 0.002104187151417136, + "step": 18 + }, + { + "epoch": 0.01, + "logps_train/chosen": -93.81849670410156, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -99.351318359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01815032958984375, + "rewards_train/margins": 0.00328216515481472, + "rewards_train/rejected": 0.01486816443502903, + "step": 19 + }, + { + "epoch": 0.01, + "logps_train/chosen": -66.8671875, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -23.009653091430664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01328125037252903, + "rewards_train/margins": 0.014246559527236968, + "rewards_train/rejected": -0.0009653091547079384, + "step": 19 + }, + { + "epoch": 0.01, + "logps_train/chosen": -162.10931396484375, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -147.56448364257812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.110931396484375, + "rewards_train/margins": -0.05448302999138832, + "rewards_train/rejected": -0.05644836649298668, + "step": 19 + }, + { + "epoch": 0.01, + "logps_train/chosen": -72.88098907470703, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -44.5, + "logps_train/rejected": -44.525634765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01190109271556139, + "rewards_train/margins": 0.01446456927806139, + "rewards_train/rejected": -0.0025634765625, + "step": 19 + }, + { + "epoch": 0.01, + "logps_train/chosen": -65.63883972167969, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -98.56526947021484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01388397254049778, + "rewards_train/margins": -0.007357025519013405, + "rewards_train/rejected": -0.006526947021484375, + "step": 19 + }, + { + "epoch": 0.01, + "logps_train/chosen": -48.862422943115234, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -17.727476119995117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.011242294684052467, + "rewards_train/margins": -0.013494682731106877, + "rewards_train/rejected": 0.00225238804705441, + "step": 19 + }, + { + "epoch": 0.01, + "logps_train/chosen": -107.01524353027344, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -118.2667236328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05152435228228569, + "rewards_train/margins": -0.02485198900103569, + "rewards_train/rejected": -0.02667236328125, + "step": 19 + }, + { + "epoch": 0.01, + "logps_train/chosen": -113.54324340820312, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -133.53811645507812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0043243407271802425, + "rewards_train/margins": -0.05051269521936774, + "rewards_train/rejected": 0.0461883544921875, + "step": 19 + }, + { + "epoch": 0.01, + "learning_rate": 3.2e-07, + "loss": 0.6908, + "step": 20 + }, + { + "epoch": 0.01, + "logps_train/chosen": -83.07501220703125, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -77.32459259033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04249877855181694, + "rewards_train/margins": 0.024958036839962006, + "rewards_train/rejected": 0.017540741711854935, + "step": 20 + }, + { + "epoch": 0.01, + "logps_train/chosen": -51.78623962402344, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -63.211666107177734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02862396277487278, + "rewards_train/margins": -0.0324573521502316, + "rewards_train/rejected": 0.00383338937535882, + "step": 20 + }, + { + "epoch": 0.01, + "logps_train/chosen": -56.50689697265625, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -64.8316879272461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02568969689309597, + "rewards_train/margins": 0.007479095831513405, + "rewards_train/rejected": -0.033168792724609375, + "step": 20 + }, + { + "epoch": 0.01, + "logps_train/chosen": -156.77587890625, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -162.7723388671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.02241211012005806, + "rewards_train/margins": -0.0003540031611919403, + "rewards_train/rejected": 0.02276611328125, + "step": 20 + }, + { + "epoch": 0.01, + "logps_train/chosen": -10.109704971313477, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -54.0, + "logps_train/rejected": -53.97415542602539, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.004720497410744429, + "rewards_train/margins": -0.007304954808205366, + "rewards_train/rejected": 0.0025844573974609375, + "step": 20 + }, + { + "epoch": 0.01, + "logps_train/chosen": -60.73615264892578, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -90.5587158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.026384735479950905, + "rewards_train/margins": 0.03225631779059768, + "rewards_train/rejected": -0.005871582310646772, + "step": 20 + }, + { + "epoch": 0.01, + "logps_train/chosen": -89.26386260986328, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -93.83717346191406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.023613739758729935, + "rewards_train/margins": -0.042668912559747696, + "rewards_train/rejected": 0.06628265231847763, + "step": 20 + }, + { + "epoch": 0.01, + "logps_train/chosen": -13.787445068359375, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -11.676741600036621, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0025054931174963713, + "rewards_train/margins": 0.02642965386621654, + "rewards_train/rejected": -0.02392416074872017, + "step": 20 + }, + { + "epoch": 0.01, + "logps_train/chosen": -3.7404861450195312, + "logps_train/ref_chosen": -3.828125, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -18.871219635009766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00876388605684042, + "rewards_train/margins": 0.008385849563637748, + "rewards_train/rejected": 0.0003780364932026714, + "step": 21 + }, + { + "epoch": 0.01, + "logps_train/chosen": -39.468589782714844, + "logps_train/ref_chosen": -39.5, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -19.933704376220703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0031410218216478825, + "rewards_train/margins": -0.0034885406494140625, + "rewards_train/rejected": 0.006629562471061945, + "step": 21 + }, + { + "epoch": 0.01, + "logps_train/chosen": -64.10682678222656, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -95.32923889160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06068268045783043, + "rewards_train/margins": -0.02775879204273224, + "rewards_train/rejected": -0.03292388841509819, + "step": 21 + }, + { + "epoch": 0.01, + "logps_train/chosen": -51.881141662597656, + "logps_train/ref_chosen": -52.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -87.40444946289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01188583392649889, + "rewards_train/margins": 0.002330780029296875, + "rewards_train/rejected": 0.009555053897202015, + "step": 21 + }, + { + "epoch": 0.01, + "logps_train/chosen": -98.77923583984375, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -183.5055694580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.027923583984375, + "rewards_train/margins": 0.02263336256146431, + "rewards_train/rejected": -0.05055694654583931, + "step": 21 + }, + { + "epoch": 0.01, + "logps_train/chosen": -21.304059982299805, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -38.0, + "logps_train/rejected": -38.076133728027344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01790599897503853, + "rewards_train/margins": -0.010292625986039639, + "rewards_train/rejected": -0.00761337298899889, + "step": 21 + }, + { + "epoch": 0.01, + "logps_train/chosen": -48.24392318725586, + "logps_train/ref_chosen": -48.25, + "logps_train/ref_rejected": -62.75, + "logps_train/rejected": -62.82749557495117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0006076812860555947, + "rewards_train/margins": 0.008357238781172782, + "rewards_train/rejected": -0.0077495574951171875, + "step": 21 + }, + { + "epoch": 0.01, + "logps_train/chosen": -0.7663556933403015, + "logps_train/ref_chosen": -0.7109375, + "logps_train/ref_rejected": -0.87109375, + "logps_train/rejected": -0.7346832752227783, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.005541819613426924, + "rewards_train/margins": -0.019182866904884577, + "rewards_train/rejected": 0.013641047291457653, + "step": 21 + }, + { + "epoch": 0.01, + "learning_rate": 3.52e-07, + "loss": 0.6938, + "step": 22 + }, + { + "epoch": 0.01, + "logps_train/chosen": -132.20077514648438, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -136.04263305664062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02007751539349556, + "rewards_train/margins": -0.015814209822565317, + "rewards_train/rejected": -0.0042633055709302425, + "step": 22 + }, + { + "epoch": 0.01, + "logps_train/chosen": -113.06689453125, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -115.16545104980469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09331054985523224, + "rewards_train/margins": 0.10985565558075905, + "rewards_train/rejected": -0.01654510572552681, + "step": 22 + }, + { + "epoch": 0.01, + "logps_train/chosen": -114.02972412109375, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -117.60737609863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.047027587890625, + "rewards_train/margins": 0.057765197940170765, + "rewards_train/rejected": -0.010737610049545765, + "step": 22 + }, + { + "epoch": 0.01, + "logps_train/chosen": -4.5131354331970215, + "logps_train/ref_chosen": -4.59375, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -25.93484878540039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008061456494033337, + "rewards_train/margins": 0.014046335127204657, + "rewards_train/rejected": -0.00598487863317132, + "step": 22 + }, + { + "epoch": 0.01, + "logps_train/chosen": -4.050341606140137, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -1.578125, + "logps_train/rejected": -1.5890023708343506, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0012158394092693925, + "rewards_train/margins": 0.002303576562553644, + "rewards_train/rejected": -0.0010877371532842517, + "step": 22 + }, + { + "epoch": 0.01, + "logps_train/chosen": -112.75489044189453, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -127.36808013916016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.024510955438017845, + "rewards_train/margins": 0.011318969540297985, + "rewards_train/rejected": 0.01319198589771986, + "step": 22 + }, + { + "epoch": 0.01, + "logps_train/chosen": -63.259761810302734, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -60.75, + "logps_train/rejected": -61.29560852050781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.000976181065198034, + "rewards_train/margins": 0.053584670240525156, + "rewards_train/rejected": -0.05456085130572319, + "step": 22 + }, + { + "epoch": 0.01, + "logps_train/chosen": -17.262680053710938, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -11.195558547973633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.013768005184829235, + "rewards_train/margins": -0.0004621502012014389, + "rewards_train/rejected": -0.013305854983627796, + "step": 22 + }, + { + "epoch": 0.01, + "logps_train/chosen": -31.18018341064453, + "logps_train/ref_chosen": -31.25, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -49.407188415527344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.006981659214943647, + "rewards_train/margins": -0.0022994992323219776, + "rewards_train/rejected": 0.009281158447265625, + "step": 23 + }, + { + "epoch": 0.01, + "logps_train/chosen": -119.12594604492188, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -103.64674377441406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01259460486471653, + "rewards_train/margins": -0.0479202289134264, + "rewards_train/rejected": 0.03532562404870987, + "step": 23 + }, + { + "epoch": 0.01, + "logps_train/chosen": -128.4718780517578, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -117.55641174316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05281219631433487, + "rewards_train/margins": 0.10845337063074112, + "rewards_train/rejected": -0.05564117431640625, + "step": 23 + }, + { + "epoch": 0.01, + "logps_train/chosen": -148.29324340820312, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -123.24357604980469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02932434156537056, + "rewards_train/margins": -0.05496673658490181, + "rewards_train/rejected": 0.02564239501953125, + "step": 23 + }, + { + "epoch": 0.01, + "logps_train/chosen": -31.858383178710938, + "logps_train/ref_chosen": -31.75, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -70.40930938720703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.010838317684829235, + "rewards_train/margins": -0.019907379522919655, + "rewards_train/rejected": 0.00906906183809042, + "step": 23 + }, + { + "epoch": 0.01, + "logps_train/chosen": -90.81387329101562, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -125.70930480957031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0313873291015625, + "rewards_train/margins": -0.01045684702694416, + "rewards_train/rejected": -0.02093048207461834, + "step": 23 + }, + { + "epoch": 0.01, + "logps_train/chosen": -8.20071792602539, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -14.1875, + "logps_train/rejected": -14.103254318237305, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.004928207490593195, + "rewards_train/margins": -0.0034963604994118214, + "rewards_train/rejected": 0.008424567990005016, + "step": 23 + }, + { + "epoch": 0.01, + "logps_train/chosen": -9.026999473571777, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -17.644821166992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028550053015351295, + "rewards_train/margins": 0.01803216990083456, + "rewards_train/rejected": 0.010517883114516735, + "step": 23 + }, + { + "epoch": 0.01, + "learning_rate": 3.84e-07, + "loss": 0.6865, + "step": 24 + }, + { + "epoch": 0.01, + "logps_train/chosen": -6.188457489013672, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -6.124053955078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.574890282237902e-05, + "rewards_train/margins": -0.0001903533993754536, + "rewards_train/rejected": 9.460449655307457e-05, + "step": 24 + }, + { + "epoch": 0.01, + "logps_train/chosen": -5.202934741973877, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -10.572571754455566, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.010956525802612305, + "rewards_train/margins": 0.011963701224885881, + "rewards_train/rejected": -0.0010071754222735763, + "step": 24 + }, + { + "epoch": 0.01, + "logps_train/chosen": -19.459453582763672, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -22.261363983154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008445358835160732, + "rewards_train/margins": 0.00519103929400444, + "rewards_train/rejected": -0.013636398129165173, + "step": 24 + }, + { + "epoch": 0.01, + "logps_train/chosen": -21.199932098388672, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -2.5, + "logps_train/rejected": -2.5357275009155273, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.007493210025131702, + "rewards_train/margins": -0.003920459887012839, + "rewards_train/rejected": -0.003572750138118863, + "step": 24 + }, + { + "epoch": 0.01, + "logps_train/chosen": -9.05758285522461, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -65.64391326904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012991714291274548, + "rewards_train/margins": 0.027383041568100452, + "rewards_train/rejected": -0.014391327276825905, + "step": 24 + }, + { + "epoch": 0.01, + "logps_train/chosen": -9.770036697387695, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -6.3695292472839355, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.008253670297563076, + "rewards_train/margins": -0.011925745522603393, + "rewards_train/rejected": 0.0036720752250403166, + "step": 24 + }, + { + "epoch": 0.01, + "logps_train/chosen": -120.60368347167969, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -103.45030212402344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01036834716796875, + "rewards_train/margins": -0.015338134951889515, + "rewards_train/rejected": 0.004969787783920765, + "step": 24 + }, + { + "epoch": 0.01, + "logps_train/chosen": -65.81903839111328, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -164.18328857421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018096162006258965, + "rewards_train/margins": 0.036425020545721054, + "rewards_train/rejected": -0.01832885853946209, + "step": 24 + }, + { + "epoch": 0.01, + "logps_train/chosen": -11.556609153747559, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -34.2513542175293, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.005660915281623602, + "rewards_train/margins": -0.030525493901222944, + "rewards_train/rejected": 0.024864578619599342, + "step": 25 + }, + { + "epoch": 0.01, + "logps_train/chosen": -137.21446228027344, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -133.34800720214844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02144622802734375, + "rewards_train/margins": -0.08664550632238388, + "rewards_train/rejected": 0.06519927829504013, + "step": 25 + }, + { + "epoch": 0.01, + "logps_train/chosen": -20.443626403808594, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -13.974607467651367, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.006862640380859375, + "rewards_train/margins": -0.0031518936157226562, + "rewards_train/rejected": -0.0037107467651367188, + "step": 25 + }, + { + "epoch": 0.01, + "logps_train/chosen": -75.15701293945312, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -118.04328918457031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.03429870679974556, + "rewards_train/margins": -0.01137237623333931, + "rewards_train/rejected": 0.04567108303308487, + "step": 25 + }, + { + "epoch": 0.01, + "logps_train/chosen": -11.540498733520508, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -5.575167179107666, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008450127206742764, + "rewards_train/margins": 0.006591845070943236, + "rewards_train/rejected": 0.0018582821357995272, + "step": 25 + }, + { + "epoch": 0.01, + "logps_train/chosen": -160.45806884765625, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -138.71102905273438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.045806884765625, + "rewards_train/margins": -0.07470398023724556, + "rewards_train/rejected": 0.02889709547162056, + "step": 25 + }, + { + "epoch": 0.01, + "logps_train/chosen": -124.12043762207031, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -109.3013687133789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.012043762020766735, + "rewards_train/margins": 0.01809311006218195, + "rewards_train/rejected": -0.030136872082948685, + "step": 25 + }, + { + "epoch": 0.01, + "logps_train/chosen": -73.68421936035156, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -104.83526611328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01842193678021431, + "rewards_train/margins": -0.03489532507956028, + "rewards_train/rejected": 0.01647338829934597, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 4.1599999999999997e-07, + "loss": 0.6983, + "step": 26 + }, + { + "epoch": 0.01, + "logps_train/chosen": -110.72602844238281, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -109.88821411132812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07260284572839737, + "rewards_train/margins": -0.08378143515437841, + "rewards_train/rejected": 0.011178589425981045, + "step": 26 + }, + { + "epoch": 0.01, + "logps_train/chosen": -91.86065673828125, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -93.6588134765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03606567531824112, + "rewards_train/margins": -0.12018433213233948, + "rewards_train/rejected": 0.08411865681409836, + "step": 26 + }, + { + "epoch": 0.01, + "logps_train/chosen": -3.5549519062042236, + "logps_train/ref_chosen": -3.5625, + "logps_train/ref_rejected": -0.9296875, + "logps_train/rejected": -0.948858380317688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0007548094145022333, + "rewards_train/margins": 0.0026718974695540965, + "rewards_train/rejected": -0.0019170880550518632, + "step": 26 + }, + { + "epoch": 0.01, + "logps_train/chosen": -53.80691909790039, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -72.2258071899414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04430809244513512, + "rewards_train/margins": 0.016888810321688652, + "rewards_train/rejected": 0.027419282123446465, + "step": 26 + }, + { + "epoch": 0.01, + "logps_train/chosen": -78.32474517822266, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -106.31714630126953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.017525482922792435, + "rewards_train/margins": -0.0007598865777254105, + "rewards_train/rejected": 0.018285369500517845, + "step": 26 + }, + { + "epoch": 0.01, + "logps_train/chosen": -45.89180374145508, + "logps_train/ref_chosen": -45.0, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -72.17566680908203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0891803726553917, + "rewards_train/margins": -0.12161369249224663, + "rewards_train/rejected": 0.032433319836854935, + "step": 26 + }, + { + "epoch": 0.01, + "logps_train/chosen": -62.43342590332031, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -34.762821197509766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006657409947365522, + "rewards_train/margins": 0.00793952972162515, + "rewards_train/rejected": -0.0012821197742596269, + "step": 26 + }, + { + "epoch": 0.01, + "logps_train/chosen": -79.50250244140625, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -87.8802719116211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05025024339556694, + "rewards_train/margins": -0.062223052605986595, + "rewards_train/rejected": 0.011972809210419655, + "step": 26 + }, + { + "epoch": 0.01, + "logps_train/chosen": -8.388378143310547, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -4.091565132141113, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011162186041474342, + "rewards_train/margins": 0.010943699249764904, + "rewards_train/rejected": 0.00021848679170943797, + "step": 27 + }, + { + "epoch": 0.01, + "logps_train/chosen": -28.614473342895508, + "logps_train/ref_chosen": -28.625, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -69.01295471191406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0010526657570153475, + "rewards_train/margins": -0.04765186528675258, + "rewards_train/rejected": 0.04870453104376793, + "step": 27 + }, + { + "epoch": 0.01, + "logps_train/chosen": -5.353400230407715, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -2.328125, + "logps_train/rejected": -2.318328380584717, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0009650230640545487, + "rewards_train/margins": -0.001944685005582869, + "rewards_train/rejected": 0.0009796619415283203, + "step": 27 + }, + { + "epoch": 0.01, + "logps_train/chosen": -88.70464324951172, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -67.0401382446289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.029535675421357155, + "rewards_train/margins": -0.01645050011575222, + "rewards_train/rejected": 0.045986175537109375, + "step": 27 + }, + { + "epoch": 0.01, + "logps_train/chosen": -0.23728647828102112, + "logps_train/ref_chosen": -0.2490234375, + "logps_train/ref_rejected": -0.2490234375, + "logps_train/rejected": -0.24209129810333252, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0011736958986148238, + "rewards_train/margins": 0.0004804819473065436, + "rewards_train/rejected": 0.0006932139513082802, + "step": 27 + }, + { + "epoch": 0.01, + "logps_train/chosen": -159.16720581054688, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -150.3579559326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01672058179974556, + "rewards_train/margins": 0.01907501369714737, + "rewards_train/rejected": -0.03579559549689293, + "step": 27 + }, + { + "epoch": 0.01, + "logps_train/chosen": -21.991741180419922, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -15.0, + "logps_train/rejected": -14.877710342407227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.011674118228256702, + "rewards_train/margins": -0.023903083987534046, + "rewards_train/rejected": 0.012228965759277344, + "step": 27 + }, + { + "epoch": 0.01, + "logps_train/chosen": -73.91768646240234, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -134.61785888671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.058231353759765625, + "rewards_train/margins": 0.020017240196466446, + "rewards_train/rejected": 0.03821411356329918, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 4.48e-07, + "loss": 0.7064, + "step": 28 + }, + { + "epoch": 0.01, + "logps_train/chosen": -8.628470420837402, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -1.3125, + "logps_train/rejected": -1.3346562385559082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01215295772999525, + "rewards_train/margins": 0.014368581585586071, + "rewards_train/rejected": -0.0022156238555908203, + "step": 28 + }, + { + "epoch": 0.01, + "logps_train/chosen": -118.9875259399414, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -136.8642578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09875259548425674, + "rewards_train/margins": -0.21232681721448898, + "rewards_train/rejected": 0.11357422173023224, + "step": 28 + }, + { + "epoch": 0.01, + "logps_train/chosen": -7.6791486740112305, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -33.25, + "logps_train/rejected": -33.158565521240234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.003960132598876953, + "rewards_train/margins": -0.005183315835893154, + "rewards_train/rejected": 0.009143448434770107, + "step": 28 + }, + { + "epoch": 0.01, + "logps_train/chosen": -8.052074432373047, + "logps_train/ref_chosen": -8.0, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -34.151798248291016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00520744314417243, + "rewards_train/margins": 0.009972381871193647, + "rewards_train/rejected": -0.015179825015366077, + "step": 28 + }, + { + "epoch": 0.01, + "logps_train/chosen": -120.80763244628906, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -123.98497009277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03076324425637722, + "rewards_train/margins": 0.01773376576602459, + "rewards_train/rejected": -0.04849701002240181, + "step": 28 + }, + { + "epoch": 0.01, + "logps_train/chosen": -133.54563903808594, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -136.49754333496094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05456390604376793, + "rewards_train/margins": -0.10480957478284836, + "rewards_train/rejected": 0.05024566873908043, + "step": 28 + }, + { + "epoch": 0.01, + "logps_train/chosen": -121.9015884399414, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -138.10491943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00984115619212389, + "rewards_train/margins": 0.020333100110292435, + "rewards_train/rejected": -0.010491943918168545, + "step": 28 + }, + { + "epoch": 0.01, + "logps_train/chosen": -107.68907165527344, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -76.18728637695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03109283559024334, + "rewards_train/margins": 0.04982147365808487, + "rewards_train/rejected": -0.01872863806784153, + "step": 28 + }, + { + "epoch": 0.01, + "logps_train/chosen": -24.898860931396484, + "logps_train/ref_chosen": -24.875, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -21.545360565185547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0023860931396484375, + "rewards_train/margins": -0.010350036434829235, + "rewards_train/rejected": 0.007963943295180798, + "step": 29 + }, + { + "epoch": 0.01, + "logps_train/chosen": -2.4309558868408203, + "logps_train/ref_chosen": -2.515625, + "logps_train/ref_rejected": -7.5625, + "logps_train/rejected": -7.633415699005127, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008466911502182484, + "rewards_train/margins": 0.015558481682091951, + "rewards_train/rejected": -0.007091570179909468, + "step": 29 + }, + { + "epoch": 0.01, + "logps_train/chosen": -0.8611130118370056, + "logps_train/ref_chosen": -0.85546875, + "logps_train/ref_rejected": -0.85546875, + "logps_train/rejected": -0.8601502180099487, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0005644261837005615, + "rewards_train/margins": -9.62793710641563e-05, + "rewards_train/rejected": -0.00046814681263640523, + "step": 29 + }, + { + "epoch": 0.01, + "logps_train/chosen": -43.470787048339844, + "logps_train/ref_chosen": -43.5, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -11.464357376098633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0029212951194494963, + "rewards_train/margins": 0.030607032356783748, + "rewards_train/rejected": -0.02768573723733425, + "step": 29 + }, + { + "epoch": 0.01, + "logps_train/chosen": -86.28929901123047, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -120.82516479492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021070098504424095, + "rewards_train/margins": 0.0035865772515535355, + "rewards_train/rejected": 0.01748352125287056, + "step": 29 + }, + { + "epoch": 0.01, + "logps_train/chosen": -92.43061065673828, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -99.24169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006938934326171875, + "rewards_train/margins": 0.031108856201171875, + "rewards_train/rejected": -0.024169921875, + "step": 29 + }, + { + "epoch": 0.01, + "logps_train/chosen": -97.20906066894531, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -74.8337173461914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02090606652200222, + "rewards_train/margins": -0.037534331902861595, + "rewards_train/rejected": 0.016628265380859375, + "step": 29 + }, + { + "epoch": 0.01, + "logps_train/chosen": -100.36041259765625, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -120.34904479980469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.036041259765625, + "rewards_train/margins": -0.051136779598891735, + "rewards_train/rejected": 0.015095519833266735, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 4.8e-07, + "loss": 0.7009, + "step": 30 + }, + { + "epoch": 0.01, + "logps_train/chosen": -67.67750549316406, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -125.88917541503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01775055006146431, + "rewards_train/margins": 0.02116699144244194, + "rewards_train/rejected": -0.03891754150390625, + "step": 30 + }, + { + "epoch": 0.01, + "logps_train/chosen": -118.08879089355469, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -120.71748352050781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04112091287970543, + "rewards_train/margins": 0.1128692664206028, + "rewards_train/rejected": -0.07174835354089737, + "step": 30 + }, + { + "epoch": 0.01, + "logps_train/chosen": -22.24710464477539, + "logps_train/ref_chosen": -22.0, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -20.158382415771484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.024710465222597122, + "rewards_train/margins": -0.008872224017977715, + "rewards_train/rejected": -0.015838241204619408, + "step": 30 + }, + { + "epoch": 0.01, + "logps_train/chosen": -145.14305114746094, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -201.0, + "logps_train/rejected": -200.43316650390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01430511474609375, + "rewards_train/margins": -0.07098846510052681, + "rewards_train/rejected": 0.05668335035443306, + "step": 30 + }, + { + "epoch": 0.01, + "logps_train/chosen": -116.27777099609375, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -82.73098754882812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02777709998190403, + "rewards_train/margins": -0.05467834509909153, + "rewards_train/rejected": 0.0269012451171875, + "step": 30 + }, + { + "epoch": 0.01, + "logps_train/chosen": -6.554619312286377, + "logps_train/ref_chosen": -6.46875, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -10.162259101867676, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00858693104237318, + "rewards_train/margins": 0.0013889791443943977, + "rewards_train/rejected": -0.009975910186767578, + "step": 30 + }, + { + "epoch": 0.01, + "logps_train/chosen": -12.388684272766113, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -18.424850463867188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.007618427276611328, + "rewards_train/margins": -0.002633380703628063, + "rewards_train/rejected": -0.004985046572983265, + "step": 30 + }, + { + "epoch": 0.01, + "logps_train/chosen": -4.149576663970947, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -2.921875, + "logps_train/rejected": -2.8849523067474365, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006917333696037531, + "rewards_train/margins": 0.0032250643707811832, + "rewards_train/rejected": 0.0036922693252563477, + "step": 30 + }, + { + "epoch": 0.01, + "logps_train/chosen": -73.6880111694336, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -92.43028259277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.018801117315888405, + "rewards_train/margins": 0.024227144196629524, + "rewards_train/rejected": -0.04302826151251793, + "step": 31 + }, + { + "epoch": 0.01, + "logps_train/chosen": -14.90671157836914, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -51.75, + "logps_train/rejected": -51.628501892089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.015578842721879482, + "rewards_train/margins": 0.0034290319308638573, + "rewards_train/rejected": 0.012149810791015625, + "step": 31 + }, + { + "epoch": 0.01, + "logps_train/chosen": -9.642163276672363, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -12.674912452697754, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0017163277370855212, + "rewards_train/margins": -0.03422508470248431, + "rewards_train/rejected": 0.03250875696539879, + "step": 31 + }, + { + "epoch": 0.01, + "logps_train/chosen": -134.78770446777344, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -109.16853332519531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07877045124769211, + "rewards_train/margins": -0.11191711947321892, + "rewards_train/rejected": 0.03314666822552681, + "step": 31 + }, + { + "epoch": 0.01, + "logps_train/chosen": -122.30989074707031, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -115.5475845336914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01901092566549778, + "rewards_train/margins": -0.026230620220303535, + "rewards_train/rejected": 0.045241545885801315, + "step": 31 + }, + { + "epoch": 0.01, + "logps_train/chosen": -30.476530075073242, + "logps_train/ref_chosen": -30.5, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -16.655853271484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0023469924926757812, + "rewards_train/margins": 0.0054323195945471525, + "rewards_train/rejected": -0.0030853271018713713, + "step": 31 + }, + { + "epoch": 0.01, + "logps_train/chosen": -12.862910270690918, + "logps_train/ref_chosen": -12.6875, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -12.557421684265137, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.017541026696562767, + "rewards_train/margins": -0.024298858363181353, + "rewards_train/rejected": 0.006757831666618586, + "step": 31 + }, + { + "epoch": 0.01, + "logps_train/chosen": -147.43280029296875, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -134.33566284179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05671996995806694, + "rewards_train/margins": 0.0902862548828125, + "rewards_train/rejected": -0.03356628492474556, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 5.12e-07, + "loss": 0.6959, + "step": 32 + }, + { + "epoch": 0.01, + "logps_train/chosen": -7.073664665222168, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -3.125, + "logps_train/rejected": -3.0667381286621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00111646659206599, + "rewards_train/margins": -0.006942653912119567, + "rewards_train/rejected": 0.005826187320053577, + "step": 32 + }, + { + "epoch": 0.01, + "logps_train/chosen": -21.04741096496582, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -15.740614891052246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007758903782814741, + "rewards_train/margins": 0.019320393446832895, + "rewards_train/rejected": -0.011561489664018154, + "step": 32 + }, + { + "epoch": 0.01, + "logps_train/chosen": -71.53279113769531, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -75.68744659423828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.003279113909229636, + "rewards_train/margins": -0.03453445597551763, + "rewards_train/rejected": 0.031255342066287994, + "step": 32 + }, + { + "epoch": 0.01, + "logps_train/chosen": -92.7406234741211, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -62.75, + "logps_train/rejected": -62.718528747558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.025937652215361595, + "rewards_train/margins": 0.02279052697122097, + "rewards_train/rejected": 0.003147125244140625, + "step": 32 + }, + { + "epoch": 0.01, + "logps_train/chosen": -87.5625991821289, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -91.41243743896484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.043740082532167435, + "rewards_train/margins": 0.034983825869858265, + "rewards_train/rejected": 0.00875625666230917, + "step": 32 + }, + { + "epoch": 0.01, + "logps_train/chosen": -4.5640482902526855, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -2.4375, + "logps_train/rejected": -2.4376955032348633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.003279829164966941, + "rewards_train/margins": -0.0032602788414806128, + "rewards_train/rejected": -1.9550323486328125e-05, + "step": 32 + }, + { + "epoch": 0.01, + "logps_train/chosen": -102.69444274902344, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -115.05903625488281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01944427564740181, + "rewards_train/margins": -0.06354065239429474, + "rewards_train/rejected": 0.04409637674689293, + "step": 32 + }, + { + "epoch": 0.01, + "logps_train/chosen": -2.0504746437072754, + "logps_train/ref_chosen": -2.046875, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -12.038307189941406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00035996438236907125, + "rewards_train/margins": -0.009029245760757476, + "rewards_train/rejected": 0.008669281378388405, + "step": 32 + }, + { + "epoch": 0.01, + "logps_train/chosen": -27.51464080810547, + "logps_train/ref_chosen": -27.5, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -6.117469310760498, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.001464080880396068, + "rewards_train/margins": 0.007157850195653737, + "rewards_train/rejected": -0.008621931076049805, + "step": 33 + }, + { + "epoch": 0.01, + "logps_train/chosen": -148.83319091796875, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -136.4168701171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01668090932071209, + "rewards_train/margins": 0.05836792103946209, + "rewards_train/rejected": -0.04168701171875, + "step": 33 + }, + { + "epoch": 0.01, + "logps_train/chosen": -16.132118225097656, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -32.75, + "logps_train/rejected": -32.73537826538086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.049288179725408554, + "rewards_train/margins": 0.0478260061936453, + "rewards_train/rejected": 0.0014621735317632556, + "step": 33 + }, + { + "epoch": 0.01, + "logps_train/chosen": -17.685253143310547, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -71.6927490234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0060253143310546875, + "rewards_train/margins": -0.03675041161477566, + "rewards_train/rejected": 0.03072509728372097, + "step": 33 + }, + { + "epoch": 0.01, + "logps_train/chosen": -78.06813049316406, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -105.15701293945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00681304931640625, + "rewards_train/margins": 0.00888824462890625, + "rewards_train/rejected": -0.0157012939453125, + "step": 33 + }, + { + "epoch": 0.01, + "logps_train/chosen": -84.90255737304688, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -125.73103332519531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.009744263254106045, + "rewards_train/margins": -0.017152405343949795, + "rewards_train/rejected": 0.02689666859805584, + "step": 33 + }, + { + "epoch": 0.01, + "logps_train/chosen": -94.21627044677734, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -94.62211608886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028372956439852715, + "rewards_train/margins": 0.04058456514030695, + "rewards_train/rejected": -0.012211608700454235, + "step": 33 + }, + { + "epoch": 0.01, + "logps_train/chosen": -78.57617950439453, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -79.43682098388672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.007617950439453125, + "rewards_train/margins": -0.013935852330178022, + "rewards_train/rejected": 0.006317901890724897, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 5.44e-07, + "loss": 0.6918, + "step": 34 + }, + { + "epoch": 0.01, + "logps_train/chosen": -93.18697357177734, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -105.22710418701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.018697356805205345, + "rewards_train/margins": 0.0040130615234375, + "rewards_train/rejected": -0.022710418328642845, + "step": 34 + }, + { + "epoch": 0.01, + "logps_train/chosen": -81.0748519897461, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -91.5840072631836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007485199254006147, + "rewards_train/margins": 0.0009155268780887127, + "rewards_train/rejected": -0.00840072613209486, + "step": 34 + }, + { + "epoch": 0.01, + "logps_train/chosen": -17.189638137817383, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -7.47452974319458, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006036186125129461, + "rewards_train/margins": 0.003489160444587469, + "rewards_train/rejected": 0.002547025680541992, + "step": 34 + }, + { + "epoch": 0.01, + "logps_train/chosen": -8.290661811828613, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -5.46875, + "logps_train/rejected": -5.4375433921813965, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.004066181369125843, + "rewards_train/margins": -0.007186842150986195, + "rewards_train/rejected": 0.0031206607818603516, + "step": 34 + }, + { + "epoch": 0.01, + "logps_train/chosen": -0.5867726802825928, + "logps_train/ref_chosen": -0.59375, + "logps_train/ref_rejected": -1.8828125, + "logps_train/rejected": -1.844475269317627, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0006977319717407227, + "rewards_train/margins": -0.0031359910499304533, + "rewards_train/rejected": 0.003833723021671176, + "step": 34 + }, + { + "epoch": 0.01, + "logps_train/chosen": -203.2578125, + "logps_train/ref_chosen": -202.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -170.47802734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12578125298023224, + "rewards_train/margins": -0.1779785193502903, + "rewards_train/rejected": 0.05219726637005806, + "step": 34 + }, + { + "epoch": 0.01, + "logps_train/chosen": -9.217601776123047, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -10.76567554473877, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028239822015166283, + "rewards_train/margins": 0.02980737655889243, + "rewards_train/rejected": -0.0015675545437261462, + "step": 34 + }, + { + "epoch": 0.01, + "logps_train/chosen": -93.63701629638672, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -90.11674499511719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.036298371851444244, + "rewards_train/margins": 0.09797287359833717, + "rewards_train/rejected": -0.06167450174689293, + "step": 34 + }, + { + "epoch": 0.01, + "logps_train/chosen": -179.15719604492188, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -198.0, + "logps_train/rejected": -198.91726684570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01571960560977459, + "rewards_train/margins": 0.07600707747042179, + "rewards_train/rejected": -0.09172668308019638, + "step": 35 + }, + { + "epoch": 0.01, + "logps_train/chosen": -117.65460205078125, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -166.41036987304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015460205264389515, + "rewards_train/margins": 0.025576784275472164, + "rewards_train/rejected": -0.04103698953986168, + "step": 35 + }, + { + "epoch": 0.01, + "logps_train/chosen": -1.3752633333206177, + "logps_train/ref_chosen": -1.3984375, + "logps_train/ref_rejected": -2.984375, + "logps_train/rejected": -3.02219557762146, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.002317416714504361, + "rewards_train/margins": 0.0060994746163487434, + "rewards_train/rejected": -0.0037820579018443823, + "step": 35 + }, + { + "epoch": 0.01, + "logps_train/chosen": -2.9168241024017334, + "logps_train/ref_chosen": -2.9375, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -16.10137939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0020675898995250463, + "rewards_train/margins": 0.018455529352650046, + "rewards_train/rejected": -0.016387939453125, + "step": 35 + }, + { + "epoch": 0.01, + "logps_train/chosen": -2.9409680366516113, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -73.28240966796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009028196334838867, + "rewards_train/margins": 0.03726916387677193, + "rewards_train/rejected": -0.02824096754193306, + "step": 35 + }, + { + "epoch": 0.01, + "logps_train/chosen": -24.073863983154297, + "logps_train/ref_chosen": -24.125, + "logps_train/ref_rejected": -3.984375, + "logps_train/rejected": -3.887723445892334, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0051136016845703125, + "rewards_train/margins": -0.004551554098725319, + "rewards_train/rejected": 0.009665155783295631, + "step": 35 + }, + { + "epoch": 0.01, + "logps_train/chosen": -68.41253662109375, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -130.5247802734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008746338076889515, + "rewards_train/margins": 0.061224364675581455, + "rewards_train/rejected": -0.05247802659869194, + "step": 35 + }, + { + "epoch": 0.01, + "logps_train/chosen": -14.936906814575195, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -14.8125, + "logps_train/rejected": -14.935341835021973, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.006190681364387274, + "rewards_train/margins": 0.006093502510339022, + "rewards_train/rejected": -0.012284183874726295, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 5.76e-07, + "loss": 0.6883, + "step": 36 + }, + { + "epoch": 0.01, + "logps_train/chosen": -7.628444194793701, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -15.75, + "logps_train/rejected": -15.660205841064453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0003444194735493511, + "rewards_train/margins": -0.009323835925897583, + "rewards_train/rejected": 0.008979416452348232, + "step": 36 + }, + { + "epoch": 0.01, + "logps_train/chosen": -123.72211456298828, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -43.75, + "logps_train/rejected": -43.92236328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.027788544073700905, + "rewards_train/margins": 0.045024871826171875, + "rewards_train/rejected": -0.01723632775247097, + "step": 36 + }, + { + "epoch": 0.01, + "logps_train/chosen": -72.29505920410156, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -84.85479736328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02950592152774334, + "rewards_train/margins": -0.044026185758411884, + "rewards_train/rejected": 0.014520264230668545, + "step": 36 + }, + { + "epoch": 0.01, + "logps_train/chosen": -10.529850006103516, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -9.278470993041992, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0029850006103515625, + "rewards_train/margins": -0.006387901259586215, + "rewards_train/rejected": 0.0034029006492346525, + "step": 36 + }, + { + "epoch": 0.01, + "logps_train/chosen": -14.248810768127441, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -6.21875, + "logps_train/rejected": -6.211826324462891, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01238107681274414, + "rewards_train/margins": -0.013073444366455078, + "rewards_train/rejected": 0.0006923675537109375, + "step": 36 + }, + { + "epoch": 0.01, + "logps_train/chosen": -17.81317901611328, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -23.51946449279785, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.006317901890724897, + "rewards_train/margins": -0.02937145298346877, + "rewards_train/rejected": 0.023053551092743874, + "step": 36 + }, + { + "epoch": 0.01, + "logps_train/chosen": -85.23844909667969, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -108.99869537353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02615508995950222, + "rewards_train/margins": 0.07602462731301785, + "rewards_train/rejected": -0.049869537353515625, + "step": 36 + }, + { + "epoch": 0.01, + "logps_train/chosen": -63.99149703979492, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -53.25, + "logps_train/rejected": -53.642601013183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05085029825568199, + "rewards_train/margins": 0.0901103988289833, + "rewards_train/rejected": -0.039260100573301315, + "step": 36 + }, + { + "epoch": 0.01, + "logps_train/chosen": -7.3462605476379395, + "logps_train/ref_chosen": -7.46875, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -15.691478729248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01224894542247057, + "rewards_train/margins": 0.03139681834727526, + "rewards_train/rejected": -0.019147872924804688, + "step": 37 + }, + { + "epoch": 0.01, + "logps_train/chosen": -14.34280014038086, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -84.380126953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028219986706972122, + "rewards_train/margins": 0.016232681460678577, + "rewards_train/rejected": 0.011987305246293545, + "step": 37 + }, + { + "epoch": 0.01, + "logps_train/chosen": -98.98246002197266, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -51.75, + "logps_train/rejected": -52.116119384765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.048246003687381744, + "rewards_train/margins": -0.011634062975645065, + "rewards_train/rejected": -0.03661194071173668, + "step": 37 + }, + { + "epoch": 0.01, + "logps_train/chosen": -33.41319274902344, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -31.375, + "logps_train/rejected": -31.122831344604492, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01631927490234375, + "rewards_train/margins": -0.04153614118695259, + "rewards_train/rejected": 0.02521686628460884, + "step": 37 + }, + { + "epoch": 0.01, + "logps_train/chosen": -111.45819091796875, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -89.68302154541016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.004180908203125, + "rewards_train/margins": -0.027516938745975494, + "rewards_train/rejected": 0.031697846949100494, + "step": 37 + }, + { + "epoch": 0.01, + "logps_train/chosen": -18.86278533935547, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -15.75, + "logps_train/rejected": -15.845907211303711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0012214661110192537, + "rewards_train/margins": 0.010812187800183892, + "rewards_train/rejected": -0.009590721689164639, + "step": 37 + }, + { + "epoch": 0.01, + "logps_train/chosen": -0.47504928708076477, + "logps_train/ref_chosen": -0.4765625, + "logps_train/ref_rejected": -7.46875, + "logps_train/rejected": -7.566366672515869, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00015132129192352295, + "rewards_train/margins": 0.009912989102303982, + "rewards_train/rejected": -0.009761667810380459, + "step": 37 + }, + { + "epoch": 0.01, + "logps_train/chosen": -25.761476516723633, + "logps_train/ref_chosen": -25.75, + "logps_train/ref_rejected": -35.25, + "logps_train/rejected": -35.47407531738281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0011476516956463456, + "rewards_train/margins": 0.021259880042634904, + "rewards_train/rejected": -0.02240753173828125, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 6.079999999999999e-07, + "loss": 0.6907, + "step": 38 + }, + { + "epoch": 0.01, + "logps_train/chosen": -64.80038452148438, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -14.274276733398438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03003845177590847, + "rewards_train/margins": -0.027610778342932463, + "rewards_train/rejected": -0.0024276734329760075, + "step": 38 + }, + { + "epoch": 0.01, + "logps_train/chosen": -105.23968505859375, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -92.69600677490234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02396850660443306, + "rewards_train/margins": -0.054367829114198685, + "rewards_train/rejected": 0.030399322509765625, + "step": 38 + }, + { + "epoch": 0.01, + "logps_train/chosen": -101.3003921508789, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -116.15974426269531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.019960785284638405, + "rewards_train/margins": 0.035935211926698685, + "rewards_train/rejected": -0.01597442664206028, + "step": 38 + }, + { + "epoch": 0.01, + "logps_train/chosen": -15.978134155273438, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -27.75, + "logps_train/rejected": -27.59548568725586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03968658670783043, + "rewards_train/margins": 0.024235155433416367, + "rewards_train/rejected": 0.015451431274414062, + "step": 38 + }, + { + "epoch": 0.01, + "logps_train/chosen": -6.351454734802246, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -8.187297821044922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0038954734336584806, + "rewards_train/margins": -0.003915691329893889, + "rewards_train/rejected": 2.021789623540826e-05, + "step": 38 + }, + { + "epoch": 0.01, + "logps_train/chosen": -20.69276237487793, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -5.709969997406006, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.006776237394660711, + "rewards_train/margins": -0.0014042374677956104, + "rewards_train/rejected": -0.005371999926865101, + "step": 38 + }, + { + "epoch": 0.01, + "logps_train/chosen": -210.009765625, + "logps_train/ref_chosen": -211.0, + "logps_train/ref_rejected": -197.0, + "logps_train/rejected": -196.732177734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09902343899011612, + "rewards_train/margins": 0.07224121131002903, + "rewards_train/rejected": 0.02678222768008709, + "step": 38 + }, + { + "epoch": 0.01, + "logps_train/chosen": -14.316786766052246, + "logps_train/ref_chosen": -14.5, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -17.280811309814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01832132413983345, + "rewards_train/margins": 0.02140245516784489, + "rewards_train/rejected": -0.0030811310280114412, + "step": 38 + }, + { + "epoch": 0.01, + "logps_train/chosen": -6.7690629959106445, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -12.067887306213379, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0012187004322186112, + "rewards_train/margins": 0.014257431612350047, + "rewards_train/rejected": -0.013038731180131435, + "step": 39 + }, + { + "epoch": 0.01, + "logps_train/chosen": -52.42662048339844, + "logps_train/ref_chosen": -52.75, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -12.832316398620605, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03233795240521431, + "rewards_train/margins": 0.015569591894745827, + "rewards_train/rejected": 0.016768360510468483, + "step": 39 + }, + { + "epoch": 0.01, + "logps_train/chosen": -4.113971710205078, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -7.6758809089660645, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0020221711602061987, + "rewards_train/margins": 0.012440920108929276, + "rewards_train/rejected": -0.014463091269135475, + "step": 39 + }, + { + "epoch": 0.01, + "logps_train/chosen": -159.5255126953125, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -145.15872192382812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.04744873195886612, + "rewards_train/margins": -0.03667907416820526, + "rewards_train/rejected": 0.08412780612707138, + "step": 39 + }, + { + "epoch": 0.01, + "logps_train/chosen": -90.69173431396484, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -91.36184692382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.019173432141542435, + "rewards_train/margins": 0.0670112632215023, + "rewards_train/rejected": -0.08618469536304474, + "step": 39 + }, + { + "epoch": 0.01, + "logps_train/chosen": -103.78752136230469, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -108.24735260009766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02875213697552681, + "rewards_train/margins": -0.004016876220703125, + "rewards_train/rejected": -0.024735260754823685, + "step": 39 + }, + { + "epoch": 0.01, + "logps_train/chosen": -135.2486572265625, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -105.88690948486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02486572228372097, + "rewards_train/margins": 0.013825228437781334, + "rewards_train/rejected": -0.038690950721502304, + "step": 39 + }, + { + "epoch": 0.01, + "logps_train/chosen": -2.3888299465179443, + "logps_train/ref_chosen": -2.296875, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -8.024480819702148, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009195494465529919, + "rewards_train/margins": -0.006747412495315075, + "rewards_train/rejected": -0.0024480819702148438, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 6.4e-07, + "loss": 0.6887, + "step": 40 + }, + { + "epoch": 0.01, + "logps_train/chosen": -118.11988830566406, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -127.58082580566406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06198883056640625, + "rewards_train/margins": -0.00390625, + "rewards_train/rejected": -0.05808258056640625, + "step": 40 + }, + { + "epoch": 0.01, + "logps_train/chosen": -3.6156868934631348, + "logps_train/ref_chosen": -3.484375, + "logps_train/ref_rejected": -1.1796875, + "logps_train/rejected": -1.2602124214172363, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.013131189160048962, + "rewards_train/margins": -0.005078696645796299, + "rewards_train/rejected": -0.008052492514252663, + "step": 40 + }, + { + "epoch": 0.01, + "logps_train/chosen": -109.11966705322266, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -132.63330078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.011966705322265625, + "rewards_train/margins": -0.048636626452207565, + "rewards_train/rejected": 0.03666992112994194, + "step": 40 + }, + { + "epoch": 0.01, + "logps_train/chosen": -131.30947875976562, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -169.77874755859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03094787709414959, + "rewards_train/margins": -0.05307312123477459, + "rewards_train/rejected": 0.022125244140625, + "step": 40 + }, + { + "epoch": 0.01, + "logps_train/chosen": -14.21550178527832, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -35.25, + "logps_train/rejected": -35.77002716064453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09655018150806427, + "rewards_train/margins": -0.044547464698553085, + "rewards_train/rejected": -0.052002716809511185, + "step": 40 + }, + { + "epoch": 0.01, + "logps_train/chosen": -1.4292747974395752, + "logps_train/ref_chosen": -1.453125, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -14.517354965209961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0023850202560424805, + "rewards_train/margins": 0.004120516823604703, + "rewards_train/rejected": -0.0017354965675622225, + "step": 40 + }, + { + "epoch": 0.01, + "logps_train/chosen": -118.64765167236328, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -100.88207244873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.035234834998846054, + "rewards_train/margins": 0.023442079313099384, + "rewards_train/rejected": 0.01179275568574667, + "step": 40 + }, + { + "epoch": 0.01, + "logps_train/chosen": -24.037914276123047, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -6.073657035827637, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021208573132753372, + "rewards_train/margins": 0.019199276575818658, + "rewards_train/rejected": 0.0020092965569347143, + "step": 40 + }, + { + "epoch": 0.01, + "logps_train/chosen": -94.45803833007812, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -120.98654174804688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04580383375287056, + "rewards_train/margins": -0.04714965901803225, + "rewards_train/rejected": 0.001345825265161693, + "step": 41 + }, + { + "epoch": 0.01, + "logps_train/chosen": -27.788074493408203, + "logps_train/ref_chosen": -27.375, + "logps_train/ref_rejected": -32.75, + "logps_train/rejected": -32.92328643798828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04130744934082031, + "rewards_train/margins": -0.023978805169463158, + "rewards_train/rejected": -0.017328644171357155, + "step": 41 + }, + { + "epoch": 0.01, + "logps_train/chosen": -159.7755889892578, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -164.10342407226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02244110219180584, + "rewards_train/margins": 0.032783509232103825, + "rewards_train/rejected": -0.010342407040297985, + "step": 41 + }, + { + "epoch": 0.01, + "logps_train/chosen": -76.3355941772461, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -120.4010009765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016440583392977715, + "rewards_train/margins": 0.056540681049227715, + "rewards_train/rejected": -0.04010009765625, + "step": 41 + }, + { + "epoch": 0.01, + "logps_train/chosen": -109.2108154296875, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -181.0, + "logps_train/rejected": -180.87667846679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02891845814883709, + "rewards_train/margins": 0.016586304642260075, + "rewards_train/rejected": 0.012332153506577015, + "step": 41 + }, + { + "epoch": 0.01, + "logps_train/chosen": -38.820579528808594, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -6.125005722045898, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007057953160256147, + "rewards_train/margins": 0.008567618671804667, + "rewards_train/rejected": -0.015625571832060814, + "step": 41 + }, + { + "epoch": 0.01, + "logps_train/chosen": -135.28518676757812, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -131.5513916015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07148132473230362, + "rewards_train/margins": 0.02662048488855362, + "rewards_train/rejected": 0.04486083984375, + "step": 41 + }, + { + "epoch": 0.01, + "logps_train/chosen": -3.277263879776001, + "logps_train/ref_chosen": -3.25, + "logps_train/ref_rejected": -2.109375, + "logps_train/rejected": -2.0733346939086914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.002726387931033969, + "rewards_train/margins": -0.006330418633297086, + "rewards_train/rejected": 0.003604030702263117, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 6.72e-07, + "loss": 0.6949, + "step": 42 + }, + { + "epoch": 0.01, + "logps_train/chosen": -75.22026062011719, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -52.0, + "logps_train/rejected": -51.90651321411133, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02202606201171875, + "rewards_train/margins": -0.03137474041432142, + "rewards_train/rejected": 0.009348678402602673, + "step": 42 + }, + { + "epoch": 0.01, + "logps_train/chosen": -83.26348114013672, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -143.13351440429688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.026348114013671875, + "rewards_train/margins": -0.012996673583984375, + "rewards_train/rejected": -0.0133514404296875, + "step": 42 + }, + { + "epoch": 0.01, + "logps_train/chosen": -10.584492683410645, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -12.241700172424316, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0021992684341967106, + "rewards_train/margins": -0.0030292511801235378, + "rewards_train/rejected": 0.0008299827459268272, + "step": 42 + }, + { + "epoch": 0.01, + "logps_train/chosen": -4.338540554046631, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -3.125, + "logps_train/rejected": -3.135849952697754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0005209445953369141, + "rewards_train/margins": 0.0016059399349614978, + "rewards_train/rejected": -0.0010849953396245837, + "step": 42 + }, + { + "epoch": 0.01, + "logps_train/chosen": -14.902956008911133, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -6.75, + "logps_train/rejected": -6.693886756896973, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02220439910888672, + "rewards_train/margins": 0.016593074891716242, + "rewards_train/rejected": 0.005611324217170477, + "step": 42 + }, + { + "epoch": 0.01, + "logps_train/chosen": -72.8499755859375, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -11.482572555541992, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03499756008386612, + "rewards_train/margins": -0.024240304715931416, + "rewards_train/rejected": -0.010757255367934704, + "step": 42 + }, + { + "epoch": 0.01, + "logps_train/chosen": -173.51504516601562, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -158.0736083984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05150451883673668, + "rewards_train/margins": -0.04414367908611894, + "rewards_train/rejected": -0.0073608397506177425, + "step": 42 + }, + { + "epoch": 0.01, + "logps_train/chosen": -67.8600845336914, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -121.52295684814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01399154681712389, + "rewards_train/margins": 0.016287231585010886, + "rewards_train/rejected": -0.0022956847678869963, + "step": 42 + }, + { + "epoch": 0.01, + "logps_train/chosen": -73.58153533935547, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -65.53973388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.041846465319395065, + "rewards_train/margins": 0.095819853246212, + "rewards_train/rejected": -0.05397338792681694, + "step": 43 + }, + { + "epoch": 0.01, + "logps_train/chosen": -6.016408920288086, + "logps_train/ref_chosen": -6.0, + "logps_train/ref_rejected": -1.5, + "logps_train/rejected": -1.5070568323135376, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0016408920055255294, + "rewards_train/margins": -0.0009352087508887053, + "rewards_train/rejected": -0.0007056832546368241, + "step": 43 + }, + { + "epoch": 0.01, + "logps_train/chosen": -23.760940551757812, + "logps_train/ref_chosen": -23.875, + "logps_train/ref_rejected": -20.375, + "logps_train/rejected": -20.395729064941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01140594482421875, + "rewards_train/margins": 0.013478851411491632, + "rewards_train/rejected": -0.0020729065872728825, + "step": 43 + }, + { + "epoch": 0.01, + "logps_train/chosen": -16.433197021484375, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -12.012526512145996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006680298130959272, + "rewards_train/margins": 0.0016829492524266243, + "rewards_train/rejected": 0.004997348878532648, + "step": 43 + }, + { + "epoch": 0.01, + "logps_train/chosen": -5.904240608215332, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -11.131815910339355, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.012299060821533203, + "rewards_train/margins": -0.024117469787597656, + "rewards_train/rejected": 0.011818408966064453, + "step": 43 + }, + { + "epoch": 0.01, + "logps_train/chosen": -86.91333770751953, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -118.40896606445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00866622943431139, + "rewards_train/margins": 0.04956283513456583, + "rewards_train/rejected": -0.04089660570025444, + "step": 43 + }, + { + "epoch": 0.01, + "logps_train/chosen": -32.913124084472656, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -5.11530876159668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.033687591552734375, + "rewards_train/margins": 0.04209346789866686, + "rewards_train/rejected": -0.008405876345932484, + "step": 43 + }, + { + "epoch": 0.01, + "logps_train/chosen": -4.267614841461182, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -4.735760688781738, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.001363515853881836, + "rewards_train/margins": -0.006310415454208851, + "rewards_train/rejected": 0.007673931308090687, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 7.04e-07, + "loss": 0.6908, + "step": 44 + }, + { + "epoch": 0.01, + "logps_train/chosen": -36.79411315917969, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -51.505184173583984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02941131591796875, + "rewards_train/margins": -0.028892898524645716, + "rewards_train/rejected": -0.000518417393323034, + "step": 44 + }, + { + "epoch": 0.01, + "logps_train/chosen": -5.218532085418701, + "logps_train/ref_chosen": -5.375, + "logps_train/ref_rejected": -5.25, + "logps_train/rejected": -5.367963790893555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.015646791085600853, + "rewards_train/margins": 0.02744317054748535, + "rewards_train/rejected": -0.011796379461884499, + "step": 44 + }, + { + "epoch": 0.01, + "logps_train/chosen": -16.810998916625977, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -13.616236686706543, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.018599892035126686, + "rewards_train/margins": -0.031976223923265934, + "rewards_train/rejected": 0.013376331888139248, + "step": 44 + }, + { + "epoch": 0.01, + "logps_train/chosen": -103.01310729980469, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -107.17572021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0013107300037518144, + "rewards_train/margins": 0.016261291108094156, + "rewards_train/rejected": -0.01757202111184597, + "step": 44 + }, + { + "epoch": 0.01, + "logps_train/chosen": -140.79962158203125, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -110.65513610839844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.02003784291446209, + "rewards_train/margins": -0.01444854773581028, + "rewards_train/rejected": 0.03448639065027237, + "step": 44 + }, + { + "epoch": 0.01, + "logps_train/chosen": -0.9632604122161865, + "logps_train/ref_chosen": -0.99609375, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -4.605460166931152, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.003283333731815219, + "rewards_train/margins": -0.014295649947598577, + "rewards_train/rejected": 0.017578983679413795, + "step": 44 + }, + { + "epoch": 0.01, + "logps_train/chosen": -103.70198822021484, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -164.33468627929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07980117946863174, + "rewards_train/margins": 0.013269804418087006, + "rewards_train/rejected": 0.06653137505054474, + "step": 44 + }, + { + "epoch": 0.01, + "logps_train/chosen": -143.35804748535156, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -109.31285095214844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03580474853515625, + "rewards_train/margins": -0.0545196533203125, + "rewards_train/rejected": 0.01871490478515625, + "step": 44 + }, + { + "epoch": 0.01, + "logps_train/chosen": -75.89673614501953, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -142.76296997070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.039673615247011185, + "rewards_train/margins": 0.036623384803533554, + "rewards_train/rejected": -0.07629700005054474, + "step": 45 + }, + { + "epoch": 0.01, + "logps_train/chosen": -81.66342163085938, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -86.4277114868164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03365783765912056, + "rewards_train/margins": 0.026428986340761185, + "rewards_train/rejected": 0.007228851318359375, + "step": 45 + }, + { + "epoch": 0.01, + "logps_train/chosen": -120.48675537109375, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -157.18215942382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0013244629371911287, + "rewards_train/margins": 0.11954040382988751, + "rewards_train/rejected": -0.11821594089269638, + "step": 45 + }, + { + "epoch": 0.01, + "logps_train/chosen": -131.85015869140625, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -153.76007080078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.014984130859375, + "rewards_train/margins": -0.00900878943502903, + "rewards_train/rejected": 0.02399292029440403, + "step": 45 + }, + { + "epoch": 0.01, + "logps_train/chosen": -117.78607940673828, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -109.57647705078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.028607940301299095, + "rewards_train/margins": -0.02096023503690958, + "rewards_train/rejected": -0.007647705264389515, + "step": 45 + }, + { + "epoch": 0.01, + "logps_train/chosen": -85.74759674072266, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -167.51417541503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.024759674444794655, + "rewards_train/margins": 0.026657866314053535, + "rewards_train/rejected": -0.05141754075884819, + "step": 45 + }, + { + "epoch": 0.01, + "logps_train/chosen": -114.63554382324219, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -133.04391479492188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06355438381433487, + "rewards_train/margins": -0.0591629040427506, + "rewards_train/rejected": -0.004391479771584272, + "step": 45 + }, + { + "epoch": 0.01, + "logps_train/chosen": -4.767439842224121, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -4.46417236328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.007993984036147594, + "rewards_train/margins": -0.011576747754588723, + "rewards_train/rejected": 0.0035827637184411287, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 7.359999999999999e-07, + "loss": 0.6927, + "step": 46 + }, + { + "epoch": 0.01, + "logps_train/chosen": -134.74566650390625, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -130.41094970703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02543335035443306, + "rewards_train/margins": 0.0665283203125, + "rewards_train/rejected": -0.04109496995806694, + "step": 46 + }, + { + "epoch": 0.01, + "logps_train/chosen": -4.37995719909668, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -20.50189971923828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.003620720002800226, + "rewards_train/margins": -0.0159307480789721, + "rewards_train/rejected": 0.012310028076171875, + "step": 46 + }, + { + "epoch": 0.01, + "logps_train/chosen": -115.27470397949219, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -99.91522216796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.02252960205078125, + "rewards_train/margins": -0.03594818338751793, + "rewards_train/rejected": 0.05847778543829918, + "step": 46 + }, + { + "epoch": 0.01, + "logps_train/chosen": -69.01554870605469, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -106.41905212402344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0015548706287518144, + "rewards_train/margins": -0.009649658226408064, + "rewards_train/rejected": 0.00809478759765625, + "step": 46 + }, + { + "epoch": 0.01, + "logps_train/chosen": -71.14198303222656, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -63.56684875488281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01419830322265625, + "rewards_train/margins": -0.03251342847943306, + "rewards_train/rejected": 0.01831512525677681, + "step": 46 + }, + { + "epoch": 0.01, + "logps_train/chosen": -97.66185760498047, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -115.77033233642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08381424099206924, + "rewards_train/margins": 0.06084747426211834, + "rewards_train/rejected": 0.022966766729950905, + "step": 46 + }, + { + "epoch": 0.01, + "logps_train/chosen": -3.312511682510376, + "logps_train/ref_chosen": -3.296875, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -9.872771263122559, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0015636682510375977, + "rewards_train/margins": 0.010713458061218262, + "rewards_train/rejected": -0.01227712631225586, + "step": 46 + }, + { + "epoch": 0.01, + "logps_train/chosen": -106.63450622558594, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -195.0, + "logps_train/rejected": -194.33047485351562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01345062255859375, + "rewards_train/margins": -0.08040314167737961, + "rewards_train/rejected": 0.06695251911878586, + "step": 46 + }, + { + "epoch": 0.01, + "logps_train/chosen": -5.641976833343506, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -17.5329647064209, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0016976833576336503, + "rewards_train/margins": 0.014098788029514253, + "rewards_train/rejected": -0.015796471387147903, + "step": 47 + }, + { + "epoch": 0.01, + "logps_train/chosen": -108.82098388671875, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -101.41925048828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03209839016199112, + "rewards_train/margins": -0.040173341520130634, + "rewards_train/rejected": 0.008074951358139515, + "step": 47 + }, + { + "epoch": 0.01, + "logps_train/chosen": -81.02127838134766, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -98.13954162597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0021278380881994963, + "rewards_train/margins": 0.06182632897980511, + "rewards_train/rejected": -0.06395416706800461, + "step": 47 + }, + { + "epoch": 0.01, + "logps_train/chosen": -3.8769822120666504, + "logps_train/ref_chosen": -3.796875, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -9.342905044555664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008010721765458584, + "rewards_train/margins": 0.013779782690107822, + "rewards_train/rejected": -0.021790504455566406, + "step": 47 + }, + { + "epoch": 0.01, + "logps_train/chosen": -86.64469909667969, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -74.13996887207031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.014469909481704235, + "rewards_train/margins": -0.0004730224609375, + "rewards_train/rejected": -0.013996887020766735, + "step": 47 + }, + { + "epoch": 0.01, + "logps_train/chosen": -6.545053005218506, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -3.923856258392334, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0013803004985675216, + "rewards_train/margins": -0.024619674659334123, + "rewards_train/rejected": 0.0232393741607666, + "step": 47 + }, + { + "epoch": 0.01, + "logps_train/chosen": -94.80706787109375, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -110.24314880371094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03070678748190403, + "rewards_train/margins": -0.00639190711081028, + "rewards_train/rejected": -0.02431488037109375, + "step": 47 + }, + { + "epoch": 0.01, + "logps_train/chosen": -76.90155029296875, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -120.50487518310547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.009844970889389515, + "rewards_train/margins": -0.08966750930994749, + "rewards_train/rejected": 0.099512480199337, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 7.68e-07, + "loss": 0.6966, + "step": 48 + }, + { + "epoch": 0.01, + "logps_train/chosen": -2.8076207637786865, + "logps_train/ref_chosen": -2.828125, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -16.05707550048828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.002050423761829734, + "rewards_train/margins": -0.004742026096209884, + "rewards_train/rejected": 0.0067924498580396175, + "step": 48 + }, + { + "epoch": 0.01, + "logps_train/chosen": -106.14033508300781, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -203.0, + "logps_train/rejected": -203.227783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03596649318933487, + "rewards_train/margins": 0.05874481424689293, + "rewards_train/rejected": -0.02277832105755806, + "step": 48 + }, + { + "epoch": 0.01, + "logps_train/chosen": -96.46379852294922, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -105.66545867919922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.003620147705078125, + "rewards_train/margins": -0.079833984375, + "rewards_train/rejected": 0.08345413208007812, + "step": 48 + }, + { + "epoch": 0.01, + "logps_train/chosen": -7.00877571105957, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -17.90420913696289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005372428800910711, + "rewards_train/margins": 0.03329334361478686, + "rewards_train/rejected": -0.027920914813876152, + "step": 48 + }, + { + "epoch": 0.01, + "logps_train/chosen": -126.70915985107422, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -126.53848266601562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.020915985107421875, + "rewards_train/margins": -0.017067718552425504, + "rewards_train/rejected": -0.0038482665549963713, + "step": 48 + }, + { + "epoch": 0.01, + "logps_train/chosen": -95.7788314819336, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -125.34846496582031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.027883147820830345, + "rewards_train/margins": -0.04303665179759264, + "rewards_train/rejected": 0.015153503976762295, + "step": 48 + }, + { + "epoch": 0.01, + "logps_train/chosen": -89.11371612548828, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -120.76435852050781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.011371612548828125, + "rewards_train/margins": 0.015064239501953125, + "rewards_train/rejected": -0.02643585205078125, + "step": 48 + }, + { + "epoch": 0.01, + "logps_train/chosen": -38.44205093383789, + "logps_train/ref_chosen": -38.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -115.33572387695312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.00579490652307868, + "rewards_train/margins": -0.01063270540907979, + "rewards_train/rejected": 0.01642761193215847, + "step": 48 + }, + { + "epoch": 0.01, + "logps_train/chosen": -140.05715942382812, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -82.57025909423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0057159424759447575, + "rewards_train/margins": 0.0013099671341478825, + "rewards_train/rejected": -0.00702590961009264, + "step": 49 + }, + { + "epoch": 0.01, + "logps_train/chosen": -89.33367919921875, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -80.30780792236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016632080078125, + "rewards_train/margins": 0.047412872314453125, + "rewards_train/rejected": -0.030780792236328125, + "step": 49 + }, + { + "epoch": 0.01, + "logps_train/chosen": -148.99217224121094, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -159.06207275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0007827758672647178, + "rewards_train/margins": 0.00699005153728649, + "rewards_train/rejected": -0.006207275670021772, + "step": 49 + }, + { + "epoch": 0.01, + "logps_train/chosen": -6.018120288848877, + "logps_train/ref_chosen": -6.125, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -22.757299423217773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.010687971487641335, + "rewards_train/margins": 0.011417913832701743, + "rewards_train/rejected": -0.0007299423450604081, + "step": 49 + }, + { + "epoch": 0.01, + "logps_train/chosen": -10.73378849029541, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -4.576326370239258, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0108788488432765, + "rewards_train/margins": -0.006371211726218462, + "rewards_train/rejected": -0.004507637117058039, + "step": 49 + }, + { + "epoch": 0.01, + "logps_train/chosen": -83.44844055175781, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -103.71708679199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05515594407916069, + "rewards_train/margins": 0.02686462365090847, + "rewards_train/rejected": 0.02829132042825222, + "step": 49 + }, + { + "epoch": 0.01, + "logps_train/chosen": -78.710205078125, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -85.60196685791016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02102050743997097, + "rewards_train/margins": -0.060823822394013405, + "rewards_train/rejected": 0.039803314954042435, + "step": 49 + }, + { + "epoch": 0.01, + "logps_train/chosen": -97.57196044921875, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -144.94891357421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04280395433306694, + "rewards_train/margins": 0.03769531147554517, + "rewards_train/rejected": 0.005108642857521772, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 8e-07, + "loss": 0.6929, + "step": 50 + }, + { + "epoch": 0.01, + "logps_train/chosen": -158.0731201171875, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -164.09329223632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007312011905014515, + "rewards_train/margins": 0.002017212100327015, + "rewards_train/rejected": -0.00932922400534153, + "step": 50 + }, + { + "epoch": 0.01, + "logps_train/chosen": -5.191544055938721, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -2.71875, + "logps_train/rejected": -2.780076026916504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0004044056113343686, + "rewards_train/margins": 0.005728197080316022, + "rewards_train/rejected": -0.006132602691650391, + "step": 50 + }, + { + "epoch": 0.01, + "logps_train/chosen": -85.6220932006836, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -118.60665130615234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.037790682166814804, + "rewards_train/margins": 0.048455812968313694, + "rewards_train/rejected": -0.01066513080149889, + "step": 50 + }, + { + "epoch": 0.01, + "logps_train/chosen": -101.516845703125, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -79.94173431396484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0016845703357830644, + "rewards_train/margins": -0.007511139032430947, + "rewards_train/rejected": 0.0058265686966478825, + "step": 50 + }, + { + "epoch": 0.01, + "logps_train/chosen": -12.5022554397583, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -26.75, + "logps_train/rejected": -26.87921714782715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012274456210434437, + "rewards_train/margins": 0.02519617136567831, + "rewards_train/rejected": -0.012921715155243874, + "step": 50 + }, + { + "epoch": 0.01, + "logps_train/chosen": -100.63439178466797, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -117.31089782714844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.013439178466796875, + "rewards_train/margins": -0.032349396497011185, + "rewards_train/rejected": 0.01891021803021431, + "step": 50 + }, + { + "epoch": 0.01, + "logps_train/chosen": -52.189125061035156, + "logps_train/ref_chosen": -52.25, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -98.61930847167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00608749408274889, + "rewards_train/margins": 0.01801834162324667, + "rewards_train/rejected": -0.01193084754049778, + "step": 50 + }, + { + "epoch": 0.01, + "logps_train/chosen": -10.25748062133789, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -27.0, + "logps_train/rejected": -26.893770217895508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018001938238739967, + "rewards_train/margins": 0.007378960028290749, + "rewards_train/rejected": 0.010622978210449219, + "step": 50 + }, + { + "epoch": 0.01, + "logps_train/chosen": -79.66499328613281, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -102.73316192626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01649932935833931, + "rewards_train/margins": 0.006816864013671875, + "rewards_train/rejected": -0.023316193372011185, + "step": 51 + }, + { + "epoch": 0.01, + "logps_train/chosen": -87.50811004638672, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -159.8053436279297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0008110046619549394, + "rewards_train/margins": -0.02027664298657328, + "rewards_train/rejected": 0.01946563832461834, + "step": 51 + }, + { + "epoch": 0.01, + "logps_train/chosen": -24.012115478515625, + "logps_train/ref_chosen": -23.875, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -10.665209770202637, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.013711548410356045, + "rewards_train/margins": -0.0159405714366585, + "rewards_train/rejected": 0.002229023026302457, + "step": 51 + }, + { + "epoch": 0.01, + "logps_train/chosen": -110.34353637695312, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -161.7950897216797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03435363993048668, + "rewards_train/margins": -0.05484466813504696, + "rewards_train/rejected": 0.02049102820456028, + "step": 51 + }, + { + "epoch": 0.01, + "logps_train/chosen": -17.012340545654297, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -19.278247833251953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0012340545654296875, + "rewards_train/margins": -0.035909272730350494, + "rewards_train/rejected": 0.03467521816492081, + "step": 51 + }, + { + "epoch": 0.01, + "logps_train/chosen": -4.336178779602051, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -6.65625, + "logps_train/rejected": -6.5618720054626465, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0007571220630779862, + "rewards_train/margins": -0.008680677390657365, + "rewards_train/rejected": 0.009437799453735352, + "step": 51 + }, + { + "epoch": 0.01, + "logps_train/chosen": -87.78451538085938, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -101.44874572753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02845153771340847, + "rewards_train/margins": 0.01642303727567196, + "rewards_train/rejected": -0.04487457498908043, + "step": 51 + }, + { + "epoch": 0.01, + "logps_train/chosen": -141.0021209716797, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -149.22703552246094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00021209717669989914, + "rewards_train/margins": -0.07750854791083839, + "rewards_train/rejected": 0.07729645073413849, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 8.319999999999999e-07, + "loss": 0.6969, + "step": 52 + }, + { + "epoch": 0.01, + "logps_train/chosen": -9.48755931854248, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -0.97265625, + "logps_train/rejected": -0.9567238092422485, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0012440681457519531, + "rewards_train/margins": -0.00034917599987238646, + "rewards_train/rejected": 0.0015932441456243396, + "step": 52 + }, + { + "epoch": 0.01, + "logps_train/chosen": -89.46917724609375, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -88.64163970947266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04691772535443306, + "rewards_train/margins": -0.0827537551522255, + "rewards_train/rejected": 0.035836029797792435, + "step": 52 + }, + { + "epoch": 0.01, + "logps_train/chosen": -4.005533695220947, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -7.3688201904296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011946630664169788, + "rewards_train/margins": 0.020703650079667568, + "rewards_train/rejected": -0.00875701941549778, + "step": 52 + }, + { + "epoch": 0.01, + "logps_train/chosen": -60.71304702758789, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -56.25, + "logps_train/rejected": -56.324405670166016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04630470275878906, + "rewards_train/margins": -0.0388641357421875, + "rewards_train/rejected": -0.0074405670166015625, + "step": 52 + }, + { + "epoch": 0.01, + "logps_train/chosen": -33.31157684326172, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -19.863862991333008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.043842315673828125, + "rewards_train/margins": 0.005228612571954727, + "rewards_train/rejected": 0.0386137031018734, + "step": 52 + }, + { + "epoch": 0.01, + "logps_train/chosen": -40.765079498291016, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -39.5, + "logps_train/rejected": -39.28165054321289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.023492051288485527, + "rewards_train/margins": 0.0016571059823036194, + "rewards_train/rejected": 0.021834945306181908, + "step": 52 + }, + { + "epoch": 0.01, + "logps_train/chosen": -33.64183044433594, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -27.40313148498535, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01081695593893528, + "rewards_train/margins": 0.0011301040649414062, + "rewards_train/rejected": 0.009686851873993874, + "step": 52 + }, + { + "epoch": 0.01, + "logps_train/chosen": -54.56451416015625, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -142.82408142089844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.006451416295021772, + "rewards_train/margins": -0.024043274577707052, + "rewards_train/rejected": 0.01759185828268528, + "step": 52 + }, + { + "epoch": 0.01, + "logps_train/chosen": -136.44900512695312, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -155.598388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04490051418542862, + "rewards_train/margins": 0.0149383544921875, + "rewards_train/rejected": -0.05983886867761612, + "step": 53 + }, + { + "epoch": 0.01, + "logps_train/chosen": -19.158525466918945, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -17.486469268798828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.009147453121840954, + "rewards_train/margins": -0.017205621115863323, + "rewards_train/rejected": 0.026353074237704277, + "step": 53 + }, + { + "epoch": 0.01, + "logps_train/chosen": -119.95043182373047, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -94.882080078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0049568177200853825, + "rewards_train/margins": 0.04316482553258538, + "rewards_train/rejected": -0.0382080078125, + "step": 53 + }, + { + "epoch": 0.01, + "logps_train/chosen": -89.63182067871094, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -57.75, + "logps_train/rejected": -57.83240509033203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01318206824362278, + "rewards_train/margins": -0.00494155939668417, + "rewards_train/rejected": -0.00824050884693861, + "step": 53 + }, + { + "epoch": 0.01, + "logps_train/chosen": -10.788188934326172, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -9.466172218322754, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0024311065208166838, + "rewards_train/margins": -0.013451672391965985, + "rewards_train/rejected": 0.01588277891278267, + "step": 53 + }, + { + "epoch": 0.01, + "logps_train/chosen": -82.16392517089844, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -98.82293701171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01639251783490181, + "rewards_train/margins": -0.03409881703555584, + "rewards_train/rejected": 0.01770629920065403, + "step": 53 + }, + { + "epoch": 0.01, + "logps_train/chosen": -12.432550430297852, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -10.262222290039062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.006744957063347101, + "rewards_train/margins": -0.010782815050333738, + "rewards_train/rejected": 0.01752777211368084, + "step": 53 + }, + { + "epoch": 0.01, + "logps_train/chosen": -185.69241333007812, + "logps_train/ref_chosen": -186.0, + "logps_train/ref_rejected": -197.0, + "logps_train/rejected": -197.0494384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03075866773724556, + "rewards_train/margins": 0.03570251539349556, + "rewards_train/rejected": -0.00494384765625, + "step": 53 + }, + { + "epoch": 0.02, + "learning_rate": 8.639999999999999e-07, + "loss": 0.6966, + "step": 54 + }, + { + "epoch": 0.02, + "logps_train/chosen": -68.48625946044922, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -43.0, + "logps_train/rejected": -42.828826904296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0013740540016442537, + "rewards_train/margins": -0.015743256313726306, + "rewards_train/rejected": 0.01711731031537056, + "step": 54 + }, + { + "epoch": 0.02, + "logps_train/chosen": -76.95735931396484, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -73.82266235351562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.045735932886600494, + "rewards_train/margins": -0.06346969865262508, + "rewards_train/rejected": 0.01773376576602459, + "step": 54 + }, + { + "epoch": 0.02, + "logps_train/chosen": -105.58357238769531, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -95.37347412109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04164276272058487, + "rewards_train/margins": 0.028990174643695354, + "rewards_train/rejected": 0.012652588076889515, + "step": 54 + }, + { + "epoch": 0.02, + "logps_train/chosen": -83.68962097167969, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -73.53971862792969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.03103790245950222, + "rewards_train/margins": -0.06499023921787739, + "rewards_train/rejected": 0.09602814167737961, + "step": 54 + }, + { + "epoch": 0.02, + "logps_train/chosen": -100.29979705810547, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -100.25775909423828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.029979705810546875, + "rewards_train/margins": -0.00420379638671875, + "rewards_train/rejected": -0.025775909423828125, + "step": 54 + }, + { + "epoch": 0.02, + "logps_train/chosen": -2.8159472942352295, + "logps_train/ref_chosen": -2.8125, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -3.9749159812927246, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0003447294293437153, + "rewards_train/margins": -0.0091031318588648, + "rewards_train/rejected": 0.008758402429521084, + "step": 54 + }, + { + "epoch": 0.02, + "logps_train/chosen": -83.88623046875, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -75.3375015258789, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.011376953683793545, + "rewards_train/margins": -0.00487289484590292, + "rewards_train/rejected": 0.016249848529696465, + "step": 54 + }, + { + "epoch": 0.02, + "logps_train/chosen": -130.48338317871094, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -104.87113952636719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05166168138384819, + "rewards_train/margins": 0.03877563402056694, + "rewards_train/rejected": 0.01288604736328125, + "step": 54 + }, + { + "epoch": 0.02, + "logps_train/chosen": -112.3721694946289, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -134.6772003173828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.012783050537109375, + "rewards_train/margins": -0.019496917724609375, + "rewards_train/rejected": 0.03227996826171875, + "step": 55 + }, + { + "epoch": 0.02, + "logps_train/chosen": -58.943443298339844, + "logps_train/ref_chosen": -58.75, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -16.670846939086914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.019344329833984375, + "rewards_train/margins": -0.027259635739028454, + "rewards_train/rejected": 0.007915305905044079, + "step": 55 + }, + { + "epoch": 0.02, + "logps_train/chosen": -77.375732421875, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -122.08750915527344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03757324442267418, + "rewards_train/margins": -0.07882232964038849, + "rewards_train/rejected": 0.04124908521771431, + "step": 55 + }, + { + "epoch": 0.02, + "logps_train/chosen": -100.80888366699219, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -104.35118103027344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03088836744427681, + "rewards_train/margins": -0.045770264230668545, + "rewards_train/rejected": 0.014881896786391735, + "step": 55 + }, + { + "epoch": 0.02, + "logps_train/chosen": -61.29552459716797, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -116.2197265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.020447541028261185, + "rewards_train/margins": 0.042420197278261185, + "rewards_train/rejected": -0.02197265625, + "step": 55 + }, + { + "epoch": 0.02, + "logps_train/chosen": -11.442773818969727, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -5.644632816314697, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0005273818969726562, + "rewards_train/margins": 0.010810899548232555, + "rewards_train/rejected": -0.011338281445205212, + "step": 55 + }, + { + "epoch": 0.02, + "logps_train/chosen": -26.3057918548584, + "logps_train/ref_chosen": -26.125, + "logps_train/ref_rejected": -26.125, + "logps_train/rejected": -26.267261505126953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.018079185858368874, + "rewards_train/margins": -0.003853035159409046, + "rewards_train/rejected": -0.014226150698959827, + "step": 55 + }, + { + "epoch": 0.02, + "logps_train/chosen": -13.144659042358398, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -16.95431900024414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.014465904794633389, + "rewards_train/margins": -0.031534005887806416, + "rewards_train/rejected": 0.017068101093173027, + "step": 55 + }, + { + "epoch": 0.02, + "learning_rate": 8.96e-07, + "loss": 0.7011, + "step": 56 + }, + { + "epoch": 0.02, + "logps_train/chosen": -19.484460830688477, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -6.4863433837890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.023446083068847656, + "rewards_train/margins": -0.02793674496933818, + "rewards_train/rejected": 0.004490661900490522, + "step": 56 + }, + { + "epoch": 0.02, + "logps_train/chosen": -0.8585613369941711, + "logps_train/ref_chosen": -0.8671875, + "logps_train/ref_rejected": -3.28125, + "logps_train/rejected": -3.2262415885925293, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0008626162889413536, + "rewards_train/margins": -0.004638225131202489, + "rewards_train/rejected": 0.005500841420143843, + "step": 56 + }, + { + "epoch": 0.02, + "logps_train/chosen": -8.496428489685059, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -19.968032836914062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.012857151217758656, + "rewards_train/margins": -0.015339565463364124, + "rewards_train/rejected": 0.02819671668112278, + "step": 56 + }, + { + "epoch": 0.02, + "logps_train/chosen": -54.96247100830078, + "logps_train/ref_chosen": -55.0, + "logps_train/ref_rejected": -55.0, + "logps_train/rejected": -54.88145446777344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0037528991233557463, + "rewards_train/margins": -0.008101654471829534, + "rewards_train/rejected": 0.01185455359518528, + "step": 56 + }, + { + "epoch": 0.02, + "logps_train/chosen": -28.84903335571289, + "logps_train/ref_chosen": -28.875, + "logps_train/ref_rejected": -1.40625, + "logps_train/rejected": -1.5789021253585815, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0025966644752770662, + "rewards_train/margins": 0.01986187812872231, + "rewards_train/rejected": -0.017265213653445244, + "step": 56 + }, + { + "epoch": 0.02, + "logps_train/chosen": -72.83161163330078, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -73.99799346923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016838837414979935, + "rewards_train/margins": 0.06663818657398224, + "rewards_train/rejected": -0.049799349159002304, + "step": 56 + }, + { + "epoch": 0.02, + "logps_train/chosen": -9.324195861816406, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -15.0, + "logps_train/rejected": -14.657978057861328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.007419586181640625, + "rewards_train/margins": -0.04162178188562393, + "rewards_train/rejected": 0.03420219570398331, + "step": 56 + }, + { + "epoch": 0.02, + "logps_train/chosen": -105.86416625976562, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -96.35916137695312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03641662746667862, + "rewards_train/margins": -0.0005004890263080597, + "rewards_train/rejected": -0.03591613844037056, + "step": 56 + }, + { + "epoch": 0.02, + "logps_train/chosen": -88.2986068725586, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -125.9531478881836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0701393112540245, + "rewards_train/margins": 0.06545409979298711, + "rewards_train/rejected": 0.004685211461037397, + "step": 57 + }, + { + "epoch": 0.02, + "logps_train/chosen": -63.85280990600586, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -18.379335403442383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.035280991345644, + "rewards_train/margins": -0.03484745099558495, + "rewards_train/rejected": -0.00043354035005904734, + "step": 57 + }, + { + "epoch": 0.02, + "logps_train/chosen": -12.259639739990234, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -15.752548217773438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0009639739873819053, + "rewards_train/margins": -0.013209152210038155, + "rewards_train/rejected": 0.01224517822265625, + "step": 57 + }, + { + "epoch": 0.02, + "logps_train/chosen": -4.876894474029541, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -84.79141235351562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0029355527367442846, + "rewards_train/margins": -0.017923211911693215, + "rewards_train/rejected": 0.0208587646484375, + "step": 57 + }, + { + "epoch": 0.02, + "logps_train/chosen": -14.604341506958008, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -9.149255752563477, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.00206584925763309, + "rewards_train/margins": -0.001758575439453125, + "rewards_train/rejected": 0.003824424697086215, + "step": 57 + }, + { + "epoch": 0.02, + "logps_train/chosen": -1.1267070770263672, + "logps_train/ref_chosen": -1.125, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -10.315733909606934, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00017070770263671875, + "rewards_train/margins": -0.01859731785953045, + "rewards_train/rejected": 0.01842661015689373, + "step": 57 + }, + { + "epoch": 0.02, + "logps_train/chosen": -112.91748809814453, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -102.65508270263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008251190185546875, + "rewards_train/margins": 0.023759461008012295, + "rewards_train/rejected": -0.01550827082246542, + "step": 57 + }, + { + "epoch": 0.02, + "logps_train/chosen": -66.08331298828125, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -117.43379211425781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04166870191693306, + "rewards_train/margins": 0.03504791343584657, + "rewards_train/rejected": 0.0066207884810864925, + "step": 57 + }, + { + "epoch": 0.02, + "learning_rate": 9.28e-07, + "loss": 0.6923, + "step": 58 + }, + { + "epoch": 0.02, + "logps_train/chosen": -15.039360046386719, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -16.658489227294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021063996478915215, + "rewards_train/margins": 0.02441291930153966, + "rewards_train/rejected": -0.003348922822624445, + "step": 58 + }, + { + "epoch": 0.02, + "logps_train/chosen": -29.285438537597656, + "logps_train/ref_chosen": -29.5, + "logps_train/ref_rejected": -57.0, + "logps_train/rejected": -57.08667755126953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021456146612763405, + "rewards_train/margins": 0.030123901553452015, + "rewards_train/rejected": -0.00866775494068861, + "step": 58 + }, + { + "epoch": 0.02, + "logps_train/chosen": -11.814658164978027, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -5.710479736328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00021581650071311742, + "rewards_train/margins": -0.016667843985487707, + "rewards_train/rejected": 0.01645202748477459, + "step": 58 + }, + { + "epoch": 0.02, + "logps_train/chosen": -18.42226791381836, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -36.243934631347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04527321085333824, + "rewards_train/margins": 0.0696666743606329, + "rewards_train/rejected": -0.024393463507294655, + "step": 58 + }, + { + "epoch": 0.02, + "logps_train/chosen": -3.527792453765869, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -6.34375, + "logps_train/rejected": -6.2351226806640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.012154245749115944, + "rewards_train/margins": -0.023016978055238724, + "rewards_train/rejected": 0.01086273230612278, + "step": 58 + }, + { + "epoch": 0.02, + "logps_train/chosen": -15.783935546875, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -35.57503890991211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009106445126235485, + "rewards_train/margins": 0.016610336024314165, + "rewards_train/rejected": -0.00750389089807868, + "step": 58 + }, + { + "epoch": 0.02, + "logps_train/chosen": -114.26302337646484, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -157.75115966796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.026302337646484375, + "rewards_train/margins": -0.051186371594667435, + "rewards_train/rejected": 0.02488403394818306, + "step": 58 + }, + { + "epoch": 0.02, + "logps_train/chosen": -131.63832092285156, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -116.222412109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06383209675550461, + "rewards_train/margins": -0.1415908858180046, + "rewards_train/rejected": 0.0777587890625, + "step": 58 + }, + { + "epoch": 0.02, + "logps_train/chosen": -172.82086181640625, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -164.37222290039062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.08208618313074112, + "rewards_train/margins": -0.04486389085650444, + "rewards_train/rejected": -0.03722229227423668, + "step": 59 + }, + { + "epoch": 0.02, + "logps_train/chosen": -131.656494140625, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -154.69140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03435058519244194, + "rewards_train/margins": 0.0034912098199129105, + "rewards_train/rejected": 0.03085937537252903, + "step": 59 + }, + { + "epoch": 0.02, + "logps_train/chosen": -84.15182495117188, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -104.4298095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03481750562787056, + "rewards_train/margins": 0.02779846265912056, + "rewards_train/rejected": 0.00701904296875, + "step": 59 + }, + { + "epoch": 0.02, + "logps_train/chosen": -10.804821014404297, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -17.867454528808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0007678985712118447, + "rewards_train/margins": 0.01251335145207122, + "rewards_train/rejected": -0.011745452880859375, + "step": 59 + }, + { + "epoch": 0.02, + "logps_train/chosen": -14.239141464233398, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -7.662576675415039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.017664147540926933, + "rewards_train/margins": -0.0014064796268939972, + "rewards_train/rejected": -0.016257667914032936, + "step": 59 + }, + { + "epoch": 0.02, + "logps_train/chosen": -144.05111694335938, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -124.05584716796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0051116943359375, + "rewards_train/margins": 0.05047302320599556, + "rewards_train/rejected": -0.05558471754193306, + "step": 59 + }, + { + "epoch": 0.02, + "logps_train/chosen": -65.376220703125, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -77.7699966430664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.012377929873764515, + "rewards_train/margins": -0.01062240544706583, + "rewards_train/rejected": 0.023000335320830345, + "step": 59 + }, + { + "epoch": 0.02, + "logps_train/chosen": -12.37047004699707, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -28.625, + "logps_train/rejected": -28.624956130981445, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.005797004792839289, + "rewards_train/margins": -0.005801391694603808, + "rewards_train/rejected": 4.38690176451928e-06, + "step": 59 + }, + { + "epoch": 0.02, + "learning_rate": 9.6e-07, + "loss": 0.6951, + "step": 60 + }, + { + "epoch": 0.02, + "logps_train/chosen": -6.220564842224121, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -8.453055381774902, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00018148422532249242, + "rewards_train/margins": 0.0013740539288846776, + "rewards_train/rejected": -0.00155553815420717, + "step": 60 + }, + { + "epoch": 0.02, + "logps_train/chosen": -2.7385637760162354, + "logps_train/ref_chosen": -2.71875, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -20.205074310302734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.001981377601623535, + "rewards_train/margins": -0.00647394685074687, + "rewards_train/rejected": 0.004492569249123335, + "step": 60 + }, + { + "epoch": 0.02, + "logps_train/chosen": -5.315730571746826, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -11.191951751708984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0003230571746826172, + "rewards_train/margins": -0.006127882283180952, + "rewards_train/rejected": 0.005804825108498335, + "step": 60 + }, + { + "epoch": 0.02, + "logps_train/chosen": -139.21701049804688, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -137.4015350341797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02170105092227459, + "rewards_train/margins": -0.18154755048453808, + "rewards_train/rejected": 0.1598464995622635, + "step": 60 + }, + { + "epoch": 0.02, + "logps_train/chosen": -61.491416931152344, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -10.71921157836914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.049141693860292435, + "rewards_train/margins": -0.06472053658217192, + "rewards_train/rejected": 0.015578842721879482, + "step": 60 + }, + { + "epoch": 0.02, + "logps_train/chosen": -40.78057861328125, + "logps_train/ref_chosen": -41.25, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -79.41862487792969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04694214090704918, + "rewards_train/margins": 0.038804628886282444, + "rewards_train/rejected": 0.008137512020766735, + "step": 60 + }, + { + "epoch": 0.02, + "logps_train/chosen": -2.0535569190979004, + "logps_train/ref_chosen": -2.015625, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -8.009088516235352, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0037931918632239103, + "rewards_train/margins": -0.009134340332821012, + "rewards_train/rejected": 0.005341148469597101, + "step": 60 + }, + { + "epoch": 0.02, + "logps_train/chosen": -0.42459601163864136, + "logps_train/ref_chosen": -0.458984375, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -12.230497360229492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0034388364292681217, + "rewards_train/margins": 0.0014885724522173405, + "rewards_train/rejected": 0.0019502639770507812, + "step": 60 + }, + { + "epoch": 0.02, + "logps_train/chosen": -28.302490234375, + "logps_train/ref_chosen": -28.375, + "logps_train/ref_rejected": -12.875, + "logps_train/rejected": -12.709209442138672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.007250976748764515, + "rewards_train/margins": -0.009328079409897327, + "rewards_train/rejected": 0.016579056158661842, + "step": 61 + }, + { + "epoch": 0.02, + "logps_train/chosen": -2.25180983543396, + "logps_train/ref_chosen": -2.265625, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -12.64146900177002, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.001381516456604004, + "rewards_train/margins": 0.0030284166568890214, + "rewards_train/rejected": -0.0016469002002850175, + "step": 61 + }, + { + "epoch": 0.02, + "logps_train/chosen": -56.56251525878906, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -79.474609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.006251526065170765, + "rewards_train/margins": -0.058790587820112705, + "rewards_train/rejected": 0.05253906175494194, + "step": 61 + }, + { + "epoch": 0.02, + "logps_train/chosen": -50.187278747558594, + "logps_train/ref_chosen": -50.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -77.52682495117188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.018727874383330345, + "rewards_train/margins": -0.016045379219576716, + "rewards_train/rejected": -0.0026824951637536287, + "step": 61 + }, + { + "epoch": 0.02, + "logps_train/chosen": -6.996564865112305, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -5.420083999633789, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.012156486511230469, + "rewards_train/margins": -0.00764808664098382, + "rewards_train/rejected": -0.004508399870246649, + "step": 61 + }, + { + "epoch": 0.02, + "logps_train/chosen": -25.055072784423828, + "logps_train/ref_chosen": -25.5, + "logps_train/ref_rejected": -1.46875, + "logps_train/rejected": -1.5682165622711182, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04449272155761719, + "rewards_train/margins": 0.05443937797099352, + "rewards_train/rejected": -0.009946656413376331, + "step": 61 + }, + { + "epoch": 0.02, + "logps_train/chosen": -20.79468536376953, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -16.246593475341797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.016968537122011185, + "rewards_train/margins": -0.004809189587831497, + "rewards_train/rejected": -0.012159347534179688, + "step": 61 + }, + { + "epoch": 0.02, + "logps_train/chosen": -76.41690826416016, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -73.05087280273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00830917339771986, + "rewards_train/margins": 0.013396453578025103, + "rewards_train/rejected": -0.0050872801803052425, + "step": 61 + }, + { + "epoch": 0.02, + "learning_rate": 9.92e-07, + "loss": 0.7019, + "step": 62 + }, + { + "epoch": 0.02, + "logps_train/chosen": -118.95081329345703, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -88.81721496582031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0049186707474291325, + "rewards_train/margins": -0.013359833043068647, + "rewards_train/rejected": 0.01827850379049778, + "step": 62 + }, + { + "epoch": 0.02, + "logps_train/chosen": -59.599754333496094, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -53.0, + "logps_train/rejected": -52.813072204589844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009975433349609375, + "rewards_train/margins": -0.02866821363568306, + "rewards_train/rejected": 0.018692780286073685, + "step": 62 + }, + { + "epoch": 0.02, + "logps_train/chosen": -15.2079439163208, + "logps_train/ref_chosen": -15.5, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -8.33004093170166, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02920560911297798, + "rewards_train/margins": 0.018459701910614967, + "rewards_train/rejected": 0.010745907202363014, + "step": 62 + }, + { + "epoch": 0.02, + "logps_train/chosen": -38.781864166259766, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -14.84359359741211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021813584491610527, + "rewards_train/margins": 0.03742294479161501, + "rewards_train/rejected": -0.015609360300004482, + "step": 62 + }, + { + "epoch": 0.02, + "logps_train/chosen": -6.706745624542236, + "logps_train/ref_chosen": -6.6875, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -11.081851959228516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0019245625007897615, + "rewards_train/margins": -0.006239366484805942, + "rewards_train/rejected": 0.00431480398401618, + "step": 62 + }, + { + "epoch": 0.02, + "logps_train/chosen": -1.4329067468643188, + "logps_train/ref_chosen": -1.4140625, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -7.962784290313721, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0018844247097149491, + "rewards_train/margins": -0.011855995864607394, + "rewards_train/rejected": 0.009971571154892445, + "step": 62 + }, + { + "epoch": 0.02, + "logps_train/chosen": -3.1312179565429688, + "logps_train/ref_chosen": -3.1875, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -6.216396331787109, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0056282044388353825, + "rewards_train/margins": 0.021017837803810835, + "rewards_train/rejected": -0.015389633364975452, + "step": 62 + }, + { + "epoch": 0.02, + "logps_train/chosen": -204.73825073242188, + "logps_train/ref_chosen": -205.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -157.68157958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02617492713034153, + "rewards_train/margins": 0.09433289058506489, + "rewards_train/rejected": -0.06815796345472336, + "step": 62 + }, + { + "epoch": 0.02, + "logps_train/chosen": -3.0254416465759277, + "logps_train/ref_chosen": -3.1875, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -14.413074493408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016205836087465286, + "rewards_train/margins": 0.001263285055756569, + "rewards_train/rejected": 0.014942551031708717, + "step": 63 + }, + { + "epoch": 0.02, + "logps_train/chosen": -3.0743489265441895, + "logps_train/ref_chosen": -2.90625, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -27.16649627685547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.016809893772006035, + "rewards_train/margins": -0.03766026720404625, + "rewards_train/rejected": 0.020850373432040215, + "step": 63 + }, + { + "epoch": 0.02, + "logps_train/chosen": -89.72357177734375, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -117.14390563964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02764282189309597, + "rewards_train/margins": 0.042033386416733265, + "rewards_train/rejected": -0.014390564523637295, + "step": 63 + }, + { + "epoch": 0.02, + "logps_train/chosen": -14.365323066711426, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -0.80078125, + "logps_train/rejected": -0.8072353601455688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.025967692956328392, + "rewards_train/margins": 0.02661310398252681, + "rewards_train/rejected": -0.000645411026198417, + "step": 63 + }, + { + "epoch": 0.02, + "logps_train/chosen": -1.9358408451080322, + "logps_train/ref_chosen": -1.9296875, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -6.358253479003906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0006153345457278192, + "rewards_train/margins": -0.008539987204130739, + "rewards_train/rejected": 0.00792465265840292, + "step": 63 + }, + { + "epoch": 0.02, + "logps_train/chosen": -16.87156105041504, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -28.51677894592285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.012156105600297451, + "rewards_train/margins": 0.00202178955078125, + "rewards_train/rejected": -0.014177895151078701, + "step": 63 + }, + { + "epoch": 0.02, + "logps_train/chosen": -5.369100570678711, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -25.940221786499023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006839943118393421, + "rewards_train/margins": 0.038362124003469944, + "rewards_train/rejected": -0.03152218088507652, + "step": 63 + }, + { + "epoch": 0.02, + "logps_train/chosen": -1.8952991962432861, + "logps_train/ref_chosen": -1.875, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -8.28269100189209, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0020299197640269995, + "rewards_train/margins": -0.023760819574818015, + "rewards_train/rejected": 0.021730899810791016, + "step": 63 + }, + { + "epoch": 0.02, + "learning_rate": 1.024e-06, + "loss": 0.6891, + "step": 64 + }, + { + "epoch": 0.02, + "logps_train/chosen": -6.376885414123535, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -9.584962844848633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01543645840138197, + "rewards_train/margins": 0.02393274288624525, + "rewards_train/rejected": -0.008496284484863281, + "step": 64 + }, + { + "epoch": 0.02, + "logps_train/chosen": -82.87020874023438, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -29.75, + "logps_train/rejected": -29.493221282958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06297912448644638, + "rewards_train/margins": 0.03730125166475773, + "rewards_train/rejected": 0.025677872821688652, + "step": 64 + }, + { + "epoch": 0.02, + "logps_train/chosen": -11.930450439453125, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -7.567419052124023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01320495642721653, + "rewards_train/margins": 0.032446861267089844, + "rewards_train/rejected": -0.019241904839873314, + "step": 64 + }, + { + "epoch": 0.02, + "logps_train/chosen": -4.296095371246338, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -5.417124271392822, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0016404628986492753, + "rewards_train/margins": 0.00897789013106376, + "rewards_train/rejected": -0.007337427232414484, + "step": 64 + }, + { + "epoch": 0.02, + "logps_train/chosen": -9.988425254821777, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -11.133045196533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0011574745876714587, + "rewards_train/margins": 0.0207119946135208, + "rewards_train/rejected": -0.019554520025849342, + "step": 64 + }, + { + "epoch": 0.02, + "logps_train/chosen": -19.17919921875, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -12.328407287597656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.007080078125, + "rewards_train/margins": -0.010079193860292435, + "rewards_train/rejected": 0.017159271985292435, + "step": 64 + }, + { + "epoch": 0.02, + "logps_train/chosen": -14.693944931030273, + "logps_train/ref_chosen": -14.75, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -12.38117790222168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005605507176369429, + "rewards_train/margins": 0.01247329730540514, + "rewards_train/rejected": -0.006867790129035711, + "step": 64 + }, + { + "epoch": 0.02, + "logps_train/chosen": -10.660486221313477, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -39.16155242919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.015201377682387829, + "rewards_train/margins": 0.006356620229780674, + "rewards_train/rejected": 0.008844757452607155, + "step": 64 + }, + { + "epoch": 0.02, + "logps_train/chosen": -62.525447845458984, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -89.81455993652344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0025447846855968237, + "rewards_train/margins": -0.021088791778311133, + "rewards_train/rejected": 0.01854400709271431, + "step": 65 + }, + { + "epoch": 0.02, + "logps_train/chosen": -130.75035095214844, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -188.0, + "logps_train/rejected": -187.40789794921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.02496490441262722, + "rewards_train/margins": -0.03424530290067196, + "rewards_train/rejected": 0.05921020731329918, + "step": 65 + }, + { + "epoch": 0.02, + "logps_train/chosen": -92.81062316894531, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -98.91085052490234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01893768273293972, + "rewards_train/margins": 0.010022735223174095, + "rewards_train/rejected": 0.008914947509765625, + "step": 65 + }, + { + "epoch": 0.02, + "logps_train/chosen": -49.14984893798828, + "logps_train/ref_chosen": -49.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -111.23822021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01498489361256361, + "rewards_train/margins": 0.00883712898939848, + "rewards_train/rejected": -0.02382202260196209, + "step": 65 + }, + { + "epoch": 0.02, + "logps_train/chosen": -2.3259661197662354, + "logps_train/ref_chosen": -2.375, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -13.529701232910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00490338820964098, + "rewards_train/margins": 0.014123511500656605, + "rewards_train/rejected": -0.009220123291015625, + "step": 65 + }, + { + "epoch": 0.02, + "logps_train/chosen": -26.698530197143555, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -76.015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005146980285644531, + "rewards_train/margins": 0.006709480308927596, + "rewards_train/rejected": -0.0015625000232830644, + "step": 65 + }, + { + "epoch": 0.02, + "logps_train/chosen": -113.3995132446289, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -129.09646606445312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.039951324462890625, + "rewards_train/margins": -0.03030471783131361, + "rewards_train/rejected": -0.009646606631577015, + "step": 65 + }, + { + "epoch": 0.02, + "logps_train/chosen": -107.27841186523438, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -125.60140991210938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02784118615090847, + "rewards_train/margins": -0.017700194381177425, + "rewards_train/rejected": -0.010140991769731045, + "step": 65 + }, + { + "epoch": 0.02, + "learning_rate": 1.056e-06, + "loss": 0.6913, + "step": 66 + }, + { + "epoch": 0.02, + "logps_train/chosen": -75.3726806640625, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -65.87580871582031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01273193396627903, + "rewards_train/margins": 0.00031280517578125, + "rewards_train/rejected": 0.01241912879049778, + "step": 66 + }, + { + "epoch": 0.02, + "logps_train/chosen": -98.35401153564453, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -112.26634216308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06459885090589523, + "rewards_train/margins": 0.04123306646943092, + "rewards_train/rejected": 0.02336578443646431, + "step": 66 + }, + { + "epoch": 0.02, + "logps_train/chosen": -1.567976713180542, + "logps_train/ref_chosen": -1.6796875, + "logps_train/ref_rejected": -1.875, + "logps_train/rejected": -1.8988410234451294, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011171079240739346, + "rewards_train/margins": 0.013555181678384542, + "rewards_train/rejected": -0.002384102437645197, + "step": 66 + }, + { + "epoch": 0.02, + "logps_train/chosen": -7.231442928314209, + "logps_train/ref_chosen": -7.25, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -8.255255699157715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0018557071452960372, + "rewards_train/margins": 0.002381277095992118, + "rewards_train/rejected": -0.0005255699506960809, + "step": 66 + }, + { + "epoch": 0.02, + "logps_train/chosen": -73.1683349609375, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -80.98141479492188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01683349721133709, + "rewards_train/margins": -0.06869201920926571, + "rewards_train/rejected": 0.05185852199792862, + "step": 66 + }, + { + "epoch": 0.02, + "logps_train/chosen": -94.52619934082031, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -75.71993255615234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0026199340354651213, + "rewards_train/margins": 0.06937332008965313, + "rewards_train/rejected": -0.07199325412511826, + "step": 66 + }, + { + "epoch": 0.02, + "logps_train/chosen": -4.116009712219238, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -12.001531600952148, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.005350971128791571, + "rewards_train/margins": 0.007302188780158758, + "rewards_train/rejected": -0.012653159908950329, + "step": 66 + }, + { + "epoch": 0.02, + "logps_train/chosen": -60.15895080566406, + "logps_train/ref_chosen": -60.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -70.04570770263672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01589508168399334, + "rewards_train/margins": -0.011324311140924692, + "rewards_train/rejected": -0.004570770543068647, + "step": 66 + }, + { + "epoch": 0.02, + "logps_train/chosen": -7.890032768249512, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -1.4921875, + "logps_train/rejected": -1.4413683414459229, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0015032768715173006, + "rewards_train/margins": -0.006585192633792758, + "rewards_train/rejected": 0.005081915762275457, + "step": 67 + }, + { + "epoch": 0.02, + "logps_train/chosen": -2.5994677543640137, + "logps_train/ref_chosen": -2.609375, + "logps_train/ref_rejected": -1.5703125, + "logps_train/rejected": -1.5809277296066284, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0009907245403155684, + "rewards_train/margins": 0.0020522475242614746, + "rewards_train/rejected": -0.0010615229839459062, + "step": 67 + }, + { + "epoch": 0.02, + "logps_train/chosen": -8.089271545410156, + "logps_train/ref_chosen": -7.90625, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -8.828298568725586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.018302155658602715, + "rewards_train/margins": -0.03547229990363121, + "rewards_train/rejected": 0.017170144245028496, + "step": 67 + }, + { + "epoch": 0.02, + "logps_train/chosen": -28.79915428161621, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -3.078125, + "logps_train/rejected": -3.1930463314056396, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004915428347885609, + "rewards_train/margins": 0.006576704792678356, + "rewards_train/rejected": -0.011492133140563965, + "step": 67 + }, + { + "epoch": 0.02, + "logps_train/chosen": -1.2148215770721436, + "logps_train/ref_chosen": -1.25, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -15.78133773803711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.003517842385917902, + "rewards_train/margins": -0.005848383996635675, + "rewards_train/rejected": 0.009366226382553577, + "step": 67 + }, + { + "epoch": 0.02, + "logps_train/chosen": -30.978219985961914, + "logps_train/ref_chosen": -31.0, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -35.784942626953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0021780014503747225, + "rewards_train/margins": 0.0056722641456872225, + "rewards_train/rejected": -0.0034942626953125, + "step": 67 + }, + { + "epoch": 0.02, + "logps_train/chosen": -11.33768081665039, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -6.59375, + "logps_train/rejected": -6.598343849182129, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.027518082410097122, + "rewards_train/margins": -0.0270586974802427, + "rewards_train/rejected": -0.0004593849298544228, + "step": 67 + }, + { + "epoch": 0.02, + "logps_train/chosen": -114.50340270996094, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -132.3008575439453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0003402710135560483, + "rewards_train/margins": -0.07025451512890868, + "rewards_train/rejected": 0.06991424411535263, + "step": 67 + }, + { + "epoch": 0.02, + "learning_rate": 1.088e-06, + "loss": 0.6959, + "step": 68 + }, + { + "epoch": 0.02, + "logps_train/chosen": -83.8695297241211, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -128.22482299804688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.036952972412109375, + "rewards_train/margins": -0.11447067558765411, + "rewards_train/rejected": 0.07751770317554474, + "step": 68 + }, + { + "epoch": 0.02, + "logps_train/chosen": -23.818492889404297, + "logps_train/ref_chosen": -23.875, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -23.467426300048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005650711245834827, + "rewards_train/margins": 0.014893341809511185, + "rewards_train/rejected": -0.009242630563676357, + "step": 68 + }, + { + "epoch": 0.02, + "logps_train/chosen": -12.65616512298584, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -9.20808219909668, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009366512298583984, + "rewards_train/margins": -0.0010582925751805305, + "rewards_train/rejected": -0.008308219723403454, + "step": 68 + }, + { + "epoch": 0.02, + "logps_train/chosen": -4.530960559844971, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -1.78125, + "logps_train/rejected": -1.7688956260681152, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 2.8944015866727568e-05, + "rewards_train/margins": -0.001206493447170942, + "rewards_train/rejected": 0.0012354374630376697, + "step": 68 + }, + { + "epoch": 0.02, + "logps_train/chosen": -111.9646987915039, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -155.47811889648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.046469878405332565, + "rewards_train/margins": 0.0013420134782791138, + "rewards_train/rejected": -0.04781189188361168, + "step": 68 + }, + { + "epoch": 0.02, + "logps_train/chosen": -0.6791642904281616, + "logps_train/ref_chosen": -0.69140625, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -14.111605644226074, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0012241959339007735, + "rewards_train/margins": 0.006134760449640453, + "rewards_train/rejected": -0.004910564515739679, + "step": 68 + }, + { + "epoch": 0.02, + "logps_train/chosen": -2.7326364517211914, + "logps_train/ref_chosen": -2.6875, + "logps_train/ref_rejected": -3.03125, + "logps_train/rejected": -3.087466239929199, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004513645078986883, + "rewards_train/margins": 0.0011079791001975536, + "rewards_train/rejected": -0.005621624179184437, + "step": 68 + }, + { + "epoch": 0.02, + "logps_train/chosen": -105.96295166015625, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -112.18861389160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0037048340309411287, + "rewards_train/margins": -0.02743377792648971, + "rewards_train/rejected": 0.03113861195743084, + "step": 68 + }, + { + "epoch": 0.02, + "logps_train/chosen": -10.444042205810547, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -10.583678245544434, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.025654220953583717, + "rewards_train/margins": -0.0297863963060081, + "rewards_train/rejected": 0.004132175352424383, + "step": 69 + }, + { + "epoch": 0.02, + "logps_train/chosen": -173.2327423095703, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -177.87060546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02327423170208931, + "rewards_train/margins": -0.03621368482708931, + "rewards_train/rejected": 0.012939453125, + "step": 69 + }, + { + "epoch": 0.02, + "logps_train/chosen": -119.39685821533203, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -13.019905090332031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01031417865306139, + "rewards_train/margins": -0.00019531231373548508, + "rewards_train/rejected": 0.010509490966796875, + "step": 69 + }, + { + "epoch": 0.02, + "logps_train/chosen": -90.79390716552734, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -78.09400177001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07060928642749786, + "rewards_train/margins": 0.08000946324318647, + "rewards_train/rejected": -0.00940017681568861, + "step": 69 + }, + { + "epoch": 0.02, + "logps_train/chosen": -137.11871337890625, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -144.2430419921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.011871337890625, + "rewards_train/margins": 0.01243286207318306, + "rewards_train/rejected": -0.02430419996380806, + "step": 69 + }, + { + "epoch": 0.02, + "logps_train/chosen": -131.31703186035156, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -99.24165344238281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03170318529009819, + "rewards_train/margins": -0.05753784067928791, + "rewards_train/rejected": 0.02583465538918972, + "step": 69 + }, + { + "epoch": 0.02, + "logps_train/chosen": -100.82723999023438, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -83.54830169677734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01727600209414959, + "rewards_train/margins": -0.027893828228116035, + "rewards_train/rejected": 0.045169830322265625, + "step": 69 + }, + { + "epoch": 0.02, + "logps_train/chosen": -78.5433349609375, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -78.49932098388672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.04566650465130806, + "rewards_train/margins": -0.004401396960020065, + "rewards_train/rejected": 0.050067901611328125, + "step": 69 + }, + { + "epoch": 0.02, + "learning_rate": 1.12e-06, + "loss": 0.6987, + "step": 70 + }, + { + "epoch": 0.02, + "logps_train/chosen": -91.48188781738281, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -85.60877990722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.001811218331567943, + "rewards_train/margins": 0.012689209426753223, + "rewards_train/rejected": -0.01087799109518528, + "step": 70 + }, + { + "epoch": 0.02, + "logps_train/chosen": -168.72817993164062, + "logps_train/ref_chosen": -169.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -159.1347198486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02718200720846653, + "rewards_train/margins": 0.04065399244427681, + "rewards_train/rejected": -0.01347198523581028, + "step": 70 + }, + { + "epoch": 0.02, + "logps_train/chosen": -75.18498229980469, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -100.6893081665039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01849823072552681, + "rewards_train/margins": 0.00043258629739284515, + "rewards_train/rejected": -0.018930817022919655, + "step": 70 + }, + { + "epoch": 0.02, + "logps_train/chosen": -1.679710030555725, + "logps_train/ref_chosen": -1.5859375, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -2.6225013732910156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009377253241837025, + "rewards_train/margins": -0.011189615936018527, + "rewards_train/rejected": 0.0018123626941815019, + "step": 70 + }, + { + "epoch": 0.02, + "logps_train/chosen": -2.361549139022827, + "logps_train/ref_chosen": -2.28125, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -7.254754543304443, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00802991446107626, + "rewards_train/margins": -0.010679460130631924, + "rewards_train/rejected": 0.002649545669555664, + "step": 70 + }, + { + "epoch": 0.02, + "logps_train/chosen": -8.988234519958496, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -2.9375, + "logps_train/rejected": -2.9390358924865723, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0011765480739995837, + "rewards_train/margins": 0.0013301373255671933, + "rewards_train/rejected": -0.0001535892515676096, + "step": 70 + }, + { + "epoch": 0.02, + "logps_train/chosen": -4.918745040893555, + "logps_train/ref_chosen": -5.03125, + "logps_train/ref_rejected": -3.8125, + "logps_train/rejected": -3.922323226928711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011250495910644531, + "rewards_train/margins": 0.02223281841725111, + "rewards_train/rejected": -0.010982322506606579, + "step": 70 + }, + { + "epoch": 0.02, + "logps_train/chosen": -98.77132415771484, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -78.86946868896484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.027132416144013405, + "rewards_train/margins": -0.040185547433793545, + "rewards_train/rejected": 0.01305313128978014, + "step": 70 + }, + { + "epoch": 0.02, + "logps_train/chosen": -2.1213395595550537, + "logps_train/ref_chosen": -2.03125, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -7.1987810134887695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009008956141769886, + "rewards_train/margins": 0.010869146324694157, + "rewards_train/rejected": -0.019878102466464043, + "step": 71 + }, + { + "epoch": 0.02, + "logps_train/chosen": -80.37920379638672, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -95.82744598388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.037920381873846054, + "rewards_train/margins": 0.04482421651482582, + "rewards_train/rejected": -0.08274459838867188, + "step": 71 + }, + { + "epoch": 0.02, + "logps_train/chosen": -4.673244476318359, + "logps_train/ref_chosen": -4.8125, + "logps_train/ref_rejected": -10.1875, + "logps_train/rejected": -10.087370872497559, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013925552368164062, + "rewards_train/margins": 0.003912639804184437, + "rewards_train/rejected": 0.010012912563979626, + "step": 71 + }, + { + "epoch": 0.02, + "logps_train/chosen": -13.102012634277344, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -16.345327377319336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01020126324146986, + "rewards_train/margins": -0.0006685256958007812, + "rewards_train/rejected": -0.009532737545669079, + "step": 71 + }, + { + "epoch": 0.02, + "logps_train/chosen": -6.655864238739014, + "logps_train/ref_chosen": -6.65625, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -78.99020385742188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 3.8576126826228574e-05, + "rewards_train/margins": -0.0009410382008354645, + "rewards_train/rejected": 0.000979614327661693, + "step": 71 + }, + { + "epoch": 0.02, + "logps_train/chosen": -8.412379264831543, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -2.609375, + "logps_train/rejected": -2.674927234649658, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021262073889374733, + "rewards_train/margins": 0.027817297261208296, + "rewards_train/rejected": -0.006555223371833563, + "step": 71 + }, + { + "epoch": 0.02, + "logps_train/chosen": -99.0604248046875, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -110.06694030761719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00604248046875, + "rewards_train/margins": 0.0006515504792332649, + "rewards_train/rejected": -0.006694030947983265, + "step": 71 + }, + { + "epoch": 0.02, + "logps_train/chosen": -1.7696908712387085, + "logps_train/ref_chosen": -1.7578125, + "logps_train/ref_rejected": -2.40625, + "logps_train/rejected": -2.5476903915405273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0011878371005877852, + "rewards_train/margins": 0.012956202612258494, + "rewards_train/rejected": -0.01414403971284628, + "step": 71 + }, + { + "epoch": 0.02, + "learning_rate": 1.152e-06, + "loss": 0.6902, + "step": 72 + }, + { + "epoch": 0.02, + "logps_train/chosen": -0.29560375213623047, + "logps_train/ref_chosen": -0.2734375, + "logps_train/ref_rejected": -3.0625, + "logps_train/rejected": -2.9654653072357178, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0022166252601891756, + "rewards_train/margins": -0.011920094722881913, + "rewards_train/rejected": 0.009703469462692738, + "step": 72 + }, + { + "epoch": 0.02, + "logps_train/chosen": -4.871772289276123, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -13.10506820678711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0003227710840292275, + "rewards_train/margins": 0.0045795918558724225, + "rewards_train/rejected": -0.004256820771843195, + "step": 72 + }, + { + "epoch": 0.02, + "logps_train/chosen": -103.50657653808594, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -118.36065673828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04934234544634819, + "rewards_train/margins": 0.035408019088208675, + "rewards_train/rejected": 0.013934326358139515, + "step": 72 + }, + { + "epoch": 0.02, + "logps_train/chosen": -183.7730712890625, + "logps_train/ref_chosen": -183.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -162.28598022460938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07730712741613388, + "rewards_train/margins": -0.04870910383760929, + "rewards_train/rejected": -0.02859802357852459, + "step": 72 + }, + { + "epoch": 0.02, + "logps_train/chosen": -7.410890579223633, + "logps_train/ref_chosen": -7.53125, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -3.8740906715393066, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.012035942636430264, + "rewards_train/margins": -0.006804990582168102, + "rewards_train/rejected": 0.018840933218598366, + "step": 72 + }, + { + "epoch": 0.02, + "logps_train/chosen": -3.3650097846984863, + "logps_train/ref_chosen": -3.328125, + "logps_train/ref_rejected": -4.0, + "logps_train/rejected": -3.9636688232421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.003688478609547019, + "rewards_train/margins": -0.0073215963784605265, + "rewards_train/rejected": 0.0036331177689135075, + "step": 72 + }, + { + "epoch": 0.02, + "logps_train/chosen": -112.47897338867188, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -93.06594848632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0021026611793786287, + "rewards_train/margins": 0.008697509998455644, + "rewards_train/rejected": -0.006594848819077015, + "step": 72 + }, + { + "epoch": 0.02, + "logps_train/chosen": -81.62109375, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -106.10142517089844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.012109375558793545, + "rewards_train/margins": -0.10196685697883368, + "rewards_train/rejected": 0.08985748142004013, + "step": 72 + }, + { + "epoch": 0.02, + "logps_train/chosen": -119.66593170166016, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -131.53274536132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.016593171283602715, + "rewards_train/margins": 0.036681367084383965, + "rewards_train/rejected": -0.05327453836798668, + "step": 73 + }, + { + "epoch": 0.02, + "logps_train/chosen": -6.16657829284668, + "logps_train/ref_chosen": -6.25, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -15.357261657714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008342171087861061, + "rewards_train/margins": 0.01906833704560995, + "rewards_train/rejected": -0.01072616595774889, + "step": 73 + }, + { + "epoch": 0.02, + "logps_train/chosen": -157.43508911132812, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -214.0, + "logps_train/rejected": -214.32949829101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05649108812212944, + "rewards_train/margins": 0.08944091945886612, + "rewards_train/rejected": -0.03294983133673668, + "step": 73 + }, + { + "epoch": 0.02, + "logps_train/chosen": -5.0102338790893555, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -9.339077949523926, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008351611904799938, + "rewards_train/margins": 0.011009406996890903, + "rewards_train/rejected": -0.0026577950920909643, + "step": 73 + }, + { + "epoch": 0.02, + "logps_train/chosen": -31.186626434326172, + "logps_train/ref_chosen": -31.125, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -30.024539947509766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0061626434326171875, + "rewards_train/margins": -0.028708649799227715, + "rewards_train/rejected": 0.022546006366610527, + "step": 73 + }, + { + "epoch": 0.02, + "logps_train/chosen": -14.893027305603027, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -5.46875, + "logps_train/rejected": -5.420682907104492, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.004447269719094038, + "rewards_train/margins": -0.00035943975672125816, + "rewards_train/rejected": 0.004806709475815296, + "step": 73 + }, + { + "epoch": 0.02, + "logps_train/chosen": -53.313629150390625, + "logps_train/ref_chosen": -53.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -125.09846496582031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01863708533346653, + "rewards_train/margins": 0.028483581729233265, + "rewards_train/rejected": -0.009846496395766735, + "step": 73 + }, + { + "epoch": 0.02, + "logps_train/chosen": -15.278298377990723, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -6.75, + "logps_train/rejected": -6.63005256652832, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.015329837799072266, + "rewards_train/margins": -0.027324581518769264, + "rewards_train/rejected": 0.011994743719696999, + "step": 73 + }, + { + "epoch": 0.02, + "learning_rate": 1.1839999999999998e-06, + "loss": 0.6932, + "step": 74 + }, + { + "epoch": 0.02, + "logps_train/chosen": -122.73284912109375, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -18.69930648803711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.023284912109375, + "rewards_train/margins": -0.02835426339879632, + "rewards_train/rejected": 0.00506935128942132, + "step": 74 + }, + { + "epoch": 0.02, + "logps_train/chosen": -0.6906918287277222, + "logps_train/ref_chosen": -0.66015625, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -12.112189292907715, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.003053557826206088, + "rewards_train/margins": -0.004334628582000732, + "rewards_train/rejected": 0.0012810707557946444, + "step": 74 + }, + { + "epoch": 0.02, + "logps_train/chosen": -116.61072540283203, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -153.6632080078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.011072540655732155, + "rewards_train/margins": 0.055248258635401726, + "rewards_train/rejected": -0.06632079929113388, + "step": 74 + }, + { + "epoch": 0.02, + "logps_train/chosen": -1.2897049188613892, + "logps_train/ref_chosen": -1.2421875, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -4.786895751953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.004751741886138916, + "rewards_train/margins": -0.007312166737392545, + "rewards_train/rejected": 0.0025604248512536287, + "step": 74 + }, + { + "epoch": 0.02, + "logps_train/chosen": -117.73336791992188, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -73.57160186767578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02666320838034153, + "rewards_train/margins": 0.033823395147919655, + "rewards_train/rejected": -0.007160186767578125, + "step": 74 + }, + { + "epoch": 0.02, + "logps_train/chosen": -109.53376007080078, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -138.90013122558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.096623994410038, + "rewards_train/margins": 0.08663711696863174, + "rewards_train/rejected": 0.00998687744140625, + "step": 74 + }, + { + "epoch": 0.02, + "logps_train/chosen": -109.19152069091797, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -109.90782165527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.030847931280732155, + "rewards_train/margins": 0.02163009624928236, + "rewards_train/rejected": 0.009217835031449795, + "step": 74 + }, + { + "epoch": 0.02, + "logps_train/chosen": -122.36981964111328, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -102.05306243896484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.036981966346502304, + "rewards_train/margins": -0.03167572245001793, + "rewards_train/rejected": -0.005306243896484375, + "step": 74 + }, + { + "epoch": 0.02, + "logps_train/chosen": -149.9438934326172, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -143.41683959960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005610656924545765, + "rewards_train/margins": 0.047294617630541325, + "rewards_train/rejected": -0.04168396070599556, + "step": 75 + }, + { + "epoch": 0.02, + "logps_train/chosen": -114.29731750488281, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -131.9805908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02026825025677681, + "rewards_train/margins": 0.11832733079791069, + "rewards_train/rejected": -0.09805908054113388, + "step": 75 + }, + { + "epoch": 0.02, + "logps_train/chosen": -10.851556777954102, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -93.82379150390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0039056779351085424, + "rewards_train/margins": -0.021526528289541602, + "rewards_train/rejected": 0.01762085035443306, + "step": 75 + }, + { + "epoch": 0.02, + "logps_train/chosen": -89.98745727539062, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -123.76983642578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04874572902917862, + "rewards_train/margins": -0.07176208682358265, + "rewards_train/rejected": 0.02301635779440403, + "step": 75 + }, + { + "epoch": 0.02, + "logps_train/chosen": -81.84088134765625, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -38.255191802978516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.034088134765625, + "rewards_train/margins": -0.08356895670294762, + "rewards_train/rejected": 0.04948082193732262, + "step": 75 + }, + { + "epoch": 0.02, + "logps_train/chosen": -104.42935943603516, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -107.18226623535156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.00706405658274889, + "rewards_train/margins": -0.02470932062715292, + "rewards_train/rejected": 0.03177337720990181, + "step": 75 + }, + { + "epoch": 0.02, + "logps_train/chosen": -161.8651123046875, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -152.56893920898438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01348876953125, + "rewards_train/margins": -0.0296173095703125, + "rewards_train/rejected": 0.0431060791015625, + "step": 75 + }, + { + "epoch": 0.02, + "logps_train/chosen": -4.91099739074707, + "logps_train/ref_chosen": -4.9375, + "logps_train/ref_rejected": -7.875, + "logps_train/rejected": -7.619990825653076, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0026502609252929688, + "rewards_train/margins": -0.022850656881928444, + "rewards_train/rejected": 0.025500917807221413, + "step": 75 + }, + { + "epoch": 0.02, + "learning_rate": 1.2159999999999999e-06, + "loss": 0.6908, + "step": 76 + }, + { + "epoch": 0.02, + "logps_train/chosen": -141.37594604492188, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -122.05956268310547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06240539625287056, + "rewards_train/margins": 0.06836166465654969, + "rewards_train/rejected": -0.0059562684036791325, + "step": 76 + }, + { + "epoch": 0.02, + "logps_train/chosen": -168.8846435546875, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -234.0, + "logps_train/rejected": -234.1553955078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11153564602136612, + "rewards_train/margins": 0.12707519717514515, + "rewards_train/rejected": -0.01553955115377903, + "step": 76 + }, + { + "epoch": 0.02, + "logps_train/chosen": -0.988000214099884, + "logps_train/ref_chosen": -0.9375, + "logps_train/ref_rejected": -2.359375, + "logps_train/rejected": -2.253138542175293, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.005050021689385176, + "rewards_train/margins": -0.015673667658120394, + "rewards_train/rejected": 0.010623645968735218, + "step": 76 + }, + { + "epoch": 0.02, + "logps_train/chosen": -28.232336044311523, + "logps_train/ref_chosen": -28.0, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -15.83677864074707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.023233605548739433, + "rewards_train/margins": 0.0041942596435546875, + "rewards_train/rejected": -0.02742786519229412, + "step": 76 + }, + { + "epoch": 0.02, + "logps_train/chosen": -14.426746368408203, + "logps_train/ref_chosen": -14.5, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -77.32386779785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007325363345444202, + "rewards_train/margins": 0.03971214313060045, + "rewards_train/rejected": -0.03238677978515625, + "step": 76 + }, + { + "epoch": 0.02, + "logps_train/chosen": -137.92555236816406, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -90.90582275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10744476318359375, + "rewards_train/margins": 0.04802703857421875, + "rewards_train/rejected": 0.059417724609375, + "step": 76 + }, + { + "epoch": 0.02, + "logps_train/chosen": -11.954879760742188, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -5.4375, + "logps_train/rejected": -5.544404029846191, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00798797607421875, + "rewards_train/margins": 0.0027024270966649055, + "rewards_train/rejected": -0.010690403170883656, + "step": 76 + }, + { + "epoch": 0.02, + "logps_train/chosen": -0.47842270135879517, + "logps_train/ref_chosen": -0.48828125, + "logps_train/ref_rejected": -2.953125, + "logps_train/rejected": -2.976128578186035, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0009858548874035478, + "rewards_train/margins": 0.003286212799139321, + "rewards_train/rejected": -0.002300357911735773, + "step": 76 + }, + { + "epoch": 0.02, + "logps_train/chosen": -5.154629230499268, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -2.84375, + "logps_train/rejected": -2.8638651371002197, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00016207694716285914, + "rewards_train/margins": 0.0021735907503170893, + "rewards_train/rejected": -0.00201151380315423, + "step": 77 + }, + { + "epoch": 0.02, + "logps_train/chosen": -0.6663127541542053, + "logps_train/ref_chosen": -0.66796875, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -4.5657548904418945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00016559958748985082, + "rewards_train/margins": 0.0036160887248115614, + "rewards_train/rejected": -0.0034504891373217106, + "step": 77 + }, + { + "epoch": 0.02, + "logps_train/chosen": -127.49488830566406, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -11.80039119720459, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.00051116943359375, + "rewards_train/margins": -0.006949711125344038, + "rewards_train/rejected": 0.007460880558937788, + "step": 77 + }, + { + "epoch": 0.02, + "logps_train/chosen": -82.858154296875, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -107.92721557617188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03581542894244194, + "rewards_train/margins": -0.0930938720703125, + "rewards_train/rejected": 0.05727844312787056, + "step": 77 + }, + { + "epoch": 0.02, + "logps_train/chosen": -133.98667907714844, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -168.9718017578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10133209079504013, + "rewards_train/margins": 0.098512266529724, + "rewards_train/rejected": 0.0028198242653161287, + "step": 77 + }, + { + "epoch": 0.02, + "logps_train/chosen": -99.73900604248047, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -124.1920166015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07609939575195312, + "rewards_train/margins": 0.045301055535674095, + "rewards_train/rejected": 0.03079834021627903, + "step": 77 + }, + { + "epoch": 0.02, + "logps_train/chosen": -7.774888038635254, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -46.5, + "logps_train/rejected": -46.422691345214844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0006361961713992059, + "rewards_train/margins": -0.007094669213984162, + "rewards_train/rejected": 0.0077308653853833675, + "step": 77 + }, + { + "epoch": 0.02, + "logps_train/chosen": -2.6355912685394287, + "logps_train/ref_chosen": -2.546875, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -10.646106719970703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.008871627040207386, + "rewards_train/margins": -0.006760955089703202, + "rewards_train/rejected": -0.0021106719505041838, + "step": 77 + }, + { + "epoch": 0.02, + "learning_rate": 1.248e-06, + "loss": 0.6842, + "step": 78 + }, + { + "epoch": 0.02, + "logps_train/chosen": -78.71302795410156, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -55.0, + "logps_train/rejected": -54.33632278442383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02130279503762722, + "rewards_train/margins": -0.08767051808536053, + "rewards_train/rejected": 0.0663677230477333, + "step": 78 + }, + { + "epoch": 0.02, + "logps_train/chosen": -45.33621597290039, + "logps_train/ref_chosen": -45.75, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -65.89505004882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04137840494513512, + "rewards_train/margins": 0.03088341001421213, + "rewards_train/rejected": 0.010494994930922985, + "step": 78 + }, + { + "epoch": 0.02, + "logps_train/chosen": -90.00940704345703, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -114.62727355957031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09905929863452911, + "rewards_train/margins": 0.11178665515035391, + "rewards_train/rejected": -0.012727356515824795, + "step": 78 + }, + { + "epoch": 0.02, + "logps_train/chosen": -60.93523025512695, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -43.25, + "logps_train/rejected": -43.507179260253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006476974580436945, + "rewards_train/margins": 0.03219490172341466, + "rewards_train/rejected": -0.025717927142977715, + "step": 78 + }, + { + "epoch": 0.02, + "logps_train/chosen": -8.562445640563965, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -2.078125, + "logps_train/rejected": -2.0293941497802734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 5.4359438763640355e-06, + "rewards_train/margins": -0.004867649357493065, + "rewards_train/rejected": 0.004873085301369429, + "step": 78 + }, + { + "epoch": 0.02, + "logps_train/chosen": -10.536087036132812, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -11.05576229095459, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02235870435833931, + "rewards_train/margins": -0.048032475635409355, + "rewards_train/rejected": 0.025673771277070045, + "step": 78 + }, + { + "epoch": 0.02, + "logps_train/chosen": -11.213621139526367, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -23.94497299194336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015112114138901234, + "rewards_train/margins": 0.004385185427963734, + "rewards_train/rejected": -0.019497299566864967, + "step": 78 + }, + { + "epoch": 0.02, + "logps_train/chosen": -2.2272050380706787, + "logps_train/ref_chosen": -2.109375, + "logps_train/ref_rejected": -12.0, + "logps_train/rejected": -12.00860595703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.011783003807067871, + "rewards_train/margins": -0.010922408080659807, + "rewards_train/rejected": -0.0008605957264080644, + "step": 78 + }, + { + "epoch": 0.02, + "logps_train/chosen": -71.09950256347656, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -69.51693725585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009950256906449795, + "rewards_train/margins": 0.041743469424545765, + "rewards_train/rejected": -0.05169372633099556, + "step": 79 + }, + { + "epoch": 0.02, + "logps_train/chosen": -1.0161712169647217, + "logps_train/ref_chosen": -1.0625, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -14.110984802246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004632878582924604, + "rewards_train/margins": 0.015731359366327524, + "rewards_train/rejected": -0.01109848078340292, + "step": 79 + }, + { + "epoch": 0.02, + "logps_train/chosen": -11.8707914352417, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -8.643465042114258, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.012079143896698952, + "rewards_train/margins": -0.010232639615423977, + "rewards_train/rejected": -0.0018465042812749743, + "step": 79 + }, + { + "epoch": 0.02, + "logps_train/chosen": -20.031234741210938, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -24.5, + "logps_train/rejected": -24.377851486206055, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0031234740745276213, + "rewards_train/margins": -0.015338325640186667, + "rewards_train/rejected": 0.012214851565659046, + "step": 79 + }, + { + "epoch": 0.02, + "logps_train/chosen": -4.05637788772583, + "logps_train/ref_chosen": -4.1875, + "logps_train/ref_rejected": -6.875, + "logps_train/rejected": -6.844277381896973, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013112211599946022, + "rewards_train/margins": 0.010039949789643288, + "rewards_train/rejected": 0.0030722618103027344, + "step": 79 + }, + { + "epoch": 0.02, + "logps_train/chosen": -14.420551300048828, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -27.0, + "logps_train/rejected": -27.60154914855957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004555129911750555, + "rewards_train/margins": 0.055599784944206476, + "rewards_train/rejected": -0.06015491485595703, + "step": 79 + }, + { + "epoch": 0.02, + "logps_train/chosen": -84.65607452392578, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -98.53546142578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01560745295137167, + "rewards_train/margins": -0.06206131260842085, + "rewards_train/rejected": 0.04645385965704918, + "step": 79 + }, + { + "epoch": 0.02, + "logps_train/chosen": -125.51773071289062, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -91.6663818359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.001773071358911693, + "rewards_train/margins": 0.014865112607367337, + "rewards_train/rejected": -0.01663818396627903, + "step": 79 + }, + { + "epoch": 0.02, + "learning_rate": 1.28e-06, + "loss": 0.6909, + "step": 80 + }, + { + "epoch": 0.02, + "logps_train/chosen": -53.27809143066406, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -68.21929931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02780914306640625, + "rewards_train/margins": 0.04412078857421875, + "rewards_train/rejected": -0.071929931640625, + "step": 80 + }, + { + "epoch": 0.02, + "logps_train/chosen": -14.382439613342285, + "logps_train/ref_chosen": -14.25, + "logps_train/ref_rejected": -2.109375, + "logps_train/rejected": -2.082042932510376, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.013243961147964, + "rewards_train/margins": -0.01597716799005866, + "rewards_train/rejected": 0.00273320684209466, + "step": 80 + }, + { + "epoch": 0.02, + "logps_train/chosen": -134.56211853027344, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -174.09335327148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04378814622759819, + "rewards_train/margins": 0.053123473189771175, + "rewards_train/rejected": -0.009335326962172985, + "step": 80 + }, + { + "epoch": 0.02, + "logps_train/chosen": -13.002082824707031, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -18.8398494720459, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.037291716784238815, + "rewards_train/margins": 0.03377666394226253, + "rewards_train/rejected": 0.003515052841976285, + "step": 80 + }, + { + "epoch": 0.02, + "logps_train/chosen": -12.340108871459961, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -13.474814414978027, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0027608871459960938, + "rewards_train/margins": 0.0009705543052405119, + "rewards_train/rejected": -0.0037314414512366056, + "step": 80 + }, + { + "epoch": 0.02, + "logps_train/chosen": -90.85984802246094, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -102.6892318725586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01401519775390625, + "rewards_train/margins": 0.032938385382294655, + "rewards_train/rejected": -0.018923187628388405, + "step": 80 + }, + { + "epoch": 0.02, + "logps_train/chosen": -3.247413158416748, + "logps_train/ref_chosen": -3.296875, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -18.2459774017334, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00494618434458971, + "rewards_train/margins": 0.029543924145400524, + "rewards_train/rejected": -0.024597739800810814, + "step": 80 + }, + { + "epoch": 0.02, + "logps_train/chosen": -124.93307495117188, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -90.60862731933594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04330749437212944, + "rewards_train/margins": -0.03244476206600666, + "rewards_train/rejected": -0.01086273230612278, + "step": 80 + }, + { + "epoch": 0.02, + "logps_train/chosen": -148.8612518310547, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -136.17510986328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.013874816708266735, + "rewards_train/margins": -0.06861419696360826, + "rewards_train/rejected": 0.082489013671875, + "step": 81 + }, + { + "epoch": 0.02, + "logps_train/chosen": -6.409436225891113, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -3.984375, + "logps_train/rejected": -4.107085704803467, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009693622589111328, + "rewards_train/margins": 0.0025774482637643814, + "rewards_train/rejected": -0.01227107085287571, + "step": 81 + }, + { + "epoch": 0.02, + "logps_train/chosen": -3.3136329650878906, + "logps_train/ref_chosen": -3.375, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -7.518344402313232, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.00613670377060771, + "rewards_train/margins": -0.004528856370598078, + "rewards_train/rejected": 0.010665560141205788, + "step": 81 + }, + { + "epoch": 0.02, + "logps_train/chosen": -3.9137775897979736, + "logps_train/ref_chosen": -3.890625, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -9.396807670593262, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0023152590729296207, + "rewards_train/margins": -0.012634492013603449, + "rewards_train/rejected": 0.010319232940673828, + "step": 81 + }, + { + "epoch": 0.02, + "logps_train/chosen": -95.63523864746094, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -93.68641662597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.013523864559829235, + "rewards_train/margins": 0.055117798037827015, + "rewards_train/rejected": -0.06864166259765625, + "step": 81 + }, + { + "epoch": 0.02, + "logps_train/chosen": -54.75159454345703, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -60.37849807739258, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.025159453973174095, + "rewards_train/margins": -0.03730964660644531, + "rewards_train/rejected": 0.012150192633271217, + "step": 81 + }, + { + "epoch": 0.02, + "logps_train/chosen": -56.28094482421875, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -31.125, + "logps_train/rejected": -31.169677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02190551720559597, + "rewards_train/margins": 0.026373290922492743, + "rewards_train/rejected": -0.004467773716896772, + "step": 81 + }, + { + "epoch": 0.02, + "logps_train/chosen": -95.9468994140625, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -92.71829223632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05531005933880806, + "rewards_train/margins": 0.07713928259909153, + "rewards_train/rejected": -0.02182922326028347, + "step": 81 + }, + { + "epoch": 0.02, + "learning_rate": 1.312e-06, + "loss": 0.6876, + "step": 82 + }, + { + "epoch": 0.02, + "logps_train/chosen": -2.623366355895996, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -2.328125, + "logps_train/rejected": -1.2123711109161377, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16266337037086487, + "rewards_train/margins": 0.05108798295259476, + "rewards_train/rejected": 0.11157538741827011, + "step": 82 + }, + { + "epoch": 0.02, + "logps_train/chosen": -99.45854187011719, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -105.17964172363281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05414581298828125, + "rewards_train/margins": 0.07210998609662056, + "rewards_train/rejected": -0.01796417310833931, + "step": 82 + }, + { + "epoch": 0.02, + "logps_train/chosen": -177.44525146484375, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -186.0, + "logps_train/rejected": -185.52455139160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05547485500574112, + "rewards_train/margins": 0.00792999193072319, + "rewards_train/rejected": 0.04754486307501793, + "step": 82 + }, + { + "epoch": 0.02, + "logps_train/chosen": -69.650146484375, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -107.43819427490234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03498535230755806, + "rewards_train/margins": 0.028804779518395662, + "rewards_train/rejected": 0.006180572789162397, + "step": 82 + }, + { + "epoch": 0.02, + "logps_train/chosen": -89.66891479492188, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -128.22691345214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03310852125287056, + "rewards_train/margins": 0.05579986609518528, + "rewards_train/rejected": -0.02269134484231472, + "step": 82 + }, + { + "epoch": 0.02, + "logps_train/chosen": -16.818775177001953, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -67.43992614746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00687751779332757, + "rewards_train/margins": 0.0371150984428823, + "rewards_train/rejected": -0.04399261623620987, + "step": 82 + }, + { + "epoch": 0.02, + "logps_train/chosen": -71.56089782714844, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -74.55175018310547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0060897828079760075, + "rewards_train/margins": -0.0009147645905613899, + "rewards_train/rejected": -0.0051750182174146175, + "step": 82 + }, + { + "epoch": 0.02, + "logps_train/chosen": -4.233261585235596, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -4.100033283233643, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011048841290175915, + "rewards_train/margins": 0.011677169648464769, + "rewards_train/rejected": -0.0006283283582888544, + "step": 82 + }, + { + "epoch": 0.02, + "logps_train/chosen": -20.71759796142578, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -14.02125072479248, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00925979670137167, + "rewards_train/margins": 0.005365275777876377, + "rewards_train/rejected": -0.014625072479248047, + "step": 83 + }, + { + "epoch": 0.02, + "logps_train/chosen": -99.24072265625, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -158.3504638671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07407226413488388, + "rewards_train/margins": -0.03902587667107582, + "rewards_train/rejected": -0.03504638746380806, + "step": 83 + }, + { + "epoch": 0.02, + "logps_train/chosen": -85.72623443603516, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -81.73944091796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.022623443976044655, + "rewards_train/margins": -0.048679351806640625, + "rewards_train/rejected": 0.02605590783059597, + "step": 83 + }, + { + "epoch": 0.02, + "logps_train/chosen": -94.56642150878906, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -120.78973388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04335784912109375, + "rewards_train/margins": 0.02233123779296875, + "rewards_train/rejected": 0.021026611328125, + "step": 83 + }, + { + "epoch": 0.02, + "logps_train/chosen": -81.00675201416016, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -82.67425537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.049324799329042435, + "rewards_train/margins": 0.11675034090876579, + "rewards_train/rejected": -0.06742554157972336, + "step": 83 + }, + { + "epoch": 0.02, + "logps_train/chosen": -74.21819305419922, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -75.17912292480469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.021819306537508965, + "rewards_train/margins": -0.003907013684511185, + "rewards_train/rejected": -0.01791229285299778, + "step": 83 + }, + { + "epoch": 0.02, + "logps_train/chosen": -0.3810037672519684, + "logps_train/ref_chosen": -0.365234375, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -13.415781021118164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0015769392484799027, + "rewards_train/margins": -0.0037488372763618827, + "rewards_train/rejected": 0.00217189802788198, + "step": 83 + }, + { + "epoch": 0.02, + "logps_train/chosen": -133.49679565429688, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -131.53076171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04967956617474556, + "rewards_train/margins": -0.0966033935546875, + "rewards_train/rejected": 0.04692382737994194, + "step": 83 + }, + { + "epoch": 0.02, + "learning_rate": 1.344e-06, + "loss": 0.6867, + "step": 84 + }, + { + "epoch": 0.02, + "logps_train/chosen": -43.31522750854492, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -42.25, + "logps_train/rejected": -42.35044860839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.006522750947624445, + "rewards_train/margins": 0.00352210970595479, + "rewards_train/rejected": -0.010044860653579235, + "step": 84 + }, + { + "epoch": 0.02, + "logps_train/chosen": -20.883848190307617, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -18.99208641052246, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.011615181341767311, + "rewards_train/margins": -0.0016761776059865952, + "rewards_train/rejected": 0.013291358947753906, + "step": 84 + }, + { + "epoch": 0.02, + "logps_train/chosen": -112.88700103759766, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -84.12255859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01129989605396986, + "rewards_train/margins": -0.02644424606114626, + "rewards_train/rejected": 0.03774414211511612, + "step": 84 + }, + { + "epoch": 0.02, + "logps_train/chosen": -21.299970626831055, + "logps_train/ref_chosen": -21.375, + "logps_train/ref_rejected": -30.125, + "logps_train/rejected": -30.235416412353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007502937223762274, + "rewards_train/margins": 0.018544578459113836, + "rewards_train/rejected": -0.011041641235351562, + "step": 84 + }, + { + "epoch": 0.02, + "logps_train/chosen": -78.18220520019531, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -100.70901489257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03177947923541069, + "rewards_train/margins": 0.05268096923828125, + "rewards_train/rejected": -0.02090149000287056, + "step": 84 + }, + { + "epoch": 0.02, + "logps_train/chosen": -99.45148468017578, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -82.1885757446289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10485153645277023, + "rewards_train/margins": 0.023709110915660858, + "rewards_train/rejected": 0.08114242553710938, + "step": 84 + }, + { + "epoch": 0.02, + "logps_train/chosen": -106.4594955444336, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -114.16603088378906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.045949555933475494, + "rewards_train/margins": -0.07934646680951118, + "rewards_train/rejected": 0.03339691087603569, + "step": 84 + }, + { + "epoch": 0.02, + "logps_train/chosen": -150.37210083007812, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -119.16770935058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0627899169921875, + "rewards_train/margins": 0.02956085279583931, + "rewards_train/rejected": 0.03322906419634819, + "step": 84 + }, + { + "epoch": 0.02, + "logps_train/chosen": -104.50282287597656, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -115.73200988769531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05028228834271431, + "rewards_train/margins": -0.02708129957318306, + "rewards_train/rejected": -0.02320098876953125, + "step": 85 + }, + { + "epoch": 0.02, + "logps_train/chosen": -119.69019317626953, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -94.97807312011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08098068088293076, + "rewards_train/margins": 0.07878799294121563, + "rewards_train/rejected": 0.0021926879417151213, + "step": 85 + }, + { + "epoch": 0.02, + "logps_train/chosen": -23.10452651977539, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -28.623674392700195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0020473480690270662, + "rewards_train/margins": 0.026914787711575627, + "rewards_train/rejected": -0.02486743964254856, + "step": 85 + }, + { + "epoch": 0.02, + "logps_train/chosen": -18.708520889282227, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -8.29373836517334, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.029147911816835403, + "rewards_train/margins": 0.008521748706698418, + "rewards_train/rejected": 0.020626163110136986, + "step": 85 + }, + { + "epoch": 0.02, + "logps_train/chosen": -98.48561096191406, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -96.85333251953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0014389038551598787, + "rewards_train/margins": -0.013227844377979636, + "rewards_train/rejected": 0.014666748233139515, + "step": 85 + }, + { + "epoch": 0.02, + "logps_train/chosen": -12.519182205200195, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -2.6875, + "logps_train/rejected": -2.766814708709717, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.008168220520019531, + "rewards_train/margins": -0.00023674964904785156, + "rewards_train/rejected": -0.00793147087097168, + "step": 85 + }, + { + "epoch": 0.02, + "logps_train/chosen": -16.841388702392578, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -6.555798530578613, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009138870052993298, + "rewards_train/margins": -0.006684016901999712, + "rewards_train/rejected": -0.0024548531509935856, + "step": 85 + }, + { + "epoch": 0.02, + "logps_train/chosen": -46.78715515136719, + "logps_train/ref_chosen": -47.0, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -49.2938117980957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02128448523581028, + "rewards_train/margins": 0.0006656646728515625, + "rewards_train/rejected": 0.020618820562958717, + "step": 85 + }, + { + "epoch": 0.02, + "learning_rate": 1.3759999999999998e-06, + "loss": 0.6904, + "step": 86 + }, + { + "epoch": 0.02, + "logps_train/chosen": -1.1630797386169434, + "logps_train/ref_chosen": -1.171875, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -20.193675994873047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0008795261383056641, + "rewards_train/margins": -0.0047528743743896484, + "rewards_train/rejected": 0.0056324005126953125, + "step": 86 + }, + { + "epoch": 0.02, + "logps_train/chosen": -88.61093139648438, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -117.65127563476562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0110931396484375, + "rewards_train/margins": -0.04596557840704918, + "rewards_train/rejected": 0.03487243875861168, + "step": 86 + }, + { + "epoch": 0.02, + "logps_train/chosen": -161.71392822265625, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -177.159912109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12860718369483948, + "rewards_train/margins": 0.04459839314222336, + "rewards_train/rejected": 0.08400879055261612, + "step": 86 + }, + { + "epoch": 0.02, + "logps_train/chosen": -98.23802185058594, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -125.44329071044922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02380218543112278, + "rewards_train/margins": -0.029473114293068647, + "rewards_train/rejected": 0.0056709288619458675, + "step": 86 + }, + { + "epoch": 0.02, + "logps_train/chosen": -3.303394317626953, + "logps_train/ref_chosen": -3.25, + "logps_train/ref_rejected": -1.5234375, + "logps_train/rejected": -1.5079087018966675, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.005339432042092085, + "rewards_train/margins": -0.006892311852425337, + "rewards_train/rejected": 0.001552879810333252, + "step": 86 + }, + { + "epoch": 0.02, + "logps_train/chosen": -6.2220964431762695, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -6.875, + "logps_train/rejected": -6.78965425491333, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018415356054902077, + "rewards_train/margins": 0.009880781173706055, + "rewards_train/rejected": 0.008534574881196022, + "step": 86 + }, + { + "epoch": 0.02, + "logps_train/chosen": -5.397753715515137, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -3.0, + "logps_train/rejected": -2.979672908782959, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007099628448486328, + "rewards_train/margins": 0.005066919373348355, + "rewards_train/rejected": 0.002032709075137973, + "step": 86 + }, + { + "epoch": 0.02, + "logps_train/chosen": -9.103280067443848, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -11.266036987304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01467199344187975, + "rewards_train/margins": 0.0100256921723485, + "rewards_train/rejected": 0.00464630126953125, + "step": 86 + }, + { + "epoch": 0.02, + "logps_train/chosen": -87.451904296875, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -145.73434448242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04519043117761612, + "rewards_train/margins": 0.0282440185546875, + "rewards_train/rejected": -0.07343444973230362, + "step": 87 + }, + { + "epoch": 0.02, + "logps_train/chosen": -134.5355224609375, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -120.27826690673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04644775390625, + "rewards_train/margins": 0.024274444207549095, + "rewards_train/rejected": 0.022173309698700905, + "step": 87 + }, + { + "epoch": 0.02, + "logps_train/chosen": -50.33203887939453, + "logps_train/ref_chosen": -50.0, + "logps_train/ref_rejected": -46.0, + "logps_train/rejected": -46.27180480957031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.033203888684511185, + "rewards_train/margins": -0.006023406982421875, + "rewards_train/rejected": -0.02718048170208931, + "step": 87 + }, + { + "epoch": 0.02, + "logps_train/chosen": -1.795931339263916, + "logps_train/ref_chosen": -1.6875, + "logps_train/ref_rejected": -2.609375, + "logps_train/rejected": -2.6280322074890137, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.010843134485185146, + "rewards_train/margins": -0.008977413759566844, + "rewards_train/rejected": -0.0018657207256183028, + "step": 87 + }, + { + "epoch": 0.02, + "logps_train/chosen": -147.44876098632812, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -167.45045471191406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0448760986328125, + "rewards_train/margins": -0.09983062744140625, + "rewards_train/rejected": 0.05495452880859375, + "step": 87 + }, + { + "epoch": 0.02, + "logps_train/chosen": -115.67755126953125, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -127.86576080322266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13224487006664276, + "rewards_train/margins": 0.11882095038890839, + "rewards_train/rejected": 0.013423919677734375, + "step": 87 + }, + { + "epoch": 0.02, + "logps_train/chosen": -136.9163818359375, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -134.6708984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00836181640625, + "rewards_train/margins": 0.07545166462659836, + "rewards_train/rejected": -0.06708984822034836, + "step": 87 + }, + { + "epoch": 0.02, + "logps_train/chosen": -17.655963897705078, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -9.804015159606934, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.015596389770507812, + "rewards_train/margins": -0.01019487390294671, + "rewards_train/rejected": -0.005401515867561102, + "step": 87 + }, + { + "epoch": 0.02, + "learning_rate": 1.408e-06, + "loss": 0.6903, + "step": 88 + }, + { + "epoch": 0.02, + "logps_train/chosen": -14.700756072998047, + "logps_train/ref_chosen": -14.75, + "logps_train/ref_rejected": -3.421875, + "logps_train/rejected": -3.4710280895233154, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00492439279332757, + "rewards_train/margins": 0.009839701931923628, + "rewards_train/rejected": -0.004915309138596058, + "step": 88 + }, + { + "epoch": 0.02, + "logps_train/chosen": -103.15937805175781, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -106.89209747314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01593780517578125, + "rewards_train/margins": 0.023271944373846054, + "rewards_train/rejected": -0.039209749549627304, + "step": 88 + }, + { + "epoch": 0.02, + "logps_train/chosen": -23.56465721130371, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -8.810434341430664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006034278776496649, + "rewards_train/margins": 0.005827712913742289, + "rewards_train/rejected": 0.00020656586275435984, + "step": 88 + }, + { + "epoch": 0.02, + "logps_train/chosen": -137.46725463867188, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -176.2996826171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04672546312212944, + "rewards_train/margins": -0.01675720140337944, + "rewards_train/rejected": -0.02996826171875, + "step": 88 + }, + { + "epoch": 0.02, + "logps_train/chosen": -98.50802612304688, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -98.45629119873047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.04919738695025444, + "rewards_train/margins": -0.005173493176698685, + "rewards_train/rejected": 0.054370880126953125, + "step": 88 + }, + { + "epoch": 0.02, + "logps_train/chosen": -77.74276733398438, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -60.25, + "logps_train/rejected": -60.42852783203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0242767333984375, + "rewards_train/margins": -0.0064239501953125, + "rewards_train/rejected": -0.017852783203125, + "step": 88 + }, + { + "epoch": 0.02, + "logps_train/chosen": -62.536468505859375, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -37.5, + "logps_train/rejected": -37.932823181152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02864685095846653, + "rewards_train/margins": 0.014635467901825905, + "rewards_train/rejected": -0.043282318860292435, + "step": 88 + }, + { + "epoch": 0.02, + "logps_train/chosen": -56.42230224609375, + "logps_train/ref_chosen": -56.0, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -70.31964874267578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04223022609949112, + "rewards_train/margins": -0.110265351831913, + "rewards_train/rejected": 0.06803512573242188, + "step": 88 + }, + { + "epoch": 0.02, + "logps_train/chosen": -129.15118408203125, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -145.4754180908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08488159626722336, + "rewards_train/margins": 0.13242340460419655, + "rewards_train/rejected": -0.04754180833697319, + "step": 89 + }, + { + "epoch": 0.02, + "logps_train/chosen": -24.660737991333008, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -7.75, + "logps_train/rejected": -7.566998481750488, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.008926200680434704, + "rewards_train/margins": -0.009373950771987438, + "rewards_train/rejected": 0.018300151452422142, + "step": 89 + }, + { + "epoch": 0.02, + "logps_train/chosen": -28.0977840423584, + "logps_train/ref_chosen": -28.125, + "logps_train/ref_rejected": -20.375, + "logps_train/rejected": -20.10215950012207, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.002721595810726285, + "rewards_train/margins": -0.024562454549595714, + "rewards_train/rejected": 0.027284050360322, + "step": 89 + }, + { + "epoch": 0.02, + "logps_train/chosen": -83.60783386230469, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -118.48595428466797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08921661227941513, + "rewards_train/margins": 0.137812040746212, + "rewards_train/rejected": -0.048595428466796875, + "step": 89 + }, + { + "epoch": 0.02, + "logps_train/chosen": -112.11298370361328, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -111.2986831665039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01129837054759264, + "rewards_train/margins": 0.018569947220385075, + "rewards_train/rejected": -0.029868317767977715, + "step": 89 + }, + { + "epoch": 0.02, + "logps_train/chosen": -13.598470687866211, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -38.0, + "logps_train/rejected": -37.69041442871094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.022347068414092064, + "rewards_train/margins": -0.053305625915527344, + "rewards_train/rejected": 0.03095855750143528, + "step": 89 + }, + { + "epoch": 0.02, + "logps_train/chosen": -64.3619613647461, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -8.763877868652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013803863897919655, + "rewards_train/margins": 0.02769165113568306, + "rewards_train/rejected": -0.013887787237763405, + "step": 89 + }, + { + "epoch": 0.02, + "logps_train/chosen": -102.372802734375, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -88.27610778808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012719727121293545, + "rewards_train/margins": 0.040330505929887295, + "rewards_train/rejected": -0.02761077880859375, + "step": 89 + }, + { + "epoch": 0.03, + "learning_rate": 1.44e-06, + "loss": 0.6875, + "step": 90 + }, + { + "epoch": 0.03, + "logps_train/chosen": -73.10205078125, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -10.33087158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08979492634534836, + "rewards_train/margins": 0.06663208454847336, + "rewards_train/rejected": 0.023162841796875, + "step": 90 + }, + { + "epoch": 0.03, + "logps_train/chosen": -106.54420471191406, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -92.44585418701172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.004420471377670765, + "rewards_train/margins": -0.05983505491167307, + "rewards_train/rejected": 0.055414583534002304, + "step": 90 + }, + { + "epoch": 0.03, + "logps_train/chosen": -20.040199279785156, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -20.07038688659668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008480072021484375, + "rewards_train/margins": 0.040518760681152344, + "rewards_train/rejected": -0.03203868865966797, + "step": 90 + }, + { + "epoch": 0.03, + "logps_train/chosen": -6.141961097717285, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -3.5, + "logps_train/rejected": -3.4557976722717285, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00794611033052206, + "rewards_train/margins": -0.012366343289613724, + "rewards_train/rejected": 0.004420232959091663, + "step": 90 + }, + { + "epoch": 0.03, + "logps_train/chosen": -121.30325317382812, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -127.17247772216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01967468298971653, + "rewards_train/margins": 0.036922454833984375, + "rewards_train/rejected": -0.017247771844267845, + "step": 90 + }, + { + "epoch": 0.03, + "logps_train/chosen": -8.005626678466797, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -11.6875, + "logps_train/rejected": -11.791905403137207, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.022437667474150658, + "rewards_train/margins": -0.01199712697416544, + "rewards_train/rejected": -0.010440540499985218, + "step": 90 + }, + { + "epoch": 0.03, + "logps_train/chosen": -54.28076171875, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -68.29349517822266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0030761719681322575, + "rewards_train/margins": 0.026273346971720457, + "rewards_train/rejected": -0.029349518939852715, + "step": 90 + }, + { + "epoch": 0.03, + "logps_train/chosen": -62.80225372314453, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -51.0, + "logps_train/rejected": -50.99115753173828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.030225371941924095, + "rewards_train/margins": -0.031109618779737502, + "rewards_train/rejected": 0.0008842468378134072, + "step": 90 + }, + { + "epoch": 0.03, + "logps_train/chosen": -91.31452941894531, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -127.28251647949219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01854705810546875, + "rewards_train/margins": -0.0032012946903705597, + "rewards_train/rejected": 0.02174835279583931, + "step": 91 + }, + { + "epoch": 0.03, + "logps_train/chosen": -3.2374861240386963, + "logps_train/ref_chosen": -2.984375, + "logps_train/ref_rejected": -3.234375, + "logps_train/rejected": -3.364903211593628, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02531111240386963, + "rewards_train/margins": -0.012258291244506836, + "rewards_train/rejected": -0.013052821159362793, + "step": 91 + }, + { + "epoch": 0.03, + "logps_train/chosen": -13.411471366882324, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -15.557806015014648, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.016147136688232422, + "rewards_train/margins": -0.010366535279899836, + "rewards_train/rejected": -0.005780601408332586, + "step": 91 + }, + { + "epoch": 0.03, + "logps_train/chosen": -11.003253936767578, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -6.74567985534668, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.012825394049286842, + "rewards_train/margins": -0.007007408421486616, + "rewards_train/rejected": -0.005817985627800226, + "step": 91 + }, + { + "epoch": 0.03, + "logps_train/chosen": -100.4729995727539, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -125.38749694824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.052700042724609375, + "rewards_train/margins": 0.09144973754882812, + "rewards_train/rejected": -0.03874969482421875, + "step": 91 + }, + { + "epoch": 0.03, + "logps_train/chosen": -160.83432006835938, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -141.02383422851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01656799390912056, + "rewards_train/margins": 0.01895141671411693, + "rewards_train/rejected": -0.0023834228049963713, + "step": 91 + }, + { + "epoch": 0.03, + "logps_train/chosen": -104.77371215820312, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -122.78938293457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12262878566980362, + "rewards_train/margins": 0.10156707838177681, + "rewards_train/rejected": 0.02106170728802681, + "step": 91 + }, + { + "epoch": 0.03, + "logps_train/chosen": -4.380704879760742, + "logps_train/ref_chosen": -4.1875, + "logps_train/ref_rejected": -1.578125, + "logps_train/rejected": -1.5920308828353882, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01932048797607422, + "rewards_train/margins": -0.017929899622686207, + "rewards_train/rejected": -0.0013905883533880115, + "step": 91 + }, + { + "epoch": 0.03, + "learning_rate": 1.4719999999999998e-06, + "loss": 0.6865, + "step": 92 + }, + { + "epoch": 0.03, + "logps_train/chosen": -11.823328971862793, + "logps_train/ref_chosen": -11.5625, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -65.16783142089844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.026082897558808327, + "rewards_train/margins": -0.059299757704138756, + "rewards_train/rejected": 0.03321686014533043, + "step": 92 + }, + { + "epoch": 0.03, + "logps_train/chosen": -63.297821044921875, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -70.724609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0202178955078125, + "rewards_train/margins": -0.00732116773724556, + "rewards_train/rejected": 0.02753906324505806, + "step": 92 + }, + { + "epoch": 0.03, + "logps_train/chosen": -9.394380569458008, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -24.713401794433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01681194268167019, + "rewards_train/margins": 0.02565212268382311, + "rewards_train/rejected": -0.00884018000215292, + "step": 92 + }, + { + "epoch": 0.03, + "logps_train/chosen": -56.25389099121094, + "logps_train/ref_chosen": -56.75, + "logps_train/ref_rejected": -32.75, + "logps_train/rejected": -32.696773529052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04961090162396431, + "rewards_train/margins": 0.04428825434297323, + "rewards_train/rejected": 0.005322647280991077, + "step": 92 + }, + { + "epoch": 0.03, + "logps_train/chosen": -73.07339477539062, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -88.11996459960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04266052320599556, + "rewards_train/margins": 0.10465698316693306, + "rewards_train/rejected": -0.0619964599609375, + "step": 92 + }, + { + "epoch": 0.03, + "logps_train/chosen": -9.211280822753906, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -3.796875, + "logps_train/rejected": -3.7492127418518066, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0023780823685228825, + "rewards_train/margins": -0.007144308183342218, + "rewards_train/rejected": 0.004766225814819336, + "step": 92 + }, + { + "epoch": 0.03, + "logps_train/chosen": -26.290599822998047, + "logps_train/ref_chosen": -26.125, + "logps_train/ref_rejected": -45.25, + "logps_train/rejected": -45.103538513183594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.016559982672333717, + "rewards_train/margins": -0.031206131912767887, + "rewards_train/rejected": 0.01464614924043417, + "step": 92 + }, + { + "epoch": 0.03, + "logps_train/chosen": -32.130828857421875, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -12.912403106689453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06191711500287056, + "rewards_train/margins": 0.07190742623060942, + "rewards_train/rejected": -0.009990311227738857, + "step": 92 + }, + { + "epoch": 0.03, + "logps_train/chosen": -142.27883911132812, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -115.2464599609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02788391150534153, + "rewards_train/margins": -0.0532379150390625, + "rewards_train/rejected": 0.02535400353372097, + "step": 93 + }, + { + "epoch": 0.03, + "logps_train/chosen": -92.89884948730469, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -99.27411651611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01011505164206028, + "rewards_train/margins": 0.037526704370975494, + "rewards_train/rejected": -0.027411652728915215, + "step": 93 + }, + { + "epoch": 0.03, + "logps_train/chosen": -139.17868041992188, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -142.3715362548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08213195949792862, + "rewards_train/margins": 0.11928558722138405, + "rewards_train/rejected": -0.03715362772345543, + "step": 93 + }, + { + "epoch": 0.03, + "logps_train/chosen": -0.9500806927680969, + "logps_train/ref_chosen": -1.0, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -15.011992454528809, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0049919309094548225, + "rewards_train/margins": -5.8823730796575546e-05, + "rewards_train/rejected": 0.005050754640251398, + "step": 93 + }, + { + "epoch": 0.03, + "logps_train/chosen": -87.69403839111328, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -129.20700073242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.030596161261200905, + "rewards_train/margins": 0.051296234130859375, + "rewards_train/rejected": -0.02070007286965847, + "step": 93 + }, + { + "epoch": 0.03, + "logps_train/chosen": -1.0634435415267944, + "logps_train/ref_chosen": -1.0625, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -17.17653465270996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.435415267944336e-05, + "rewards_train/margins": 0.005059111397713423, + "rewards_train/rejected": -0.005153465550392866, + "step": 93 + }, + { + "epoch": 0.03, + "logps_train/chosen": -5.079185962677002, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -8.460001945495605, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.017081404104828835, + "rewards_train/margins": 0.01933159865438938, + "rewards_train/rejected": -0.002250194549560547, + "step": 93 + }, + { + "epoch": 0.03, + "logps_train/chosen": -125.63623809814453, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -126.12577056884766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01362381037324667, + "rewards_train/margins": 0.04895325098186731, + "rewards_train/rejected": -0.06257706135511398, + "step": 93 + }, + { + "epoch": 0.03, + "learning_rate": 1.504e-06, + "loss": 0.6818, + "step": 94 + }, + { + "epoch": 0.03, + "logps_train/chosen": -128.95867919921875, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -142.2357177734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004132080357521772, + "rewards_train/margins": 0.027703858446329832, + "rewards_train/rejected": -0.02357177808880806, + "step": 94 + }, + { + "epoch": 0.03, + "logps_train/chosen": -19.23220443725586, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -21.463726043701172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0017795562744140625, + "rewards_train/margins": -0.0018478394486010075, + "rewards_train/rejected": 0.00362739572301507, + "step": 94 + }, + { + "epoch": 0.03, + "logps_train/chosen": -118.3102798461914, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -136.82977294921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018972015008330345, + "rewards_train/margins": 0.10194930993020535, + "rewards_train/rejected": -0.082977294921875, + "step": 94 + }, + { + "epoch": 0.03, + "logps_train/chosen": -126.40089416503906, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -144.39410400390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04008941724896431, + "rewards_train/margins": -0.00067901611328125, + "rewards_train/rejected": -0.03941040113568306, + "step": 94 + }, + { + "epoch": 0.03, + "logps_train/chosen": -97.20693969726562, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -77.07776641845703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02930602990090847, + "rewards_train/margins": 0.03708267165347934, + "rewards_train/rejected": -0.0077766417525708675, + "step": 94 + }, + { + "epoch": 0.03, + "logps_train/chosen": -7.177738666534424, + "logps_train/ref_chosen": -7.21875, + "logps_train/ref_rejected": -7.34375, + "logps_train/rejected": -7.577825546264648, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004101133439689875, + "rewards_train/margins": 0.02750868769362569, + "rewards_train/rejected": -0.023407554253935814, + "step": 94 + }, + { + "epoch": 0.03, + "logps_train/chosen": -126.06999206542969, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -158.85133361816406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.006999206729233265, + "rewards_train/margins": -0.0218658447265625, + "rewards_train/rejected": 0.014866637997329235, + "step": 94 + }, + { + "epoch": 0.03, + "logps_train/chosen": -2.3960633277893066, + "logps_train/ref_chosen": -2.296875, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -4.672136306762695, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009918833151459694, + "rewards_train/margins": -0.01770520256832242, + "rewards_train/rejected": 0.007786369416862726, + "step": 94 + }, + { + "epoch": 0.03, + "logps_train/chosen": -90.91568756103516, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -95.54354858398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00843124371021986, + "rewards_train/margins": 0.012786102015525103, + "rewards_train/rejected": -0.0043548583053052425, + "step": 95 + }, + { + "epoch": 0.03, + "logps_train/chosen": -76.78353881835938, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -113.62486267089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02164611779153347, + "rewards_train/margins": 0.034132384695112705, + "rewards_train/rejected": -0.012486266903579235, + "step": 95 + }, + { + "epoch": 0.03, + "logps_train/chosen": -53.7172966003418, + "logps_train/ref_chosen": -53.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -83.35981750488281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.021729661151766777, + "rewards_train/margins": -0.03574791084975004, + "rewards_train/rejected": 0.014018249697983265, + "step": 95 + }, + { + "epoch": 0.03, + "logps_train/chosen": -6.085355281829834, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -3.65625, + "logps_train/rejected": -3.721961498260498, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.002285528229549527, + "rewards_train/margins": 0.004285621689632535, + "rewards_train/rejected": -0.006571149919182062, + "step": 95 + }, + { + "epoch": 0.03, + "logps_train/chosen": -0.8124368786811829, + "logps_train/ref_chosen": -0.76953125, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -10.112581253051758, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.004290563054382801, + "rewards_train/margins": -0.02428243774920702, + "rewards_train/rejected": 0.01999187469482422, + "step": 95 + }, + { + "epoch": 0.03, + "logps_train/chosen": -7.341681480407715, + "logps_train/ref_chosen": -7.53125, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -14.720399856567383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018956853076815605, + "rewards_train/margins": 0.015996838686987758, + "rewards_train/rejected": 0.0029600143898278475, + "step": 95 + }, + { + "epoch": 0.03, + "logps_train/chosen": -73.80528259277344, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -72.04827880859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01947174035012722, + "rewards_train/margins": 0.024299621116369963, + "rewards_train/rejected": -0.0048278807662427425, + "step": 95 + }, + { + "epoch": 0.03, + "logps_train/chosen": -25.2070255279541, + "logps_train/ref_chosen": -25.125, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -25.1827392578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.008202552795410156, + "rewards_train/margins": -0.0024286271072924137, + "rewards_train/rejected": -0.0057739256881177425, + "step": 95 + }, + { + "epoch": 0.03, + "learning_rate": 1.536e-06, + "loss": 0.6877, + "step": 96 + }, + { + "epoch": 0.03, + "logps_train/chosen": -2.731381416320801, + "logps_train/ref_chosen": -2.640625, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -5.359218597412109, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009075641632080078, + "rewards_train/margins": -0.01065378193743527, + "rewards_train/rejected": 0.0015781403053551912, + "step": 96 + }, + { + "epoch": 0.03, + "logps_train/chosen": -2.4859704971313477, + "logps_train/ref_chosen": -2.546875, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -5.163731575012207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006090450566262007, + "rewards_train/margins": 0.009963608114048839, + "rewards_train/rejected": -0.003873157547786832, + "step": 96 + }, + { + "epoch": 0.03, + "logps_train/chosen": -7.90134859085083, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -12.278226852416992, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.008884859271347523, + "rewards_train/margins": -0.012312174076214433, + "rewards_train/rejected": 0.00342731480486691, + "step": 96 + }, + { + "epoch": 0.03, + "logps_train/chosen": -88.3575439453125, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -65.79019927978516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.014245606027543545, + "rewards_train/margins": -0.00673446711152792, + "rewards_train/rejected": 0.020980073139071465, + "step": 96 + }, + { + "epoch": 0.03, + "logps_train/chosen": -107.45977783203125, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -122.12704467773438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0040222168900072575, + "rewards_train/margins": -0.03327331459149718, + "rewards_train/rejected": 0.03729553148150444, + "step": 96 + }, + { + "epoch": 0.03, + "logps_train/chosen": -147.7197265625, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -168.0179901123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02802734449505806, + "rewards_train/margins": 0.029826355748809874, + "rewards_train/rejected": -0.0017990112537518144, + "step": 96 + }, + { + "epoch": 0.03, + "logps_train/chosen": -11.94056510925293, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -6.90625, + "logps_train/rejected": -6.921706199645996, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01905651204288006, + "rewards_train/margins": -0.017510892008431256, + "rewards_train/rejected": -0.0015456200344488025, + "step": 96 + }, + { + "epoch": 0.03, + "logps_train/chosen": -68.4773178100586, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -87.5550765991211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10226821899414062, + "rewards_train/margins": 0.15777587890625, + "rewards_train/rejected": -0.055507659912109375, + "step": 96 + }, + { + "epoch": 0.03, + "logps_train/chosen": -7.82059383392334, + "logps_train/ref_chosen": -7.75, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -10.89188003540039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.007059383671730757, + "rewards_train/margins": -0.011621380224823952, + "rewards_train/rejected": 0.004561996553093195, + "step": 97 + }, + { + "epoch": 0.03, + "logps_train/chosen": -15.501100540161133, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -12.974909782409668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006139946170151234, + "rewards_train/margins": 0.02238092478364706, + "rewards_train/rejected": -0.016240978613495827, + "step": 97 + }, + { + "epoch": 0.03, + "logps_train/chosen": -131.1765594482422, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -120.6134262084961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01765594445168972, + "rewards_train/margins": -0.0063133230432868, + "rewards_train/rejected": -0.01134262140840292, + "step": 97 + }, + { + "epoch": 0.03, + "logps_train/chosen": -121.24560546875, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -99.67970275878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02456054650247097, + "rewards_train/margins": 0.04340972937643528, + "rewards_train/rejected": -0.06797027587890625, + "step": 97 + }, + { + "epoch": 0.03, + "logps_train/chosen": -2.452965497970581, + "logps_train/ref_chosen": -2.40625, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -7.128518104553223, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.004671549890190363, + "rewards_train/margins": -0.001194739481434226, + "rewards_train/rejected": -0.003476810408756137, + "step": 97 + }, + { + "epoch": 0.03, + "logps_train/chosen": -136.3042755126953, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -190.76551818847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06957244873046875, + "rewards_train/margins": 0.04612426646053791, + "rewards_train/rejected": 0.02344818226993084, + "step": 97 + }, + { + "epoch": 0.03, + "logps_train/chosen": -126.1297607421875, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -110.19950103759766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06297607719898224, + "rewards_train/margins": 0.006974026560783386, + "rewards_train/rejected": -0.06995010375976562, + "step": 97 + }, + { + "epoch": 0.03, + "logps_train/chosen": -91.14291381835938, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -105.14244842529297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.014291382394731045, + "rewards_train/margins": -4.654005169868469e-05, + "rewards_train/rejected": -0.01424484234303236, + "step": 97 + }, + { + "epoch": 0.03, + "learning_rate": 1.568e-06, + "loss": 0.6866, + "step": 98 + }, + { + "epoch": 0.03, + "logps_train/chosen": -78.62385559082031, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -18.82288932800293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03761444240808487, + "rewards_train/margins": 0.03240337502211332, + "rewards_train/rejected": 0.005211067385971546, + "step": 98 + }, + { + "epoch": 0.03, + "logps_train/chosen": -10.614181518554688, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -35.732574462890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.005168152041733265, + "rewards_train/margins": -0.031910705380141735, + "rewards_train/rejected": 0.02674255333840847, + "step": 98 + }, + { + "epoch": 0.03, + "logps_train/chosen": -55.05731201171875, + "logps_train/ref_chosen": -55.75, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -92.96630096435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06926880031824112, + "rewards_train/margins": 0.015898894518613815, + "rewards_train/rejected": 0.053369905799627304, + "step": 98 + }, + { + "epoch": 0.03, + "logps_train/chosen": -93.79911804199219, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -107.73399353027344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02991180494427681, + "rewards_train/margins": -0.00651245191693306, + "rewards_train/rejected": -0.02339935302734375, + "step": 98 + }, + { + "epoch": 0.03, + "logps_train/chosen": -3.1935102939605713, + "logps_train/ref_chosen": -3.265625, + "logps_train/ref_rejected": -3.65625, + "logps_train/rejected": -3.670933961868286, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007211470510810614, + "rewards_train/margins": 0.008679866674356163, + "rewards_train/rejected": -0.001468396163545549, + "step": 98 + }, + { + "epoch": 0.03, + "logps_train/chosen": -24.926902770996094, + "logps_train/ref_chosen": -24.875, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -9.480445861816406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0051902770064771175, + "rewards_train/margins": -0.025895691942423582, + "rewards_train/rejected": 0.020705414935946465, + "step": 98 + }, + { + "epoch": 0.03, + "logps_train/chosen": -77.75933837890625, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -109.09617614746094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.02406616322696209, + "rewards_train/margins": -0.06631622649729252, + "rewards_train/rejected": 0.09038238972425461, + "step": 98 + }, + { + "epoch": 0.03, + "logps_train/chosen": -26.89349937438965, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -5.09375, + "logps_train/rejected": -4.9423699378967285, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.014349937438964844, + "rewards_train/margins": -0.029487944208085537, + "rewards_train/rejected": 0.015138006769120693, + "step": 98 + }, + { + "epoch": 0.03, + "logps_train/chosen": -72.37616729736328, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -100.09420013427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.062383271753787994, + "rewards_train/margins": 0.07180328574031591, + "rewards_train/rejected": -0.00942001398652792, + "step": 99 + }, + { + "epoch": 0.03, + "logps_train/chosen": -148.7420654296875, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -149.0439453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07420654594898224, + "rewards_train/margins": -0.06981201469898224, + "rewards_train/rejected": -0.00439453125, + "step": 99 + }, + { + "epoch": 0.03, + "logps_train/chosen": -211.251708984375, + "logps_train/ref_chosen": -211.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -177.95327758789062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02517089806497097, + "rewards_train/margins": -0.029843139462172985, + "rewards_train/rejected": 0.004672241397202015, + "step": 99 + }, + { + "epoch": 0.03, + "logps_train/chosen": -150.37391662597656, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -107.24055480957031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03739166259765625, + "rewards_train/margins": -0.013336181640625, + "rewards_train/rejected": -0.02405548095703125, + "step": 99 + }, + { + "epoch": 0.03, + "logps_train/chosen": -100.7794189453125, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -46.75, + "logps_train/rejected": -46.81599044799805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02205810509622097, + "rewards_train/margins": 0.028657149989157915, + "rewards_train/rejected": -0.006599044892936945, + "step": 99 + }, + { + "epoch": 0.03, + "logps_train/chosen": -5.896768569946289, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -8.67001724243164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0009481430170126259, + "rewards_train/margins": -0.0008001328096725047, + "rewards_train/rejected": 0.0017482758266851306, + "step": 99 + }, + { + "epoch": 0.03, + "logps_train/chosen": -3.3224284648895264, + "logps_train/ref_chosen": -3.109375, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -21.05919647216797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.021305346861481667, + "rewards_train/margins": -0.09038569964468479, + "rewards_train/rejected": 0.06908035278320312, + "step": 99 + }, + { + "epoch": 0.03, + "logps_train/chosen": -142.05982971191406, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -85.2242431640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0059829712845385075, + "rewards_train/margins": -0.0335586559958756, + "rewards_train/rejected": 0.02757568471133709, + "step": 99 + }, + { + "epoch": 0.03, + "learning_rate": 1.6e-06, + "loss": 0.7015, + "step": 100 + }, + { + "epoch": 0.03, + "logps_train/chosen": -20.745420455932617, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -6.749210357666016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00045795441837981343, + "rewards_train/margins": 0.025378989812452346, + "rewards_train/rejected": -0.024921035394072533, + "step": 100 + }, + { + "epoch": 0.03, + "logps_train/chosen": -1.8522173166275024, + "logps_train/ref_chosen": -1.8671875, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -11.571210861206055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0014970183838158846, + "rewards_train/margins": 0.014868105063214898, + "rewards_train/rejected": -0.013371086679399014, + "step": 100 + }, + { + "epoch": 0.03, + "logps_train/chosen": -13.381340026855469, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -55.5, + "logps_train/rejected": -55.614967346191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.036865998059511185, + "rewards_train/margins": 0.048362732864916325, + "rewards_train/rejected": -0.01149673480540514, + "step": 100 + }, + { + "epoch": 0.03, + "logps_train/chosen": -37.369720458984375, + "logps_train/ref_chosen": -37.75, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -35.63304901123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03802795335650444, + "rewards_train/margins": 0.05133285466581583, + "rewards_train/rejected": -0.01330490130931139, + "step": 100 + }, + { + "epoch": 0.03, + "logps_train/chosen": -44.038047790527344, + "logps_train/ref_chosen": -44.0, + "logps_train/ref_rejected": -30.0, + "logps_train/rejected": -30.12054443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0038047791458666325, + "rewards_train/margins": 0.008249664213508368, + "rewards_train/rejected": -0.012054443359375, + "step": 100 + }, + { + "epoch": 0.03, + "logps_train/chosen": -5.379714012145996, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -0.6015625, + "logps_train/rejected": -0.6058858633041382, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02140359953045845, + "rewards_train/margins": 0.021835935855051503, + "rewards_train/rejected": -0.00043233632459305227, + "step": 100 + }, + { + "epoch": 0.03, + "logps_train/chosen": -78.94712829589844, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -78.66259765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.05528717115521431, + "rewards_train/margins": -0.02845306321978569, + "rewards_train/rejected": 0.083740234375, + "step": 100 + }, + { + "epoch": 0.03, + "logps_train/chosen": -7.6299309730529785, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -23.399747848510742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01825690269470215, + "rewards_train/margins": 0.020731687545776367, + "rewards_train/rejected": -0.0024747848510742188, + "step": 100 + }, + { + "epoch": 0.03, + "logps_train/chosen": -16.090673446655273, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -13.30252456665039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009067344479262829, + "rewards_train/margins": -0.003814887721091509, + "rewards_train/rejected": -0.00525245675817132, + "step": 101 + }, + { + "epoch": 0.03, + "logps_train/chosen": -69.45667266845703, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -97.9775390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.00433273334056139, + "rewards_train/margins": -0.04791336040943861, + "rewards_train/rejected": 0.05224609375, + "step": 101 + }, + { + "epoch": 0.03, + "logps_train/chosen": -78.70162963867188, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -100.96382141113281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02016296423971653, + "rewards_train/margins": 0.02621917612850666, + "rewards_train/rejected": -0.04638214036822319, + "step": 101 + }, + { + "epoch": 0.03, + "logps_train/chosen": -2.2890660762786865, + "logps_train/ref_chosen": -2.25, + "logps_train/ref_rejected": -1.5, + "logps_train/rejected": -1.547875165939331, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.003906607627868652, + "rewards_train/margins": 0.000880909152328968, + "rewards_train/rejected": -0.00478751678019762, + "step": 101 + }, + { + "epoch": 0.03, + "logps_train/chosen": -65.7902603149414, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -55.0, + "logps_train/rejected": -55.36466979980469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.029026031494140625, + "rewards_train/margins": 0.007440950721502304, + "rewards_train/rejected": -0.03646698221564293, + "step": 101 + }, + { + "epoch": 0.03, + "logps_train/chosen": -19.653003692626953, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -7.875, + "logps_train/rejected": -7.9052863121032715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009699630551040173, + "rewards_train/margins": 0.012728261761367321, + "rewards_train/rejected": -0.0030286312103271484, + "step": 101 + }, + { + "epoch": 0.03, + "logps_train/chosen": -3.400094747543335, + "logps_train/ref_chosen": -3.546875, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -2.46427321434021, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.014678025618195534, + "rewards_train/margins": 0.022042847238481045, + "rewards_train/rejected": -0.007364821620285511, + "step": 101 + }, + { + "epoch": 0.03, + "logps_train/chosen": -27.703187942504883, + "logps_train/ref_chosen": -27.625, + "logps_train/ref_rejected": -29.5, + "logps_train/rejected": -29.471599578857422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.007818794809281826, + "rewards_train/margins": -0.010658836923539639, + "rewards_train/rejected": 0.0028400421142578125, + "step": 101 + }, + { + "epoch": 0.03, + "learning_rate": 1.6319999999999998e-06, + "loss": 0.6879, + "step": 102 + }, + { + "epoch": 0.03, + "logps_train/chosen": -109.55962371826172, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -122.40020751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.005962371826171875, + "rewards_train/margins": 0.034058380872011185, + "rewards_train/rejected": -0.04002075269818306, + "step": 102 + }, + { + "epoch": 0.03, + "logps_train/chosen": -72.51244354248047, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -110.81900787353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.048755645751953125, + "rewards_train/margins": 0.03065643273293972, + "rewards_train/rejected": 0.018099213019013405, + "step": 102 + }, + { + "epoch": 0.03, + "logps_train/chosen": -105.54759216308594, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -131.15960693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0047592162154614925, + "rewards_train/margins": 0.11120147863402963, + "rewards_train/rejected": -0.11596069484949112, + "step": 102 + }, + { + "epoch": 0.03, + "logps_train/chosen": -106.95513916015625, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -118.50152587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004486083984375, + "rewards_train/margins": 0.05463867262005806, + "rewards_train/rejected": -0.05015258863568306, + "step": 102 + }, + { + "epoch": 0.03, + "logps_train/chosen": -45.716819763183594, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -2.8469901084899902, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028318023309111595, + "rewards_train/margins": 0.04895453527569771, + "rewards_train/rejected": -0.020636511966586113, + "step": 102 + }, + { + "epoch": 0.03, + "logps_train/chosen": -138.6842041015625, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -141.29872131347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13157959282398224, + "rewards_train/margins": 0.06145172566175461, + "rewards_train/rejected": 0.07012786716222763, + "step": 102 + }, + { + "epoch": 0.03, + "logps_train/chosen": -0.19532957673072815, + "logps_train/ref_chosen": -0.2060546875, + "logps_train/ref_rejected": -0.2060546875, + "logps_train/rejected": -0.19236841797828674, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.001072511076927185, + "rewards_train/margins": -0.0002961158752441406, + "rewards_train/rejected": 0.0013686269521713257, + "step": 102 + }, + { + "epoch": 0.03, + "logps_train/chosen": -111.55189514160156, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -127.87010192871094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05518951639533043, + "rewards_train/margins": -0.01817932352423668, + "rewards_train/rejected": -0.03701019287109375, + "step": 102 + }, + { + "epoch": 0.03, + "logps_train/chosen": -3.5518686771392822, + "logps_train/ref_chosen": -2.9375, + "logps_train/ref_rejected": -3.9375, + "logps_train/rejected": -4.384410381317139, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06143686920404434, + "rewards_train/margins": -0.016745831817388535, + "rewards_train/rejected": -0.04469103738665581, + "step": 103 + }, + { + "epoch": 0.03, + "logps_train/chosen": -96.6513671875, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -117.12081909179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08486328274011612, + "rewards_train/margins": 0.14694519340991974, + "rewards_train/rejected": -0.06208191066980362, + "step": 103 + }, + { + "epoch": 0.03, + "logps_train/chosen": -80.09617614746094, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -84.832763671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05961761623620987, + "rewards_train/margins": -0.02634124830365181, + "rewards_train/rejected": -0.03327636793255806, + "step": 103 + }, + { + "epoch": 0.03, + "logps_train/chosen": -17.72295570373535, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -33.62894058227539, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.034795571118593216, + "rewards_train/margins": -0.07190151512622833, + "rewards_train/rejected": 0.03710594400763512, + "step": 103 + }, + { + "epoch": 0.03, + "logps_train/chosen": -60.78156661987305, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -93.63349914550781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0031566619873046875, + "rewards_train/margins": -0.03980674967169762, + "rewards_train/rejected": 0.03665008768439293, + "step": 103 + }, + { + "epoch": 0.03, + "logps_train/chosen": -74.6659164428711, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -84.46910095214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08340835571289062, + "rewards_train/margins": 0.08031845092773438, + "rewards_train/rejected": 0.00308990478515625, + "step": 103 + }, + { + "epoch": 0.03, + "logps_train/chosen": -1.8100600242614746, + "logps_train/ref_chosen": -1.7578125, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -8.98283863067627, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.005224752705544233, + "rewards_train/margins": 0.01805911073461175, + "rewards_train/rejected": -0.023283863440155983, + "step": 103 + }, + { + "epoch": 0.03, + "logps_train/chosen": -40.34494400024414, + "logps_train/ref_chosen": -40.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -97.84795379638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.015505599789321423, + "rewards_train/margins": 0.05030098091810942, + "rewards_train/rejected": -0.034795381128787994, + "step": 103 + }, + { + "epoch": 0.03, + "learning_rate": 1.6639999999999999e-06, + "loss": 0.6795, + "step": 104 + }, + { + "epoch": 0.03, + "logps_train/chosen": -0.28432437777519226, + "logps_train/ref_chosen": -0.296875, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -4.306674480438232, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.001255062292329967, + "rewards_train/margins": -0.002452489803545177, + "rewards_train/rejected": 0.003707552095875144, + "step": 104 + }, + { + "epoch": 0.03, + "logps_train/chosen": -58.550384521484375, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -89.14617919921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.005038452334702015, + "rewards_train/margins": -0.040420533157885075, + "rewards_train/rejected": 0.03538208082318306, + "step": 104 + }, + { + "epoch": 0.03, + "logps_train/chosen": -37.35689163208008, + "logps_train/ref_chosen": -37.5, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -23.09956169128418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.014310836791992188, + "rewards_train/margins": 0.02426700573414564, + "rewards_train/rejected": -0.009956168942153454, + "step": 104 + }, + { + "epoch": 0.03, + "logps_train/chosen": -11.831047058105469, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -49.51845932006836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.010645294561982155, + "rewards_train/margins": 0.012491226545535028, + "rewards_train/rejected": -0.0018459319835528731, + "step": 104 + }, + { + "epoch": 0.03, + "logps_train/chosen": -70.6024169921875, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -70.32791900634766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.03975830227136612, + "rewards_train/margins": -0.027449801564216614, + "rewards_train/rejected": 0.06720810383558273, + "step": 104 + }, + { + "epoch": 0.03, + "logps_train/chosen": -3.1609349250793457, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -2.359375, + "logps_train/rejected": -2.3490006923675537, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00734400749206543, + "rewards_train/margins": 0.006306576658971608, + "rewards_train/rejected": 0.001037430833093822, + "step": 104 + }, + { + "epoch": 0.03, + "logps_train/chosen": -84.14559173583984, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -137.21322631835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.014559173956513405, + "rewards_train/margins": 0.006763458251953125, + "rewards_train/rejected": -0.02132263220846653, + "step": 104 + }, + { + "epoch": 0.03, + "logps_train/chosen": -13.241822242736816, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -5.09375, + "logps_train/rejected": -5.421782970428467, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007067775819450617, + "rewards_train/margins": 0.039871072862297297, + "rewards_train/rejected": -0.03280329704284668, + "step": 104 + }, + { + "epoch": 0.03, + "logps_train/chosen": -25.394424438476562, + "logps_train/ref_chosen": -25.875, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -11.338678359985352, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04805755615234375, + "rewards_train/margins": 0.04442539205774665, + "rewards_train/rejected": 0.003632164094597101, + "step": 105 + }, + { + "epoch": 0.03, + "logps_train/chosen": -5.781980514526367, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -0.58203125, + "logps_train/rejected": -1.0593366622924805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02805194817483425, + "rewards_train/margins": 0.07578249089419842, + "rewards_train/rejected": -0.047730542719364166, + "step": 105 + }, + { + "epoch": 0.03, + "logps_train/chosen": -2.3136484622955322, + "logps_train/ref_chosen": -2.25, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -6.179105281829834, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.006364846136420965, + "rewards_train/margins": -0.01970431813970208, + "rewards_train/rejected": 0.013339472003281116, + "step": 105 + }, + { + "epoch": 0.03, + "logps_train/chosen": -53.39374542236328, + "logps_train/ref_chosen": -53.5, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -61.965850830078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01062545832246542, + "rewards_train/margins": 0.007210541283711791, + "rewards_train/rejected": 0.0034149170387536287, + "step": 105 + }, + { + "epoch": 0.03, + "logps_train/chosen": -91.14816284179688, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -91.87991333007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03518371656537056, + "rewards_train/margins": 0.02317504957318306, + "rewards_train/rejected": 0.0120086669921875, + "step": 105 + }, + { + "epoch": 0.03, + "logps_train/chosen": -107.46688842773438, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -102.96585083007812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.05331115797162056, + "rewards_train/margins": -0.00010376051068305969, + "rewards_train/rejected": 0.05341491848230362, + "step": 105 + }, + { + "epoch": 0.03, + "logps_train/chosen": -113.57743072509766, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -119.61091613769531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.057743072509765625, + "rewards_train/margins": -0.1466514617204666, + "rewards_train/rejected": 0.08890838921070099, + "step": 105 + }, + { + "epoch": 0.03, + "logps_train/chosen": -137.96595764160156, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -143.07122802734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10340423882007599, + "rewards_train/margins": 0.11052704183384776, + "rewards_train/rejected": -0.007122803013771772, + "step": 105 + }, + { + "epoch": 0.03, + "learning_rate": 1.696e-06, + "loss": 0.6897, + "step": 106 + }, + { + "epoch": 0.03, + "logps_train/chosen": -49.07862854003906, + "logps_train/ref_chosen": -49.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -85.46196746826172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.04213714599609375, + "rewards_train/margins": -0.011666107922792435, + "rewards_train/rejected": 0.053803253918886185, + "step": 106 + }, + { + "epoch": 0.03, + "logps_train/chosen": -4.287465572357178, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -4.334192752838135, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0006215572357177734, + "rewards_train/margins": 0.004672718234360218, + "rewards_train/rejected": -0.0052942754700779915, + "step": 106 + }, + { + "epoch": 0.03, + "logps_train/chosen": -23.957571029663086, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -19.157663345336914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.004242897033691406, + "rewards_train/margins": -0.004990768618881702, + "rewards_train/rejected": 0.009233665652573109, + "step": 106 + }, + { + "epoch": 0.03, + "logps_train/chosen": -92.18569946289062, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -101.0397720336914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0185699462890625, + "rewards_train/margins": -0.014592743013054132, + "rewards_train/rejected": -0.0039772032760083675, + "step": 106 + }, + { + "epoch": 0.03, + "logps_train/chosen": -9.663225173950195, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -6.810784339904785, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.010072517208755016, + "rewards_train/margins": -0.0008690832182765007, + "rewards_train/rejected": -0.009203433990478516, + "step": 106 + }, + { + "epoch": 0.03, + "logps_train/chosen": -57.074951171875, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -104.53083038330078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01750488393008709, + "rewards_train/margins": 0.07058792375028133, + "rewards_train/rejected": -0.053083039820194244, + "step": 106 + }, + { + "epoch": 0.03, + "logps_train/chosen": -101.9162826538086, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -80.95588684082031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008371734991669655, + "rewards_train/margins": 0.053960418328642845, + "rewards_train/rejected": -0.04558868333697319, + "step": 106 + }, + { + "epoch": 0.03, + "logps_train/chosen": -92.82977294921875, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -101.89581298828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01702270470559597, + "rewards_train/margins": 0.006604003719985485, + "rewards_train/rejected": 0.010418700985610485, + "step": 106 + }, + { + "epoch": 0.03, + "logps_train/chosen": -1.1536563634872437, + "logps_train/ref_chosen": -1.203125, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -5.767541408538818, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004946863744407892, + "rewards_train/margins": 0.019201004412025213, + "rewards_train/rejected": -0.014254140667617321, + "step": 107 + }, + { + "epoch": 0.03, + "logps_train/chosen": -106.58863830566406, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -29.25, + "logps_train/rejected": -29.126272201538086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00886383093893528, + "rewards_train/margins": -0.02123661059886217, + "rewards_train/rejected": 0.012372779659926891, + "step": 107 + }, + { + "epoch": 0.03, + "logps_train/chosen": -14.120826721191406, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -18.78035545349121, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.005832672119140625, + "rewards_train/margins": -0.0027971267700195312, + "rewards_train/rejected": -0.0030355453491210938, + "step": 107 + }, + { + "epoch": 0.03, + "logps_train/chosen": -165.92051696777344, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -178.27297973632812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.00794830359518528, + "rewards_train/margins": -0.06475372426211834, + "rewards_train/rejected": 0.07270202785730362, + "step": 107 + }, + { + "epoch": 0.03, + "logps_train/chosen": -1.759765386581421, + "logps_train/ref_chosen": -1.90625, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -5.348722457885742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01464846171438694, + "rewards_train/margins": 0.015145707526244223, + "rewards_train/rejected": -0.0004972458118572831, + "step": 107 + }, + { + "epoch": 0.03, + "logps_train/chosen": -7.754639625549316, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -16.970924377441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03078603744506836, + "rewards_train/margins": 0.040378475561738014, + "rewards_train/rejected": -0.009592438116669655, + "step": 107 + }, + { + "epoch": 0.03, + "logps_train/chosen": -60.719200134277344, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -11.307866096496582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0030799866653978825, + "rewards_train/margins": 0.0026165963208768517, + "rewards_train/rejected": 0.0004633903445210308, + "step": 107 + }, + { + "epoch": 0.03, + "logps_train/chosen": -256.205078125, + "logps_train/ref_chosen": -256.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -142.8564910888672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0205078125, + "rewards_train/margins": -0.03485870361328125, + "rewards_train/rejected": 0.01435089111328125, + "step": 107 + }, + { + "epoch": 0.03, + "learning_rate": 1.7279999999999998e-06, + "loss": 0.6921, + "step": 108 + }, + { + "epoch": 0.03, + "logps_train/chosen": -2.3378868103027344, + "logps_train/ref_chosen": -2.390625, + "logps_train/ref_rejected": -2.53125, + "logps_train/rejected": -2.6619789600372314, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0052738189697265625, + "rewards_train/margins": 0.018346714787185192, + "rewards_train/rejected": -0.01307289581745863, + "step": 108 + }, + { + "epoch": 0.03, + "logps_train/chosen": -82.15835571289062, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -97.59060668945312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01583557203412056, + "rewards_train/margins": -0.006774903275072575, + "rewards_train/rejected": -0.009060668759047985, + "step": 108 + }, + { + "epoch": 0.03, + "logps_train/chosen": -44.780155181884766, + "logps_train/ref_chosen": -45.0, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -17.960187911987305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021984482184052467, + "rewards_train/margins": 0.043003274127840996, + "rewards_train/rejected": -0.02101879194378853, + "step": 108 + }, + { + "epoch": 0.03, + "logps_train/chosen": -90.6578369140625, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -123.3837661743164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03421631082892418, + "rewards_train/margins": 0.022592928260564804, + "rewards_train/rejected": 0.011623382568359375, + "step": 108 + }, + { + "epoch": 0.03, + "logps_train/chosen": -111.50511932373047, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -121.0738296508789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09948807209730148, + "rewards_train/margins": 0.10687103727832437, + "rewards_train/rejected": -0.0073829651810228825, + "step": 108 + }, + { + "epoch": 0.03, + "logps_train/chosen": -2.785757541656494, + "logps_train/ref_chosen": -2.8125, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -8.12707233428955, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.002674245974048972, + "rewards_train/margins": -0.00961852096952498, + "rewards_train/rejected": 0.012292766943573952, + "step": 108 + }, + { + "epoch": 0.03, + "logps_train/chosen": -6.642243385314941, + "logps_train/ref_chosen": -6.4375, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -32.84819412231445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02047433890402317, + "rewards_train/margins": 0.014345074072480202, + "rewards_train/rejected": -0.03481941297650337, + "step": 108 + }, + { + "epoch": 0.03, + "logps_train/chosen": -1.4458346366882324, + "logps_train/ref_chosen": -1.484375, + "logps_train/ref_rejected": -1.484375, + "logps_train/rejected": -1.4552561044692993, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.003854036331176758, + "rewards_train/margins": 0.0009421466384083033, + "rewards_train/rejected": 0.0029118896927684546, + "step": 108 + }, + { + "epoch": 0.03, + "logps_train/chosen": -85.96731567382812, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -79.76128387451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0532684326171875, + "rewards_train/margins": 0.07939681969583035, + "rewards_train/rejected": -0.026128387078642845, + "step": 109 + }, + { + "epoch": 0.03, + "logps_train/chosen": -137.25567626953125, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -111.35993194580078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12556762993335724, + "rewards_train/margins": -0.089574433863163, + "rewards_train/rejected": -0.035993196070194244, + "step": 109 + }, + { + "epoch": 0.03, + "logps_train/chosen": -136.81349182128906, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -139.7615509033203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01865081861615181, + "rewards_train/margins": -0.00519409216940403, + "rewards_train/rejected": 0.02384491078555584, + "step": 109 + }, + { + "epoch": 0.03, + "logps_train/chosen": -117.8402099609375, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -97.49111938476562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03402099758386612, + "rewards_train/margins": -0.08490905910730362, + "rewards_train/rejected": 0.0508880615234375, + "step": 109 + }, + { + "epoch": 0.03, + "logps_train/chosen": -126.48556518554688, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -134.79702758789062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04855651780962944, + "rewards_train/margins": -0.06885376013815403, + "rewards_train/rejected": 0.02029724232852459, + "step": 109 + }, + { + "epoch": 0.03, + "logps_train/chosen": -2.766935348510742, + "logps_train/ref_chosen": -2.703125, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -8.268464088439941, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.006381034851074219, + "rewards_train/margins": -0.010784626007080078, + "rewards_train/rejected": 0.004403591156005859, + "step": 109 + }, + { + "epoch": 0.03, + "logps_train/chosen": -112.31555938720703, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -138.2527618408203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.031555939465761185, + "rewards_train/margins": -0.006279755383729935, + "rewards_train/rejected": -0.02527618408203125, + "step": 109 + }, + { + "epoch": 0.03, + "logps_train/chosen": -90.92607879638672, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -66.13328552246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.057392120361328125, + "rewards_train/margins": 0.020720671862363815, + "rewards_train/rejected": 0.03667144849896431, + "step": 109 + }, + { + "epoch": 0.03, + "learning_rate": 1.7599999999999999e-06, + "loss": 0.6926, + "step": 110 + }, + { + "epoch": 0.03, + "logps_train/chosen": -81.97521209716797, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -91.223876953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0024787902366369963, + "rewards_train/margins": -0.025133514078333974, + "rewards_train/rejected": 0.02761230431497097, + "step": 110 + }, + { + "epoch": 0.03, + "logps_train/chosen": -5.250389575958252, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -6.8125, + "logps_train/rejected": -6.844979286193848, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00933604221791029, + "rewards_train/margins": 0.012583970790728927, + "rewards_train/rejected": -0.003247928572818637, + "step": 110 + }, + { + "epoch": 0.03, + "logps_train/chosen": -71.53094482421875, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -85.46194458007812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0030944824684411287, + "rewards_train/margins": -0.006900024600327015, + "rewards_train/rejected": 0.003805542131885886, + "step": 110 + }, + { + "epoch": 0.03, + "logps_train/chosen": -84.35231018066406, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -88.97872924804688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03523102030158043, + "rewards_train/margins": -0.037358095636591315, + "rewards_train/rejected": 0.002127075335010886, + "step": 110 + }, + { + "epoch": 0.03, + "logps_train/chosen": -8.773763656616211, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -8.117277145385742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008626366034150124, + "rewards_train/margins": 0.003101348876953125, + "rewards_train/rejected": -0.011727714911103249, + "step": 110 + }, + { + "epoch": 0.03, + "logps_train/chosen": -59.48318862915039, + "logps_train/ref_chosen": -59.75, + "logps_train/ref_rejected": -62.75, + "logps_train/rejected": -62.80967712402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.026681138202548027, + "rewards_train/margins": 0.032648850698024035, + "rewards_train/rejected": -0.0059677124954760075, + "step": 110 + }, + { + "epoch": 0.03, + "logps_train/chosen": -3.1175694465637207, + "logps_train/ref_chosen": -3.171875, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -5.075038433074951, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005430555436760187, + "rewards_train/margins": 0.028559398371726274, + "rewards_train/rejected": -0.023128842934966087, + "step": 110 + }, + { + "epoch": 0.03, + "logps_train/chosen": -32.4190559387207, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -43.75, + "logps_train/rejected": -43.71577453613281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008094406686723232, + "rewards_train/margins": 0.0046718602534383535, + "rewards_train/rejected": 0.0034225464332848787, + "step": 110 + }, + { + "epoch": 0.03, + "logps_train/chosen": -83.36832427978516, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -103.82388305664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01316757220774889, + "rewards_train/margins": 0.1955558778718114, + "rewards_train/rejected": -0.1823883056640625, + "step": 111 + }, + { + "epoch": 0.03, + "logps_train/chosen": -91.85507202148438, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -91.29877471923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.014492797665297985, + "rewards_train/margins": 0.04437027033418417, + "rewards_train/rejected": -0.029877472668886185, + "step": 111 + }, + { + "epoch": 0.03, + "logps_train/chosen": -177.44735717773438, + "logps_train/ref_chosen": -180.0, + "logps_train/ref_rejected": -228.0, + "logps_train/rejected": -227.32977294921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2552642822265625, + "rewards_train/margins": 0.18824157863855362, + "rewards_train/rejected": 0.06702270358800888, + "step": 111 + }, + { + "epoch": 0.03, + "logps_train/chosen": -31.6895751953125, + "logps_train/ref_chosen": -31.125, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -91.32843780517578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05645751953125, + "rewards_train/margins": -0.023613739758729935, + "rewards_train/rejected": -0.032843779772520065, + "step": 111 + }, + { + "epoch": 0.03, + "logps_train/chosen": -124.88060760498047, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -126.8946304321289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.038060761988162994, + "rewards_train/margins": -0.14859771728515625, + "rewards_train/rejected": 0.11053695529699326, + "step": 111 + }, + { + "epoch": 0.03, + "logps_train/chosen": -18.732452392578125, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -49.792781829833984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.014254760928452015, + "rewards_train/margins": 0.04353294428437948, + "rewards_train/rejected": -0.029278183355927467, + "step": 111 + }, + { + "epoch": 0.03, + "logps_train/chosen": -123.98180389404297, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -127.51730346679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.001819610595703125, + "rewards_train/margins": 0.0035499572986736894, + "rewards_train/rejected": -0.0017303467029705644, + "step": 111 + }, + { + "epoch": 0.03, + "logps_train/chosen": -0.03328058496117592, + "logps_train/ref_chosen": -0.035888671875, + "logps_train/ref_rejected": -0.035888671875, + "logps_train/rejected": -0.034054581075906754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0002608087088447064, + "rewards_train/margins": 7.739962893538177e-05, + "rewards_train/rejected": 0.00018340907990932465, + "step": 111 + }, + { + "epoch": 0.03, + "learning_rate": 1.792e-06, + "loss": 0.6845, + "step": 112 + }, + { + "epoch": 0.03, + "logps_train/chosen": -15.6527099609375, + "logps_train/ref_chosen": -15.625, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -16.413928985595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0027709961868822575, + "rewards_train/margins": 0.038621903862804174, + "rewards_train/rejected": -0.04139290004968643, + "step": 112 + }, + { + "epoch": 0.03, + "logps_train/chosen": -46.30982208251953, + "logps_train/ref_chosen": -46.5, + "logps_train/ref_rejected": -36.5, + "logps_train/rejected": -36.29595184326172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.019017791375517845, + "rewards_train/margins": -0.0013870242983102798, + "rewards_train/rejected": 0.020404815673828125, + "step": 112 + }, + { + "epoch": 0.03, + "logps_train/chosen": -107.538818359375, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -150.5768280029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09611816704273224, + "rewards_train/margins": 0.15380096808075905, + "rewards_train/rejected": -0.05768280103802681, + "step": 112 + }, + { + "epoch": 0.03, + "logps_train/chosen": -6.280618667602539, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -8.278800010681152, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009438133798539639, + "rewards_train/margins": 0.018568134866654873, + "rewards_train/rejected": -0.009130001068115234, + "step": 112 + }, + { + "epoch": 0.03, + "logps_train/chosen": -97.3559799194336, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -132.82106018066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11440201103687286, + "rewards_train/margins": 0.09650802798569202, + "rewards_train/rejected": 0.01789398305118084, + "step": 112 + }, + { + "epoch": 0.03, + "logps_train/chosen": -13.130369186401367, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -5.035290241241455, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0005369186401367188, + "rewards_train/margins": 0.006117105484008789, + "rewards_train/rejected": -0.006654024124145508, + "step": 112 + }, + { + "epoch": 0.03, + "logps_train/chosen": -46.60515213012695, + "logps_train/ref_chosen": -47.0, + "logps_train/ref_rejected": -1.3515625, + "logps_train/rejected": -1.7622114419937134, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03948478773236275, + "rewards_train/margins": 0.0805496834218502, + "rewards_train/rejected": -0.04106489568948746, + "step": 112 + }, + { + "epoch": 0.03, + "logps_train/chosen": -33.35458755493164, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -46.75, + "logps_train/rejected": -46.688819885253906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.010458755306899548, + "rewards_train/margins": -0.016576766967773438, + "rewards_train/rejected": 0.00611801166087389, + "step": 112 + }, + { + "epoch": 0.03, + "logps_train/chosen": -116.6424560546875, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -178.00015258789062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.014245606027543545, + "rewards_train/margins": -0.11423034872859716, + "rewards_train/rejected": 0.09998474270105362, + "step": 113 + }, + { + "epoch": 0.03, + "logps_train/chosen": -83.99896240234375, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -83.7088623046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.00010375976853538305, + "rewards_train/margins": -0.029010009762714617, + "rewards_train/rejected": 0.02911376953125, + "step": 113 + }, + { + "epoch": 0.03, + "logps_train/chosen": -59.986968994140625, + "logps_train/ref_chosen": -60.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -94.01756286621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0013031006092205644, + "rewards_train/margins": -0.046940612024627626, + "rewards_train/rejected": 0.04824371263384819, + "step": 113 + }, + { + "epoch": 0.03, + "logps_train/chosen": -3.2008543014526367, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -3.8125, + "logps_train/rejected": -3.7760987281799316, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.004460430238395929, + "rewards_train/margins": -0.008100557373836637, + "rewards_train/rejected": 0.003640127135440707, + "step": 113 + }, + { + "epoch": 0.03, + "logps_train/chosen": -88.7662353515625, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -142.09109497070312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07662353664636612, + "rewards_train/margins": -0.06751403957605362, + "rewards_train/rejected": -0.0091094970703125, + "step": 113 + }, + { + "epoch": 0.03, + "logps_train/chosen": -2.5282907485961914, + "logps_train/ref_chosen": -2.546875, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -13.398887634277344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0018584252102300525, + "rewards_train/margins": -0.020752810989506543, + "rewards_train/rejected": 0.022611236199736595, + "step": 113 + }, + { + "epoch": 0.03, + "logps_train/chosen": -47.738494873046875, + "logps_train/ref_chosen": -48.0, + "logps_train/ref_rejected": -48.75, + "logps_train/rejected": -48.74118423461914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02615051344037056, + "rewards_train/margins": 0.025268936879001558, + "rewards_train/rejected": 0.0008815765613690019, + "step": 113 + }, + { + "epoch": 0.03, + "logps_train/chosen": -137.5759735107422, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -138.09530639648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04240265116095543, + "rewards_train/margins": 0.051933291368186474, + "rewards_train/rejected": -0.009530640207231045, + "step": 113 + }, + { + "epoch": 0.03, + "learning_rate": 1.824e-06, + "loss": 0.6882, + "step": 114 + }, + { + "epoch": 0.03, + "logps_train/chosen": -86.46615600585938, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -85.3039321899414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05338440090417862, + "rewards_train/margins": 0.08377761952579021, + "rewards_train/rejected": -0.030393218621611595, + "step": 114 + }, + { + "epoch": 0.03, + "logps_train/chosen": -98.60232543945312, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -121.13508605957031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13976745307445526, + "rewards_train/margins": 0.10327605903148651, + "rewards_train/rejected": 0.03649139404296875, + "step": 114 + }, + { + "epoch": 0.03, + "logps_train/chosen": -6.2982072830200195, + "logps_train/ref_chosen": -6.4375, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -8.353987693786621, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013929272070527077, + "rewards_train/margins": 0.011828041402623057, + "rewards_train/rejected": 0.0021012306679040194, + "step": 114 + }, + { + "epoch": 0.03, + "logps_train/chosen": -92.50233459472656, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -137.21261596679688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00023345947556663305, + "rewards_train/margins": -0.07897186577611137, + "rewards_train/rejected": 0.07873840630054474, + "step": 114 + }, + { + "epoch": 0.03, + "logps_train/chosen": -15.235536575317383, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -9.19749927520752, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013946342281997204, + "rewards_train/margins": 0.03369627054780722, + "rewards_train/rejected": -0.019749928265810013, + "step": 114 + }, + { + "epoch": 0.03, + "logps_train/chosen": -12.514859199523926, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -54.75, + "logps_train/rejected": -54.21984100341797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.013985919766128063, + "rewards_train/margins": -0.06700181867927313, + "rewards_train/rejected": 0.053015898913145065, + "step": 114 + }, + { + "epoch": 0.03, + "logps_train/chosen": -108.13495635986328, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -90.42627716064453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.036504365503787994, + "rewards_train/margins": -0.02086791768670082, + "rewards_train/rejected": 0.057372283190488815, + "step": 114 + }, + { + "epoch": 0.03, + "logps_train/chosen": -16.273584365844727, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -4.243881702423096, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0023584365844726562, + "rewards_train/margins": 0.00015473365783691406, + "rewards_train/rejected": -0.0025131702423095703, + "step": 114 + }, + { + "epoch": 0.03, + "logps_train/chosen": -81.05502319335938, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -70.47885131835938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0055023194290697575, + "rewards_train/margins": -0.007617187686264515, + "rewards_train/rejected": 0.0021148682571947575, + "step": 115 + }, + { + "epoch": 0.03, + "logps_train/chosen": -11.45356559753418, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -4.03994083404541, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01714344136416912, + "rewards_train/margins": 0.008637524209916592, + "rewards_train/rejected": 0.00850591715425253, + "step": 115 + }, + { + "epoch": 0.03, + "logps_train/chosen": -130.4913330078125, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -134.11712646484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04913330078125, + "rewards_train/margins": -0.037420653738081455, + "rewards_train/rejected": -0.011712647043168545, + "step": 115 + }, + { + "epoch": 0.03, + "logps_train/chosen": -119.79087829589844, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -86.98353576660156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02908783033490181, + "rewards_train/margins": -0.030734253698028624, + "rewards_train/rejected": 0.0016464233631268144, + "step": 115 + }, + { + "epoch": 0.03, + "logps_train/chosen": -192.73475646972656, + "logps_train/ref_chosen": -192.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -168.32809448242188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07347565144300461, + "rewards_train/margins": -0.04066620394587517, + "rewards_train/rejected": -0.03280944749712944, + "step": 115 + }, + { + "epoch": 0.03, + "logps_train/chosen": -16.434980392456055, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -20.688655853271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01900196075439453, + "rewards_train/margins": 0.0003675464540719986, + "rewards_train/rejected": 0.018634414300322533, + "step": 115 + }, + { + "epoch": 0.03, + "logps_train/chosen": -109.9364013671875, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -93.17825317382812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.05635986477136612, + "rewards_train/margins": -0.02581481635570526, + "rewards_train/rejected": 0.08217468112707138, + "step": 115 + }, + { + "epoch": 0.03, + "logps_train/chosen": -149.35012817382812, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -119.36205291748047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03501281887292862, + "rewards_train/margins": -0.04880752693861723, + "rewards_train/rejected": 0.01379470806568861, + "step": 115 + }, + { + "epoch": 0.03, + "learning_rate": 1.856e-06, + "loss": 0.6976, + "step": 116 + }, + { + "epoch": 0.03, + "logps_train/chosen": -16.386295318603516, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -4.092465400695801, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03637046739459038, + "rewards_train/margins": 0.02686700690537691, + "rewards_train/rejected": 0.009503460489213467, + "step": 116 + }, + { + "epoch": 0.03, + "logps_train/chosen": -1.6804276704788208, + "logps_train/ref_chosen": -1.671875, + "logps_train/ref_rejected": -1.2109375, + "logps_train/rejected": -1.2453938722610474, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0008552670478820801, + "rewards_train/margins": 0.0025903701316565275, + "rewards_train/rejected": -0.0034456371795386076, + "step": 116 + }, + { + "epoch": 0.03, + "logps_train/chosen": -4.251415729522705, + "logps_train/ref_chosen": -3.171875, + "logps_train/ref_rejected": -4.03125, + "logps_train/rejected": -4.025405406951904, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10795407742261887, + "rewards_train/margins": -0.10853853676235303, + "rewards_train/rejected": 0.0005844593397341669, + "step": 116 + }, + { + "epoch": 0.03, + "logps_train/chosen": -70.73371887207031, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -121.41362762451172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07337188720703125, + "rewards_train/margins": -0.032009124755859375, + "rewards_train/rejected": -0.041362762451171875, + "step": 116 + }, + { + "epoch": 0.03, + "logps_train/chosen": -111.92704010009766, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -132.3380584716797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.042704012244939804, + "rewards_train/margins": -0.008898165076971054, + "rewards_train/rejected": -0.03380584716796875, + "step": 116 + }, + { + "epoch": 0.03, + "logps_train/chosen": -77.01219177246094, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -115.26158905029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0012191772693768144, + "rewards_train/margins": 0.02493972738739103, + "rewards_train/rejected": -0.026158904656767845, + "step": 116 + }, + { + "epoch": 0.03, + "logps_train/chosen": -29.980087280273438, + "logps_train/ref_chosen": -29.875, + "logps_train/ref_rejected": -43.0, + "logps_train/rejected": -42.83481216430664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.010508728213608265, + "rewards_train/margins": -0.027027512900531292, + "rewards_train/rejected": 0.016518784686923027, + "step": 116 + }, + { + "epoch": 0.03, + "logps_train/chosen": -1.4669365882873535, + "logps_train/ref_chosen": -1.484375, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -10.160274505615234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0017438412178307772, + "rewards_train/margins": 0.017771291779354215, + "rewards_train/rejected": -0.016027450561523438, + "step": 116 + }, + { + "epoch": 0.03, + "logps_train/chosen": -28.898805618286133, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -51.0, + "logps_train/rejected": -51.14210510253906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.014880562201142311, + "rewards_train/margins": -0.000670052133500576, + "rewards_train/rejected": -0.014210510067641735, + "step": 117 + }, + { + "epoch": 0.03, + "logps_train/chosen": -27.252155303955078, + "logps_train/ref_chosen": -27.25, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -16.652610778808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0002155303955078125, + "rewards_train/margins": 0.05254554748535156, + "rewards_train/rejected": -0.052761077880859375, + "step": 117 + }, + { + "epoch": 0.03, + "logps_train/chosen": -106.79823303222656, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -89.03179931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07017669826745987, + "rewards_train/margins": 0.02335662767291069, + "rewards_train/rejected": 0.04682007059454918, + "step": 117 + }, + { + "epoch": 0.03, + "logps_train/chosen": -122.23757934570312, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -108.22823333740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07624206691980362, + "rewards_train/margins": 0.14906540513038635, + "rewards_train/rejected": -0.07282333821058273, + "step": 117 + }, + { + "epoch": 0.03, + "logps_train/chosen": -11.790824890136719, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -4.434722900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.014667510986328125, + "rewards_train/margins": 0.02376480121165514, + "rewards_train/rejected": -0.009097290225327015, + "step": 117 + }, + { + "epoch": 0.03, + "logps_train/chosen": -8.904218673706055, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -7.601733684539795, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0029218674171715975, + "rewards_train/margins": 0.029126503271982074, + "rewards_train/rejected": -0.03204837068915367, + "step": 117 + }, + { + "epoch": 0.03, + "logps_train/chosen": -9.358426094055176, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -87.45345306396484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.004592609591782093, + "rewards_train/margins": -0.00924730347469449, + "rewards_train/rejected": 0.004654693882912397, + "step": 117 + }, + { + "epoch": 0.03, + "logps_train/chosen": -4.56505823135376, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -4.924371242523193, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00025582313537597656, + "rewards_train/margins": 0.0015563011402264237, + "rewards_train/rejected": -0.0018121242756024003, + "step": 117 + }, + { + "epoch": 0.03, + "learning_rate": 1.8879999999999998e-06, + "loss": 0.6877, + "step": 118 + }, + { + "epoch": 0.03, + "logps_train/chosen": -64.00302124023438, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -67.44975280761719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0496978759765625, + "rewards_train/margins": -0.005326844751834869, + "rewards_train/rejected": 0.05502472072839737, + "step": 118 + }, + { + "epoch": 0.03, + "logps_train/chosen": -61.29009246826172, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -98.93931579589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.045990753918886185, + "rewards_train/margins": 0.03992233332246542, + "rewards_train/rejected": 0.006068420596420765, + "step": 118 + }, + { + "epoch": 0.03, + "logps_train/chosen": -126.78966522216797, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -122.27586364746094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.021033478900790215, + "rewards_train/margins": -0.0013801567256450653, + "rewards_train/rejected": 0.02241363562643528, + "step": 118 + }, + { + "epoch": 0.03, + "logps_train/chosen": -43.34436798095703, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -18.367000579833984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00943679828196764, + "rewards_train/margins": 0.0022632600739598274, + "rewards_train/rejected": -0.011700058355927467, + "step": 118 + }, + { + "epoch": 0.03, + "logps_train/chosen": -96.08064270019531, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -112.20047760009766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04193573072552681, + "rewards_train/margins": 0.011983489617705345, + "rewards_train/rejected": 0.029952241107821465, + "step": 118 + }, + { + "epoch": 0.03, + "logps_train/chosen": -13.49219036102295, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -105.17622375488281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.030469035729765892, + "rewards_train/margins": -0.11284666322171688, + "rewards_train/rejected": 0.08237762749195099, + "step": 118 + }, + { + "epoch": 0.03, + "logps_train/chosen": -97.06982421875, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -131.05535888671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0069824219681322575, + "rewards_train/margins": -0.001446533016860485, + "rewards_train/rejected": -0.005535888951271772, + "step": 118 + }, + { + "epoch": 0.03, + "logps_train/chosen": -145.6407012939453, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -106.24215698242188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06407012790441513, + "rewards_train/margins": -0.08985443040728569, + "rewards_train/rejected": 0.02578430250287056, + "step": 118 + }, + { + "epoch": 0.03, + "logps_train/chosen": -4.30404806137085, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -13.763578414916992, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00852980650961399, + "rewards_train/margins": -0.007171964971348643, + "rewards_train/rejected": -0.0013578415382653475, + "step": 119 + }, + { + "epoch": 0.03, + "logps_train/chosen": -111.3284912109375, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -103.36653137207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01715087890625, + "rewards_train/margins": 0.003804015927016735, + "rewards_train/rejected": 0.013346862979233265, + "step": 119 + }, + { + "epoch": 0.03, + "logps_train/chosen": -121.11868286132812, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -124.25729370117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0381317138671875, + "rewards_train/margins": 0.01386108435690403, + "rewards_train/rejected": 0.02427062951028347, + "step": 119 + }, + { + "epoch": 0.03, + "logps_train/chosen": -131.57342529296875, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -160.20941162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.057342529296875, + "rewards_train/margins": 0.0635986328125, + "rewards_train/rejected": -0.120941162109375, + "step": 119 + }, + { + "epoch": 0.03, + "logps_train/chosen": -65.143310546875, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -52.0, + "logps_train/rejected": -52.16156768798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.014331054873764515, + "rewards_train/margins": 0.0018257135525345802, + "rewards_train/rejected": -0.016156768426299095, + "step": 119 + }, + { + "epoch": 0.03, + "logps_train/chosen": -3.6758315563201904, + "logps_train/ref_chosen": -3.546875, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -5.286080837249756, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.012895655818283558, + "rewards_train/margins": -0.003037571907043457, + "rewards_train/rejected": -0.0098580839112401, + "step": 119 + }, + { + "epoch": 0.03, + "logps_train/chosen": -2.489116668701172, + "logps_train/ref_chosen": -2.453125, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -13.869142532348633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0035991668701171875, + "rewards_train/margins": -0.010434913914650679, + "rewards_train/rejected": 0.006835747044533491, + "step": 119 + }, + { + "epoch": 0.03, + "logps_train/chosen": -23.055404663085938, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -34.25, + "logps_train/rejected": -34.00819778442383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0069595337845385075, + "rewards_train/margins": -0.01722068851813674, + "rewards_train/rejected": 0.024180222302675247, + "step": 119 + }, + { + "epoch": 0.03, + "learning_rate": 1.92e-06, + "loss": 0.6974, + "step": 120 + }, + { + "epoch": 0.03, + "logps_train/chosen": -129.03717041015625, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -134.80458068847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0037170411087572575, + "rewards_train/margins": 0.07674102624878287, + "rewards_train/rejected": -0.08045806735754013, + "step": 120 + }, + { + "epoch": 0.03, + "logps_train/chosen": -6.516416549682617, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -18.076248168945312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.014141655527055264, + "rewards_train/margins": -0.006516838446259499, + "rewards_train/rejected": -0.007624817080795765, + "step": 120 + }, + { + "epoch": 0.03, + "logps_train/chosen": -50.212890625, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -47.5, + "logps_train/rejected": -46.962921142578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.02871093712747097, + "rewards_train/margins": -0.02499694935977459, + "rewards_train/rejected": 0.05370788648724556, + "step": 120 + }, + { + "epoch": 0.03, + "logps_train/chosen": -19.733125686645508, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -20.661243438720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02331256866455078, + "rewards_train/margins": 0.01781177520751953, + "rewards_train/rejected": -0.04112434387207031, + "step": 120 + }, + { + "epoch": 0.03, + "logps_train/chosen": -106.95999145507812, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -115.56536102294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004000854678452015, + "rewards_train/margins": 0.06053695920854807, + "rewards_train/rejected": -0.056536104530096054, + "step": 120 + }, + { + "epoch": 0.03, + "logps_train/chosen": -22.64592742919922, + "logps_train/ref_chosen": -23.0, + "logps_train/ref_rejected": -53.0, + "logps_train/rejected": -53.95127868652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.035407256335020065, + "rewards_train/margins": 0.13053512945771217, + "rewards_train/rejected": -0.09512787312269211, + "step": 120 + }, + { + "epoch": 0.03, + "logps_train/chosen": -139.13153076171875, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -100.67597198486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.013153076171875, + "rewards_train/margins": 0.05444412678480148, + "rewards_train/rejected": -0.06759720295667648, + "step": 120 + }, + { + "epoch": 0.03, + "logps_train/chosen": -5.1851487159729, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -32.236175537109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009139872156083584, + "rewards_train/margins": -0.01052231842186302, + "rewards_train/rejected": 0.0013824462657794356, + "step": 120 + }, + { + "epoch": 0.03, + "logps_train/chosen": -69.76941680908203, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -133.4881591796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07305832207202911, + "rewards_train/margins": 0.22187423706054688, + "rewards_train/rejected": -0.14881591498851776, + "step": 121 + }, + { + "epoch": 0.03, + "logps_train/chosen": -6.488013744354248, + "logps_train/ref_chosen": -7.65625, + "logps_train/ref_rejected": -1.640625, + "logps_train/rejected": -2.007953405380249, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11682362854480743, + "rewards_train/margins": 0.1535564698278904, + "rewards_train/rejected": -0.03673284128308296, + "step": 121 + }, + { + "epoch": 0.03, + "logps_train/chosen": -83.78781127929688, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -92.3939437866211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07121887058019638, + "rewards_train/margins": 0.06061324942857027, + "rewards_train/rejected": 0.01060562115162611, + "step": 121 + }, + { + "epoch": 0.03, + "logps_train/chosen": -92.52505493164062, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -96.1610107421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04749450832605362, + "rewards_train/margins": 0.06359558366239071, + "rewards_train/rejected": -0.01610107533633709, + "step": 121 + }, + { + "epoch": 0.03, + "logps_train/chosen": -16.294279098510742, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -21.897964477539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004427909851074219, + "rewards_train/margins": 0.02286853827536106, + "rewards_train/rejected": -0.02729644812643528, + "step": 121 + }, + { + "epoch": 0.03, + "logps_train/chosen": -0.2614106833934784, + "logps_train/ref_chosen": -0.275390625, + "logps_train/ref_rejected": -2.546875, + "logps_train/rejected": -2.6799888610839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0013979942305013537, + "rewards_train/margins": 0.014709380338899791, + "rewards_train/rejected": -0.013311386108398438, + "step": 121 + }, + { + "epoch": 0.03, + "logps_train/chosen": -9.98371410369873, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -0.359375, + "logps_train/rejected": -1.235856294631958, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10787858814001083, + "rewards_train/margins": 0.19552671909332275, + "rewards_train/rejected": -0.08764813095331192, + "step": 121 + }, + { + "epoch": 0.03, + "logps_train/chosen": -54.740413665771484, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -59.5, + "logps_train/rejected": -58.80176544189453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.05095863342285156, + "rewards_train/margins": -0.01886482536792755, + "rewards_train/rejected": 0.06982345879077911, + "step": 121 + }, + { + "epoch": 0.03, + "learning_rate": 1.9519999999999997e-06, + "loss": 0.6627, + "step": 122 + }, + { + "epoch": 0.03, + "logps_train/chosen": -10.842046737670898, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -7.133126258850098, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028295326977968216, + "rewards_train/margins": 0.04785795323550701, + "rewards_train/rejected": -0.019562626257538795, + "step": 122 + }, + { + "epoch": 0.03, + "logps_train/chosen": -2.6502888202667236, + "logps_train/ref_chosen": -2.640625, + "logps_train/ref_rejected": -0.71875, + "logps_train/rejected": -0.7288691401481628, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0009663820383138955, + "rewards_train/margins": 4.553195321932435e-05, + "rewards_train/rejected": -0.0010119139915332198, + "step": 122 + }, + { + "epoch": 0.03, + "logps_train/chosen": -18.240116119384766, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -11.783635139465332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013488388620316982, + "rewards_train/margins": 0.0043519027531147, + "rewards_train/rejected": 0.009136485867202282, + "step": 122 + }, + { + "epoch": 0.03, + "logps_train/chosen": -3.22232723236084, + "logps_train/ref_chosen": -3.265625, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -13.42538070678711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004329776857048273, + "rewards_train/margins": 0.02186784753575921, + "rewards_train/rejected": -0.017538070678710938, + "step": 122 + }, + { + "epoch": 0.03, + "logps_train/chosen": -79.93558502197266, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -124.81822204589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0064414977096021175, + "rewards_train/margins": 0.13826371124014258, + "rewards_train/rejected": -0.13182221353054047, + "step": 122 + }, + { + "epoch": 0.03, + "logps_train/chosen": -2.533301830291748, + "logps_train/ref_chosen": -2.65625, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -14.131625175476074, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01229481678456068, + "rewards_train/margins": 0.025457334704697132, + "rewards_train/rejected": -0.013162517920136452, + "step": 122 + }, + { + "epoch": 0.03, + "logps_train/chosen": -43.2984619140625, + "logps_train/ref_chosen": -44.0, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -4.7407426834106445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07015381008386612, + "rewards_train/margins": 0.0911030787974596, + "rewards_train/rejected": -0.020949268713593483, + "step": 122 + }, + { + "epoch": 0.03, + "logps_train/chosen": -81.71807098388672, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -91.43550109863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07819290459156036, + "rewards_train/margins": 0.1717430129647255, + "rewards_train/rejected": -0.09355010837316513, + "step": 122 + }, + { + "epoch": 0.03, + "logps_train/chosen": -8.55490779876709, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -15.357670783996582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0007592201582156122, + "rewards_train/margins": 0.042776300047989935, + "rewards_train/rejected": -0.04201707988977432, + "step": 123 + }, + { + "epoch": 0.03, + "logps_train/chosen": -78.20518493652344, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -116.21354675292969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02051849476993084, + "rewards_train/margins": 0.0008361805230379105, + "rewards_train/rejected": -0.02135467529296875, + "step": 123 + }, + { + "epoch": 0.03, + "logps_train/chosen": -44.295711517333984, + "logps_train/ref_chosen": -44.25, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -76.45841979980469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.004571151919662952, + "rewards_train/margins": -0.008729171939194202, + "rewards_train/rejected": 0.00415802001953125, + "step": 123 + }, + { + "epoch": 0.03, + "logps_train/chosen": -64.12860870361328, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -60.96831512451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01286087092012167, + "rewards_train/margins": 0.033970643766224384, + "rewards_train/rejected": -0.046831514686346054, + "step": 123 + }, + { + "epoch": 0.03, + "logps_train/chosen": -7.680639743804932, + "logps_train/ref_chosen": -7.84375, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -21.358192443847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016311025246977806, + "rewards_train/margins": 0.027130269445478916, + "rewards_train/rejected": -0.01081924419850111, + "step": 123 + }, + { + "epoch": 0.03, + "logps_train/chosen": -65.07637786865234, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -18.845989227294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.042362213134765625, + "rewards_train/margins": 0.06446113623678684, + "rewards_train/rejected": -0.022098923102021217, + "step": 123 + }, + { + "epoch": 0.03, + "logps_train/chosen": -0.9914988279342651, + "logps_train/ref_chosen": -0.95703125, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -8.368237495422363, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0034467577934265137, + "rewards_train/margins": -0.004123008286114782, + "rewards_train/rejected": 0.0006762504926882684, + "step": 123 + }, + { + "epoch": 0.03, + "logps_train/chosen": -21.23199462890625, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -5.0625, + "logps_train/rejected": -5.154926300048828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02319946326315403, + "rewards_train/margins": -0.013956832699477673, + "rewards_train/rejected": -0.009242630563676357, + "step": 123 + }, + { + "epoch": 0.03, + "learning_rate": 1.984e-06, + "loss": 0.6731, + "step": 124 + }, + { + "epoch": 0.03, + "logps_train/chosen": -51.31226348876953, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -57.75, + "logps_train/rejected": -57.838218688964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.006226349156349897, + "rewards_train/margins": 0.0025955201126635075, + "rewards_train/rejected": -0.008821869269013405, + "step": 124 + }, + { + "epoch": 0.03, + "logps_train/chosen": -19.223407745361328, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -30.009315490722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.027659226208925247, + "rewards_train/margins": 0.003590775653719902, + "rewards_train/rejected": 0.024068450555205345, + "step": 124 + }, + { + "epoch": 0.03, + "logps_train/chosen": -41.08605194091797, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -62.795326232910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00860519427806139, + "rewards_train/margins": 0.09592742752283812, + "rewards_train/rejected": -0.1045326218008995, + "step": 124 + }, + { + "epoch": 0.03, + "logps_train/chosen": -100.38288879394531, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -135.16456604003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01171112060546875, + "rewards_train/margins": 0.028167724609375, + "rewards_train/rejected": -0.01645660400390625, + "step": 124 + }, + { + "epoch": 0.03, + "logps_train/chosen": -8.208161354064941, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -13.3125, + "logps_train/rejected": -13.455031394958496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0020661354064941406, + "rewards_train/margins": 0.012187004089355469, + "rewards_train/rejected": -0.01425313949584961, + "step": 124 + }, + { + "epoch": 0.03, + "logps_train/chosen": -94.21056365966797, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -120.61825561523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028943633660674095, + "rewards_train/margins": 0.040769195184111595, + "rewards_train/rejected": -0.0118255615234375, + "step": 124 + }, + { + "epoch": 0.03, + "logps_train/chosen": -108.96918487548828, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -141.76608276367188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0030815124046057463, + "rewards_train/margins": -0.020310211228206754, + "rewards_train/rejected": 0.0233917236328125, + "step": 124 + }, + { + "epoch": 0.03, + "logps_train/chosen": -75.66943359375, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -74.51016998291016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.08305664360523224, + "rewards_train/margins": -0.06592635810375214, + "rewards_train/rejected": 0.14898300170898438, + "step": 124 + }, + { + "epoch": 0.03, + "logps_train/chosen": -78.64228057861328, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -109.83079528808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.035771943628787994, + "rewards_train/margins": 0.018851472064852715, + "rewards_train/rejected": 0.01692047156393528, + "step": 125 + }, + { + "epoch": 0.03, + "logps_train/chosen": -104.7662124633789, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -155.95098876953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07662124931812286, + "rewards_train/margins": 0.018477626144886017, + "rewards_train/rejected": -0.09509887546300888, + "step": 125 + }, + { + "epoch": 0.03, + "logps_train/chosen": -144.18731689453125, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -165.54119873046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.081268310546875, + "rewards_train/margins": 0.03538818284869194, + "rewards_train/rejected": 0.04588012769818306, + "step": 125 + }, + { + "epoch": 0.03, + "logps_train/chosen": -2.2611324787139893, + "logps_train/ref_chosen": -2.28125, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -3.996076822280884, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0020117522217333317, + "rewards_train/margins": 0.012556934263557196, + "rewards_train/rejected": -0.010545182041823864, + "step": 125 + }, + { + "epoch": 0.03, + "logps_train/chosen": -29.881149291992188, + "logps_train/ref_chosen": -29.5, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -17.4056396484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03811493143439293, + "rewards_train/margins": -0.0350509665440768, + "rewards_train/rejected": -0.0030639648903161287, + "step": 125 + }, + { + "epoch": 0.03, + "logps_train/chosen": -82.56784057617188, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -68.91403198242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04321594163775444, + "rewards_train/margins": 0.03461913950741291, + "rewards_train/rejected": 0.00859680213034153, + "step": 125 + }, + { + "epoch": 0.03, + "logps_train/chosen": -112.6017074584961, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -118.7659912109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01017074566334486, + "rewards_train/margins": 0.1164283724501729, + "rewards_train/rejected": -0.12659911811351776, + "step": 125 + }, + { + "epoch": 0.03, + "logps_train/chosen": -49.69144821166992, + "logps_train/ref_chosen": -49.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -82.5357894897461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.019144821912050247, + "rewards_train/margins": 0.03443412855267525, + "rewards_train/rejected": -0.053578950464725494, + "step": 125 + }, + { + "epoch": 0.04, + "learning_rate": 1.9999991251321473e-06, + "loss": 0.683, + "step": 126 + }, + { + "epoch": 0.04, + "logps_train/chosen": -84.21720123291016, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -104.0073471069336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.021720124408602715, + "rewards_train/margins": -0.07098541595041752, + "rewards_train/rejected": 0.049265291541814804, + "step": 126 + }, + { + "epoch": 0.04, + "logps_train/chosen": -2.3892159461975098, + "logps_train/ref_chosen": -2.40625, + "logps_train/ref_rejected": -1.8828125, + "logps_train/rejected": -1.9910262823104858, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0017034054035320878, + "rewards_train/margins": 0.012524784193374217, + "rewards_train/rejected": -0.010821378789842129, + "step": 126 + }, + { + "epoch": 0.04, + "logps_train/chosen": -68.63739013671875, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -79.02694702148438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01373901404440403, + "rewards_train/margins": -0.011044311802834272, + "rewards_train/rejected": -0.0026947022415697575, + "step": 126 + }, + { + "epoch": 0.04, + "logps_train/chosen": -150.78038024902344, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -111.02252197265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07803802937269211, + "rewards_train/margins": -0.07578583201393485, + "rewards_train/rejected": -0.0022521973587572575, + "step": 126 + }, + { + "epoch": 0.04, + "logps_train/chosen": -20.06241798400879, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -39.44000244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.018741799518465996, + "rewards_train/margins": 0.00025844573974609375, + "rewards_train/rejected": -0.01900024525821209, + "step": 126 + }, + { + "epoch": 0.04, + "logps_train/chosen": -9.790735244750977, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -3.765625, + "logps_train/rejected": -3.8957865238189697, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008426475338637829, + "rewards_train/margins": 0.021442627534270287, + "rewards_train/rejected": -0.013016152195632458, + "step": 126 + }, + { + "epoch": 0.04, + "logps_train/chosen": -12.066198348999023, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -3.78125, + "logps_train/rejected": -4.001808166503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.000369834917364642, + "rewards_train/margins": 0.021685982850613073, + "rewards_train/rejected": -0.022055817767977715, + "step": 126 + }, + { + "epoch": 0.04, + "logps_train/chosen": -110.87167358398438, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -126.20277404785156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03716735914349556, + "rewards_train/margins": -0.06688995473086834, + "rewards_train/rejected": 0.02972259558737278, + "step": 126 + }, + { + "epoch": 0.04, + "logps_train/chosen": -9.3307523727417, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -6.043102264404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02942476235330105, + "rewards_train/margins": 0.043109988793730736, + "rewards_train/rejected": -0.013685226440429688, + "step": 127 + }, + { + "epoch": 0.04, + "logps_train/chosen": -66.87750244140625, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -12.138631820678711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06224975734949112, + "rewards_train/margins": 0.08861294016242027, + "rewards_train/rejected": -0.026363182812929153, + "step": 127 + }, + { + "epoch": 0.04, + "logps_train/chosen": -12.312475204467773, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -17.492034912109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006252479739487171, + "rewards_train/margins": 0.03045597206801176, + "rewards_train/rejected": -0.02420349232852459, + "step": 127 + }, + { + "epoch": 0.04, + "logps_train/chosen": -170.67556762695312, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -209.0, + "logps_train/rejected": -209.96688842773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03244323655962944, + "rewards_train/margins": 0.12913208082318306, + "rewards_train/rejected": -0.09668884426355362, + "step": 127 + }, + { + "epoch": 0.04, + "logps_train/chosen": -125.03641510009766, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -129.30728149414062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1536415070295334, + "rewards_train/margins": -0.12291335687041283, + "rewards_train/rejected": -0.03072815015912056, + "step": 127 + }, + { + "epoch": 0.04, + "logps_train/chosen": -18.40949821472168, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -4.902632236480713, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009050178341567516, + "rewards_train/margins": 0.01806340180337429, + "rewards_train/rejected": -0.009013223461806774, + "step": 127 + }, + { + "epoch": 0.04, + "logps_train/chosen": -95.56779479980469, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -145.12860107421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04322052001953125, + "rewards_train/margins": 0.15608062595129013, + "rewards_train/rejected": -0.11286010593175888, + "step": 127 + }, + { + "epoch": 0.04, + "logps_train/chosen": -89.7550048828125, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -192.0325927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02550048939883709, + "rewards_train/margins": 0.17775878496468067, + "rewards_train/rejected": -0.20325927436351776, + "step": 127 + }, + { + "epoch": 0.04, + "learning_rate": 1.9999921261985114e-06, + "loss": 0.6835, + "step": 128 + }, + { + "epoch": 0.04, + "logps_train/chosen": -30.280433654785156, + "logps_train/ref_chosen": -30.25, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -22.930593490600586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0030433654319494963, + "rewards_train/margins": -0.04748401860706508, + "rewards_train/rejected": 0.044440653175115585, + "step": 128 + }, + { + "epoch": 0.04, + "logps_train/chosen": -60.91107177734375, + "logps_train/ref_chosen": -61.25, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -70.64047241210938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.03389282152056694, + "rewards_train/margins": -0.0020599365234375, + "rewards_train/rejected": 0.03595275804400444, + "step": 128 + }, + { + "epoch": 0.04, + "logps_train/chosen": -10.756033897399902, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -11.178102493286133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018146609887480736, + "rewards_train/margins": 0.03595685958862305, + "rewards_train/rejected": -0.01781024970114231, + "step": 128 + }, + { + "epoch": 0.04, + "logps_train/chosen": -10.404312133789062, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -14.331414222717285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0029312134720385075, + "rewards_train/margins": 0.023960209917277098, + "rewards_train/rejected": -0.026891423389315605, + "step": 128 + }, + { + "epoch": 0.04, + "logps_train/chosen": -135.26510620117188, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -117.28167724609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07348938286304474, + "rewards_train/margins": 0.10165710747241974, + "rewards_train/rejected": -0.028167724609375, + "step": 128 + }, + { + "epoch": 0.04, + "logps_train/chosen": -177.6179656982422, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -143.43560791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16179656982421875, + "rewards_train/margins": 0.08176422119140625, + "rewards_train/rejected": -0.243560791015625, + "step": 128 + }, + { + "epoch": 0.04, + "logps_train/chosen": -54.794677734375, + "logps_train/ref_chosen": -55.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -76.05717468261719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07053222507238388, + "rewards_train/margins": 0.02624969184398651, + "rewards_train/rejected": 0.04428253322839737, + "step": 128 + }, + { + "epoch": 0.04, + "logps_train/chosen": -19.24261474609375, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -5.15625, + "logps_train/rejected": -5.4843525886535645, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013238525949418545, + "rewards_train/margins": 0.04604878555983305, + "rewards_train/rejected": -0.032810259610414505, + "step": 128 + }, + { + "epoch": 0.04, + "logps_train/chosen": -13.417701721191406, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -7.378868103027344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01052017230540514, + "rewards_train/margins": -0.013258361956104636, + "rewards_train/rejected": 0.0027381896506994963, + "step": 129 + }, + { + "epoch": 0.04, + "logps_train/chosen": -12.942180633544922, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -18.662418365478516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.030781937763094902, + "rewards_train/margins": 0.02202377375215292, + "rewards_train/rejected": 0.008758164010941982, + "step": 129 + }, + { + "epoch": 0.04, + "logps_train/chosen": -17.682371139526367, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -28.875, + "logps_train/rejected": -29.214065551757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03176288679242134, + "rewards_train/margins": 0.06566944345831871, + "rewards_train/rejected": -0.03390655666589737, + "step": 129 + }, + { + "epoch": 0.04, + "logps_train/chosen": -2.208397626876831, + "logps_train/ref_chosen": -2.171875, + "logps_train/ref_rejected": -31.75, + "logps_train/rejected": -31.337284088134766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.003652262734249234, + "rewards_train/margins": -0.04492385615594685, + "rewards_train/rejected": 0.04127159342169762, + "step": 129 + }, + { + "epoch": 0.04, + "logps_train/chosen": -6.507226467132568, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -20.580339431762695, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.002402353333309293, + "rewards_train/margins": -0.014563704608008265, + "rewards_train/rejected": 0.01696605794131756, + "step": 129 + }, + { + "epoch": 0.04, + "logps_train/chosen": -88.93971252441406, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -114.07327270507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05602874979376793, + "rewards_train/margins": 0.06335602048784494, + "rewards_train/rejected": -0.007327270694077015, + "step": 129 + }, + { + "epoch": 0.04, + "logps_train/chosen": -1.5062038898468018, + "logps_train/ref_chosen": -1.390625, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -22.188125610351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01155788917094469, + "rewards_train/margins": 0.007254672236740589, + "rewards_train/rejected": -0.01881256140768528, + "step": 129 + }, + { + "epoch": 0.04, + "logps_train/chosen": -42.46715545654297, + "logps_train/ref_chosen": -42.0, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -70.52401733398438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.046715546399354935, + "rewards_train/margins": -0.04431381286121905, + "rewards_train/rejected": -0.002401733538135886, + "step": 129 + }, + { + "epoch": 0.04, + "learning_rate": 1.999978128380225e-06, + "loss": 0.684, + "step": 130 + }, + { + "epoch": 0.04, + "logps_train/chosen": -186.791015625, + "logps_train/ref_chosen": -186.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -113.77272033691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0791015625, + "rewards_train/margins": 0.04817047715187073, + "rewards_train/rejected": -0.12727203965187073, + "step": 130 + }, + { + "epoch": 0.04, + "logps_train/chosen": -13.931400299072266, + "logps_train/ref_chosen": -13.875, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -11.907501220703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0056400299072265625, + "rewards_train/margins": 0.03511009365320206, + "rewards_train/rejected": -0.04075012356042862, + "step": 130 + }, + { + "epoch": 0.04, + "logps_train/chosen": -3.086698293685913, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -1.859375, + "logps_train/rejected": -1.7947232723236084, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0008573294035159051, + "rewards_train/margins": -0.007322502264287323, + "rewards_train/rejected": 0.006465172860771418, + "step": 130 + }, + { + "epoch": 0.04, + "logps_train/chosen": -7.597979545593262, + "logps_train/ref_chosen": -7.46875, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -8.250271797180176, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.012922954745590687, + "rewards_train/margins": -0.006645774934440851, + "rewards_train/rejected": -0.006277179811149836, + "step": 130 + }, + { + "epoch": 0.04, + "logps_train/chosen": -18.68020248413086, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -3.671875, + "logps_train/rejected": -3.5932319164276123, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.031979750841856, + "rewards_train/margins": 0.02411544229835272, + "rewards_train/rejected": 0.007864308543503284, + "step": 130 + }, + { + "epoch": 0.04, + "logps_train/chosen": -129.31954956054688, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -111.6068115234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03195495530962944, + "rewards_train/margins": -0.0712738037109375, + "rewards_train/rejected": 0.03931884840130806, + "step": 130 + }, + { + "epoch": 0.04, + "logps_train/chosen": -37.725685119628906, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -11.75, + "logps_train/rejected": -11.754480361938477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.027431488037109375, + "rewards_train/margins": 0.02787952424841933, + "rewards_train/rejected": -0.0004480362113099545, + "step": 130 + }, + { + "epoch": 0.04, + "logps_train/chosen": -157.6998748779297, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -94.45804595947266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13001251220703125, + "rewards_train/margins": 0.12581710796803236, + "rewards_train/rejected": 0.00419540423899889, + "step": 130 + }, + { + "epoch": 0.04, + "logps_train/chosen": -10.785794258117676, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -12.0, + "logps_train/rejected": -11.988253593444824, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.016079425811767578, + "rewards_train/margins": -0.01725406653713435, + "rewards_train/rejected": 0.0011746407253667712, + "step": 131 + }, + { + "epoch": 0.04, + "logps_train/chosen": -7.3499908447265625, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -3.09375, + "logps_train/rejected": -3.122424602508545, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00687408447265625, + "rewards_train/margins": -0.004006624221801758, + "rewards_train/rejected": -0.002867460250854492, + "step": 131 + }, + { + "epoch": 0.04, + "logps_train/chosen": -85.5531005859375, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -99.16548156738281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04468994215130806, + "rewards_train/margins": 0.16123810037970543, + "rewards_train/rejected": -0.11654815822839737, + "step": 131 + }, + { + "epoch": 0.04, + "logps_train/chosen": -124.69781494140625, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -121.25458526611328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.08021850883960724, + "rewards_train/margins": -0.09432296454906464, + "rewards_train/rejected": 0.17454147338867188, + "step": 131 + }, + { + "epoch": 0.04, + "logps_train/chosen": -59.099613189697266, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -32.353782653808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04003868252038956, + "rewards_train/margins": 0.07541694864630699, + "rewards_train/rejected": -0.035378266125917435, + "step": 131 + }, + { + "epoch": 0.04, + "logps_train/chosen": -68.79859924316406, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -100.80708312988281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02014007605612278, + "rewards_train/margins": 0.15084839798510075, + "rewards_train/rejected": -0.13070832192897797, + "step": 131 + }, + { + "epoch": 0.04, + "logps_train/chosen": -88.87017822265625, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -108.31890869140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06298217922449112, + "rewards_train/margins": 0.09487304836511612, + "rewards_train/rejected": -0.031890869140625, + "step": 131 + }, + { + "epoch": 0.04, + "logps_train/chosen": -74.76791381835938, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -82.54435729980469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0232086181640625, + "rewards_train/margins": 0.027644348330795765, + "rewards_train/rejected": -0.004435730166733265, + "step": 131 + }, + { + "epoch": 0.04, + "learning_rate": 1.9999571317752566e-06, + "loss": 0.6761, + "step": 132 + }, + { + "epoch": 0.04, + "logps_train/chosen": -79.09050750732422, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -121.22200012207031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.059050749987363815, + "rewards_train/margins": -0.036850737407803535, + "rewards_train/rejected": -0.02220001257956028, + "step": 132 + }, + { + "epoch": 0.04, + "logps_train/chosen": -19.957305908203125, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -12.888566970825195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00823059119284153, + "rewards_train/margins": 0.0056261057034134865, + "rewards_train/rejected": -0.013856696896255016, + "step": 132 + }, + { + "epoch": 0.04, + "logps_train/chosen": -93.14344787597656, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -119.66064453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1356552094221115, + "rewards_train/margins": 0.10171966254711151, + "rewards_train/rejected": 0.033935546875, + "step": 132 + }, + { + "epoch": 0.04, + "logps_train/chosen": -13.79690170288086, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -22.625, + "logps_train/rejected": -22.49055290222168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.026559829711914062, + "rewards_train/margins": 0.013115120120346546, + "rewards_train/rejected": 0.013444709591567516, + "step": 132 + }, + { + "epoch": 0.04, + "logps_train/chosen": -114.3011703491211, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -91.62004852294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.019882965832948685, + "rewards_train/margins": 0.13188781961798668, + "rewards_train/rejected": -0.112004853785038, + "step": 132 + }, + { + "epoch": 0.04, + "logps_train/chosen": -23.202877044677734, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -26.875, + "logps_train/rejected": -26.362159729003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05471229553222656, + "rewards_train/margins": 0.003428269177675247, + "rewards_train/rejected": 0.051284026354551315, + "step": 132 + }, + { + "epoch": 0.04, + "logps_train/chosen": -52.681251525878906, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -25.04136848449707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.056874848902225494, + "rewards_train/margins": 0.048511696979403496, + "rewards_train/rejected": 0.008363151922821999, + "step": 132 + }, + { + "epoch": 0.04, + "logps_train/chosen": -172.27566528320312, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -169.281005859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02756652794778347, + "rewards_train/margins": 0.10053406096994877, + "rewards_train/rejected": -0.12810058891773224, + "step": 132 + }, + { + "epoch": 0.04, + "logps_train/chosen": -64.48091125488281, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -85.76319885253906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04809112474322319, + "rewards_train/margins": -0.02177123911678791, + "rewards_train/rejected": -0.02631988562643528, + "step": 133 + }, + { + "epoch": 0.04, + "logps_train/chosen": -11.755558013916016, + "logps_train/ref_chosen": -12.0, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -6.616806983947754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.024444198235869408, + "rewards_train/margins": 0.03299989644438028, + "rewards_train/rejected": -0.008555698208510876, + "step": 133 + }, + { + "epoch": 0.04, + "logps_train/chosen": -4.834624767303467, + "logps_train/ref_chosen": -5.375, + "logps_train/ref_rejected": -1.2890625, + "logps_train/rejected": -2.553701162338257, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05403752252459526, + "rewards_train/margins": 0.18050139769911766, + "rewards_train/rejected": -0.1264638751745224, + "step": 133 + }, + { + "epoch": 0.04, + "logps_train/chosen": -3.063098669052124, + "logps_train/ref_chosen": -3.125, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -4.384658336639404, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.006190133281052113, + "rewards_train/margins": -0.005344033241271973, + "rewards_train/rejected": 0.011534166522324085, + "step": 133 + }, + { + "epoch": 0.04, + "logps_train/chosen": -51.31547927856445, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -39.177791595458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018452072516083717, + "rewards_train/margins": 0.011231231968849897, + "rewards_train/rejected": 0.00722084054723382, + "step": 133 + }, + { + "epoch": 0.04, + "logps_train/chosen": -3.933657646179199, + "logps_train/ref_chosen": -3.9375, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -16.36673355102539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00038423537625931203, + "rewards_train/margins": 0.012057590851327404, + "rewards_train/rejected": -0.011673355475068092, + "step": 133 + }, + { + "epoch": 0.04, + "logps_train/chosen": -109.71058654785156, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -97.45988464355469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07894134521484375, + "rewards_train/margins": 0.07492980966344476, + "rewards_train/rejected": 0.0040115355513989925, + "step": 133 + }, + { + "epoch": 0.04, + "logps_train/chosen": -5.958868980407715, + "logps_train/ref_chosen": -5.75, + "logps_train/ref_rejected": -6.46875, + "logps_train/rejected": -6.517080307006836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.020886898040771484, + "rewards_train/margins": -0.016053867060691118, + "rewards_train/rejected": -0.004833030980080366, + "step": 133 + }, + { + "epoch": 0.04, + "learning_rate": 1.999929136530562e-06, + "loss": 0.6739, + "step": 134 + }, + { + "epoch": 0.04, + "logps_train/chosen": -82.87535858154297, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -103.42523193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.037535857409238815, + "rewards_train/margins": 0.004987336695194244, + "rewards_train/rejected": -0.04252319410443306, + "step": 134 + }, + { + "epoch": 0.04, + "logps_train/chosen": -67.2639389038086, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -51.98517990112305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.026393890380859375, + "rewards_train/margins": 0.022124100476503372, + "rewards_train/rejected": -0.04851799085736275, + "step": 134 + }, + { + "epoch": 0.04, + "logps_train/chosen": -2.4235646724700928, + "logps_train/ref_chosen": -2.265625, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -11.472005844116211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.015793967992067337, + "rewards_train/margins": -0.024843383580446243, + "rewards_train/rejected": 0.009049415588378906, + "step": 134 + }, + { + "epoch": 0.04, + "logps_train/chosen": -87.39812469482422, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -70.66063690185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11018753051757812, + "rewards_train/margins": 0.07625121995806694, + "rewards_train/rejected": 0.033936310559511185, + "step": 134 + }, + { + "epoch": 0.04, + "logps_train/chosen": -110.36100769042969, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -149.99630737304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03610077127814293, + "rewards_train/margins": 0.06352996453642845, + "rewards_train/rejected": -0.09963073581457138, + "step": 134 + }, + { + "epoch": 0.04, + "logps_train/chosen": -60.51698303222656, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -60.49052429199219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.12330169975757599, + "rewards_train/margins": -0.0026458799839019775, + "rewards_train/rejected": 0.12594757974147797, + "step": 134 + }, + { + "epoch": 0.04, + "logps_train/chosen": -89.23815155029297, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -65.2593994140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07618484646081924, + "rewards_train/margins": 0.1521247923374176, + "rewards_train/rejected": -0.07593994587659836, + "step": 134 + }, + { + "epoch": 0.04, + "logps_train/chosen": -20.736785888671875, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -14.175395965576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0013214111095294356, + "rewards_train/margins": 0.03136100841220468, + "rewards_train/rejected": -0.030039597302675247, + "step": 134 + }, + { + "epoch": 0.04, + "logps_train/chosen": -125.34526824951172, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -129.95947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06547317653894424, + "rewards_train/margins": 0.06142044207081199, + "rewards_train/rejected": 0.0040527344681322575, + "step": 135 + }, + { + "epoch": 0.04, + "logps_train/chosen": -133.2755126953125, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -116.52728271484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02755126915872097, + "rewards_train/margins": 0.02517700381577015, + "rewards_train/rejected": -0.05272827297449112, + "step": 135 + }, + { + "epoch": 0.04, + "logps_train/chosen": -11.140024185180664, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -14.125923156738281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.020252419635653496, + "rewards_train/margins": -0.06391010619699955, + "rewards_train/rejected": 0.043657686561346054, + "step": 135 + }, + { + "epoch": 0.04, + "logps_train/chosen": -94.88905334472656, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -124.91148376464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11109466850757599, + "rewards_train/margins": 0.1022430444136262, + "rewards_train/rejected": 0.008851624093949795, + "step": 135 + }, + { + "epoch": 0.04, + "logps_train/chosen": -30.719039916992188, + "logps_train/ref_chosen": -30.75, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -13.792542457580566, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.003096008440479636, + "rewards_train/margins": 0.032350254943594337, + "rewards_train/rejected": -0.0292542465031147, + "step": 135 + }, + { + "epoch": 0.04, + "logps_train/chosen": -170.25509643554688, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -140.75579833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02550964429974556, + "rewards_train/margins": 0.05007019266486168, + "rewards_train/rejected": -0.07557983696460724, + "step": 135 + }, + { + "epoch": 0.04, + "logps_train/chosen": -80.4466552734375, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -97.03086853027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0053344727493822575, + "rewards_train/margins": 0.008421325823292136, + "rewards_train/rejected": -0.0030868530739098787, + "step": 135 + }, + { + "epoch": 0.04, + "logps_train/chosen": -80.84040069580078, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -132.0558319091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06595993041992188, + "rewards_train/margins": 0.17154312133789062, + "rewards_train/rejected": -0.10558319091796875, + "step": 135 + }, + { + "epoch": 0.04, + "learning_rate": 1.999894142842077e-06, + "loss": 0.6717, + "step": 136 + }, + { + "epoch": 0.04, + "logps_train/chosen": -88.72943115234375, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -75.31600952148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02705688588321209, + "rewards_train/margins": 0.00865783728659153, + "rewards_train/rejected": 0.01839904859662056, + "step": 136 + }, + { + "epoch": 0.04, + "logps_train/chosen": -30.8187255859375, + "logps_train/ref_chosen": -30.375, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -23.03713035583496, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04437255859375, + "rewards_train/margins": -0.015659522265195847, + "rewards_train/rejected": -0.028713036328554153, + "step": 136 + }, + { + "epoch": 0.04, + "logps_train/chosen": -61.182823181152344, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -52.25, + "logps_train/rejected": -52.2731819152832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.031717684119939804, + "rewards_train/margins": 0.034035875694826245, + "rewards_train/rejected": -0.0023181915748864412, + "step": 136 + }, + { + "epoch": 0.04, + "logps_train/chosen": -10.35749626159668, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -24.5, + "logps_train/rejected": -24.843807220458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02675037458539009, + "rewards_train/margins": 0.06113109737634659, + "rewards_train/rejected": -0.0343807227909565, + "step": 136 + }, + { + "epoch": 0.04, + "logps_train/chosen": -11.999629020690918, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -11.962891578674316, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.031212901696562767, + "rewards_train/margins": -0.028673743829131126, + "rewards_train/rejected": -0.0025391578674316406, + "step": 136 + }, + { + "epoch": 0.04, + "logps_train/chosen": -72.5350341796875, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -106.56761169433594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.09649658203125, + "rewards_train/margins": -0.04674224555492401, + "rewards_train/rejected": 0.143238827586174, + "step": 136 + }, + { + "epoch": 0.04, + "logps_train/chosen": -128.56861877441406, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -137.9193115234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10686188191175461, + "rewards_train/margins": -0.01493072509765625, + "rewards_train/rejected": -0.09193115681409836, + "step": 136 + }, + { + "epoch": 0.04, + "logps_train/chosen": -3.1605141162872314, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -4.9375, + "logps_train/rejected": -5.049224376678467, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0073860883712768555, + "rewards_train/margins": 0.018558526411652565, + "rewards_train/rejected": -0.01117243804037571, + "step": 136 + }, + { + "epoch": 0.04, + "logps_train/chosen": -103.052001953125, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -109.03120422363281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0447998046875, + "rewards_train/margins": 0.09792022779583931, + "rewards_train/rejected": -0.05312042310833931, + "step": 137 + }, + { + "epoch": 0.04, + "logps_train/chosen": -8.264812469482422, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -3.234375, + "logps_train/rejected": -3.2198710441589355, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.017268752679228783, + "rewards_train/margins": 0.0158183571184054, + "rewards_train/rejected": 0.001450395560823381, + "step": 137 + }, + { + "epoch": 0.04, + "logps_train/chosen": -10.36992073059082, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -5.21875, + "logps_train/rejected": -5.393777370452881, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0005079269758425653, + "rewards_train/margins": 0.01801066513871774, + "rewards_train/rejected": -0.017502738162875175, + "step": 137 + }, + { + "epoch": 0.04, + "logps_train/chosen": -76.64662170410156, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -62.98863220214844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01466217078268528, + "rewards_train/margins": -0.06579895131289959, + "rewards_train/rejected": 0.05113678053021431, + "step": 137 + }, + { + "epoch": 0.04, + "logps_train/chosen": -2.2838387489318848, + "logps_train/ref_chosen": -2.234375, + "logps_train/ref_rejected": -2.21875, + "logps_train/rejected": -2.3576016426086426, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004946374800056219, + "rewards_train/margins": 0.008938789833337069, + "rewards_train/rejected": -0.013885164633393288, + "step": 137 + }, + { + "epoch": 0.04, + "logps_train/chosen": -4.351325035095215, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -9.87365436553955, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0007575034978799522, + "rewards_train/margins": -0.019642067316453904, + "rewards_train/rejected": 0.01888456381857395, + "step": 137 + }, + { + "epoch": 0.04, + "logps_train/chosen": -139.36131286621094, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -122.17620086669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03613128885626793, + "rewards_train/margins": 0.031488802284002304, + "rewards_train/rejected": -0.06762009114027023, + "step": 137 + }, + { + "epoch": 0.04, + "logps_train/chosen": -73.29554748535156, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -164.94232177734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07955475151538849, + "rewards_train/margins": 0.21467743813991547, + "rewards_train/rejected": -0.29423218965530396, + "step": 137 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998521509547213e-06, + "loss": 0.6838, + "step": 138 + }, + { + "epoch": 0.04, + "logps_train/chosen": -148.9016876220703, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -123.87089538574219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.19016876816749573, + "rewards_train/margins": -0.053079232573509216, + "rewards_train/rejected": -0.1370895355939865, + "step": 138 + }, + { + "epoch": 0.04, + "logps_train/chosen": -9.934060096740723, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -7.707053184509277, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.025343989953398705, + "rewards_train/margins": 0.04604930803179741, + "rewards_train/rejected": -0.020705318078398705, + "step": 138 + }, + { + "epoch": 0.04, + "logps_train/chosen": -90.58653259277344, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -86.85786437988281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.008653259836137295, + "rewards_train/margins": -0.0728668263182044, + "rewards_train/rejected": 0.06421356648206711, + "step": 138 + }, + { + "epoch": 0.04, + "logps_train/chosen": -70.0762939453125, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -129.31060791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14237061142921448, + "rewards_train/margins": 0.3234314024448395, + "rewards_train/rejected": -0.181060791015625, + "step": 138 + }, + { + "epoch": 0.04, + "logps_train/chosen": -71.64883422851562, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -62.73244857788086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03511657938361168, + "rewards_train/margins": 0.008361436426639557, + "rewards_train/rejected": 0.026755142956972122, + "step": 138 + }, + { + "epoch": 0.04, + "logps_train/chosen": -200.38040161132812, + "logps_train/ref_chosen": -198.0, + "logps_train/ref_rejected": -252.0, + "logps_train/rejected": -254.26461791992188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.23804016411304474, + "rewards_train/margins": -0.011578366160392761, + "rewards_train/rejected": -0.22646179795265198, + "step": 138 + }, + { + "epoch": 0.04, + "logps_train/chosen": -7.052916526794434, + "logps_train/ref_chosen": -7.03125, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -6.718966484069824, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0021666528191417456, + "rewards_train/margins": -0.027145005529746413, + "rewards_train/rejected": 0.024978352710604668, + "step": 138 + }, + { + "epoch": 0.04, + "logps_train/chosen": -1.326164722442627, + "logps_train/ref_chosen": -1.3046875, + "logps_train/ref_rejected": -2.0625, + "logps_train/rejected": -1.968408226966858, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.002147722290828824, + "rewards_train/margins": -0.011556899407878518, + "rewards_train/rejected": 0.009409177117049694, + "step": 138 + }, + { + "epoch": 0.04, + "logps_train/chosen": -118.6363525390625, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -152.29017639160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.013635254465043545, + "rewards_train/margins": -0.08461761381477118, + "rewards_train/rejected": 0.07098235934972763, + "step": 139 + }, + { + "epoch": 0.04, + "logps_train/chosen": -22.42816734313965, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -3.578125, + "logps_train/rejected": -3.5124661922454834, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.019683266058564186, + "rewards_train/margins": 0.013117385096848011, + "rewards_train/rejected": 0.006565880961716175, + "step": 139 + }, + { + "epoch": 0.04, + "logps_train/chosen": -60.91968536376953, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -79.20767974853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08303146809339523, + "rewards_train/margins": 0.10379944369196892, + "rewards_train/rejected": -0.020767975598573685, + "step": 139 + }, + { + "epoch": 0.04, + "logps_train/chosen": -5.0147600173950195, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -10.716028213500977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010851002298295498, + "rewards_train/margins": 0.017001819796860218, + "rewards_train/rejected": -0.027852822095155716, + "step": 139 + }, + { + "epoch": 0.04, + "logps_train/chosen": -102.38468170166016, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -118.45759582519531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011531829833984375, + "rewards_train/margins": 0.0072914124466478825, + "rewards_train/rejected": 0.0042404173873364925, + "step": 139 + }, + { + "epoch": 0.04, + "logps_train/chosen": -39.790916442871094, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -38.5, + "logps_train/rejected": -39.70296096801758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0790916457772255, + "rewards_train/margins": 0.04120445251464844, + "rewards_train/rejected": -0.12029609829187393, + "step": 139 + }, + { + "epoch": 0.04, + "logps_train/chosen": -150.72305297851562, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -136.46180725097656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0276947021484375, + "rewards_train/margins": -0.02612457424402237, + "rewards_train/rejected": 0.05381927639245987, + "step": 139 + }, + { + "epoch": 0.04, + "logps_train/chosen": -86.65528106689453, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -78.25286865234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015528107061982155, + "rewards_train/margins": 0.059758758172392845, + "rewards_train/rejected": -0.075286865234375, + "step": 139 + }, + { + "epoch": 0.04, + "learning_rate": 1.9998031611623926e-06, + "loss": 0.684, + "step": 140 + }, + { + "epoch": 0.04, + "logps_train/chosen": -11.453953742980957, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -21.66245460510254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.029604626819491386, + "rewards_train/margins": 0.03335008746944368, + "rewards_train/rejected": -0.0037454606499522924, + "step": 140 + }, + { + "epoch": 0.04, + "logps_train/chosen": -3.137310028076172, + "logps_train/ref_chosen": -3.140625, + "logps_train/ref_rejected": -14.4375, + "logps_train/rejected": -14.25185775756836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0003314971982035786, + "rewards_train/margins": -0.018232727044960484, + "rewards_train/rejected": 0.018564224243164062, + "step": 140 + }, + { + "epoch": 0.04, + "logps_train/chosen": -80.97848510742188, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -44.5, + "logps_train/rejected": -44.25039291381836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05215149000287056, + "rewards_train/margins": 0.027190780267119408, + "rewards_train/rejected": 0.024960709735751152, + "step": 140 + }, + { + "epoch": 0.04, + "logps_train/chosen": -112.2635498046875, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -105.21136474609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.07364501804113388, + "rewards_train/margins": -0.005218505859375, + "rewards_train/rejected": 0.07886352390050888, + "step": 140 + }, + { + "epoch": 0.04, + "logps_train/chosen": -69.09030151367188, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -86.11598205566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009030151180922985, + "rewards_train/margins": 0.0025680549442768097, + "rewards_train/rejected": -0.011598206125199795, + "step": 140 + }, + { + "epoch": 0.04, + "logps_train/chosen": -3.8116960525512695, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -5.4375, + "logps_train/rejected": -5.402837753295898, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.004607105161994696, + "rewards_train/margins": -0.00807332992553711, + "rewards_train/rejected": 0.0034662247635424137, + "step": 140 + }, + { + "epoch": 0.04, + "logps_train/chosen": -14.630058288574219, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -13.3125, + "logps_train/rejected": -13.245177268981934, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.050505828112363815, + "rewards_train/margins": -0.05723810149356723, + "rewards_train/rejected": 0.006732273381203413, + "step": 140 + }, + { + "epoch": 0.04, + "logps_train/chosen": -78.9542236328125, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -78.82903289794922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.00457763671875, + "rewards_train/margins": -0.012519074603915215, + "rewards_train/rejected": 0.017096711322665215, + "step": 140 + }, + { + "epoch": 0.04, + "logps_train/chosen": -58.29426574707031, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -139.2525634765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12057342380285263, + "rewards_train/margins": 0.3458297774195671, + "rewards_train/rejected": -0.22525635361671448, + "step": 141 + }, + { + "epoch": 0.04, + "logps_train/chosen": -6.288993835449219, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -11.77117919921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.000774383544921875, + "rewards_train/margins": -0.004906463902443647, + "rewards_train/rejected": 0.004132080357521772, + "step": 141 + }, + { + "epoch": 0.04, + "logps_train/chosen": -179.79598999023438, + "logps_train/ref_chosen": -180.0, + "logps_train/ref_rejected": -200.0, + "logps_train/rejected": -199.92515563964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0204010009765625, + "rewards_train/margins": 0.01291656494140625, + "rewards_train/rejected": 0.00748443603515625, + "step": 141 + }, + { + "epoch": 0.04, + "logps_train/chosen": -2.1706738471984863, + "logps_train/ref_chosen": -2.21875, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -9.684211730957031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00480761518701911, + "rewards_train/margins": 0.029478787910193205, + "rewards_train/rejected": -0.024671172723174095, + "step": 141 + }, + { + "epoch": 0.04, + "logps_train/chosen": -9.073498725891113, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -6.793903350830078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011400127783417702, + "rewards_train/margins": 0.012665462912991643, + "rewards_train/rejected": -0.0012653351295739412, + "step": 141 + }, + { + "epoch": 0.04, + "logps_train/chosen": -31.811344146728516, + "logps_train/ref_chosen": -32.25, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -11.51712417602539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04386558756232262, + "rewards_train/margins": 0.08932800590991974, + "rewards_train/rejected": -0.04546241834759712, + "step": 141 + }, + { + "epoch": 0.04, + "logps_train/chosen": -2.5497817993164062, + "logps_train/ref_chosen": -2.65625, + "logps_train/ref_rejected": -3.0625, + "logps_train/rejected": -3.307973861694336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.010646820068359375, + "rewards_train/margins": 0.03519420698285103, + "rewards_train/rejected": -0.024547386914491653, + "step": 141 + }, + { + "epoch": 0.04, + "logps_train/chosen": -139.09127807617188, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -140.4398193359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009127807803452015, + "rewards_train/margins": -0.06514587346464396, + "rewards_train/rejected": 0.05601806566119194, + "step": 141 + }, + { + "epoch": 0.04, + "learning_rate": 1.9997471738079682e-06, + "loss": 0.6811, + "step": 142 + }, + { + "epoch": 0.04, + "logps_train/chosen": -75.01566314697266, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -13.949667930603027, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.051566313952207565, + "rewards_train/margins": -0.037849520333111286, + "rewards_train/rejected": -0.01371679361909628, + "step": 142 + }, + { + "epoch": 0.04, + "logps_train/chosen": -131.41392517089844, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -174.25559997558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05860748514533043, + "rewards_train/margins": 0.18416748568415642, + "rewards_train/rejected": -0.125560000538826, + "step": 142 + }, + { + "epoch": 0.04, + "logps_train/chosen": -2.375460147857666, + "logps_train/ref_chosen": -2.171875, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -9.310218811035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02035851590335369, + "rewards_train/margins": 0.035663364455103874, + "rewards_train/rejected": -0.056021880358457565, + "step": 142 + }, + { + "epoch": 0.04, + "logps_train/chosen": -66.69267272949219, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -107.28450012207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01926727406680584, + "rewards_train/margins": 0.20918274112045765, + "rewards_train/rejected": -0.2284500151872635, + "step": 142 + }, + { + "epoch": 0.04, + "logps_train/chosen": -89.31080627441406, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -134.77377319335938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01891937293112278, + "rewards_train/margins": -0.10370331071317196, + "rewards_train/rejected": 0.12262268364429474, + "step": 142 + }, + { + "epoch": 0.04, + "logps_train/chosen": -29.30408477783203, + "logps_train/ref_chosen": -29.25, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -14.857131004333496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00540847796946764, + "rewards_train/margins": 0.02405462320894003, + "rewards_train/rejected": -0.02946310117840767, + "step": 142 + }, + { + "epoch": 0.04, + "logps_train/chosen": -81.25381469726562, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -83.42376708984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07538147270679474, + "rewards_train/margins": 0.01699523627758026, + "rewards_train/rejected": -0.092376708984375, + "step": 142 + }, + { + "epoch": 0.04, + "logps_train/chosen": -134.86996459960938, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -80.47456359863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013003540225327015, + "rewards_train/margins": 0.11045989859849215, + "rewards_train/rejected": -0.09745635837316513, + "step": 142 + }, + { + "epoch": 0.04, + "logps_train/chosen": -75.91072082519531, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -75.88721466064453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.15892791748046875, + "rewards_train/margins": -0.0023506134748458862, + "rewards_train/rejected": 0.16127853095531464, + "step": 143 + }, + { + "epoch": 0.04, + "logps_train/chosen": -57.3022346496582, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -99.23761749267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04477653652429581, + "rewards_train/margins": 0.11853829026222229, + "rewards_train/rejected": -0.07376175373792648, + "step": 143 + }, + { + "epoch": 0.04, + "logps_train/chosen": -173.87037658691406, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -163.64361572265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.11296234279870987, + "rewards_train/margins": -0.02267608791589737, + "rewards_train/rejected": 0.13563843071460724, + "step": 143 + }, + { + "epoch": 0.04, + "logps_train/chosen": -116.87570190429688, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -141.98748779296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06242981180548668, + "rewards_train/margins": 0.1611785925924778, + "rewards_train/rejected": -0.09874878078699112, + "step": 143 + }, + { + "epoch": 0.04, + "logps_train/chosen": -10.714533805847168, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -12.731576919555664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.022296620532870293, + "rewards_train/margins": 0.0017043128609657288, + "rewards_train/rejected": 0.020592307671904564, + "step": 143 + }, + { + "epoch": 0.04, + "logps_train/chosen": -1.3464804887771606, + "logps_train/ref_chosen": -1.34375, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -20.506900787353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00027304887771606445, + "rewards_train/margins": 0.025417029857635498, + "rewards_train/rejected": -0.025690078735351562, + "step": 143 + }, + { + "epoch": 0.04, + "logps_train/chosen": -8.814173698425293, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -6.769253730773926, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006082630250602961, + "rewards_train/margins": 0.020508003886789083, + "rewards_train/rejected": -0.014425373636186123, + "step": 143 + }, + { + "epoch": 0.04, + "logps_train/chosen": -4.076543807983398, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -5.560990333557129, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0014043807750567794, + "rewards_train/margins": 0.02031965332571417, + "rewards_train/rejected": -0.02172403410077095, + "step": 143 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996841892832997e-06, + "loss": 0.6701, + "step": 144 + }, + { + "epoch": 0.04, + "logps_train/chosen": -19.750112533569336, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -6.350468635559082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.024988746270537376, + "rewards_train/margins": 0.03503561019897461, + "rewards_train/rejected": -0.010046863928437233, + "step": 144 + }, + { + "epoch": 0.04, + "logps_train/chosen": -131.38656616210938, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -125.2939453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.06134338304400444, + "rewards_train/margins": -0.05926208570599556, + "rewards_train/rejected": 0.12060546875, + "step": 144 + }, + { + "epoch": 0.04, + "logps_train/chosen": -127.18144226074219, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -130.13040161132812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01814422570168972, + "rewards_train/margins": -0.005104064010083675, + "rewards_train/rejected": -0.013040161691606045, + "step": 144 + }, + { + "epoch": 0.04, + "logps_train/chosen": -110.42517852783203, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -103.88169860839844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.057482149451971054, + "rewards_train/margins": -0.004347991198301315, + "rewards_train/rejected": 0.06183014065027237, + "step": 144 + }, + { + "epoch": 0.04, + "logps_train/chosen": -110.95462036132812, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -135.33450317382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09546203911304474, + "rewards_train/margins": 0.13798828423023224, + "rewards_train/rejected": -0.23345032334327698, + "step": 144 + }, + { + "epoch": 0.04, + "logps_train/chosen": -89.61656188964844, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -108.20217895507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03834381327033043, + "rewards_train/margins": 0.05856170877814293, + "rewards_train/rejected": -0.0202178955078125, + "step": 144 + }, + { + "epoch": 0.04, + "logps_train/chosen": -68.79691314697266, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -69.55821228027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.029691314324736595, + "rewards_train/margins": 0.026129914447665215, + "rewards_train/rejected": -0.05582122877240181, + "step": 144 + }, + { + "epoch": 0.04, + "logps_train/chosen": -4.649929523468018, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -18.418642044067383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0068820477463305, + "rewards_train/margins": 0.04874625289812684, + "rewards_train/rejected": -0.04186420515179634, + "step": 144 + }, + { + "epoch": 0.04, + "logps_train/chosen": -122.39128875732422, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -132.90838623046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.039128877222537994, + "rewards_train/margins": 0.051709748804569244, + "rewards_train/rejected": -0.09083862602710724, + "step": 145 + }, + { + "epoch": 0.04, + "logps_train/chosen": -79.33732604980469, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -120.99993133544922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01626739464700222, + "rewards_train/margins": -0.033739471808075905, + "rewards_train/rejected": 0.050006866455078125, + "step": 145 + }, + { + "epoch": 0.04, + "logps_train/chosen": -7.677423000335693, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -62.5, + "logps_train/rejected": -62.251487731933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.038507699966430664, + "rewards_train/margins": 0.01365647278726101, + "rewards_train/rejected": 0.024851227179169655, + "step": 145 + }, + { + "epoch": 0.04, + "logps_train/chosen": -83.3508529663086, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -119.53614807128906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.08508529514074326, + "rewards_train/margins": -0.031470488756895065, + "rewards_train/rejected": -0.05361480638384819, + "step": 145 + }, + { + "epoch": 0.04, + "logps_train/chosen": -1.6172258853912354, + "logps_train/ref_chosen": -1.640625, + "logps_train/ref_rejected": -2.75, + "logps_train/rejected": -2.7401413917541504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.002339911414310336, + "rewards_train/margins": 0.0013540505897253752, + "rewards_train/rejected": 0.000985860824584961, + "step": 145 + }, + { + "epoch": 0.04, + "logps_train/chosen": -128.48385620117188, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -126.55905151367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05161438137292862, + "rewards_train/margins": 0.10751953348517418, + "rewards_train/rejected": -0.05590515211224556, + "step": 145 + }, + { + "epoch": 0.04, + "logps_train/chosen": -2.22829532623291, + "logps_train/ref_chosen": -2.203125, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -6.865170478820801, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0025170326698571444, + "rewards_train/margins": -0.012874985346570611, + "rewards_train/rejected": 0.010357952676713467, + "step": 145 + }, + { + "epoch": 0.04, + "logps_train/chosen": -30.525697708129883, + "logps_train/ref_chosen": -30.625, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -100.09524536132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009930229745805264, + "rewards_train/margins": 0.06945476587861776, + "rewards_train/rejected": -0.0595245361328125, + "step": 145 + }, + { + "epoch": 0.04, + "learning_rate": 1.9996142080292126e-06, + "loss": 0.6814, + "step": 146 + }, + { + "epoch": 0.04, + "logps_train/chosen": -73.91475677490234, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -82.48741149902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008524322882294655, + "rewards_train/margins": 0.10726547427475452, + "rewards_train/rejected": -0.09874115139245987, + "step": 146 + }, + { + "epoch": 0.04, + "logps_train/chosen": -133.85696411132812, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -133.919189453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08569641411304474, + "rewards_train/margins": 0.006222531199455261, + "rewards_train/rejected": -0.0919189453125, + "step": 146 + }, + { + "epoch": 0.04, + "logps_train/chosen": -132.15310668945312, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -116.78663635253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08468933403491974, + "rewards_train/margins": 0.11335296928882599, + "rewards_train/rejected": -0.02866363525390625, + "step": 146 + }, + { + "epoch": 0.04, + "logps_train/chosen": -3.4781124591827393, + "logps_train/ref_chosen": -3.65625, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -5.358303546905518, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.017813755199313164, + "rewards_train/margins": 0.02239410998299718, + "rewards_train/rejected": -0.004580354783684015, + "step": 146 + }, + { + "epoch": 0.04, + "logps_train/chosen": -8.387735366821289, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -49.25093078613281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.020023537799715996, + "rewards_train/margins": -0.06993045844137669, + "rewards_train/rejected": 0.04990692064166069, + "step": 146 + }, + { + "epoch": 0.04, + "logps_train/chosen": -74.27226257324219, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -91.4310531616211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02722625806927681, + "rewards_train/margins": -0.03412094200029969, + "rewards_train/rejected": 0.0068946839310228825, + "step": 146 + }, + { + "epoch": 0.04, + "logps_train/chosen": -0.6516520977020264, + "logps_train/ref_chosen": -0.734375, + "logps_train/ref_rejected": -1.609375, + "logps_train/rejected": -1.7048016786575317, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008272290229797363, + "rewards_train/margins": 0.017814958468079567, + "rewards_train/rejected": -0.009542668238282204, + "step": 146 + }, + { + "epoch": 0.04, + "logps_train/chosen": -1.6957767009735107, + "logps_train/ref_chosen": -1.6875, + "logps_train/ref_rejected": -3.15625, + "logps_train/rejected": -3.3041396141052246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0008276700973510742, + "rewards_train/margins": 0.013961291871964931, + "rewards_train/rejected": -0.014788961969316006, + "step": 146 + }, + { + "epoch": 0.04, + "logps_train/chosen": -20.48290252685547, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -3.03125, + "logps_train/rejected": -3.244540214538574, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06420975178480148, + "rewards_train/margins": 0.08553877286612988, + "rewards_train/rejected": -0.021329021081328392, + "step": 147 + }, + { + "epoch": 0.04, + "logps_train/chosen": -0.8467235565185547, + "logps_train/ref_chosen": -0.8359375, + "logps_train/ref_rejected": -3.125, + "logps_train/rejected": -3.130188465118408, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0010786056518554688, + "rewards_train/margins": -0.0005597591516561806, + "rewards_train/rejected": -0.0005188465001992881, + "step": 147 + }, + { + "epoch": 0.04, + "logps_train/chosen": -80.53718566894531, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -105.28858947753906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0037185668479651213, + "rewards_train/margins": -0.02485961909405887, + "rewards_train/rejected": 0.02114105224609375, + "step": 147 + }, + { + "epoch": 0.04, + "logps_train/chosen": -17.557323455810547, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -9.934359550476074, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.006767654325813055, + "rewards_train/margins": -0.031046392861753702, + "rewards_train/rejected": 0.03781404718756676, + "step": 147 + }, + { + "epoch": 0.04, + "logps_train/chosen": -0.36872702836990356, + "logps_train/ref_chosen": -0.34765625, + "logps_train/ref_rejected": -14.4375, + "logps_train/rejected": -14.886468887329102, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0021070779766887426, + "rewards_train/margins": 0.04278981150127947, + "rewards_train/rejected": -0.044896889477968216, + "step": 147 + }, + { + "epoch": 0.04, + "logps_train/chosen": -1.7593650817871094, + "logps_train/ref_chosen": -1.671875, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -8.813323974609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008749008178710938, + "rewards_train/margins": 0.010083390399813652, + "rewards_train/rejected": -0.01883239857852459, + "step": 147 + }, + { + "epoch": 0.04, + "logps_train/chosen": -14.197920799255371, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -22.58531951904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01770791970193386, + "rewards_train/margins": 0.026239871978759766, + "rewards_train/rejected": -0.008531952276825905, + "step": 147 + }, + { + "epoch": 0.04, + "logps_train/chosen": -83.2872543334961, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -87.64334106445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021274566650390625, + "rewards_train/margins": 0.08560867607593536, + "rewards_train/rejected": -0.06433410942554474, + "step": 147 + }, + { + "epoch": 0.04, + "learning_rate": 1.999537230535501e-06, + "loss": 0.6815, + "step": 148 + }, + { + "epoch": 0.04, + "logps_train/chosen": -55.887901306152344, + "logps_train/ref_chosen": -55.75, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -35.73489761352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.013790130615234375, + "rewards_train/margins": 0.08469963073730469, + "rewards_train/rejected": -0.09848976135253906, + "step": 148 + }, + { + "epoch": 0.04, + "logps_train/chosen": -115.75646209716797, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -95.63040161132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2743538022041321, + "rewards_train/margins": 0.2873939638957381, + "rewards_train/rejected": -0.013040161691606045, + "step": 148 + }, + { + "epoch": 0.04, + "logps_train/chosen": -74.95686340332031, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -125.50920867919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1456863433122635, + "rewards_train/margins": 0.10523451864719391, + "rewards_train/rejected": -0.2509208619594574, + "step": 148 + }, + { + "epoch": 0.04, + "logps_train/chosen": -3.9883766174316406, + "logps_train/ref_chosen": -3.640625, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -15.743803024291992, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03477516397833824, + "rewards_train/margins": -0.05414486117660999, + "rewards_train/rejected": 0.01936969719827175, + "step": 148 + }, + { + "epoch": 0.04, + "logps_train/chosen": -132.8726806640625, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -128.34571838378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11273193359375, + "rewards_train/margins": 0.1473037712275982, + "rewards_train/rejected": -0.03457183763384819, + "step": 148 + }, + { + "epoch": 0.04, + "logps_train/chosen": -119.30827331542969, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -65.88101196289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11917266994714737, + "rewards_train/margins": 0.10727386642247438, + "rewards_train/rejected": 0.011898803524672985, + "step": 148 + }, + { + "epoch": 0.04, + "logps_train/chosen": -6.2973480224609375, + "logps_train/ref_chosen": -6.25, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -77.00263977050781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.004734802525490522, + "rewards_train/margins": -0.004470825457246974, + "rewards_train/rejected": -0.0002639770682435483, + "step": 148 + }, + { + "epoch": 0.04, + "logps_train/chosen": -12.772870063781738, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -5.145659446716309, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0022870064713060856, + "rewards_train/margins": 0.027903938200324774, + "rewards_train/rejected": -0.03019094467163086, + "step": 148 + }, + { + "epoch": 0.04, + "logps_train/chosen": -1.2944386005401611, + "logps_train/ref_chosen": -1.40625, + "logps_train/ref_rejected": -2.578125, + "logps_train/rejected": -2.828883171081543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011181140318512917, + "rewards_train/margins": 0.03625695779919624, + "rewards_train/rejected": -0.025075817480683327, + "step": 149 + }, + { + "epoch": 0.04, + "logps_train/chosen": -100.08624267578125, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -110.36578369140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09137573093175888, + "rewards_train/margins": 0.22795409709215164, + "rewards_train/rejected": -0.13657836616039276, + "step": 149 + }, + { + "epoch": 0.04, + "logps_train/chosen": -116.51565551757812, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -144.26043701171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1515655517578125, + "rewards_train/margins": -0.025521844625473022, + "rewards_train/rejected": -0.12604370713233948, + "step": 149 + }, + { + "epoch": 0.04, + "logps_train/chosen": -81.89786529541016, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -111.80537414550781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.060213472694158554, + "rewards_train/margins": 0.2907508872449398, + "rewards_train/rejected": -0.23053741455078125, + "step": 149 + }, + { + "epoch": 0.04, + "logps_train/chosen": -23.67818832397461, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -10.804850578308105, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0071811676025390625, + "rewards_train/margins": 0.0251662265509367, + "rewards_train/rejected": -0.017985058948397636, + "step": 149 + }, + { + "epoch": 0.04, + "logps_train/chosen": -129.2283935546875, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -132.0350341796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02283935621380806, + "rewards_train/margins": -0.019335938151925802, + "rewards_train/rejected": -0.0035034180618822575, + "step": 149 + }, + { + "epoch": 0.04, + "logps_train/chosen": -0.698978066444397, + "logps_train/ref_chosen": -0.71484375, + "logps_train/ref_rejected": -0.71484375, + "logps_train/rejected": -0.688542902469635, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0015865684254094958, + "rewards_train/margins": -0.0010435163276270032, + "rewards_train/rejected": 0.002630084753036499, + "step": 149 + }, + { + "epoch": 0.04, + "logps_train/chosen": -52.61505126953125, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -13.516868591308594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.011505126953125, + "rewards_train/margins": -0.041068268939852715, + "rewards_train/rejected": 0.029563141986727715, + "step": 149 + }, + { + "epoch": 0.04, + "learning_rate": 1.999453257340926e-06, + "loss": 0.6586, + "step": 150 + }, + { + "epoch": 0.04, + "logps_train/chosen": -13.417628288269043, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -6.34375, + "logps_train/rejected": -6.3242645263671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0019871711265295744, + "rewards_train/margins": 3.86236933991313e-05, + "rewards_train/rejected": 0.001948547433130443, + "step": 150 + }, + { + "epoch": 0.04, + "logps_train/chosen": -42.596412658691406, + "logps_train/ref_chosen": -42.5, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -4.975517272949219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00964126642793417, + "rewards_train/margins": 0.025410463102161884, + "rewards_train/rejected": -0.035051729530096054, + "step": 150 + }, + { + "epoch": 0.04, + "logps_train/chosen": -54.1495475769043, + "logps_train/ref_chosen": -55.0, + "logps_train/ref_rejected": -53.75, + "logps_train/rejected": -53.71722412109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0850452408194542, + "rewards_train/margins": 0.08176765288226306, + "rewards_train/rejected": 0.0032775879371911287, + "step": 150 + }, + { + "epoch": 0.04, + "logps_train/chosen": -0.47898414731025696, + "logps_train/ref_chosen": -0.4765625, + "logps_train/ref_rejected": -0.4765625, + "logps_train/rejected": -0.4737936556339264, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0002421647368464619, + "rewards_train/margins": -0.0005190491792745888, + "rewards_train/rejected": 0.00027688444242812693, + "step": 150 + }, + { + "epoch": 0.04, + "logps_train/chosen": -161.9637451171875, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -126.44205474853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0036254883743822575, + "rewards_train/margins": 0.14783096918836236, + "rewards_train/rejected": -0.1442054808139801, + "step": 150 + }, + { + "epoch": 0.04, + "logps_train/chosen": -12.004579544067383, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -28.971221923828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.012957954779267311, + "rewards_train/margins": 0.04666423983871937, + "rewards_train/rejected": -0.05962219461798668, + "step": 150 + }, + { + "epoch": 0.04, + "logps_train/chosen": -50.70109558105469, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -47.0, + "logps_train/rejected": -48.2314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02010955847799778, + "rewards_train/margins": 0.1030349712818861, + "rewards_train/rejected": -0.12314452975988388, + "step": 150 + }, + { + "epoch": 0.04, + "logps_train/chosen": -135.14276123046875, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -96.68485260009766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.085723876953125, + "rewards_train/margins": 0.054209135472774506, + "rewards_train/rejected": 0.031514741480350494, + "step": 150 + }, + { + "epoch": 0.04, + "logps_train/chosen": -58.6052131652832, + "logps_train/ref_chosen": -59.25, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -74.95391845703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06447868794202805, + "rewards_train/margins": 0.009870532900094986, + "rewards_train/rejected": 0.05460815504193306, + "step": 151 + }, + { + "epoch": 0.04, + "logps_train/chosen": -9.226142883300781, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -96.44518280029297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01011428888887167, + "rewards_train/margins": -0.015596008859574795, + "rewards_train/rejected": 0.005481719970703125, + "step": 151 + }, + { + "epoch": 0.04, + "logps_train/chosen": -78.61422729492188, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -115.96875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13857726752758026, + "rewards_train/margins": 0.33545227348804474, + "rewards_train/rejected": -0.19687500596046448, + "step": 151 + }, + { + "epoch": 0.04, + "logps_train/chosen": -8.452159881591797, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -1.265625, + "logps_train/rejected": -1.3192479610443115, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03603401407599449, + "rewards_train/margins": 0.041396310087293386, + "rewards_train/rejected": -0.005362296011298895, + "step": 151 + }, + { + "epoch": 0.04, + "logps_train/chosen": -4.003275394439697, + "logps_train/ref_chosen": -3.828125, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -8.508825302124023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.017515039071440697, + "rewards_train/margins": 0.008367491886019707, + "rewards_train/rejected": -0.025882530957460403, + "step": 151 + }, + { + "epoch": 0.04, + "logps_train/chosen": -99.27629089355469, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -104.27510833740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02762909047305584, + "rewards_train/margins": 0.049881743267178535, + "rewards_train/rejected": -0.07751083374023438, + "step": 151 + }, + { + "epoch": 0.04, + "logps_train/chosen": -108.11944580078125, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -165.4766082763672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.011944579891860485, + "rewards_train/margins": 0.13571625668555498, + "rewards_train/rejected": -0.14766083657741547, + "step": 151 + }, + { + "epoch": 0.04, + "logps_train/chosen": -33.914730072021484, + "logps_train/ref_chosen": -34.5, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -18.984115600585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05852699279785156, + "rewards_train/margins": 0.05693855287972838, + "rewards_train/rejected": 0.0015884399181231856, + "step": 151 + }, + { + "epoch": 0.04, + "learning_rate": 1.9993622890332106e-06, + "loss": 0.6608, + "step": 152 + }, + { + "epoch": 0.04, + "logps_train/chosen": -12.026729583740234, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -4.5625, + "logps_train/rejected": -4.441925048828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.015172958374023438, + "rewards_train/margins": -0.027230453677475452, + "rewards_train/rejected": 0.012057495303452015, + "step": 152 + }, + { + "epoch": 0.04, + "logps_train/chosen": -69.89991760253906, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -78.22805786132812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.06000823900103569, + "rewards_train/margins": -0.01718597486615181, + "rewards_train/rejected": 0.0771942138671875, + "step": 152 + }, + { + "epoch": 0.04, + "logps_train/chosen": -89.3545913696289, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -62.18046569824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06454086303710938, + "rewards_train/margins": 0.10758743435144424, + "rewards_train/rejected": -0.04304657131433487, + "step": 152 + }, + { + "epoch": 0.04, + "logps_train/chosen": -25.05682945251465, + "logps_train/ref_chosen": -26.625, + "logps_train/ref_rejected": -0.578125, + "logps_train/rejected": -1.891160488128662, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15681706368923187, + "rewards_train/margins": 0.2881206125020981, + "rewards_train/rejected": -0.1313035488128662, + "step": 152 + }, + { + "epoch": 0.04, + "logps_train/chosen": -39.332035064697266, + "logps_train/ref_chosen": -40.0, + "logps_train/ref_rejected": -29.5, + "logps_train/rejected": -29.23888397216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06679649651050568, + "rewards_train/margins": 0.04068489372730255, + "rewards_train/rejected": 0.026111602783203125, + "step": 152 + }, + { + "epoch": 0.04, + "logps_train/chosen": -14.58094310760498, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -5.28125, + "logps_train/rejected": -5.3228607177734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0018443107837811112, + "rewards_train/margins": 0.0023167611798271537, + "rewards_train/rejected": -0.004161071963608265, + "step": 152 + }, + { + "epoch": 0.04, + "logps_train/chosen": -101.29393005371094, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -77.94068908691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07060699909925461, + "rewards_train/margins": 0.16467590630054474, + "rewards_train/rejected": -0.09406890720129013, + "step": 152 + }, + { + "epoch": 0.04, + "logps_train/chosen": -134.3170166015625, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -124.01383972167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06829833984375, + "rewards_train/margins": 0.11968231201171875, + "rewards_train/rejected": -0.05138397216796875, + "step": 152 + }, + { + "epoch": 0.04, + "logps_train/chosen": -91.7398452758789, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -157.66293334960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.023984527215361595, + "rewards_train/margins": 0.14230881072580814, + "rewards_train/rejected": -0.16629333794116974, + "step": 153 + }, + { + "epoch": 0.04, + "logps_train/chosen": -129.16806030273438, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -124.76983642578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01680603064596653, + "rewards_train/margins": -0.03982238844037056, + "rewards_train/rejected": 0.02301635779440403, + "step": 153 + }, + { + "epoch": 0.04, + "logps_train/chosen": -1.1468406915664673, + "logps_train/ref_chosen": -1.0546875, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -5.598879814147949, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009215319529175758, + "rewards_train/margins": -0.008702338091097772, + "rewards_train/rejected": -0.0005129814380779862, + "step": 153 + }, + { + "epoch": 0.04, + "logps_train/chosen": -100.04403686523438, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -109.09625244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19559632241725922, + "rewards_train/margins": 0.35522156953811646, + "rewards_train/rejected": -0.15962524712085724, + "step": 153 + }, + { + "epoch": 0.04, + "logps_train/chosen": -11.38349723815918, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -5.0625, + "logps_train/rejected": -5.27716064453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007099723909050226, + "rewards_train/margins": 0.014366341289132833, + "rewards_train/rejected": -0.02146606519818306, + "step": 153 + }, + { + "epoch": 0.04, + "logps_train/chosen": -22.308164596557617, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -1.4296875, + "logps_train/rejected": -3.4998950958251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10668354481458664, + "rewards_train/margins": 0.31370430439710617, + "rewards_train/rejected": -0.20702075958251953, + "step": 153 + }, + { + "epoch": 0.04, + "logps_train/chosen": -0.24872556328773499, + "logps_train/ref_chosen": -0.27734375, + "logps_train/ref_rejected": -0.27734375, + "logps_train/rejected": -0.26032575964927673, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.002861818764358759, + "rewards_train/margins": 0.0011600196594372392, + "rewards_train/rejected": 0.0017017991049215198, + "step": 153 + }, + { + "epoch": 0.04, + "logps_train/chosen": -6.330545902252197, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -1.8203125, + "logps_train/rejected": -1.9538767337799072, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004929590504616499, + "rewards_train/margins": 0.008426833432167768, + "rewards_train/rejected": -0.013356423936784267, + "step": 153 + }, + { + "epoch": 0.04, + "learning_rate": 1.9992643262490366e-06, + "loss": 0.6504, + "step": 154 + }, + { + "epoch": 0.04, + "logps_train/chosen": -11.115743637084961, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -15.6875, + "logps_train/rejected": -15.84749984741211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.024074364453554153, + "rewards_train/margins": -0.008074378594756126, + "rewards_train/rejected": -0.015999985858798027, + "step": 154 + }, + { + "epoch": 0.04, + "logps_train/chosen": -91.65507507324219, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -113.84601593017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11550750583410263, + "rewards_train/margins": 0.019094087183475494, + "rewards_train/rejected": -0.13460159301757812, + "step": 154 + }, + { + "epoch": 0.04, + "logps_train/chosen": -40.99017333984375, + "logps_train/ref_chosen": -40.25, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -76.63900756835938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07401733845472336, + "rewards_train/margins": -0.0101165771484375, + "rewards_train/rejected": -0.06390076130628586, + "step": 154 + }, + { + "epoch": 0.04, + "logps_train/chosen": -111.25700378417969, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -143.29248046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07429962605237961, + "rewards_train/margins": 0.10354767367243767, + "rewards_train/rejected": -0.02924804762005806, + "step": 154 + }, + { + "epoch": 0.04, + "logps_train/chosen": -70.56731414794922, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -102.57850646972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14326858520507812, + "rewards_train/margins": 0.2511192336678505, + "rewards_train/rejected": -0.10785064846277237, + "step": 154 + }, + { + "epoch": 0.04, + "logps_train/chosen": -6.422679901123047, + "logps_train/ref_chosen": -6.125, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -90.76658630371094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.029767990112304688, + "rewards_train/margins": -0.05310936085879803, + "rewards_train/rejected": 0.02334137074649334, + "step": 154 + }, + { + "epoch": 0.04, + "logps_train/chosen": -12.348316192626953, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -25.52442741394043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.015168380923569202, + "rewards_train/margins": 0.005111122503876686, + "rewards_train/rejected": 0.010057258419692516, + "step": 154 + }, + { + "epoch": 0.04, + "logps_train/chosen": -71.54894256591797, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -65.65110778808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.045105744153261185, + "rewards_train/margins": 0.06021652277559042, + "rewards_train/rejected": -0.015110778622329235, + "step": 154 + }, + { + "epoch": 0.04, + "logps_train/chosen": -105.29869079589844, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -90.31868743896484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17986908555030823, + "rewards_train/margins": 0.1019996702671051, + "rewards_train/rejected": -0.28186875581741333, + "step": 155 + }, + { + "epoch": 0.04, + "logps_train/chosen": -121.82681274414062, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -133.70901489257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0173187255859375, + "rewards_train/margins": 0.08822021633386612, + "rewards_train/rejected": -0.07090149074792862, + "step": 155 + }, + { + "epoch": 0.04, + "logps_train/chosen": -5.870887279510498, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -6.580146312713623, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006661272142082453, + "rewards_train/margins": 0.014675903599709272, + "rewards_train/rejected": -0.00801463145762682, + "step": 155 + }, + { + "epoch": 0.04, + "logps_train/chosen": -9.281428337097168, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -1.140625, + "logps_train/rejected": -1.1582508087158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5843572020530701, + "rewards_train/margins": 0.5861197829945013, + "rewards_train/rejected": -0.0017625809414312243, + "step": 155 + }, + { + "epoch": 0.04, + "logps_train/chosen": -19.905000686645508, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -15.836978912353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03449993208050728, + "rewards_train/margins": 0.0681978240609169, + "rewards_train/rejected": -0.03369789198040962, + "step": 155 + }, + { + "epoch": 0.04, + "logps_train/chosen": -11.20639705657959, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -2.78125, + "logps_train/rejected": -2.9722611904144287, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.023110294714570045, + "rewards_train/margins": 0.04221141338348389, + "rewards_train/rejected": -0.01910111866891384, + "step": 155 + }, + { + "epoch": 0.04, + "logps_train/chosen": -125.07088470458984, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -98.41473388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.042911529541015625, + "rewards_train/margins": 0.13438492268323898, + "rewards_train/rejected": -0.09147339314222336, + "step": 155 + }, + { + "epoch": 0.04, + "logps_train/chosen": -3.2179925441741943, + "logps_train/ref_chosen": -3.171875, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -8.804006576538086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004611754324287176, + "rewards_train/margins": 0.03828890481963754, + "rewards_train/rejected": -0.04290065914392471, + "step": 155 + }, + { + "epoch": 0.04, + "learning_rate": 1.99915936967404e-06, + "loss": 0.6521, + "step": 156 + }, + { + "epoch": 0.04, + "logps_train/chosen": -89.79344177246094, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -89.76332092285156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02934417687356472, + "rewards_train/margins": -0.10301208309829235, + "rewards_train/rejected": 0.07366790622472763, + "step": 156 + }, + { + "epoch": 0.04, + "logps_train/chosen": -4.608402252197266, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -6.1875, + "logps_train/rejected": -6.147995471954346, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.013965225778520107, + "rewards_train/margins": -0.017915678676217794, + "rewards_train/rejected": 0.003950452897697687, + "step": 156 + }, + { + "epoch": 0.04, + "logps_train/chosen": -2.3460214138031006, + "logps_train/ref_chosen": -2.4375, + "logps_train/ref_rejected": -1.9453125, + "logps_train/rejected": -1.9388855695724487, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009147859178483486, + "rewards_train/margins": 0.008505166100803763, + "rewards_train/rejected": 0.0006426930776797235, + "step": 156 + }, + { + "epoch": 0.04, + "logps_train/chosen": -48.05434036254883, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -49.25, + "logps_train/rejected": -49.50346755981445, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05543403699994087, + "rewards_train/margins": -0.03008728101849556, + "rewards_train/rejected": -0.025346755981445312, + "step": 156 + }, + { + "epoch": 0.04, + "logps_train/chosen": -150.03228759765625, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -155.41651916503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.096771240234375, + "rewards_train/margins": 0.1384231559932232, + "rewards_train/rejected": -0.04165191575884819, + "step": 156 + }, + { + "epoch": 0.04, + "logps_train/chosen": -121.3148193359375, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -109.16165161132812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.18148194253444672, + "rewards_train/margins": -0.11531677842140198, + "rewards_train/rejected": -0.06616516411304474, + "step": 156 + }, + { + "epoch": 0.04, + "logps_train/chosen": -7.979432582855225, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -41.0, + "logps_train/rejected": -41.3603515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008306741714477539, + "rewards_train/margins": 0.04434189945459366, + "rewards_train/rejected": -0.03603515774011612, + "step": 156 + }, + { + "epoch": 0.04, + "logps_train/chosen": -14.870335578918457, + "logps_train/ref_chosen": -15.0, + "logps_train/ref_rejected": -4.03125, + "logps_train/rejected": -4.106495380401611, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012966441921889782, + "rewards_train/margins": 0.020490980241447687, + "rewards_train/rejected": -0.007524538319557905, + "step": 156 + }, + { + "epoch": 0.04, + "logps_train/chosen": -2.496687173843384, + "logps_train/ref_chosen": -2.421875, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -21.679046630859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.007481217384338379, + "rewards_train/margins": -0.014576554298400879, + "rewards_train/rejected": 0.0070953369140625, + "step": 157 + }, + { + "epoch": 0.04, + "logps_train/chosen": -83.439453125, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -48.75, + "logps_train/rejected": -48.430519104003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05605468899011612, + "rewards_train/margins": 0.024106599390506744, + "rewards_train/rejected": 0.031948089599609375, + "step": 157 + }, + { + "epoch": 0.04, + "logps_train/chosen": -8.810959815979004, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -9.112258911132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03765401989221573, + "rewards_train/margins": 0.0738799124956131, + "rewards_train/rejected": -0.03622589260339737, + "step": 157 + }, + { + "epoch": 0.04, + "logps_train/chosen": -126.31925201416016, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -107.40458679199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11807479709386826, + "rewards_train/margins": 0.158533476293087, + "rewards_train/rejected": -0.04045867919921875, + "step": 157 + }, + { + "epoch": 0.04, + "logps_train/chosen": -84.16350555419922, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -55.453651428222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.016350556164979935, + "rewards_train/margins": 0.00401458702981472, + "rewards_train/rejected": -0.020365143194794655, + "step": 157 + }, + { + "epoch": 0.04, + "logps_train/chosen": -161.9503173828125, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -158.74496459960938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.29503175616264343, + "rewards_train/margins": -0.020535290241241455, + "rewards_train/rejected": -0.274496465921402, + "step": 157 + }, + { + "epoch": 0.04, + "logps_train/chosen": -120.08181762695312, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -90.37045288085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10818176716566086, + "rewards_train/margins": 0.1788635328412056, + "rewards_train/rejected": -0.28704530000686646, + "step": 157 + }, + { + "epoch": 0.04, + "logps_train/chosen": -18.354965209960938, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -31.125, + "logps_train/rejected": -31.791702270507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02299652062356472, + "rewards_train/margins": 0.04367371089756489, + "rewards_train/rejected": -0.06667023152112961, + "step": 157 + }, + { + "epoch": 0.04, + "learning_rate": 1.9990474200428056e-06, + "loss": 0.682, + "step": 158 + }, + { + "epoch": 0.04, + "logps_train/chosen": -64.61468505859375, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -101.82325744628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03853149339556694, + "rewards_train/margins": 0.02085723727941513, + "rewards_train/rejected": 0.01767425611615181, + "step": 158 + }, + { + "epoch": 0.04, + "logps_train/chosen": -48.89595413208008, + "logps_train/ref_chosen": -48.25, + "logps_train/ref_rejected": -42.0, + "logps_train/rejected": -42.08146667480469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06459541618824005, + "rewards_train/margins": -0.05644874833524227, + "rewards_train/rejected": -0.00814666785299778, + "step": 158 + }, + { + "epoch": 0.04, + "logps_train/chosen": -165.6009521484375, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -99.250732421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03990478441119194, + "rewards_train/margins": 0.1649780236184597, + "rewards_train/rejected": -0.12507323920726776, + "step": 158 + }, + { + "epoch": 0.04, + "logps_train/chosen": -18.01030731201172, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -12.547283172607422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.038530733436346054, + "rewards_train/margins": -0.021302415058016777, + "rewards_train/rejected": -0.017228318378329277, + "step": 158 + }, + { + "epoch": 0.04, + "logps_train/chosen": -16.854412078857422, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -14.133705139160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07294120639562607, + "rewards_train/margins": -0.047070691362023354, + "rewards_train/rejected": -0.025870515033602715, + "step": 158 + }, + { + "epoch": 0.04, + "logps_train/chosen": -84.36124420166016, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -134.73480224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06387557834386826, + "rewards_train/margins": 0.13735580444335938, + "rewards_train/rejected": -0.07348022609949112, + "step": 158 + }, + { + "epoch": 0.04, + "logps_train/chosen": -10.955809593200684, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -19.00918197631836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.02316904067993164, + "rewards_train/margins": -0.03841276094317436, + "rewards_train/rejected": 0.061581801623106, + "step": 158 + }, + { + "epoch": 0.04, + "logps_train/chosen": -73.76856231689453, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -111.0142822265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07314377278089523, + "rewards_train/margins": 0.22457199543714523, + "rewards_train/rejected": -0.15142822265625, + "step": 158 + }, + { + "epoch": 0.04, + "logps_train/chosen": -4.598772048950195, + "logps_train/ref_chosen": -4.5, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -9.465408325195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009877204895019531, + "rewards_train/margins": 0.011663628742098808, + "rewards_train/rejected": -0.02154083363711834, + "step": 159 + }, + { + "epoch": 0.04, + "logps_train/chosen": -64.08639526367188, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -46.25, + "logps_train/rejected": -46.23845291137695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09136047214269638, + "rewards_train/margins": 0.09020576323382556, + "rewards_train/rejected": 0.0011547089088708162, + "step": 159 + }, + { + "epoch": 0.04, + "logps_train/chosen": -14.975738525390625, + "logps_train/ref_chosen": -15.0, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -4.973349571228027, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0024261474609375, + "rewards_train/margins": 0.034136105328798294, + "rewards_train/rejected": -0.031709957867860794, + "step": 159 + }, + { + "epoch": 0.04, + "logps_train/chosen": -16.270118713378906, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -2.828125, + "logps_train/rejected": -3.050288438796997, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0020118714310228825, + "rewards_train/margins": 0.020204472821205854, + "rewards_train/rejected": -0.022216344252228737, + "step": 159 + }, + { + "epoch": 0.04, + "logps_train/chosen": -177.1927490234375, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -148.6263427734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21927490830421448, + "rewards_train/margins": 0.14335936307907104, + "rewards_train/rejected": -0.3626342713832855, + "step": 159 + }, + { + "epoch": 0.04, + "logps_train/chosen": -144.69839477539062, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -138.03817749023438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.03016052208840847, + "rewards_train/margins": -0.06602172739803791, + "rewards_train/rejected": 0.09618224948644638, + "step": 159 + }, + { + "epoch": 0.04, + "logps_train/chosen": -4.426096439361572, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -11.846726417541504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.010515356436371803, + "rewards_train/margins": 0.04518800042569637, + "rewards_train/rejected": -0.03467264398932457, + "step": 159 + }, + { + "epoch": 0.04, + "logps_train/chosen": -99.1512222290039, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -116.09629821777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06512222439050674, + "rewards_train/margins": 0.19450759142637253, + "rewards_train/rejected": -0.2596298158168793, + "step": 159 + }, + { + "epoch": 0.04, + "learning_rate": 1.9989284781388615e-06, + "loss": 0.6676, + "step": 160 + }, + { + "epoch": 0.04, + "logps_train/chosen": -91.50881958007812, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -149.73736572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19911804795265198, + "rewards_train/margins": 0.372854620218277, + "rewards_train/rejected": -0.173736572265625, + "step": 160 + }, + { + "epoch": 0.04, + "logps_train/chosen": -5.614221096038818, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -5.15625, + "logps_train/rejected": -5.22338342666626, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.045797109603881836, + "rewards_train/margins": -0.039083766750991344, + "rewards_train/rejected": -0.0067133428528904915, + "step": 160 + }, + { + "epoch": 0.04, + "logps_train/chosen": -139.30819702148438, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -183.44667053222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13081970810890198, + "rewards_train/margins": 0.01384735107421875, + "rewards_train/rejected": -0.14466705918312073, + "step": 160 + }, + { + "epoch": 0.04, + "logps_train/chosen": -5.250831604003906, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -9.148497581481934, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.015708161517977715, + "rewards_train/margins": -0.0008584028109908104, + "rewards_train/rejected": -0.014849758706986904, + "step": 160 + }, + { + "epoch": 0.04, + "logps_train/chosen": -7.491453170776367, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -5.865373611450195, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.014770316891372204, + "rewards_train/margins": -0.021982955746352673, + "rewards_train/rejected": 0.007212638854980469, + "step": 160 + }, + { + "epoch": 0.04, + "logps_train/chosen": -5.532009601593018, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -10.645709991455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009299039840698242, + "rewards_train/margins": 0.036370038986206055, + "rewards_train/rejected": -0.027070999145507812, + "step": 160 + }, + { + "epoch": 0.04, + "logps_train/chosen": -0.9423859119415283, + "logps_train/ref_chosen": -0.8046875, + "logps_train/ref_rejected": -0.92578125, + "logps_train/rejected": -1.6910715103149414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.013769841752946377, + "rewards_train/margins": 0.06275918427854776, + "rewards_train/rejected": -0.07652902603149414, + "step": 160 + }, + { + "epoch": 0.04, + "logps_train/chosen": -7.933134078979492, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -9.536828994750977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.005813408177345991, + "rewards_train/margins": 0.016619490925222635, + "rewards_train/rejected": -0.022432899102568626, + "step": 160 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.3896069526672363, + "logps_train/ref_chosen": -2.359375, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -5.489055633544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0030231953132897615, + "rewards_train/margins": 0.014632369158789515, + "rewards_train/rejected": -0.017655564472079277, + "step": 161 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.057849407196045, + "logps_train/ref_chosen": -3.0625, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -5.891355991363525, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0004650592745747417, + "rewards_train/margins": -0.004149341868469492, + "rewards_train/rejected": 0.004614401143044233, + "step": 161 + }, + { + "epoch": 0.05, + "logps_train/chosen": -79.28306579589844, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -53.25, + "logps_train/rejected": -53.510948181152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12169342488050461, + "rewards_train/margins": 0.147788243368268, + "rewards_train/rejected": -0.026094818487763405, + "step": 161 + }, + { + "epoch": 0.05, + "logps_train/chosen": -112.63221740722656, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -112.25926971435547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06322174519300461, + "rewards_train/margins": -0.0372947733849287, + "rewards_train/rejected": -0.025926971808075905, + "step": 161 + }, + { + "epoch": 0.05, + "logps_train/chosen": -7.165810585021973, + "logps_train/ref_chosen": -7.25, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -12.809795379638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008418941870331764, + "rewards_train/margins": 0.07064848206937313, + "rewards_train/rejected": -0.06222954019904137, + "step": 161 + }, + { + "epoch": 0.05, + "logps_train/chosen": -1.9875653982162476, + "logps_train/ref_chosen": -1.8671875, + "logps_train/ref_rejected": -3.453125, + "logps_train/rejected": -3.471386194229126, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0120377903804183, + "rewards_train/margins": -0.01021167088765651, + "rewards_train/rejected": -0.0018261194927617908, + "step": 161 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.6717934608459473, + "logps_train/ref_chosen": -3.8125, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -9.925559997558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.014070654287934303, + "rewards_train/margins": 0.012876654043793678, + "rewards_train/rejected": 0.001194000244140625, + "step": 161 + }, + { + "epoch": 0.05, + "logps_train/chosen": -52.80585479736328, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -10.75943374633789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.044414520263671875, + "rewards_train/margins": 0.07660789415240288, + "rewards_train/rejected": -0.032193373888731, + "step": 161 + }, + { + "epoch": 0.05, + "learning_rate": 1.998802544794675e-06, + "loss": 0.6725, + "step": 162 + }, + { + "epoch": 0.05, + "logps_train/chosen": -10.6996488571167, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -31.875, + "logps_train/rejected": -31.5522403717041, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.017535114660859108, + "rewards_train/margins": -0.014740848913788795, + "rewards_train/rejected": 0.0322759635746479, + "step": 162 + }, + { + "epoch": 0.05, + "logps_train/chosen": -338.4677734375, + "logps_train/ref_chosen": -334.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -182.76177978515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.44677734375, + "rewards_train/margins": -0.3705993667244911, + "rewards_train/rejected": -0.07617797702550888, + "step": 162 + }, + { + "epoch": 0.05, + "logps_train/chosen": -10.030801773071289, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -1.75, + "logps_train/rejected": -1.7741667032241821, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009419823065400124, + "rewards_train/margins": 0.011836493387818336, + "rewards_train/rejected": -0.002416670322418213, + "step": 162 + }, + { + "epoch": 0.05, + "logps_train/chosen": -132.88970947265625, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -113.24290466308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18897095322608948, + "rewards_train/margins": 0.08531951904296875, + "rewards_train/rejected": -0.2742904722690582, + "step": 162 + }, + { + "epoch": 0.05, + "logps_train/chosen": -50.84965515136719, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -56.75, + "logps_train/rejected": -56.583377838134766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04003448411822319, + "rewards_train/margins": 0.023372268304228783, + "rewards_train/rejected": 0.016662215813994408, + "step": 162 + }, + { + "epoch": 0.05, + "logps_train/chosen": -7.167806148529053, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -18.804847717285156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.010530615225434303, + "rewards_train/margins": -0.030045844614505768, + "rewards_train/rejected": 0.019515229389071465, + "step": 162 + }, + { + "epoch": 0.05, + "logps_train/chosen": -106.96073913574219, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -126.48311614990234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05392608791589737, + "rewards_train/margins": 0.20223770290613174, + "rewards_train/rejected": -0.14831161499023438, + "step": 162 + }, + { + "epoch": 0.05, + "logps_train/chosen": -118.816162109375, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -24.875, + "logps_train/rejected": -25.15163803100586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06838379055261612, + "rewards_train/margins": 0.09604759328067303, + "rewards_train/rejected": -0.027663802728056908, + "step": 162 + }, + { + "epoch": 0.05, + "logps_train/chosen": -86.19810485839844, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -105.53959655761719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01981048658490181, + "rewards_train/margins": 0.034149169921875, + "rewards_train/rejected": -0.05395965650677681, + "step": 163 + }, + { + "epoch": 0.05, + "logps_train/chosen": -56.15179443359375, + "logps_train/ref_chosen": -56.75, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -88.14331817626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05982055887579918, + "rewards_train/margins": 0.0741523765027523, + "rewards_train/rejected": -0.014331817626953125, + "step": 163 + }, + { + "epoch": 0.05, + "logps_train/chosen": -56.07586669921875, + "logps_train/ref_chosen": -56.75, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -111.772216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.067413330078125, + "rewards_train/margins": 0.094635009765625, + "rewards_train/rejected": -0.0272216796875, + "step": 163 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.933375358581543, + "logps_train/ref_chosen": -3.859375, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -10.82542896270752, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007400035858154297, + "rewards_train/margins": 0.06264286488294601, + "rewards_train/rejected": -0.07004290074110031, + "step": 163 + }, + { + "epoch": 0.05, + "logps_train/chosen": -55.50367736816406, + "logps_train/ref_chosen": -55.75, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -70.7330551147461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02463226392865181, + "rewards_train/margins": 0.09793777391314507, + "rewards_train/rejected": -0.07330550998449326, + "step": 163 + }, + { + "epoch": 0.05, + "logps_train/chosen": -67.88140869140625, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -11.916844367980957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.061859130859375, + "rewards_train/margins": 0.07229356747120619, + "rewards_train/rejected": -0.010434436611831188, + "step": 163 + }, + { + "epoch": 0.05, + "logps_train/chosen": -8.867582321166992, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -19.445829391479492, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.005508232396095991, + "rewards_train/margins": -0.04842529399320483, + "rewards_train/rejected": 0.04291706159710884, + "step": 163 + }, + { + "epoch": 0.05, + "logps_train/chosen": -87.32695007324219, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -47.5, + "logps_train/rejected": -48.325496673583984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16730499267578125, + "rewards_train/margins": 0.2498546615242958, + "rewards_train/rejected": -0.08254966884851456, + "step": 163 + }, + { + "epoch": 0.05, + "learning_rate": 1.9986696208916465e-06, + "loss": 0.6756, + "step": 164 + }, + { + "epoch": 0.05, + "logps_train/chosen": -0.3742799460887909, + "logps_train/ref_chosen": -0.3828125, + "logps_train/ref_rejected": -6.90625, + "logps_train/rejected": -7.078000545501709, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0008532553911209106, + "rewards_train/margins": 0.01802830956876278, + "rewards_train/rejected": -0.01717505417764187, + "step": 164 + }, + { + "epoch": 0.05, + "logps_train/chosen": -245.52816772460938, + "logps_train/ref_chosen": -243.0, + "logps_train/ref_rejected": -218.0, + "logps_train/rejected": -221.21249389648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.252816766500473, + "rewards_train/margins": 0.06843262910842896, + "rewards_train/rejected": -0.321249395608902, + "step": 164 + }, + { + "epoch": 0.05, + "logps_train/chosen": -28.857393264770508, + "logps_train/ref_chosen": -28.625, + "logps_train/ref_rejected": -29.125, + "logps_train/rejected": -29.582149505615234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02323932759463787, + "rewards_train/margins": 0.022475624457001686, + "rewards_train/rejected": -0.04571495205163956, + "step": 164 + }, + { + "epoch": 0.05, + "logps_train/chosen": -133.46417236328125, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -207.0, + "logps_train/rejected": -212.66796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14641724526882172, + "rewards_train/margins": 0.4203796535730362, + "rewards_train/rejected": -0.5667968988418579, + "step": 164 + }, + { + "epoch": 0.05, + "logps_train/chosen": -61.37767791748047, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -88.83383178710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1372322142124176, + "rewards_train/margins": 0.17061539366841316, + "rewards_train/rejected": -0.03338317945599556, + "step": 164 + }, + { + "epoch": 0.05, + "logps_train/chosen": -99.9583740234375, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -108.89054107666016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04583740234375, + "rewards_train/margins": 0.09321670234203339, + "rewards_train/rejected": -0.1390541046857834, + "step": 164 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.5323212146759033, + "logps_train/ref_chosen": -2.59375, + "logps_train/ref_rejected": -31.0, + "logps_train/rejected": -30.9359130859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0061428784392774105, + "rewards_train/margins": -0.0002658129669725895, + "rewards_train/rejected": 0.00640869140625, + "step": 164 + }, + { + "epoch": 0.05, + "logps_train/chosen": -122.44776916503906, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -153.774169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09477692097425461, + "rewards_train/margins": 0.08264007419347763, + "rewards_train/rejected": -0.17741699516773224, + "step": 164 + }, + { + "epoch": 0.05, + "logps_train/chosen": -0.4202796220779419, + "logps_train/ref_chosen": -0.431640625, + "logps_train/ref_rejected": -0.431640625, + "logps_train/rejected": -0.43937650322914124, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0011361002689227462, + "rewards_train/margins": 0.001909688115119934, + "rewards_train/rejected": -0.0007735878461971879, + "step": 165 + }, + { + "epoch": 0.05, + "logps_train/chosen": -5.13623571395874, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -11.256814956665039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.026123572140932083, + "rewards_train/margins": 0.018307924270629883, + "rewards_train/rejected": -0.044431496411561966, + "step": 165 + }, + { + "epoch": 0.05, + "logps_train/chosen": -93.11184692382812, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -105.69863891601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0111846923828125, + "rewards_train/margins": 0.05867920070886612, + "rewards_train/rejected": -0.06986389309167862, + "step": 165 + }, + { + "epoch": 0.05, + "logps_train/chosen": -4.018438339233398, + "logps_train/ref_chosen": -4.1875, + "logps_train/ref_rejected": -5.75, + "logps_train/rejected": -5.928955554962158, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016906166449189186, + "rewards_train/margins": 0.03480172157287598, + "rewards_train/rejected": -0.01789555512368679, + "step": 165 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.0895256996154785, + "logps_train/ref_chosen": -1.8515625, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -4.741828918457031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02379631996154785, + "rewards_train/margins": -0.027738428208976984, + "rewards_train/rejected": 0.0039421082474291325, + "step": 165 + }, + { + "epoch": 0.05, + "logps_train/chosen": -11.911273956298828, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -31.125, + "logps_train/rejected": -31.203062057495117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04737739637494087, + "rewards_train/margins": -0.03957119071856141, + "rewards_train/rejected": -0.007806205656379461, + "step": 165 + }, + { + "epoch": 0.05, + "logps_train/chosen": -11.01933479309082, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -7.1325459480285645, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008183479309082031, + "rewards_train/margins": 0.011321116238832474, + "rewards_train/rejected": -0.019504595547914505, + "step": 165 + }, + { + "epoch": 0.05, + "logps_train/chosen": -7.659015655517578, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -16.079158782958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.015348434448242188, + "rewards_train/margins": 0.060764312744140625, + "rewards_train/rejected": -0.04541587829589844, + "step": 165 + }, + { + "epoch": 0.05, + "learning_rate": 1.9985297073601024e-06, + "loss": 0.6641, + "step": 166 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.9569709300994873, + "logps_train/ref_chosen": -3.609375, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -13.778238296508789, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03475959226489067, + "rewards_train/margins": -0.013185761868953705, + "rewards_train/rejected": -0.021573830395936966, + "step": 166 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.335115909576416, + "logps_train/ref_chosen": -0.96484375, + "logps_train/ref_rejected": -0.96484375, + "logps_train/rejected": -2.3243842124938965, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.13702721893787384, + "rewards_train/margins": -0.0010731667280197144, + "rewards_train/rejected": -0.13595405220985413, + "step": 166 + }, + { + "epoch": 0.05, + "logps_train/chosen": -13.983553886413574, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -13.912836074829102, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007894611917436123, + "rewards_train/margins": 0.005428219446912408, + "rewards_train/rejected": 0.002466392470523715, + "step": 166 + }, + { + "epoch": 0.05, + "logps_train/chosen": -271.1795349121094, + "logps_train/ref_chosen": -274.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -154.14283752441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.28204652667045593, + "rewards_train/margins": 0.3963302820920944, + "rewards_train/rejected": -0.11428375542163849, + "step": 166 + }, + { + "epoch": 0.05, + "logps_train/chosen": -126.59214782714844, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -159.81619262695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09078522026538849, + "rewards_train/margins": 0.172404482960701, + "rewards_train/rejected": -0.0816192626953125, + "step": 166 + }, + { + "epoch": 0.05, + "logps_train/chosen": -119.25340270996094, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -107.97034454345703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.02465972863137722, + "rewards_train/margins": -0.028305819258093834, + "rewards_train/rejected": 0.052965547889471054, + "step": 166 + }, + { + "epoch": 0.05, + "logps_train/chosen": -26.11642074584961, + "logps_train/ref_chosen": -26.125, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -23.14189338684082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.000857925449963659, + "rewards_train/margins": 0.015047264692839235, + "rewards_train/rejected": -0.014189339242875576, + "step": 166 + }, + { + "epoch": 0.05, + "logps_train/chosen": -9.728279113769531, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -5.646563529968262, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01032791193574667, + "rewards_train/margins": 0.019953441806137562, + "rewards_train/rejected": -0.03028135374188423, + "step": 166 + }, + { + "epoch": 0.05, + "logps_train/chosen": -119.67349243164062, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -116.23043060302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2673492431640625, + "rewards_train/margins": 0.0056938230991363525, + "rewards_train/rejected": -0.27304306626319885, + "step": 167 + }, + { + "epoch": 0.05, + "logps_train/chosen": -86.82528686523438, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -77.42237854003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11747131496667862, + "rewards_train/margins": 0.20970916748046875, + "rewards_train/rejected": -0.09223785251379013, + "step": 167 + }, + { + "epoch": 0.05, + "logps_train/chosen": -109.05803680419922, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -132.33901977539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0058036805130541325, + "rewards_train/margins": 0.22809829702600837, + "rewards_train/rejected": -0.2339019775390625, + "step": 167 + }, + { + "epoch": 0.05, + "logps_train/chosen": -5.468748569488525, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -4.389774799346924, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.03125014528632164, + "rewards_train/margins": -0.0047723762691020966, + "rewards_train/rejected": 0.03602252155542374, + "step": 167 + }, + { + "epoch": 0.05, + "logps_train/chosen": -24.960073471069336, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -43.0, + "logps_train/rejected": -44.075382232666016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.033507347106933594, + "rewards_train/margins": 0.07403087615966797, + "rewards_train/rejected": -0.10753822326660156, + "step": 167 + }, + { + "epoch": 0.05, + "logps_train/chosen": -70.52052307128906, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -72.38711547851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04794769361615181, + "rewards_train/margins": 0.036659241653978825, + "rewards_train/rejected": 0.011288451962172985, + "step": 167 + }, + { + "epoch": 0.05, + "logps_train/chosen": -4.446146011352539, + "logps_train/ref_chosen": -0.1806640625, + "logps_train/ref_rejected": -3.4375, + "logps_train/rejected": -11.446080207824707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42654821276664734, + "rewards_train/margins": 0.37430980801582336, + "rewards_train/rejected": -0.8008580207824707, + "step": 167 + }, + { + "epoch": 0.05, + "logps_train/chosen": -107.17105102539062, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -158.88368225097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03289489820599556, + "rewards_train/margins": 0.3212631233036518, + "rewards_train/rejected": -0.28836822509765625, + "step": 167 + }, + { + "epoch": 0.05, + "learning_rate": 1.9983828051792885e-06, + "loss": 0.6406, + "step": 168 + }, + { + "epoch": 0.05, + "logps_train/chosen": -92.45835876464844, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -134.2730255126953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05416412279009819, + "rewards_train/margins": 0.28146667405962944, + "rewards_train/rejected": -0.22730255126953125, + "step": 168 + }, + { + "epoch": 0.05, + "logps_train/chosen": -86.48869323730469, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -138.13134765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0011306762462481856, + "rewards_train/margins": 0.11426544038113207, + "rewards_train/rejected": -0.11313476413488388, + "step": 168 + }, + { + "epoch": 0.05, + "logps_train/chosen": -50.03853225708008, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -124.04267883300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17114678025245667, + "rewards_train/margins": 0.675414651632309, + "rewards_train/rejected": -0.5042678713798523, + "step": 168 + }, + { + "epoch": 0.05, + "logps_train/chosen": -17.607641220092773, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -18.022281646728516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0017358780605718493, + "rewards_train/margins": 0.04146404273342341, + "rewards_train/rejected": -0.03972816467285156, + "step": 168 + }, + { + "epoch": 0.05, + "logps_train/chosen": -32.27113723754883, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -36.8918342590332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04788627848029137, + "rewards_train/margins": 0.06206970475614071, + "rewards_train/rejected": -0.014183426275849342, + "step": 168 + }, + { + "epoch": 0.05, + "logps_train/chosen": -26.323135375976562, + "logps_train/ref_chosen": -26.875, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -4.587780475616455, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05518646165728569, + "rewards_train/margins": 0.08583950996398926, + "rewards_train/rejected": -0.030653048306703568, + "step": 168 + }, + { + "epoch": 0.05, + "logps_train/chosen": -1.9995330572128296, + "logps_train/ref_chosen": -1.9296875, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -9.946959495544434, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0069845556281507015, + "rewards_train/margins": 0.018961394671350718, + "rewards_train/rejected": -0.02594595029950142, + "step": 168 + }, + { + "epoch": 0.05, + "logps_train/chosen": -144.80679321289062, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -159.0503692626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11932068318128586, + "rewards_train/margins": 0.12435760954394937, + "rewards_train/rejected": -0.0050369263626635075, + "step": 168 + }, + { + "epoch": 0.05, + "logps_train/chosen": -4.069393634796143, + "logps_train/ref_chosen": -3.96875, + "logps_train/ref_rejected": -1.21875, + "logps_train/rejected": -1.4063488245010376, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010064363479614258, + "rewards_train/margins": 0.008695518597960472, + "rewards_train/rejected": -0.01875988207757473, + "step": 169 + }, + { + "epoch": 0.05, + "logps_train/chosen": -74.91920471191406, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -98.28833770751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15807953476905823, + "rewards_train/margins": 0.43691331148147583, + "rewards_train/rejected": -0.2788337767124176, + "step": 169 + }, + { + "epoch": 0.05, + "logps_train/chosen": -88.88269805908203, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -116.74041748046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.038269806653261185, + "rewards_train/margins": -0.014228058978915215, + "rewards_train/rejected": -0.02404174767434597, + "step": 169 + }, + { + "epoch": 0.05, + "logps_train/chosen": -16.666776657104492, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -7.03125, + "logps_train/rejected": -6.927389144897461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03332233428955078, + "rewards_train/margins": 0.02293624822050333, + "rewards_train/rejected": 0.010386086069047451, + "step": 169 + }, + { + "epoch": 0.05, + "logps_train/chosen": -28.968799591064453, + "logps_train/ref_chosen": -29.0, + "logps_train/ref_rejected": -26.0, + "logps_train/rejected": -26.210674285888672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.003120040986686945, + "rewards_train/margins": 0.024187470320612192, + "rewards_train/rejected": -0.021067429333925247, + "step": 169 + }, + { + "epoch": 0.05, + "logps_train/chosen": -93.91300964355469, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -90.60491943359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04130096361041069, + "rewards_train/margins": -0.030809019692242146, + "rewards_train/rejected": -0.010491943918168545, + "step": 169 + }, + { + "epoch": 0.05, + "logps_train/chosen": -56.706199645996094, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -57.25, + "logps_train/rejected": -56.43680191040039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.054380036890506744, + "rewards_train/margins": -0.026939772069454193, + "rewards_train/rejected": 0.08131980895996094, + "step": 169 + }, + { + "epoch": 0.05, + "logps_train/chosen": -100.32084655761719, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -104.25889587402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16791534423828125, + "rewards_train/margins": 0.29380492866039276, + "rewards_train/rejected": -0.1258895844221115, + "step": 169 + }, + { + "epoch": 0.05, + "learning_rate": 1.9982289153773643e-06, + "loss": 0.6328, + "step": 170 + }, + { + "epoch": 0.05, + "logps_train/chosen": -105.59971618652344, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -109.05001831054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14002838730812073, + "rewards_train/margins": 0.3950302302837372, + "rewards_train/rejected": -0.25500184297561646, + "step": 170 + }, + { + "epoch": 0.05, + "logps_train/chosen": -122.89231872558594, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -105.93109893798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16076813638210297, + "rewards_train/margins": 0.05387803167104721, + "rewards_train/rejected": 0.10689010471105576, + "step": 170 + }, + { + "epoch": 0.05, + "logps_train/chosen": -91.5333480834961, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -91.55911254882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.046665191650390625, + "rewards_train/margins": 0.002576444298028946, + "rewards_train/rejected": 0.04408874735236168, + "step": 170 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.4976093769073486, + "logps_train/ref_chosen": -2.671875, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -4.77480936050415, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.017426563426852226, + "rewards_train/margins": 0.013657499337568879, + "rewards_train/rejected": 0.003769064089283347, + "step": 170 + }, + { + "epoch": 0.05, + "logps_train/chosen": -23.462217330932617, + "logps_train/ref_chosen": -23.0, + "logps_train/ref_rejected": -42.75, + "logps_train/rejected": -43.13447570800781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04622173309326172, + "rewards_train/margins": -0.0077741630375385284, + "rewards_train/rejected": -0.03844757005572319, + "step": 170 + }, + { + "epoch": 0.05, + "logps_train/chosen": -25.1270751953125, + "logps_train/ref_chosen": -25.375, + "logps_train/ref_rejected": -43.5, + "logps_train/rejected": -43.61996078491211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02479248121380806, + "rewards_train/margins": 0.03678855951875448, + "rewards_train/rejected": -0.011996078304946423, + "step": 170 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.10825252532959, + "logps_train/ref_chosen": -1.9375, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -24.938297271728516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.017075253650546074, + "rewards_train/margins": 0.014254475012421608, + "rewards_train/rejected": -0.03132972866296768, + "step": 170 + }, + { + "epoch": 0.05, + "logps_train/chosen": -26.92340087890625, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -13.363301277160645, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01734008826315403, + "rewards_train/margins": 0.04399004019796848, + "rewards_train/rejected": -0.06133012846112251, + "step": 170 + }, + { + "epoch": 0.05, + "logps_train/chosen": -87.12641906738281, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -18.247114181518555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03735809400677681, + "rewards_train/margins": 0.1120695136487484, + "rewards_train/rejected": -0.07471141964197159, + "step": 171 + }, + { + "epoch": 0.05, + "logps_train/chosen": -13.024253845214844, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -1.71875, + "logps_train/rejected": -1.8778787851333618, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028824616223573685, + "rewards_train/margins": 0.044737495481967926, + "rewards_train/rejected": -0.01591287925839424, + "step": 171 + }, + { + "epoch": 0.05, + "logps_train/chosen": -8.067228317260742, + "logps_train/ref_chosen": -7.96875, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -4.849130630493164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009847831912338734, + "rewards_train/margins": 0.012565231882035732, + "rewards_train/rejected": -0.022413063794374466, + "step": 171 + }, + { + "epoch": 0.05, + "logps_train/chosen": -39.90062713623047, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -77.07697296142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015062713995575905, + "rewards_train/margins": 0.1926345881074667, + "rewards_train/rejected": -0.2076973021030426, + "step": 171 + }, + { + "epoch": 0.05, + "logps_train/chosen": -152.73089599609375, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -136.89276123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12691040337085724, + "rewards_train/margins": 0.11618652660399675, + "rewards_train/rejected": 0.010723876766860485, + "step": 171 + }, + { + "epoch": 0.05, + "logps_train/chosen": -102.45110321044922, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -136.64964294433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2048896849155426, + "rewards_train/margins": 0.26985397934913635, + "rewards_train/rejected": -0.06496429443359375, + "step": 171 + }, + { + "epoch": 0.05, + "logps_train/chosen": -51.70854949951172, + "logps_train/ref_chosen": -52.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -72.157470703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.029145050793886185, + "rewards_train/margins": -0.055107880383729935, + "rewards_train/rejected": 0.08425293117761612, + "step": 171 + }, + { + "epoch": 0.05, + "logps_train/chosen": -11.159427642822266, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -27.344398498535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021557236090302467, + "rewards_train/margins": 0.04349708557128906, + "rewards_train/rejected": -0.021939849480986595, + "step": 171 + }, + { + "epoch": 0.05, + "learning_rate": 1.998068039031396e-06, + "loss": 0.6553, + "step": 172 + }, + { + "epoch": 0.05, + "logps_train/chosen": -127.02342224121094, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -91.43057250976562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10234222561120987, + "rewards_train/margins": -0.00928497314453125, + "rewards_train/rejected": -0.09305725246667862, + "step": 172 + }, + { + "epoch": 0.05, + "logps_train/chosen": -9.601081848144531, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -8.787765502929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.052391815930604935, + "rewards_train/margins": 0.04991836613044143, + "rewards_train/rejected": 0.0024734498001635075, + "step": 172 + }, + { + "epoch": 0.05, + "logps_train/chosen": -9.625226974487305, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -44.92008972167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07502269744873047, + "rewards_train/margins": 0.016986273229122162, + "rewards_train/rejected": -0.09200897067785263, + "step": 172 + }, + { + "epoch": 0.05, + "logps_train/chosen": -5.42141580581665, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -0.56640625, + "logps_train/rejected": -0.6070871949195862, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0077665806747972965, + "rewards_train/margins": -0.003698485903441906, + "rewards_train/rejected": -0.0040680947713553905, + "step": 172 + }, + { + "epoch": 0.05, + "logps_train/chosen": -109.2378158569336, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -132.4905548095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0737815871834755, + "rewards_train/margins": 0.2752739116549492, + "rewards_train/rejected": -0.3490554988384247, + "step": 172 + }, + { + "epoch": 0.05, + "logps_train/chosen": -102.395751953125, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -63.95907974243164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.010424804873764515, + "rewards_train/margins": 0.006332778837531805, + "rewards_train/rejected": 0.00409202603623271, + "step": 172 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.7877066135406494, + "logps_train/ref_chosen": -2.328125, + "logps_train/ref_rejected": -2.828125, + "logps_train/rejected": -2.3154890537261963, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.14595817029476166, + "rewards_train/margins": -0.1972217671573162, + "rewards_train/rejected": 0.05126359686255455, + "step": 172 + }, + { + "epoch": 0.05, + "logps_train/chosen": -9.55830192565918, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -63.65232849121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00041980744572356343, + "rewards_train/margins": 0.0156526563805528, + "rewards_train/rejected": -0.015232848934829235, + "step": 172 + }, + { + "epoch": 0.05, + "logps_train/chosen": -53.879417419433594, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -53.25, + "logps_train/rejected": -53.25834655761719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13705825805664062, + "rewards_train/margins": 0.1378929138300009, + "rewards_train/rejected": -0.0008346557733602822, + "step": 173 + }, + { + "epoch": 0.05, + "logps_train/chosen": -118.85494995117188, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -145.51058959960938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.014505005441606045, + "rewards_train/margins": -0.034436036832630634, + "rewards_train/rejected": 0.04894104227423668, + "step": 173 + }, + { + "epoch": 0.05, + "logps_train/chosen": -86.11431884765625, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -131.28501892089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.061431884765625, + "rewards_train/margins": 0.31707000732421875, + "rewards_train/rejected": -0.37850189208984375, + "step": 173 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.5154895782470703, + "logps_train/ref_chosen": -2.265625, + "logps_train/ref_rejected": -2.546875, + "logps_train/rejected": -2.2916781902313232, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02498645894229412, + "rewards_train/margins": -0.050506141036748886, + "rewards_train/rejected": 0.025519682094454765, + "step": 173 + }, + { + "epoch": 0.05, + "logps_train/chosen": -106.33941650390625, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -158.20956420898438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.133941650390625, + "rewards_train/margins": -0.0129852294921875, + "rewards_train/rejected": -0.1209564208984375, + "step": 173 + }, + { + "epoch": 0.05, + "logps_train/chosen": -10.817024230957031, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -5.290135860443115, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.018297577276825905, + "rewards_train/margins": -0.002688836306333542, + "rewards_train/rejected": 0.020986413583159447, + "step": 173 + }, + { + "epoch": 0.05, + "logps_train/chosen": -115.05291748046875, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -149.96414184570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.205291748046875, + "rewards_train/margins": 0.2911224365234375, + "rewards_train/rejected": -0.4964141845703125, + "step": 173 + }, + { + "epoch": 0.05, + "logps_train/chosen": -106.62315368652344, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -106.67460632324219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.16231536865234375, + "rewards_train/margins": -0.09485473483800888, + "rewards_train/rejected": -0.06746063381433487, + "step": 173 + }, + { + "epoch": 0.05, + "learning_rate": 1.997900177267347e-06, + "loss": 0.6735, + "step": 174 + }, + { + "epoch": 0.05, + "logps_train/chosen": -7.400090217590332, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -11.652364730834961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06624098122119904, + "rewards_train/margins": 0.10647745430469513, + "rewards_train/rejected": -0.040236473083496094, + "step": 174 + }, + { + "epoch": 0.05, + "logps_train/chosen": -23.995454788208008, + "logps_train/ref_chosen": -24.125, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -16.841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012954520992934704, + "rewards_train/margins": 0.08463420998305082, + "rewards_train/rejected": -0.07167968899011612, + "step": 174 + }, + { + "epoch": 0.05, + "logps_train/chosen": -31.76980209350586, + "logps_train/ref_chosen": -31.375, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -14.80366039276123, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03948020935058594, + "rewards_train/margins": -0.034114169888198376, + "rewards_train/rejected": -0.005366039462387562, + "step": 174 + }, + { + "epoch": 0.05, + "logps_train/chosen": -88.50749206542969, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -96.02098083496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04925079271197319, + "rewards_train/margins": 0.101348876953125, + "rewards_train/rejected": -0.05209808424115181, + "step": 174 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.6831274032592773, + "logps_train/ref_chosen": -3.71875, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -8.235834121704102, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0035622597206383944, + "rewards_train/margins": -0.010354328667744994, + "rewards_train/rejected": 0.013916588388383389, + "step": 174 + }, + { + "epoch": 0.05, + "logps_train/chosen": -12.941494941711426, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -5.965339660644531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01835050620138645, + "rewards_train/margins": 0.008634472265839577, + "rewards_train/rejected": 0.009716033935546875, + "step": 174 + }, + { + "epoch": 0.05, + "logps_train/chosen": -119.78752899169922, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -158.74703979492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07124710083007812, + "rewards_train/margins": 0.2459510862827301, + "rewards_train/rejected": -0.17470398545265198, + "step": 174 + }, + { + "epoch": 0.05, + "logps_train/chosen": -134.4978790283203, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -176.74664306640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.35021209716796875, + "rewards_train/margins": 0.3248764034360647, + "rewards_train/rejected": 0.02533569373190403, + "step": 174 + }, + { + "epoch": 0.05, + "logps_train/chosen": -155.08106994628906, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -97.86479949951172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10810699313879013, + "rewards_train/margins": -0.021627038717269897, + "rewards_train/rejected": -0.08647995442152023, + "step": 175 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.2737326622009277, + "logps_train/ref_chosen": -2.15625, + "logps_train/ref_rejected": -2.71875, + "logps_train/rejected": -2.530928134918213, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.011748266406357288, + "rewards_train/margins": -0.03053045365959406, + "rewards_train/rejected": 0.01878218725323677, + "step": 175 + }, + { + "epoch": 0.05, + "logps_train/chosen": -12.725231170654297, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -14.241393089294434, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.010023117065429688, + "rewards_train/margins": -0.010883808135986328, + "rewards_train/rejected": 0.0008606910705566406, + "step": 175 + }, + { + "epoch": 0.05, + "logps_train/chosen": -82.19804382324219, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -85.12067413330078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03019561804831028, + "rewards_train/margins": 0.04226303193718195, + "rewards_train/rejected": -0.01206741388887167, + "step": 175 + }, + { + "epoch": 0.05, + "logps_train/chosen": -53.21367645263672, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -74.51858520507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.103632353246212, + "rewards_train/margins": 0.055490873754024506, + "rewards_train/rejected": 0.0481414794921875, + "step": 175 + }, + { + "epoch": 0.05, + "logps_train/chosen": -9.4254150390625, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -13.17550277709961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007458496373146772, + "rewards_train/margins": 0.05000877333804965, + "rewards_train/rejected": -0.04255027696490288, + "step": 175 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.228282928466797, + "logps_train/ref_chosen": -3.25, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -8.799232482910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0021717071067541838, + "rewards_train/margins": 0.0008449553279206157, + "rewards_train/rejected": 0.001326751778833568, + "step": 175 + }, + { + "epoch": 0.05, + "logps_train/chosen": -99.6031494140625, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -214.0, + "logps_train/rejected": -218.53294372558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11031494289636612, + "rewards_train/margins": 0.34297942370176315, + "rewards_train/rejected": -0.4532943665981293, + "step": 175 + }, + { + "epoch": 0.05, + "learning_rate": 1.9977253312600713e-06, + "loss": 0.6564, + "step": 176 + }, + { + "epoch": 0.05, + "logps_train/chosen": -135.34913635253906, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -115.70187377929688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1349136382341385, + "rewards_train/margins": -0.11472626030445099, + "rewards_train/rejected": -0.0201873779296875, + "step": 176 + }, + { + "epoch": 0.05, + "logps_train/chosen": -43.30031204223633, + "logps_train/ref_chosen": -43.75, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -72.09102630615234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04496879503130913, + "rewards_train/margins": 0.10407142713665962, + "rewards_train/rejected": -0.059102632105350494, + "step": 176 + }, + { + "epoch": 0.05, + "logps_train/chosen": -101.08409118652344, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -113.60504913330078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04159088060259819, + "rewards_train/margins": 0.40209580585360527, + "rewards_train/rejected": -0.3605049252510071, + "step": 176 + }, + { + "epoch": 0.05, + "logps_train/chosen": -77.04688262939453, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -47.75, + "logps_train/rejected": -48.7984733581543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.054688263684511185, + "rewards_train/margins": 0.05015907064080238, + "rewards_train/rejected": -0.10484733432531357, + "step": 176 + }, + { + "epoch": 0.05, + "logps_train/chosen": -52.102134704589844, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -53.25, + "logps_train/rejected": -52.212318420410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1147865280508995, + "rewards_train/margins": 0.011018365621566772, + "rewards_train/rejected": 0.10376816242933273, + "step": 176 + }, + { + "epoch": 0.05, + "logps_train/chosen": -147.49465942382812, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -72.18341827392578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.14946594834327698, + "rewards_train/margins": -0.08112411946058273, + "rewards_train/rejected": -0.06834182888269424, + "step": 176 + }, + { + "epoch": 0.05, + "logps_train/chosen": -76.416259765625, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -99.38248443603516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0416259765625, + "rewards_train/margins": -0.003377530723810196, + "rewards_train/rejected": -0.038248445838689804, + "step": 176 + }, + { + "epoch": 0.05, + "logps_train/chosen": -10.48159122467041, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -26.892873764038086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008090877905488014, + "rewards_train/margins": 0.10987825877964497, + "rewards_train/rejected": -0.10178738087415695, + "step": 176 + }, + { + "epoch": 0.05, + "logps_train/chosen": -18.396398544311523, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -33.76443099975586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0646398589015007, + "rewards_train/margins": -0.038196759298443794, + "rewards_train/rejected": -0.026443099603056908, + "step": 177 + }, + { + "epoch": 0.05, + "logps_train/chosen": -124.28970336914062, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -124.74518585205078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.37897035479545593, + "rewards_train/margins": -0.05445176362991333, + "rewards_train/rejected": -0.3245185911655426, + "step": 177 + }, + { + "epoch": 0.05, + "logps_train/chosen": -5.766711711883545, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -8.345537185668945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.029578829184174538, + "rewards_train/margins": 0.06725754775106907, + "rewards_train/rejected": -0.03767871856689453, + "step": 177 + }, + { + "epoch": 0.05, + "logps_train/chosen": -159.7810516357422, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -113.5999984741211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22189484536647797, + "rewards_train/margins": 0.2818946950137615, + "rewards_train/rejected": -0.059999849647283554, + "step": 177 + }, + { + "epoch": 0.05, + "logps_train/chosen": -67.698974609375, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -65.54925537109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06989746540784836, + "rewards_train/margins": -0.06497192801907659, + "rewards_train/rejected": -0.004925537388771772, + "step": 177 + }, + { + "epoch": 0.05, + "logps_train/chosen": -129.67633056640625, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -81.77786254882812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06763305515050888, + "rewards_train/margins": -0.03984680026769638, + "rewards_train/rejected": -0.0277862548828125, + "step": 177 + }, + { + "epoch": 0.05, + "logps_train/chosen": -21.144142150878906, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -21.22897720336914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.014414215460419655, + "rewards_train/margins": 0.04598350636661053, + "rewards_train/rejected": -0.06039772182703018, + "step": 177 + }, + { + "epoch": 0.05, + "logps_train/chosen": -136.02806091308594, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -177.55889892578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20280610024929047, + "rewards_train/margins": -0.04691620171070099, + "rewards_train/rejected": -0.15588989853858948, + "step": 177 + }, + { + "epoch": 0.05, + "learning_rate": 1.9975435022333063e-06, + "loss": 0.6759, + "step": 178 + }, + { + "epoch": 0.05, + "logps_train/chosen": -80.47537231445312, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -134.5682830810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1024627685546875, + "rewards_train/margins": 0.4592910706996918, + "rewards_train/rejected": -0.3568283021450043, + "step": 178 + }, + { + "epoch": 0.05, + "logps_train/chosen": -17.454500198364258, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -11.49384880065918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05454998090863228, + "rewards_train/margins": 0.0726848617196083, + "rewards_train/rejected": -0.01813488081097603, + "step": 178 + }, + { + "epoch": 0.05, + "logps_train/chosen": -172.947021484375, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -151.01174926757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20529785752296448, + "rewards_train/margins": 0.20647278428077698, + "rewards_train/rejected": -0.0011749267578125, + "step": 178 + }, + { + "epoch": 0.05, + "logps_train/chosen": -0.21730345487594604, + "logps_train/ref_chosen": -0.1865234375, + "logps_train/ref_rejected": -0.1865234375, + "logps_train/rejected": -0.2185930609703064, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0030780017841607332, + "rewards_train/margins": 0.00012896070256829262, + "rewards_train/rejected": -0.003206962486729026, + "step": 178 + }, + { + "epoch": 0.05, + "logps_train/chosen": -46.37289047241211, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -80.82275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08728905022144318, + "rewards_train/margins": 0.044986337423324585, + "rewards_train/rejected": -0.13227538764476776, + "step": 178 + }, + { + "epoch": 0.05, + "logps_train/chosen": -51.8677978515625, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -12.334196090698242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06322021782398224, + "rewards_train/margins": 0.14663983136415482, + "rewards_train/rejected": -0.08341961354017258, + "step": 178 + }, + { + "epoch": 0.05, + "logps_train/chosen": -57.35658264160156, + "logps_train/ref_chosen": -56.75, + "logps_train/ref_rejected": -40.75, + "logps_train/rejected": -41.755645751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06065826490521431, + "rewards_train/margins": 0.03990631178021431, + "rewards_train/rejected": -0.10056457668542862, + "step": 178 + }, + { + "epoch": 0.05, + "logps_train/chosen": -4.620863914489746, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -8.045069694519043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01833639107644558, + "rewards_train/margins": 0.03617057763040066, + "rewards_train/rejected": -0.05450696870684624, + "step": 178 + }, + { + "epoch": 0.05, + "logps_train/chosen": -8.031109809875488, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -20.346797943115234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03438901901245117, + "rewards_train/margins": 0.0565688144415617, + "rewards_train/rejected": -0.022179795429110527, + "step": 179 + }, + { + "epoch": 0.05, + "logps_train/chosen": -29.160249710083008, + "logps_train/ref_chosen": -29.375, + "logps_train/ref_rejected": -30.5, + "logps_train/rejected": -30.806791305541992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02147503010928631, + "rewards_train/margins": 0.05215416103601456, + "rewards_train/rejected": -0.03067913092672825, + "step": 179 + }, + { + "epoch": 0.05, + "logps_train/chosen": -5.172942638397217, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -2.015625, + "logps_train/rejected": -2.0076866149902344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0485442653298378, + "rewards_train/margins": -0.049338103854097426, + "rewards_train/rejected": 0.0007938385242596269, + "step": 179 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.845963716506958, + "logps_train/ref_chosen": -3.53125, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -6.570653915405273, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0314713716506958, + "rewards_train/margins": -0.030655980110168457, + "rewards_train/rejected": -0.0008153915405273438, + "step": 179 + }, + { + "epoch": 0.05, + "logps_train/chosen": -74.80306243896484, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -26.875, + "logps_train/rejected": -27.81349754333496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21969376504421234, + "rewards_train/margins": 0.31354352086782455, + "rewards_train/rejected": -0.09384975582361221, + "step": 179 + }, + { + "epoch": 0.05, + "logps_train/chosen": -100.63155364990234, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -100.91845703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.013155365362763405, + "rewards_train/margins": -0.021309662610292435, + "rewards_train/rejected": 0.00815429724752903, + "step": 179 + }, + { + "epoch": 0.05, + "logps_train/chosen": -151.68382263183594, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -164.0894012451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03161773830652237, + "rewards_train/margins": 0.24055787175893784, + "rewards_train/rejected": -0.20894013345241547, + "step": 179 + }, + { + "epoch": 0.05, + "logps_train/chosen": -47.1766357421875, + "logps_train/ref_chosen": -47.25, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -20.141429901123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007336426060646772, + "rewards_train/margins": 0.008979416219517589, + "rewards_train/rejected": -0.0016429901588708162, + "step": 179 + }, + { + "epoch": 0.05, + "learning_rate": 1.997354691459662e-06, + "loss": 0.6477, + "step": 180 + }, + { + "epoch": 0.05, + "logps_train/chosen": -8.393516540527344, + "logps_train/ref_chosen": -7.6875, + "logps_train/ref_rejected": -24.0, + "logps_train/rejected": -24.001026153564453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07060165703296661, + "rewards_train/margins": -0.07049904167797649, + "rewards_train/rejected": -0.00010261535499012098, + "step": 180 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.2482194900512695, + "logps_train/ref_chosen": -2.03125, + "logps_train/ref_rejected": -3.5, + "logps_train/rejected": -3.4779868125915527, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.021696949377655983, + "rewards_train/margins": -0.02389826811850071, + "rewards_train/rejected": 0.0022013187408447266, + "step": 180 + }, + { + "epoch": 0.05, + "logps_train/chosen": -82.11555480957031, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -78.7475357055664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11155547946691513, + "rewards_train/margins": 0.1631980910897255, + "rewards_train/rejected": -0.2747535705566406, + "step": 180 + }, + { + "epoch": 0.05, + "logps_train/chosen": -138.53399658203125, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -153.531494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3533996641635895, + "rewards_train/margins": 0.19974973797798157, + "rewards_train/rejected": -0.553149402141571, + "step": 180 + }, + { + "epoch": 0.05, + "logps_train/chosen": -5.561427116394043, + "logps_train/ref_chosen": -5.375, + "logps_train/ref_rejected": -3.140625, + "logps_train/rejected": -3.205335855484009, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.018642712384462357, + "rewards_train/margins": -0.012171626556664705, + "rewards_train/rejected": -0.006471085827797651, + "step": 180 + }, + { + "epoch": 0.05, + "logps_train/chosen": -76.8392333984375, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -97.76599884033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11607666313648224, + "rewards_train/margins": 0.24267655611038208, + "rewards_train/rejected": -0.12659989297389984, + "step": 180 + }, + { + "epoch": 0.05, + "logps_train/chosen": -12.353653907775879, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -22.08899688720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04588460922241211, + "rewards_train/margins": 0.054784297943115234, + "rewards_train/rejected": -0.008899688720703125, + "step": 180 + }, + { + "epoch": 0.05, + "logps_train/chosen": -20.850175857543945, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -9.737753868103027, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02251758612692356, + "rewards_train/margins": 0.05750780366361141, + "rewards_train/rejected": -0.08002538979053497, + "step": 180 + }, + { + "epoch": 0.05, + "logps_train/chosen": -73.40852355957031, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -112.4771728515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15914765000343323, + "rewards_train/margins": 0.5068649351596832, + "rewards_train/rejected": -0.34771728515625, + "step": 181 + }, + { + "epoch": 0.05, + "logps_train/chosen": -104.1397933959961, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -128.78729248046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.23602066934108734, + "rewards_train/margins": 0.4147499203681946, + "rewards_train/rejected": -0.17872925102710724, + "step": 181 + }, + { + "epoch": 0.05, + "logps_train/chosen": -7.1443376541137695, + "logps_train/ref_chosen": -6.5625, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -7.295281887054443, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05818376690149307, + "rewards_train/margins": -0.03803057782351971, + "rewards_train/rejected": -0.020153189077973366, + "step": 181 + }, + { + "epoch": 0.05, + "logps_train/chosen": -17.554821014404297, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -69.55724334716797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.017982101067900658, + "rewards_train/margins": -0.06225776858627796, + "rewards_train/rejected": 0.044275667518377304, + "step": 181 + }, + { + "epoch": 0.05, + "logps_train/chosen": -28.31705665588379, + "logps_train/ref_chosen": -28.125, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -13.437583923339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.019205665215849876, + "rewards_train/margins": 0.06830273009836674, + "rewards_train/rejected": -0.08750839531421661, + "step": 181 + }, + { + "epoch": 0.05, + "logps_train/chosen": -66.11204528808594, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -83.08258056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03879547119140625, + "rewards_train/margins": 0.09705352783203125, + "rewards_train/rejected": -0.058258056640625, + "step": 181 + }, + { + "epoch": 0.05, + "logps_train/chosen": -106.963134765625, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -169.63201904296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.05368652567267418, + "rewards_train/margins": -0.08311157301068306, + "rewards_train/rejected": 0.13679809868335724, + "step": 181 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.4397530555725098, + "logps_train/ref_chosen": -3.34375, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -22.23546028137207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009600305929780006, + "rewards_train/margins": 0.013945722952485085, + "rewards_train/rejected": -0.02354602888226509, + "step": 181 + }, + { + "epoch": 0.05, + "learning_rate": 1.997158900260614e-06, + "loss": 0.6493, + "step": 182 + }, + { + "epoch": 0.05, + "logps_train/chosen": -112.71609497070312, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -153.93020629882812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.471609503030777, + "rewards_train/margins": -0.07858887314796448, + "rewards_train/rejected": -0.3930206298828125, + "step": 182 + }, + { + "epoch": 0.05, + "logps_train/chosen": -71.38368225097656, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -33.86791229248047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.13836823403835297, + "rewards_train/margins": -0.10157700255513191, + "rewards_train/rejected": -0.036791231483221054, + "step": 182 + }, + { + "epoch": 0.05, + "logps_train/chosen": -6.754178524017334, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -9.563028335571289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01833214797079563, + "rewards_train/margins": 0.08088498003780842, + "rewards_train/rejected": -0.06255283206701279, + "step": 182 + }, + { + "epoch": 0.05, + "logps_train/chosen": -85.7320556640625, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -108.54771423339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02679443359375, + "rewards_train/margins": 0.18156586587429047, + "rewards_train/rejected": -0.15477143228054047, + "step": 182 + }, + { + "epoch": 0.05, + "logps_train/chosen": -9.28075885772705, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -33.964054107666016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02182588540017605, + "rewards_train/margins": -0.025420474587008357, + "rewards_train/rejected": 0.0035945891868323088, + "step": 182 + }, + { + "epoch": 0.05, + "logps_train/chosen": -95.47029876708984, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -125.5076904296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14702987670898438, + "rewards_train/margins": 0.0037391632795333862, + "rewards_train/rejected": -0.15076903998851776, + "step": 182 + }, + { + "epoch": 0.05, + "logps_train/chosen": -58.2563591003418, + "logps_train/ref_chosen": -59.75, + "logps_train/ref_rejected": -52.25, + "logps_train/rejected": -54.430538177490234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14936409890651703, + "rewards_train/margins": 0.36741791665554047, + "rewards_train/rejected": -0.21805381774902344, + "step": 182 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.1689491271972656, + "logps_train/ref_chosen": -2.21875, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -10.490944862365723, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0049800872802734375, + "rewards_train/margins": 0.04782457277178764, + "rewards_train/rejected": -0.042844485491514206, + "step": 182 + }, + { + "epoch": 0.05, + "logps_train/chosen": -4.2704057693481445, + "logps_train/ref_chosen": -4.1875, + "logps_train/ref_rejected": -5.71875, + "logps_train/rejected": -5.889193534851074, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008290576748549938, + "rewards_train/margins": 0.008753777481615543, + "rewards_train/rejected": -0.01704435423016548, + "step": 183 + }, + { + "epoch": 0.05, + "logps_train/chosen": -1.1377081871032715, + "logps_train/ref_chosen": -0.98828125, + "logps_train/ref_rejected": -2.234375, + "logps_train/rejected": -2.1858344078063965, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.014942693524062634, + "rewards_train/margins": -0.019796752836555243, + "rewards_train/rejected": 0.004854059312492609, + "step": 183 + }, + { + "epoch": 0.05, + "logps_train/chosen": -14.913436889648438, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -12.315872192382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05240631103515625, + "rewards_train/margins": 0.02149352990090847, + "rewards_train/rejected": 0.03091278113424778, + "step": 183 + }, + { + "epoch": 0.05, + "logps_train/chosen": -57.434024810791016, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -88.79308319091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03159752115607262, + "rewards_train/margins": 0.010905839502811432, + "rewards_train/rejected": 0.020691681653261185, + "step": 183 + }, + { + "epoch": 0.05, + "logps_train/chosen": -126.84561157226562, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -156.54257202148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0345611572265625, + "rewards_train/margins": 0.519696056842804, + "rewards_train/rejected": -0.5542572140693665, + "step": 183 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.405015707015991, + "logps_train/ref_chosen": -2.15625, + "logps_train/ref_rejected": -1.4296875, + "logps_train/rejected": -1.3889092206954956, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02487657032907009, + "rewards_train/margins": -0.028954398352652788, + "rewards_train/rejected": 0.004077828023582697, + "step": 183 + }, + { + "epoch": 0.05, + "logps_train/chosen": -95.45448303222656, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -106.91482543945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2545517086982727, + "rewards_train/margins": 0.14603424817323685, + "rewards_train/rejected": 0.10851746052503586, + "step": 183 + }, + { + "epoch": 0.05, + "logps_train/chosen": -15.187743186950684, + "logps_train/ref_chosen": -14.25, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -18.170888900756836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09377431869506836, + "rewards_train/margins": -0.014185428619384766, + "rewards_train/rejected": -0.0795888900756836, + "step": 183 + }, + { + "epoch": 0.05, + "learning_rate": 1.9969561300064925e-06, + "loss": 0.6621, + "step": 184 + }, + { + "epoch": 0.05, + "logps_train/chosen": -6.351419448852539, + "logps_train/ref_chosen": -6.15625, + "logps_train/ref_rejected": -22.375, + "logps_train/rejected": -21.82685661315918, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.019516944885253906, + "rewards_train/margins": -0.07433128356933594, + "rewards_train/rejected": 0.05481433868408203, + "step": 184 + }, + { + "epoch": 0.05, + "logps_train/chosen": -6.330157279968262, + "logps_train/ref_chosen": -6.625, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -10.729418754577637, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.029484272003173828, + "rewards_train/margins": 0.0024261474609375, + "rewards_train/rejected": 0.027058124542236328, + "step": 184 + }, + { + "epoch": 0.05, + "logps_train/chosen": -122.36711883544922, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -116.15982055664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11328811943531036, + "rewards_train/margins": 0.5292701870203018, + "rewards_train/rejected": -0.41598206758499146, + "step": 184 + }, + { + "epoch": 0.05, + "logps_train/chosen": -68.8996810913086, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -82.39225769042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11003189533948898, + "rewards_train/margins": 0.14925766363739967, + "rewards_train/rejected": -0.03922576829791069, + "step": 184 + }, + { + "epoch": 0.05, + "logps_train/chosen": -87.95045471191406, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -115.7020492553711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05495452880859375, + "rewards_train/margins": 0.5251594483852386, + "rewards_train/rejected": -0.4702049195766449, + "step": 184 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.0256762504577637, + "logps_train/ref_chosen": -1.671875, + "logps_train/ref_rejected": -3.828125, + "logps_train/rejected": -4.124544620513916, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03538012504577637, + "rewards_train/margins": -0.0057381633669137955, + "rewards_train/rejected": -0.02964196167886257, + "step": 184 + }, + { + "epoch": 0.05, + "logps_train/chosen": -20.08743667602539, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -16.221092224121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04125633463263512, + "rewards_train/margins": 0.06336555816233158, + "rewards_train/rejected": -0.022109223529696465, + "step": 184 + }, + { + "epoch": 0.05, + "logps_train/chosen": -29.546689987182617, + "logps_train/ref_chosen": -29.625, + "logps_train/ref_rejected": -28.75, + "logps_train/rejected": -29.291156768798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007831001654267311, + "rewards_train/margins": 0.0619466807693243, + "rewards_train/rejected": -0.05411567911505699, + "step": 184 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.1776483058929443, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -7.875, + "logps_train/rejected": -7.55073356628418, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009952330961823463, + "rewards_train/margins": -0.042378975078463554, + "rewards_train/rejected": 0.03242664411664009, + "step": 185 + }, + { + "epoch": 0.05, + "logps_train/chosen": -106.71585083007812, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -131.97442626953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02841491810977459, + "rewards_train/margins": 0.4758575391024351, + "rewards_train/rejected": -0.4474426209926605, + "step": 185 + }, + { + "epoch": 0.05, + "logps_train/chosen": -48.45182800292969, + "logps_train/ref_chosen": -49.25, + "logps_train/ref_rejected": -54.5, + "logps_train/rejected": -54.489925384521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07981719821691513, + "rewards_train/margins": 0.0788097366457805, + "rewards_train/rejected": 0.0010074615711346269, + "step": 185 + }, + { + "epoch": 0.05, + "logps_train/chosen": -88.06011199951172, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -87.9949722290039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.093988798558712, + "rewards_train/margins": 0.24348602443933487, + "rewards_train/rejected": -0.14949722588062286, + "step": 185 + }, + { + "epoch": 0.05, + "logps_train/chosen": -72.19338989257812, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -103.67362976074219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.280661016702652, + "rewards_train/margins": 0.24802399054169655, + "rewards_train/rejected": 0.03263702616095543, + "step": 185 + }, + { + "epoch": 0.05, + "logps_train/chosen": -7.369078159332275, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -5.4375, + "logps_train/rejected": -5.755361557006836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04121718555688858, + "rewards_train/margins": 0.07300334051251411, + "rewards_train/rejected": -0.031786154955625534, + "step": 185 + }, + { + "epoch": 0.05, + "logps_train/chosen": -96.41970825195312, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -83.36986541748047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.10802917927503586, + "rewards_train/margins": -0.05498427897691727, + "rewards_train/rejected": 0.16301345825195312, + "step": 185 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.936052083969116, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -17.7694091796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03110520914196968, + "rewards_train/margins": 0.03333571180701256, + "rewards_train/rejected": -0.06444092094898224, + "step": 185 + }, + { + "epoch": 0.05, + "learning_rate": 1.9967463821164744e-06, + "loss": 0.6286, + "step": 186 + }, + { + "epoch": 0.05, + "logps_train/chosen": -53.37651062011719, + "logps_train/ref_chosen": -53.75, + "logps_train/ref_rejected": -49.0, + "logps_train/rejected": -48.97585678100586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03734893724322319, + "rewards_train/margins": 0.03493461525067687, + "rewards_train/rejected": 0.00241432199254632, + "step": 186 + }, + { + "epoch": 0.05, + "logps_train/chosen": -14.713327407836914, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -29.0, + "logps_train/rejected": -29.52045249938965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03383274003863335, + "rewards_train/margins": 0.018212512135505676, + "rewards_train/rejected": -0.05204525217413902, + "step": 186 + }, + { + "epoch": 0.05, + "logps_train/chosen": -166.9698944091797, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -159.218994140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10301055759191513, + "rewards_train/margins": 0.12490997277200222, + "rewards_train/rejected": -0.02189941518008709, + "step": 186 + }, + { + "epoch": 0.05, + "logps_train/chosen": -18.819658279418945, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -27.003219604492188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0305341724306345, + "rewards_train/margins": -0.006643867120146751, + "rewards_train/rejected": 0.03717803955078125, + "step": 186 + }, + { + "epoch": 0.05, + "logps_train/chosen": -60.085533142089844, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -112.54074096679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0664466843008995, + "rewards_train/margins": 0.12052078172564507, + "rewards_train/rejected": -0.05407409742474556, + "step": 186 + }, + { + "epoch": 0.05, + "logps_train/chosen": -80.34331512451172, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -62.16832733154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08433151245117188, + "rewards_train/margins": 0.08250121772289276, + "rewards_train/rejected": -0.16683273017406464, + "step": 186 + }, + { + "epoch": 0.05, + "logps_train/chosen": -6.2718400955200195, + "logps_train/ref_chosen": -6.0, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -10.273384094238281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.027184009552001953, + "rewards_train/margins": 0.00015440024435520172, + "rewards_train/rejected": -0.027338409796357155, + "step": 186 + }, + { + "epoch": 0.05, + "logps_train/chosen": -144.43832397460938, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -53.75, + "logps_train/rejected": -54.71181869506836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.14383240044116974, + "rewards_train/margins": -0.0476505309343338, + "rewards_train/rejected": -0.09618186950683594, + "step": 186 + }, + { + "epoch": 0.05, + "logps_train/chosen": -41.490962982177734, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -41.5, + "logps_train/rejected": -41.44398880004883, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.000903701817151159, + "rewards_train/margins": -0.004697418364230543, + "rewards_train/rejected": 0.005601120181381702, + "step": 187 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.992797613143921, + "logps_train/ref_chosen": -3.90625, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -6.067100524902344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.008654761128127575, + "rewards_train/margins": -0.05819471087306738, + "rewards_train/rejected": 0.049539949744939804, + "step": 187 + }, + { + "epoch": 0.05, + "logps_train/chosen": -75.45378875732422, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -65.92155456542969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.054621126502752304, + "rewards_train/margins": 0.32177659496665, + "rewards_train/rejected": -0.2671554684638977, + "step": 187 + }, + { + "epoch": 0.05, + "logps_train/chosen": -145.2593231201172, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -128.26043701171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12593232095241547, + "rewards_train/margins": 0.05011138319969177, + "rewards_train/rejected": -0.17604370415210724, + "step": 187 + }, + { + "epoch": 0.05, + "logps_train/chosen": -56.88243103027344, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -29.125, + "logps_train/rejected": -29.980113983154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01175689697265625, + "rewards_train/margins": 0.09726829826831818, + "rewards_train/rejected": -0.08551140129566193, + "step": 187 + }, + { + "epoch": 0.05, + "logps_train/chosen": -8.260597229003906, + "logps_train/ref_chosen": -7.96875, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -4.820550918579102, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.029184723272919655, + "rewards_train/margins": 0.01537037082016468, + "rewards_train/rejected": -0.044555094093084335, + "step": 187 + }, + { + "epoch": 0.05, + "logps_train/chosen": -23.885448455810547, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -26.375, + "logps_train/rejected": -28.077442169189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07604485005140305, + "rewards_train/margins": 0.09419936686754227, + "rewards_train/rejected": -0.1702442169189453, + "step": 187 + }, + { + "epoch": 0.05, + "logps_train/chosen": -124.39459228515625, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -135.2205810546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.289459228515625, + "rewards_train/margins": 0.2325989007949829, + "rewards_train/rejected": -0.5220581293106079, + "step": 187 + }, + { + "epoch": 0.05, + "learning_rate": 1.9965296580585733e-06, + "loss": 0.6621, + "step": 188 + }, + { + "epoch": 0.05, + "logps_train/chosen": -0.790400505065918, + "logps_train/ref_chosen": -0.92578125, + "logps_train/ref_rejected": -0.92578125, + "logps_train/rejected": -0.7891103029251099, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.013538074679672718, + "rewards_train/margins": -0.0001290198415517807, + "rewards_train/rejected": 0.013667094521224499, + "step": 188 + }, + { + "epoch": 0.05, + "logps_train/chosen": -10.206870079040527, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -16.60598373413086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00818700809031725, + "rewards_train/margins": 0.014911365695297718, + "rewards_train/rejected": -0.023098373785614967, + "step": 188 + }, + { + "epoch": 0.05, + "logps_train/chosen": -117.89181518554688, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -112.56767272949219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.18918152153491974, + "rewards_train/margins": -0.03241424262523651, + "rewards_train/rejected": -0.15676727890968323, + "step": 188 + }, + { + "epoch": 0.05, + "logps_train/chosen": -90.5438232421875, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -114.87725830078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19561767578125, + "rewards_train/margins": 0.633343517780304, + "rewards_train/rejected": -0.43772584199905396, + "step": 188 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.9207870960235596, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -5.402257919311523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011046290397644043, + "rewards_train/margins": 0.03877208195626736, + "rewards_train/rejected": -0.027725791558623314, + "step": 188 + }, + { + "epoch": 0.05, + "logps_train/chosen": -109.08716583251953, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -116.52593994140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.041283417493104935, + "rewards_train/margins": 0.5438774116337299, + "rewards_train/rejected": -0.502593994140625, + "step": 188 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.428025007247925, + "logps_train/ref_chosen": -2.515625, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -12.10274887084961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00875999964773655, + "rewards_train/margins": 0.025284886360168457, + "rewards_train/rejected": -0.016524886712431908, + "step": 188 + }, + { + "epoch": 0.05, + "logps_train/chosen": -15.74703598022461, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -21.347034454345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03779640421271324, + "rewards_train/margins": 0.17249984666705132, + "rewards_train/rejected": -0.13470344245433807, + "step": 188 + }, + { + "epoch": 0.05, + "logps_train/chosen": -110.79730224609375, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -120.55235290527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02026977576315403, + "rewards_train/margins": 0.5255050901323557, + "rewards_train/rejected": -0.5052353143692017, + "step": 189 + }, + { + "epoch": 0.05, + "logps_train/chosen": -25.670639038085938, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -68.40013122558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04206390306353569, + "rewards_train/margins": 0.09794922545552254, + "rewards_train/rejected": -0.14001312851905823, + "step": 189 + }, + { + "epoch": 0.05, + "logps_train/chosen": -123.52525329589844, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -146.52056884765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14747467637062073, + "rewards_train/margins": 0.5995315611362457, + "rewards_train/rejected": -0.452056884765625, + "step": 189 + }, + { + "epoch": 0.05, + "logps_train/chosen": -7.786448001861572, + "logps_train/ref_chosen": -1.4140625, + "logps_train/ref_rejected": -1.8046875, + "logps_train/rejected": -0.8381778001785278, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6372385621070862, + "rewards_train/margins": -0.7338895350694656, + "rewards_train/rejected": 0.09665097296237946, + "step": 189 + }, + { + "epoch": 0.05, + "logps_train/chosen": -102.61937713623047, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -140.85244750976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16193771362304688, + "rewards_train/margins": 0.2233070433139801, + "rewards_train/rejected": -0.385244756937027, + "step": 189 + }, + { + "epoch": 0.05, + "logps_train/chosen": -87.8746566772461, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -107.36196899414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06253433227539062, + "rewards_train/margins": 0.09873123094439507, + "rewards_train/rejected": -0.03619689866900444, + "step": 189 + }, + { + "epoch": 0.05, + "logps_train/chosen": -28.06241226196289, + "logps_train/ref_chosen": -28.25, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -7.357830047607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018758773803710938, + "rewards_train/margins": 0.026416778564453125, + "rewards_train/rejected": -0.0076580047607421875, + "step": 189 + }, + { + "epoch": 0.05, + "logps_train/chosen": -103.42855834960938, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -189.0, + "logps_train/rejected": -192.89315795898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04285583645105362, + "rewards_train/margins": 0.3464599773287773, + "rewards_train/rejected": -0.38931581377983093, + "step": 189 + }, + { + "epoch": 0.05, + "learning_rate": 1.996305959349627e-06, + "loss": 0.6286, + "step": 190 + }, + { + "epoch": 0.05, + "logps_train/chosen": -166.55007934570312, + "logps_train/ref_chosen": -169.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -151.66896057128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24499206244945526, + "rewards_train/margins": 0.1118881106376648, + "rewards_train/rejected": 0.13310395181179047, + "step": 190 + }, + { + "epoch": 0.05, + "logps_train/chosen": -19.85715103149414, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -22.25, + "logps_train/rejected": -22.273591995239258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03928489610552788, + "rewards_train/margins": 0.04164409567601979, + "rewards_train/rejected": -0.00235919957049191, + "step": 190 + }, + { + "epoch": 0.05, + "logps_train/chosen": -54.030181884765625, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -68.88108825683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12198181450366974, + "rewards_train/margins": 0.1600906401872635, + "rewards_train/rejected": -0.03810882568359375, + "step": 190 + }, + { + "epoch": 0.05, + "logps_train/chosen": -164.56735229492188, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -132.2477569580078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.456735223531723, + "rewards_train/margins": -0.2319595217704773, + "rewards_train/rejected": -0.22477570176124573, + "step": 190 + }, + { + "epoch": 0.05, + "logps_train/chosen": -136.25311279296875, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -129.36904907226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07468872517347336, + "rewards_train/margins": 0.2115936353802681, + "rewards_train/rejected": -0.13690491020679474, + "step": 190 + }, + { + "epoch": 0.05, + "logps_train/chosen": -107.00141906738281, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -71.66844940185547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3001419007778168, + "rewards_train/margins": -0.3832969591021538, + "rewards_train/rejected": 0.083155058324337, + "step": 190 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.7070350646972656, + "logps_train/ref_chosen": -3.578125, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -17.36454200744629, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.012891006655991077, + "rewards_train/margins": 0.011063194833695889, + "rewards_train/rejected": -0.023954201489686966, + "step": 190 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.8842735290527344, + "logps_train/ref_chosen": -3.9375, + "logps_train/ref_rejected": -7.46875, + "logps_train/rejected": -8.0606689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005322647280991077, + "rewards_train/margins": 0.06451454106718302, + "rewards_train/rejected": -0.05919189378619194, + "step": 190 + }, + { + "epoch": 0.05, + "logps_train/chosen": -15.12475299835205, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -69.69892883300781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04372530058026314, + "rewards_train/margins": -0.2238324172794819, + "rewards_train/rejected": 0.18010711669921875, + "step": 191 + }, + { + "epoch": 0.05, + "logps_train/chosen": -33.61187744140625, + "logps_train/ref_chosen": -34.75, + "logps_train/ref_rejected": -48.5, + "logps_train/rejected": -48.77290725708008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11381226032972336, + "rewards_train/margins": 0.1411029864102602, + "rewards_train/rejected": -0.027290726080536842, + "step": 191 + }, + { + "epoch": 0.05, + "logps_train/chosen": -91.2908935546875, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -137.3277587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22091065347194672, + "rewards_train/margins": 0.1536865308880806, + "rewards_train/rejected": 0.06722412258386612, + "step": 191 + }, + { + "epoch": 0.05, + "logps_train/chosen": -98.7681884765625, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -122.13495635986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07318115234375, + "rewards_train/margins": 0.6366767883300781, + "rewards_train/rejected": -0.5634956359863281, + "step": 191 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.0483617782592773, + "logps_train/ref_chosen": -1.8046875, + "logps_train/ref_rejected": -7.03125, + "logps_train/rejected": -7.803361415863037, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.024367427453398705, + "rewards_train/margins": 0.052843714132905006, + "rewards_train/rejected": -0.07721114158630371, + "step": 191 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.041128635406494, + "logps_train/ref_chosen": -3.203125, + "logps_train/ref_rejected": -1.796875, + "logps_train/rejected": -1.8762829303741455, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016199637204408646, + "rewards_train/margins": 0.024140430614352226, + "rewards_train/rejected": -0.00794079340994358, + "step": 191 + }, + { + "epoch": 0.05, + "logps_train/chosen": -2.7346858978271484, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -1.5703125, + "logps_train/rejected": -1.84871506690979, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010968590155243874, + "rewards_train/margins": 0.01687166653573513, + "rewards_train/rejected": -0.027840256690979004, + "step": 191 + }, + { + "epoch": 0.05, + "logps_train/chosen": -97.50405883789062, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -132.719482421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0995941162109375, + "rewards_train/margins": 0.17154236137866974, + "rewards_train/rejected": -0.07194824516773224, + "step": 191 + }, + { + "epoch": 0.05, + "learning_rate": 1.9960752875552895e-06, + "loss": 0.6694, + "step": 192 + }, + { + "epoch": 0.05, + "logps_train/chosen": -22.518836975097656, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -5.948480129241943, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07311630249023438, + "rewards_train/margins": 0.061714314855635166, + "rewards_train/rejected": 0.011401987634599209, + "step": 192 + }, + { + "epoch": 0.05, + "logps_train/chosen": -150.00059509277344, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -156.14004516601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10005950927734375, + "rewards_train/margins": 0.11394500732421875, + "rewards_train/rejected": -0.2140045166015625, + "step": 192 + }, + { + "epoch": 0.05, + "logps_train/chosen": -94.6429443359375, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -121.65718841552734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06429443508386612, + "rewards_train/margins": 0.051424406468868256, + "rewards_train/rejected": -0.11571884155273438, + "step": 192 + }, + { + "epoch": 0.05, + "logps_train/chosen": -77.96144104003906, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -97.2620849609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04614410549402237, + "rewards_train/margins": 0.2800643965601921, + "rewards_train/rejected": -0.3262085020542145, + "step": 192 + }, + { + "epoch": 0.05, + "logps_train/chosen": -17.15085220336914, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -11.668315887451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009914780035614967, + "rewards_train/margins": 0.051746370270848274, + "rewards_train/rejected": -0.04183159023523331, + "step": 192 + }, + { + "epoch": 0.05, + "logps_train/chosen": -101.11089324951172, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -160.24636840820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16108933091163635, + "rewards_train/margins": 0.16354751586914062, + "rewards_train/rejected": -0.324636846780777, + "step": 192 + }, + { + "epoch": 0.05, + "logps_train/chosen": -72.86778259277344, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -118.22781372070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06322174519300461, + "rewards_train/margins": 0.18600311875343323, + "rewards_train/rejected": -0.12278137356042862, + "step": 192 + }, + { + "epoch": 0.05, + "logps_train/chosen": -5.876059532165527, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -13.714221000671387, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.025105953216552734, + "rewards_train/margins": 0.002566147595643997, + "rewards_train/rejected": -0.02767210081219673, + "step": 192 + }, + { + "epoch": 0.05, + "logps_train/chosen": -149.56088256835938, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -189.0, + "logps_train/rejected": -195.23614501953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5560882687568665, + "rewards_train/margins": 0.06752622127532959, + "rewards_train/rejected": -0.623614490032196, + "step": 193 + }, + { + "epoch": 0.05, + "logps_train/chosen": -190.04119873046875, + "logps_train/ref_chosen": -186.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -185.72634887695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4041198790073395, + "rewards_train/margins": 0.16851499676704407, + "rewards_train/rejected": -0.5726348757743835, + "step": 193 + }, + { + "epoch": 0.05, + "logps_train/chosen": -6.071712493896484, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -6.954244613647461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.014703750610351562, + "rewards_train/margins": 0.0570032112300396, + "rewards_train/rejected": -0.042299460619688034, + "step": 193 + }, + { + "epoch": 0.05, + "logps_train/chosen": -3.1112101078033447, + "logps_train/ref_chosen": -3.34375, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -8.50613784790039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.023253990337252617, + "rewards_train/margins": 0.030117775313556194, + "rewards_train/rejected": -0.006863784976303577, + "step": 193 + }, + { + "epoch": 0.05, + "logps_train/chosen": -125.56692504882812, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -181.69454956054688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1566925048828125, + "rewards_train/margins": -0.18723754957318306, + "rewards_train/rejected": 0.03054504469037056, + "step": 193 + }, + { + "epoch": 0.05, + "logps_train/chosen": -75.33590698242188, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -76.96405792236328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.06640930473804474, + "rewards_train/margins": -0.03718490153551102, + "rewards_train/rejected": 0.10359420627355576, + "step": 193 + }, + { + "epoch": 0.05, + "logps_train/chosen": -57.21464538574219, + "logps_train/ref_chosen": -55.0, + "logps_train/ref_rejected": -46.25, + "logps_train/rejected": -46.59211730957031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.22146454453468323, + "rewards_train/margins": -0.18725281208753586, + "rewards_train/rejected": -0.03421173244714737, + "step": 193 + }, + { + "epoch": 0.05, + "logps_train/chosen": -18.78097152709961, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -15.456610679626465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021902848035097122, + "rewards_train/margins": 0.03631391655653715, + "rewards_train/rejected": -0.01441106852144003, + "step": 193 + }, + { + "epoch": 0.05, + "learning_rate": 1.9958376442900188e-06, + "loss": 0.6682, + "step": 194 + }, + { + "epoch": 0.05, + "logps_train/chosen": -174.1869354248047, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -149.27001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11869354546070099, + "rewards_train/margins": 0.10830840468406677, + "rewards_train/rejected": -0.22700195014476776, + "step": 194 + }, + { + "epoch": 0.05, + "logps_train/chosen": -23.36691665649414, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -24.52625274658203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07419166713953018, + "rewards_train/margins": -0.009066388010978699, + "rewards_train/rejected": -0.06512527912855148, + "step": 194 + }, + { + "epoch": 0.05, + "logps_train/chosen": -143.1991729736328, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -95.14930725097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01991729810833931, + "rewards_train/margins": 0.0450134314596653, + "rewards_train/rejected": -0.06493072956800461, + "step": 194 + }, + { + "epoch": 0.05, + "logps_train/chosen": -8.116833686828613, + "logps_train/ref_chosen": -7.96875, + "logps_train/ref_rejected": -26.0, + "logps_train/rejected": -26.384906768798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.014808368869125843, + "rewards_train/margins": 0.02368231024593115, + "rewards_train/rejected": -0.03849067911505699, + "step": 194 + }, + { + "epoch": 0.05, + "logps_train/chosen": -74.14306640625, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -105.68626403808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.014306641183793545, + "rewards_train/margins": 0.20431976858526468, + "rewards_train/rejected": -0.21862640976905823, + "step": 194 + }, + { + "epoch": 0.05, + "logps_train/chosen": -83.28856658935547, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -129.8534393310547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4211433529853821, + "rewards_train/margins": 1.0064873099327087, + "rewards_train/rejected": -0.5853439569473267, + "step": 194 + }, + { + "epoch": 0.05, + "logps_train/chosen": -113.67539978027344, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -61.68199157714844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.13246002793312073, + "rewards_train/margins": -0.02434082329273224, + "rewards_train/rejected": 0.15680085122585297, + "step": 194 + }, + { + "epoch": 0.05, + "logps_train/chosen": -145.31475830078125, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -154.95127868652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13147583603858948, + "rewards_train/margins": 0.5636520683765411, + "rewards_train/rejected": -0.6951279044151306, + "step": 194 + }, + { + "epoch": 0.05, + "logps_train/chosen": -98.22248077392578, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -96.39122772216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2722480893135071, + "rewards_train/margins": 0.0668746829032898, + "rewards_train/rejected": -0.3391227722167969, + "step": 195 + }, + { + "epoch": 0.05, + "logps_train/chosen": -84.38357543945312, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -82.8815689086914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03835754469037056, + "rewards_train/margins": 0.09979934617877007, + "rewards_train/rejected": -0.13815689086914062, + "step": 195 + }, + { + "epoch": 0.05, + "logps_train/chosen": -142.1881103515625, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -91.91233825683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01881103590130806, + "rewards_train/margins": 0.12242279574275017, + "rewards_train/rejected": -0.14123383164405823, + "step": 195 + }, + { + "epoch": 0.05, + "logps_train/chosen": -13.525333404541016, + "logps_train/ref_chosen": -13.5625, + "logps_train/ref_rejected": -30.0, + "logps_train/rejected": -30.111717224121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0037166594993323088, + "rewards_train/margins": 0.014888382283970714, + "rewards_train/rejected": -0.011171722784638405, + "step": 195 + }, + { + "epoch": 0.05, + "logps_train/chosen": -5.852009296417236, + "logps_train/ref_chosen": -5.875, + "logps_train/ref_rejected": -3.296875, + "logps_train/rejected": -3.259716749191284, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.002299070358276367, + "rewards_train/margins": -0.0014167546760290861, + "rewards_train/rejected": 0.0037158250343054533, + "step": 195 + }, + { + "epoch": 0.05, + "logps_train/chosen": -8.77648639678955, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -7.728668212890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015148639678955078, + "rewards_train/margins": 0.007718181237578392, + "rewards_train/rejected": -0.02286682091653347, + "step": 195 + }, + { + "epoch": 0.05, + "logps_train/chosen": -11.181224822998047, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -32.809593200683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03187751770019531, + "rewards_train/margins": 0.06283683888614178, + "rewards_train/rejected": -0.030959321185946465, + "step": 195 + }, + { + "epoch": 0.05, + "logps_train/chosen": -13.94340991973877, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -7.283837795257568, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.036909010261297226, + "rewards_train/margins": 0.11216779425740242, + "rewards_train/rejected": -0.0752587839961052, + "step": 195 + }, + { + "epoch": 0.05, + "learning_rate": 1.9955930312170656e-06, + "loss": 0.629, + "step": 196 + }, + { + "epoch": 0.05, + "logps_train/chosen": -12.173458099365234, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -16.332740783691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0076541900634765625, + "rewards_train/margins": 0.04092827066779137, + "rewards_train/rejected": -0.033274080604314804, + "step": 196 + }, + { + "epoch": 0.05, + "logps_train/chosen": -122.7267837524414, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -120.91558837890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.47267839312553406, + "rewards_train/margins": -0.08111953735351562, + "rewards_train/rejected": -0.39155885577201843, + "step": 196 + }, + { + "epoch": 0.05, + "logps_train/chosen": -6.145541667938232, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -6.575915336608887, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19169583916664124, + "rewards_train/margins": 0.23991237208247185, + "rewards_train/rejected": -0.04821653291583061, + "step": 196 + }, + { + "epoch": 0.05, + "logps_train/chosen": -12.899242401123047, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -30.75, + "logps_train/rejected": -31.749277114868164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03507576137781143, + "rewards_train/margins": 0.1350034773349762, + "rewards_train/rejected": -0.09992771595716476, + "step": 196 + }, + { + "epoch": 0.05, + "logps_train/chosen": -76.68770599365234, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -90.04344177246094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.018770599737763405, + "rewards_train/margins": -0.11442642100155354, + "rewards_train/rejected": 0.09565582126379013, + "step": 196 + }, + { + "epoch": 0.05, + "logps_train/chosen": -117.77458190917969, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -125.78289794921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07254181057214737, + "rewards_train/margins": 0.5008316114544868, + "rewards_train/rejected": -0.4282898008823395, + "step": 196 + }, + { + "epoch": 0.05, + "logps_train/chosen": -37.02093505859375, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -40.786136627197266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.052093505859375, + "rewards_train/margins": 0.10152016580104828, + "rewards_train/rejected": -0.15361367166042328, + "step": 196 + }, + { + "epoch": 0.05, + "logps_train/chosen": -52.12507629394531, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -93.90448760986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03749237209558487, + "rewards_train/margins": 0.12794113159179688, + "rewards_train/rejected": -0.090448759496212, + "step": 196 + }, + { + "epoch": 0.06, + "logps_train/chosen": -8.918882369995117, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -17.52588653564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02313823811709881, + "rewards_train/margins": 0.004450416192412376, + "rewards_train/rejected": -0.027588654309511185, + "step": 197 + }, + { + "epoch": 0.06, + "logps_train/chosen": -94.76692199707031, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -170.41531372070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12330780178308487, + "rewards_train/margins": 0.06483917310833931, + "rewards_train/rejected": 0.05846862867474556, + "step": 197 + }, + { + "epoch": 0.06, + "logps_train/chosen": -118.9000473022461, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -99.86274719238281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2400047332048416, + "rewards_train/margins": -0.053730010986328125, + "rewards_train/rejected": -0.1862747222185135, + "step": 197 + }, + { + "epoch": 0.06, + "logps_train/chosen": -110.48690795898438, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -84.88704681396484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0486907958984375, + "rewards_train/margins": 0.19001388549804688, + "rewards_train/rejected": -0.23870468139648438, + "step": 197 + }, + { + "epoch": 0.06, + "logps_train/chosen": -12.400303840637207, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -43.5, + "logps_train/rejected": -41.869205474853516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.016219615936279297, + "rewards_train/margins": -0.14685983955860138, + "rewards_train/rejected": 0.16307945549488068, + "step": 197 + }, + { + "epoch": 0.06, + "logps_train/chosen": -101.38249206542969, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -125.16185760498047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.06175079569220543, + "rewards_train/margins": -0.022063445299863815, + "rewards_train/rejected": 0.08381424099206924, + "step": 197 + }, + { + "epoch": 0.06, + "logps_train/chosen": -128.59071350097656, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -158.3854522705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24092864990234375, + "rewards_train/margins": 0.179473876953125, + "rewards_train/rejected": 0.06145477294921875, + "step": 197 + }, + { + "epoch": 0.06, + "logps_train/chosen": -8.734615325927734, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -8.461630821228027, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007788467686623335, + "rewards_train/margins": 0.05707655055448413, + "rewards_train/rejected": -0.049288082867860794, + "step": 197 + }, + { + "epoch": 0.06, + "learning_rate": 1.995341450048463e-06, + "loss": 0.6588, + "step": 198 + }, + { + "epoch": 0.06, + "logps_train/chosen": -3.5717194080352783, + "logps_train/ref_chosen": -3.34375, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -10.584624290466309, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02279694192111492, + "rewards_train/margins": 0.04191548563539982, + "rewards_train/rejected": -0.06471242755651474, + "step": 198 + }, + { + "epoch": 0.06, + "logps_train/chosen": -85.05850219726562, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -71.50271606445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24414978921413422, + "rewards_train/margins": 0.5444213896989822, + "rewards_train/rejected": -0.300271600484848, + "step": 198 + }, + { + "epoch": 0.06, + "logps_train/chosen": -2.0565667152404785, + "logps_train/ref_chosen": -1.7109375, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -5.5558695793151855, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03456292301416397, + "rewards_train/margins": 0.030399039387702942, + "rewards_train/rejected": -0.06496196240186691, + "step": 198 + }, + { + "epoch": 0.06, + "logps_train/chosen": -1.8440626859664917, + "logps_train/ref_chosen": -1.625, + "logps_train/ref_rejected": -0.6015625, + "logps_train/rejected": -0.5689114332199097, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02190626971423626, + "rewards_train/margins": -0.02517137653194368, + "rewards_train/rejected": 0.0032651068177074194, + "step": 198 + }, + { + "epoch": 0.06, + "logps_train/chosen": -87.34111022949219, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -111.52584838867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11588897556066513, + "rewards_train/margins": 0.7184738144278526, + "rewards_train/rejected": -0.6025848388671875, + "step": 198 + }, + { + "epoch": 0.06, + "logps_train/chosen": -8.888019561767578, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -4.5810441970825195, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.026301955804228783, + "rewards_train/margins": -0.030697536189109087, + "rewards_train/rejected": 0.004395580384880304, + "step": 198 + }, + { + "epoch": 0.06, + "logps_train/chosen": -22.218067169189453, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -2.994302988052368, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15319328010082245, + "rewards_train/margins": 0.1885610781610012, + "rewards_train/rejected": -0.03536779806017876, + "step": 198 + }, + { + "epoch": 0.06, + "logps_train/chosen": -78.7379150390625, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -115.97454833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2762084901332855, + "rewards_train/margins": 0.9736633598804474, + "rewards_train/rejected": -0.6974548697471619, + "step": 198 + }, + { + "epoch": 0.06, + "logps_train/chosen": -26.563020706176758, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -34.594627380371094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01880207099020481, + "rewards_train/margins": -0.009339332580566406, + "rewards_train/rejected": -0.009462738409638405, + "step": 199 + }, + { + "epoch": 0.06, + "logps_train/chosen": -19.2144775390625, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -31.625, + "logps_train/rejected": -32.26011657714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04105224832892418, + "rewards_train/margins": 0.10456391051411629, + "rewards_train/rejected": -0.06351166218519211, + "step": 199 + }, + { + "epoch": 0.06, + "logps_train/chosen": -96.49700927734375, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -126.04879760742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04970092698931694, + "rewards_train/margins": 0.7551788575947285, + "rewards_train/rejected": -0.8048797845840454, + "step": 199 + }, + { + "epoch": 0.06, + "logps_train/chosen": -51.933441162109375, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -99.22196960449219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04334411770105362, + "rewards_train/margins": 0.02885284274816513, + "rewards_train/rejected": -0.07219696044921875, + "step": 199 + }, + { + "epoch": 0.06, + "logps_train/chosen": -18.46881866455078, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -51.0, + "logps_train/rejected": -50.576786041259766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10311813652515411, + "rewards_train/margins": 0.060796741396188736, + "rewards_train/rejected": 0.04232139512896538, + "step": 199 + }, + { + "epoch": 0.06, + "logps_train/chosen": -88.78450012207031, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -94.87747192382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07845001667737961, + "rewards_train/margins": 0.20929718762636185, + "rewards_train/rejected": -0.28774720430374146, + "step": 199 + }, + { + "epoch": 0.06, + "logps_train/chosen": -6.246655464172363, + "logps_train/ref_chosen": -6.25, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -6.9024152755737305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00033445359440520406, + "rewards_train/margins": 0.006200981151778251, + "rewards_train/rejected": -0.005866527557373047, + "step": 199 + }, + { + "epoch": 0.06, + "logps_train/chosen": -2.0796735286712646, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -1.375, + "logps_train/rejected": -1.9185746908187866, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.054532647132873535, + "rewards_train/margins": 0.1088901162147522, + "rewards_train/rejected": -0.05435746908187866, + "step": 199 + }, + { + "epoch": 0.06, + "learning_rate": 1.9950829025450113e-06, + "loss": 0.5959, + "step": 200 + }, + { + "epoch": 0.06, + "logps_train/chosen": -67.30797576904297, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -85.63160705566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06920242309570312, + "rewards_train/margins": 0.3323631286621094, + "rewards_train/rejected": -0.26316070556640625, + "step": 200 + }, + { + "epoch": 0.06, + "logps_train/chosen": -2.0926849842071533, + "logps_train/ref_chosen": -1.90625, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -66.03506469726562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.018643498420715332, + "rewards_train/margins": -0.06513702869415283, + "rewards_train/rejected": 0.0464935302734375, + "step": 200 + }, + { + "epoch": 0.06, + "logps_train/chosen": -21.760026931762695, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -7.5625, + "logps_train/rejected": -8.127017974853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.013502693735063076, + "rewards_train/margins": 0.042949103750288486, + "rewards_train/rejected": -0.05645179748535156, + "step": 200 + }, + { + "epoch": 0.06, + "logps_train/chosen": -80.01908874511719, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -86.36636352539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0019088744884356856, + "rewards_train/margins": 0.23472748103085905, + "rewards_train/rejected": -0.23663635551929474, + "step": 200 + }, + { + "epoch": 0.06, + "logps_train/chosen": -91.74613952636719, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -91.1009750366211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02461395226418972, + "rewards_train/margins": -0.014516448602080345, + "rewards_train/rejected": -0.010097503662109375, + "step": 200 + }, + { + "epoch": 0.06, + "logps_train/chosen": -11.699344635009766, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -6.190719127655029, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09256553649902344, + "rewards_train/margins": 0.23351244628429413, + "rewards_train/rejected": -0.1409469097852707, + "step": 200 + }, + { + "epoch": 0.06, + "logps_train/chosen": -160.33419799804688, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -165.89743041992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13341979682445526, + "rewards_train/margins": 0.5563232451677322, + "rewards_train/rejected": -0.6897430419921875, + "step": 200 + }, + { + "epoch": 0.06, + "logps_train/chosen": -7.4254608154296875, + "logps_train/ref_chosen": -7.5625, + "logps_train/ref_rejected": -1.96875, + "logps_train/rejected": -2.1063575744628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013703919015824795, + "rewards_train/margins": 0.027464676648378372, + "rewards_train/rejected": -0.013760757632553577, + "step": 200 + }, + { + "epoch": 0.06, + "logps_train/chosen": -25.872333526611328, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -67.01573181152344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11223335564136505, + "rewards_train/margins": -0.010660171508789062, + "rewards_train/rejected": -0.10157318413257599, + "step": 201 + }, + { + "epoch": 0.06, + "logps_train/chosen": -117.08718872070312, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -184.44813537597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.291281133890152, + "rewards_train/margins": 0.7360946834087372, + "rewards_train/rejected": -0.4448135495185852, + "step": 201 + }, + { + "epoch": 0.06, + "logps_train/chosen": -105.83595275878906, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -74.62509155273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18359528481960297, + "rewards_train/margins": 0.07891388237476349, + "rewards_train/rejected": -0.26250916719436646, + "step": 201 + }, + { + "epoch": 0.06, + "logps_train/chosen": -7.005718231201172, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -7.8125, + "logps_train/rejected": -8.084543228149414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04744682461023331, + "rewards_train/margins": -0.02024250105023384, + "rewards_train/rejected": -0.027204323559999466, + "step": 201 + }, + { + "epoch": 0.06, + "logps_train/chosen": -111.26322174072266, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -80.8785400390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32632216811180115, + "rewards_train/margins": 0.011531829833984375, + "rewards_train/rejected": -0.3378539979457855, + "step": 201 + }, + { + "epoch": 0.06, + "logps_train/chosen": -1.690068244934082, + "logps_train/ref_chosen": -1.5546875, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -10.892528533935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.013538074679672718, + "rewards_train/margins": 0.006964779458940029, + "rewards_train/rejected": -0.020502854138612747, + "step": 201 + }, + { + "epoch": 0.06, + "logps_train/chosen": -7.017459869384766, + "logps_train/ref_chosen": -7.1875, + "logps_train/ref_rejected": -51.75, + "logps_train/rejected": -52.817508697509766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.017004013061523438, + "rewards_train/margins": 0.12375488132238388, + "rewards_train/rejected": -0.10675086826086044, + "step": 201 + }, + { + "epoch": 0.06, + "logps_train/chosen": -4.4606475830078125, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -13.72695541381836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007060241885483265, + "rewards_train/margins": 0.09850578475743532, + "rewards_train/rejected": -0.09144554287195206, + "step": 201 + }, + { + "epoch": 0.06, + "learning_rate": 1.9948173905162697e-06, + "loss": 0.6269, + "step": 202 + }, + { + "epoch": 0.06, + "logps_train/chosen": -111.29403686523438, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -109.47657775878906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.17940369248390198, + "rewards_train/margins": -0.18174591660499573, + "rewards_train/rejected": 0.00234222412109375, + "step": 202 + }, + { + "epoch": 0.06, + "logps_train/chosen": -1.9912614822387695, + "logps_train/ref_chosen": -1.984375, + "logps_train/ref_rejected": -2.46875, + "logps_train/rejected": -2.190145492553711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0006886482588015497, + "rewards_train/margins": -0.028549099748488516, + "rewards_train/rejected": 0.027860451489686966, + "step": 202 + }, + { + "epoch": 0.06, + "logps_train/chosen": -12.188742637634277, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -9.733386039733887, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.050124265253543854, + "rewards_train/margins": 0.004464339464902878, + "rewards_train/rejected": -0.05458860471844673, + "step": 202 + }, + { + "epoch": 0.06, + "logps_train/chosen": -94.40824127197266, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -102.72782897949219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.059175875037908554, + "rewards_train/margins": 0.13195877149701118, + "rewards_train/rejected": -0.07278289645910263, + "step": 202 + }, + { + "epoch": 0.06, + "logps_train/chosen": -100.25293731689453, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -85.18653869628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17470626533031464, + "rewards_train/margins": 0.3933601379394531, + "rewards_train/rejected": -0.2186538726091385, + "step": 202 + }, + { + "epoch": 0.06, + "logps_train/chosen": -32.18809509277344, + "logps_train/ref_chosen": -30.75, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -89.20150756835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.143809512257576, + "rewards_train/margins": 0.07634124159812927, + "rewards_train/rejected": -0.22015075385570526, + "step": 202 + }, + { + "epoch": 0.06, + "logps_train/chosen": -18.79733657836914, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -11.711335182189941, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04526634141802788, + "rewards_train/margins": 0.11014986410737038, + "rewards_train/rejected": -0.0648835226893425, + "step": 202 + }, + { + "epoch": 0.06, + "logps_train/chosen": -1.4272522926330566, + "logps_train/ref_chosen": -1.375, + "logps_train/ref_rejected": -1.90625, + "logps_train/rejected": -1.900046706199646, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0052252295427024364, + "rewards_train/margins": -0.005845558946020901, + "rewards_train/rejected": 0.0006203294033184648, + "step": 202 + }, + { + "epoch": 0.06, + "logps_train/chosen": -141.18751525878906, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -106.5600357055664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08124847710132599, + "rewards_train/margins": 0.3872520476579666, + "rewards_train/rejected": -0.3060035705566406, + "step": 203 + }, + { + "epoch": 0.06, + "logps_train/chosen": -71.52748107910156, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -108.95348358154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04725189134478569, + "rewards_train/margins": 0.39260024949908257, + "rewards_train/rejected": -0.3453483581542969, + "step": 203 + }, + { + "epoch": 0.06, + "logps_train/chosen": -118.04548645019531, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -137.8057861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15454864501953125, + "rewards_train/margins": 0.22602996230125427, + "rewards_train/rejected": -0.3805786073207855, + "step": 203 + }, + { + "epoch": 0.06, + "logps_train/chosen": -8.060503005981445, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -1.9765625, + "logps_train/rejected": -1.8780999183654785, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.006449699401855469, + "rewards_train/margins": -0.0033965585753321648, + "rewards_train/rejected": 0.009846257977187634, + "step": 203 + }, + { + "epoch": 0.06, + "logps_train/chosen": -96.77882385253906, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -106.32275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07211761921644211, + "rewards_train/margins": 0.05439301021397114, + "rewards_train/rejected": 0.01772460900247097, + "step": 203 + }, + { + "epoch": 0.06, + "logps_train/chosen": -4.316220283508301, + "logps_train/ref_chosen": -2.765625, + "logps_train/ref_rejected": -50.25, + "logps_train/rejected": -50.171051025390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.15505953133106232, + "rewards_train/margins": -0.16295442916452885, + "rewards_train/rejected": 0.00789489783346653, + "step": 203 + }, + { + "epoch": 0.06, + "logps_train/chosen": -7.41486120223999, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -0.80859375, + "logps_train/rejected": -0.9041933417320251, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.013361120596528053, + "rewards_train/margins": -0.0038011614233255386, + "rewards_train/rejected": -0.009559959173202515, + "step": 203 + }, + { + "epoch": 0.06, + "logps_train/chosen": -75.04148864746094, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -109.85968017578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.39585113525390625, + "rewards_train/margins": 0.8818191587924957, + "rewards_train/rejected": -0.4859680235385895, + "step": 203 + }, + { + "epoch": 0.06, + "learning_rate": 1.99454491582054e-06, + "loss": 0.6328, + "step": 204 + }, + { + "epoch": 0.06, + "logps_train/chosen": -77.2115249633789, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -104.3404541015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.021152496337890625, + "rewards_train/margins": -0.037107085809111595, + "rewards_train/rejected": 0.01595458947122097, + "step": 204 + }, + { + "epoch": 0.06, + "logps_train/chosen": -75.25437927246094, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -80.74188232421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02456207387149334, + "rewards_train/margins": 0.04875030741095543, + "rewards_train/rejected": -0.02418823353946209, + "step": 204 + }, + { + "epoch": 0.06, + "logps_train/chosen": -101.87879943847656, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -120.58464050292969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2378799468278885, + "rewards_train/margins": 0.32058410346508026, + "rewards_train/rejected": -0.5584640502929688, + "step": 204 + }, + { + "epoch": 0.06, + "logps_train/chosen": -98.14694213867188, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -107.68556213378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06469421833753586, + "rewards_train/margins": 0.0038619935512542725, + "rewards_train/rejected": -0.06855621188879013, + "step": 204 + }, + { + "epoch": 0.06, + "logps_train/chosen": -2.583545446395874, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -3.140625, + "logps_train/rejected": -3.1851861476898193, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004145455546677113, + "rewards_train/margins": 0.008601570501923561, + "rewards_train/rejected": -0.0044561149552464485, + "step": 204 + }, + { + "epoch": 0.06, + "logps_train/chosen": -3.67105770111084, + "logps_train/ref_chosen": -3.5625, + "logps_train/ref_rejected": -3.953125, + "logps_train/rejected": -4.314464092254639, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01085577066987753, + "rewards_train/margins": 0.025278140790760517, + "rewards_train/rejected": -0.036133911460638046, + "step": 204 + }, + { + "epoch": 0.06, + "logps_train/chosen": -3.8387632369995117, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -7.424936771392822, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.022373676300048828, + "rewards_train/margins": 0.09611735492944717, + "rewards_train/rejected": -0.07374367862939835, + "step": 204 + }, + { + "epoch": 0.06, + "logps_train/chosen": -109.93565368652344, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -108.26374816894531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09356536716222763, + "rewards_train/margins": 0.18280944973230362, + "rewards_train/rejected": -0.27637481689453125, + "step": 204 + }, + { + "epoch": 0.06, + "logps_train/chosen": -95.22431945800781, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -113.17091369628906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12243194878101349, + "rewards_train/margins": -0.05534058064222336, + "rewards_train/rejected": -0.06709136813879013, + "step": 205 + }, + { + "epoch": 0.06, + "logps_train/chosen": -60.959312438964844, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -134.7982940673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.30406877398490906, + "rewards_train/margins": 0.9838981926441193, + "rewards_train/rejected": -0.6798294186592102, + "step": 205 + }, + { + "epoch": 0.06, + "logps_train/chosen": -50.04655075073242, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -120.96215057373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04534492641687393, + "rewards_train/margins": 0.04155998374335468, + "rewards_train/rejected": 0.0037849426735192537, + "step": 205 + }, + { + "epoch": 0.06, + "logps_train/chosen": -44.0938606262207, + "logps_train/ref_chosen": -44.75, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -37.10865020751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06561394035816193, + "rewards_train/margins": 0.15147896111011505, + "rewards_train/rejected": -0.08586502075195312, + "step": 205 + }, + { + "epoch": 0.06, + "logps_train/chosen": -114.34298706054688, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -181.22198486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06570129841566086, + "rewards_train/margins": 0.9878997728228569, + "rewards_train/rejected": -0.922198474407196, + "step": 205 + }, + { + "epoch": 0.06, + "logps_train/chosen": -27.080814361572266, + "logps_train/ref_chosen": -26.875, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -18.589706420898438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.020581437274813652, + "rewards_train/margins": -0.06161079742014408, + "rewards_train/rejected": 0.04102936014533043, + "step": 205 + }, + { + "epoch": 0.06, + "logps_train/chosen": -62.40867614746094, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -26.180530548095703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21586762368679047, + "rewards_train/margins": -0.1478145644068718, + "rewards_train/rejected": -0.06805305927991867, + "step": 205 + }, + { + "epoch": 0.06, + "logps_train/chosen": -87.52043151855469, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -78.66974639892578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.04795685037970543, + "rewards_train/margins": -0.03506850823760033, + "rewards_train/rejected": 0.08302535861730576, + "step": 205 + }, + { + "epoch": 0.06, + "learning_rate": 1.994265480364857e-06, + "loss": 0.6308, + "step": 206 + }, + { + "epoch": 0.06, + "logps_train/chosen": -128.23867797851562, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -112.60653686523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22386780381202698, + "rewards_train/margins": 0.28678587079048157, + "rewards_train/rejected": -0.5106536746025085, + "step": 206 + }, + { + "epoch": 0.06, + "logps_train/chosen": -1.6918877363204956, + "logps_train/ref_chosen": -1.640625, + "logps_train/ref_rejected": -2.65625, + "logps_train/rejected": -3.170846462249756, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.005126273725181818, + "rewards_train/margins": 0.04633337398990989, + "rewards_train/rejected": -0.051459647715091705, + "step": 206 + }, + { + "epoch": 0.06, + "logps_train/chosen": -3.4377689361572266, + "logps_train/ref_chosen": -3.828125, + "logps_train/ref_rejected": -3.1875, + "logps_train/rejected": -3.4818334579467773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0390356071293354, + "rewards_train/margins": 0.06846895255148411, + "rewards_train/rejected": -0.029433345422148705, + "step": 206 + }, + { + "epoch": 0.06, + "logps_train/chosen": -50.49231719970703, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -57.25, + "logps_train/rejected": -56.87333297729492, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0007682800642214715, + "rewards_train/margins": -0.03689842444146052, + "rewards_train/rejected": 0.03766670450568199, + "step": 206 + }, + { + "epoch": 0.06, + "logps_train/chosen": -2.0747814178466797, + "logps_train/ref_chosen": -1.9296875, + "logps_train/ref_rejected": -1.0859375, + "logps_train/rejected": -1.1220629215240479, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.014509391970932484, + "rewards_train/margins": -0.010896849678829312, + "rewards_train/rejected": -0.0036125422921031713, + "step": 206 + }, + { + "epoch": 0.06, + "logps_train/chosen": -93.78004455566406, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -120.37934875488281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5280044674873352, + "rewards_train/margins": 0.15993040800094604, + "rewards_train/rejected": -0.6879348754882812, + "step": 206 + }, + { + "epoch": 0.06, + "logps_train/chosen": -151.35848999023438, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -144.96946716308594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6358489990234375, + "rewards_train/margins": -0.2389022707939148, + "rewards_train/rejected": -0.3969467282295227, + "step": 206 + }, + { + "epoch": 0.06, + "logps_train/chosen": -8.360210418701172, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -8.301651000976562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.05147895961999893, + "rewards_train/margins": -0.005855940282344818, + "rewards_train/rejected": 0.05733489990234375, + "step": 206 + }, + { + "epoch": 0.06, + "logps_train/chosen": -78.11405181884766, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -264.0, + "logps_train/rejected": -266.0540771484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2114051878452301, + "rewards_train/margins": -0.005997464060783386, + "rewards_train/rejected": -0.20540772378444672, + "step": 207 + }, + { + "epoch": 0.06, + "logps_train/chosen": -98.11225891113281, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -103.0228500366211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11122589558362961, + "rewards_train/margins": 0.7410591319203377, + "rewards_train/rejected": -0.8522850275039673, + "step": 207 + }, + { + "epoch": 0.06, + "logps_train/chosen": -64.08699035644531, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -87.03797149658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1413009613752365, + "rewards_train/margins": 0.24509811401367188, + "rewards_train/rejected": -0.10379715263843536, + "step": 207 + }, + { + "epoch": 0.06, + "logps_train/chosen": -68.94158935546875, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -76.20399475097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05584106594324112, + "rewards_train/margins": 0.1262405440211296, + "rewards_train/rejected": -0.07039947807788849, + "step": 207 + }, + { + "epoch": 0.06, + "logps_train/chosen": -185.93563842773438, + "logps_train/ref_chosen": -182.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -185.57571411132812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.393563836812973, + "rewards_train/margins": -0.03599241375923157, + "rewards_train/rejected": -0.35757142305374146, + "step": 207 + }, + { + "epoch": 0.06, + "logps_train/chosen": -116.087890625, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -127.80286407470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14121094346046448, + "rewards_train/margins": 0.22149734944105148, + "rewards_train/rejected": -0.080286405980587, + "step": 207 + }, + { + "epoch": 0.06, + "logps_train/chosen": -78.56495666503906, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -78.32827758789062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.19350433349609375, + "rewards_train/margins": -0.023667916655540466, + "rewards_train/rejected": 0.21717225015163422, + "step": 207 + }, + { + "epoch": 0.06, + "logps_train/chosen": -165.153564453125, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -122.67379760742188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.41535645723342896, + "rewards_train/margins": -0.14797669649124146, + "rewards_train/rejected": -0.2673797607421875, + "step": 207 + }, + { + "epoch": 0.06, + "learning_rate": 1.993979086104973e-06, + "loss": 0.6564, + "step": 208 + }, + { + "epoch": 0.06, + "logps_train/chosen": -1.315913200378418, + "logps_train/ref_chosen": -1.234375, + "logps_train/ref_rejected": -1.9921875, + "logps_train/rejected": -2.196444034576416, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008153820410370827, + "rewards_train/margins": 0.012271832674741745, + "rewards_train/rejected": -0.02042565308511257, + "step": 208 + }, + { + "epoch": 0.06, + "logps_train/chosen": -205.4471435546875, + "logps_train/ref_chosen": -204.0, + "logps_train/ref_rejected": -189.0, + "logps_train/rejected": -189.53225708007812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.14471435546875, + "rewards_train/margins": -0.09148864820599556, + "rewards_train/rejected": -0.05322570726275444, + "step": 208 + }, + { + "epoch": 0.06, + "logps_train/chosen": -213.75119018554688, + "logps_train/ref_chosen": -207.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -123.30259704589844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6751190423965454, + "rewards_train/margins": -0.1948593258857727, + "rewards_train/rejected": -0.4802597165107727, + "step": 208 + }, + { + "epoch": 0.06, + "logps_train/chosen": -63.60350799560547, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -93.9172592163086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.28964921832084656, + "rewards_train/margins": 0.331375140696764, + "rewards_train/rejected": -0.041725922375917435, + "step": 208 + }, + { + "epoch": 0.06, + "logps_train/chosen": -10.654787063598633, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -13.760644912719727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015478706918656826, + "rewards_train/margins": 0.0105857839807868, + "rewards_train/rejected": -0.026064490899443626, + "step": 208 + }, + { + "epoch": 0.06, + "logps_train/chosen": -13.024621963500977, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -9.45893669128418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.027462197467684746, + "rewards_train/margins": 0.005931472405791283, + "rewards_train/rejected": -0.03339366987347603, + "step": 208 + }, + { + "epoch": 0.06, + "logps_train/chosen": -11.595985412597656, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -10.889419555664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.002901458879932761, + "rewards_train/margins": 0.02934341481886804, + "rewards_train/rejected": -0.02644195593893528, + "step": 208 + }, + { + "epoch": 0.06, + "logps_train/chosen": -2.5938849449157715, + "logps_train/ref_chosen": -2.484375, + "logps_train/ref_rejected": -3.265625, + "logps_train/rejected": -3.814175605773926, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010950994677841663, + "rewards_train/margins": 0.043904065154492855, + "rewards_train/rejected": -0.05485505983233452, + "step": 208 + }, + { + "epoch": 0.06, + "logps_train/chosen": -37.80304718017578, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -13.6875, + "logps_train/rejected": -14.66574764251709, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.119695283472538, + "rewards_train/margins": 0.21752005070447922, + "rewards_train/rejected": -0.09782476723194122, + "step": 209 + }, + { + "epoch": 0.06, + "logps_train/chosen": -81.71730041503906, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -99.56352233886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07173004001379013, + "rewards_train/margins": 0.2846221998333931, + "rewards_train/rejected": -0.3563522398471832, + "step": 209 + }, + { + "epoch": 0.06, + "logps_train/chosen": -188.76315307617188, + "logps_train/ref_chosen": -184.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -130.1867218017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47631531953811646, + "rewards_train/margins": 0.24235689640045166, + "rewards_train/rejected": -0.7186722159385681, + "step": 209 + }, + { + "epoch": 0.06, + "logps_train/chosen": -87.9315185546875, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -127.37433624267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006848144810646772, + "rewards_train/margins": 0.6942818048410118, + "rewards_train/rejected": -0.687433660030365, + "step": 209 + }, + { + "epoch": 0.06, + "logps_train/chosen": -103.0951919555664, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -105.57926940917969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14048080146312714, + "rewards_train/margins": 0.19840774312615395, + "rewards_train/rejected": -0.05792694166302681, + "step": 209 + }, + { + "epoch": 0.06, + "logps_train/chosen": -103.58694458007812, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -108.79188537597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008694457821547985, + "rewards_train/margins": 0.22049408871680498, + "rewards_train/rejected": -0.22918854653835297, + "step": 209 + }, + { + "epoch": 0.06, + "logps_train/chosen": -8.525547981262207, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -21.48407745361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009945201687514782, + "rewards_train/margins": 0.08335294853895903, + "rewards_train/rejected": -0.07340774685144424, + "step": 209 + }, + { + "epoch": 0.06, + "logps_train/chosen": -128.10440063476562, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -128.45021057128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0604400634765625, + "rewards_train/margins": 0.3845809996128082, + "rewards_train/rejected": -0.4450210630893707, + "step": 209 + }, + { + "epoch": 0.06, + "learning_rate": 1.9936857350453426e-06, + "loss": 0.6243, + "step": 210 + }, + { + "epoch": 0.06, + "logps_train/chosen": -42.78136444091797, + "logps_train/ref_chosen": -42.5, + "logps_train/ref_rejected": -215.0, + "logps_train/rejected": -201.0240020751953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.028136445209383965, + "rewards_train/margins": -1.4257362615317106, + "rewards_train/rejected": 1.3975998163223267, + "step": 210 + }, + { + "epoch": 0.06, + "logps_train/chosen": -167.13450622558594, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -157.757568359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.213450625538826, + "rewards_train/margins": 0.6623062342405319, + "rewards_train/rejected": -0.8757568597793579, + "step": 210 + }, + { + "epoch": 0.06, + "logps_train/chosen": -85.01069641113281, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -85.48503112792969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19893036782741547, + "rewards_train/margins": 0.047433480620384216, + "rewards_train/rejected": 0.15149688720703125, + "step": 210 + }, + { + "epoch": 0.06, + "logps_train/chosen": -4.275291442871094, + "logps_train/ref_chosen": -4.40625, + "logps_train/ref_rejected": -3.421875, + "logps_train/rejected": -3.3949947357177734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013095855712890625, + "rewards_train/margins": 0.010407829191535711, + "rewards_train/rejected": 0.0026880265213549137, + "step": 210 + }, + { + "epoch": 0.06, + "logps_train/chosen": -123.66218566894531, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -136.58615112304688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.166218563914299, + "rewards_train/margins": -0.007603451609611511, + "rewards_train/rejected": -0.1586151123046875, + "step": 210 + }, + { + "epoch": 0.06, + "logps_train/chosen": -0.6728160977363586, + "logps_train/ref_chosen": -0.65234375, + "logps_train/ref_rejected": -0.65234375, + "logps_train/rejected": -0.6430935859680176, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0020472349133342505, + "rewards_train/margins": -0.002972251328174025, + "rewards_train/rejected": 0.0009250164148397744, + "step": 210 + }, + { + "epoch": 0.06, + "logps_train/chosen": -110.11930847167969, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -31.375, + "logps_train/rejected": -32.81303787231445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01193084754049778, + "rewards_train/margins": 0.13187294267117977, + "rewards_train/rejected": -0.14380379021167755, + "step": 210 + }, + { + "epoch": 0.06, + "logps_train/chosen": -7.467573165893555, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -10.877338409423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03449268266558647, + "rewards_train/margins": 0.0347265236050589, + "rewards_train/rejected": -0.00023384093947242945, + "step": 210 + }, + { + "epoch": 0.06, + "logps_train/chosen": -0.9679437875747681, + "logps_train/ref_chosen": -0.98046875, + "logps_train/ref_rejected": -2.03125, + "logps_train/rejected": -1.8863496780395508, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0012524962658062577, + "rewards_train/margins": -0.013237536302767694, + "rewards_train/rejected": 0.014490032568573952, + "step": 211 + }, + { + "epoch": 0.06, + "logps_train/chosen": -21.34925651550293, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -35.5155143737793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09742565453052521, + "rewards_train/margins": 0.004125781357288361, + "rewards_train/rejected": -0.10155143588781357, + "step": 211 + }, + { + "epoch": 0.06, + "logps_train/chosen": -131.6832275390625, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -90.7615966796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13167725503444672, + "rewards_train/margins": 0.05783692002296448, + "rewards_train/rejected": 0.07384033501148224, + "step": 211 + }, + { + "epoch": 0.06, + "logps_train/chosen": -16.85977554321289, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -4.58443021774292, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.026522446423768997, + "rewards_train/margins": 0.06934046745300293, + "rewards_train/rejected": -0.04281802102923393, + "step": 211 + }, + { + "epoch": 0.06, + "logps_train/chosen": -21.258255004882812, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -13.228686332702637, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0008255005232058465, + "rewards_train/margins": 0.028293132374528795, + "rewards_train/rejected": -0.029118632897734642, + "step": 211 + }, + { + "epoch": 0.06, + "logps_train/chosen": -62.11730194091797, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -16.367443084716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.038269806653261185, + "rewards_train/margins": 0.23751411214470863, + "rewards_train/rejected": -0.19924430549144745, + "step": 211 + }, + { + "epoch": 0.06, + "logps_train/chosen": -5.403512001037598, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -3.876809597015381, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01277379970997572, + "rewards_train/margins": -0.015170241706073284, + "rewards_train/rejected": 0.027944041416049004, + "step": 211 + }, + { + "epoch": 0.06, + "logps_train/chosen": -193.9029541015625, + "logps_train/ref_chosen": -195.0, + "logps_train/ref_rejected": -241.0, + "logps_train/rejected": -242.0195770263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10970459133386612, + "rewards_train/margins": 0.21166229248046875, + "rewards_train/rejected": -0.10195770114660263, + "step": 211 + }, + { + "epoch": 0.06, + "learning_rate": 1.9933854292391134e-06, + "loss": 0.7094, + "step": 212 + }, + { + "epoch": 0.06, + "logps_train/chosen": -34.374755859375, + "logps_train/ref_chosen": -34.5, + "logps_train/ref_rejected": -23.5, + "logps_train/rejected": -24.116193771362305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012524413876235485, + "rewards_train/margins": 0.07414379250258207, + "rewards_train/rejected": -0.06161937862634659, + "step": 212 + }, + { + "epoch": 0.06, + "logps_train/chosen": -5.662627220153809, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -11.214742660522461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01938772201538086, + "rewards_train/margins": 0.05208654701709747, + "rewards_train/rejected": -0.07147426903247833, + "step": 212 + }, + { + "epoch": 0.06, + "logps_train/chosen": -107.91659545898438, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -19.708946228027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05834045633673668, + "rewards_train/margins": 0.20423508808016777, + "rewards_train/rejected": -0.1458946317434311, + "step": 212 + }, + { + "epoch": 0.06, + "logps_train/chosen": -23.18791961669922, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -19.311182022094727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0062080384232103825, + "rewards_train/margins": 0.0373262413777411, + "rewards_train/rejected": -0.031118202954530716, + "step": 212 + }, + { + "epoch": 0.06, + "logps_train/chosen": -111.96300506591797, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -2.53125, + "logps_train/rejected": -5.036975860595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5536994934082031, + "rewards_train/margins": 0.8042720854282379, + "rewards_train/rejected": -0.2505725920200348, + "step": 212 + }, + { + "epoch": 0.06, + "logps_train/chosen": -10.143250465393066, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -9.087087631225586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0393250472843647, + "rewards_train/margins": 0.0006337165832519531, + "rewards_train/rejected": -0.03995876386761665, + "step": 212 + }, + { + "epoch": 0.06, + "logps_train/chosen": -5.1925764083862305, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -13.480724334716797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.011992359533905983, + "rewards_train/margins": -0.06493520550429821, + "rewards_train/rejected": 0.0769275650382042, + "step": 212 + }, + { + "epoch": 0.06, + "logps_train/chosen": -82.64006042480469, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -100.22909545898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06400604546070099, + "rewards_train/margins": 0.35890351235866547, + "rewards_train/rejected": -0.42290955781936646, + "step": 212 + }, + { + "epoch": 0.06, + "logps_train/chosen": -145.42544555664062, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -150.52822875976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5425445437431335, + "rewards_train/margins": 0.21027833223342896, + "rewards_train/rejected": -0.7528228759765625, + "step": 213 + }, + { + "epoch": 0.06, + "logps_train/chosen": -99.18069458007812, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -140.7965545654297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08193054050207138, + "rewards_train/margins": 0.6615860089659691, + "rewards_train/rejected": -0.5796554684638977, + "step": 213 + }, + { + "epoch": 0.06, + "logps_train/chosen": -108.99130249023438, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -144.2857666015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0008697509765625, + "rewards_train/margins": 0.329446405172348, + "rewards_train/rejected": -0.3285766541957855, + "step": 213 + }, + { + "epoch": 0.06, + "logps_train/chosen": -3.4297709465026855, + "logps_train/ref_chosen": -3.84375, + "logps_train/ref_rejected": -0.92578125, + "logps_train/rejected": -0.9822525382041931, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.041397906839847565, + "rewards_train/margins": 0.047045035753399134, + "rewards_train/rejected": -0.005647128913551569, + "step": 213 + }, + { + "epoch": 0.06, + "logps_train/chosen": -14.354360580444336, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -10.320493698120117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.054186057299375534, + "rewards_train/margins": -0.034636687487363815, + "rewards_train/rejected": -0.01954936981201172, + "step": 213 + }, + { + "epoch": 0.06, + "logps_train/chosen": -1.698411226272583, + "logps_train/ref_chosen": -1.625, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -12.022383689880371, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007341122720390558, + "rewards_train/margins": 0.03239724552258849, + "rewards_train/rejected": -0.03973836824297905, + "step": 213 + }, + { + "epoch": 0.06, + "logps_train/chosen": -71.20451354980469, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -76.12316131591797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07045135647058487, + "rewards_train/margins": -0.058135224506258965, + "rewards_train/rejected": -0.012316131964325905, + "step": 213 + }, + { + "epoch": 0.06, + "logps_train/chosen": -80.78689575195312, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -110.00524139404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1713104248046875, + "rewards_train/margins": 0.8218345642089844, + "rewards_train/rejected": -0.6505241394042969, + "step": 213 + }, + { + "epoch": 0.06, + "learning_rate": 1.9930781707881077e-06, + "loss": 0.6012, + "step": 214 + }, + { + "epoch": 0.06, + "logps_train/chosen": -22.627952575683594, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -35.147666931152344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11279525607824326, + "rewards_train/margins": -0.04802855849266052, + "rewards_train/rejected": -0.06476669758558273, + "step": 214 + }, + { + "epoch": 0.06, + "logps_train/chosen": -68.92022705078125, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -57.75, + "logps_train/rejected": -59.51502227783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.207977294921875, + "rewards_train/margins": 0.3844795227050781, + "rewards_train/rejected": -0.17650222778320312, + "step": 214 + }, + { + "epoch": 0.06, + "logps_train/chosen": -43.48344039916992, + "logps_train/ref_chosen": -42.5, + "logps_train/ref_rejected": -29.125, + "logps_train/rejected": -28.93569564819336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09834404289722443, + "rewards_train/margins": -0.11727447807788849, + "rewards_train/rejected": 0.018930435180664062, + "step": 214 + }, + { + "epoch": 0.06, + "logps_train/chosen": -38.72262954711914, + "logps_train/ref_chosen": -38.25, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -27.66790199279785, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04726295545697212, + "rewards_train/margins": 0.16952724382281303, + "rewards_train/rejected": -0.21679019927978516, + "step": 214 + }, + { + "epoch": 0.06, + "logps_train/chosen": -17.8125, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -15.808292388916016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04374999925494194, + "rewards_train/margins": -0.05667076073586941, + "rewards_train/rejected": 0.012920761480927467, + "step": 214 + }, + { + "epoch": 0.06, + "logps_train/chosen": -7.485745429992676, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -81.1414566040039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12982454895973206, + "rewards_train/margins": -0.2156788930296898, + "rewards_train/rejected": 0.08585434406995773, + "step": 214 + }, + { + "epoch": 0.06, + "logps_train/chosen": -63.42860412597656, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -17.50503158569336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04286041483283043, + "rewards_train/margins": 0.05764274671673775, + "rewards_train/rejected": -0.10050316154956818, + "step": 214 + }, + { + "epoch": 0.06, + "logps_train/chosen": -28.297590255737305, + "logps_train/ref_chosen": -27.625, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -58.2857551574707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06725902855396271, + "rewards_train/margins": 0.2363164871931076, + "rewards_train/rejected": -0.3035755157470703, + "step": 214 + }, + { + "epoch": 0.06, + "logps_train/chosen": -19.401752471923828, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -41.5, + "logps_train/rejected": -40.93450164794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1098247542977333, + "rewards_train/margins": 0.05327491834759712, + "rewards_train/rejected": 0.056549835950136185, + "step": 215 + }, + { + "epoch": 0.06, + "logps_train/chosen": -12.297934532165527, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -21.089069366455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.020206546410918236, + "rewards_train/margins": 0.05411348305642605, + "rewards_train/rejected": -0.03390693664550781, + "step": 215 + }, + { + "epoch": 0.06, + "logps_train/chosen": -7.3871684074401855, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -6.3727569580078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.057466842234134674, + "rewards_train/margins": 0.026683852076530457, + "rewards_train/rejected": -0.08415069431066513, + "step": 215 + }, + { + "epoch": 0.06, + "logps_train/chosen": -3.6759514808654785, + "logps_train/ref_chosen": -3.515625, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -4.485177993774414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01603264920413494, + "rewards_train/margins": -0.005014849826693535, + "rewards_train/rejected": -0.011017799377441406, + "step": 215 + }, + { + "epoch": 0.06, + "logps_train/chosen": -3.6209053993225098, + "logps_train/ref_chosen": -3.65625, + "logps_train/ref_rejected": -2.890625, + "logps_train/rejected": -3.0122814178466797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0035344602074474096, + "rewards_train/margins": 0.015700102550908923, + "rewards_train/rejected": -0.012165642343461514, + "step": 215 + }, + { + "epoch": 0.06, + "logps_train/chosen": -17.46645164489746, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -12.114834785461426, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03414516523480415, + "rewards_train/margins": 0.07108831778168678, + "rewards_train/rejected": -0.10523348301649094, + "step": 215 + }, + { + "epoch": 0.06, + "logps_train/chosen": -120.80748748779297, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -164.33078002929688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.030748749151825905, + "rewards_train/margins": -0.09767075069248676, + "rewards_train/rejected": 0.06692200154066086, + "step": 215 + }, + { + "epoch": 0.06, + "logps_train/chosen": -83.76335144042969, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -105.88352966308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12366485595703125, + "rewards_train/margins": 0.36201782524585724, + "rewards_train/rejected": -0.238352969288826, + "step": 215 + }, + { + "epoch": 0.06, + "learning_rate": 1.992763961842808e-06, + "loss": 0.6695, + "step": 216 + }, + { + "epoch": 0.06, + "logps_train/chosen": -79.69441986083984, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -16.28622817993164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3194420039653778, + "rewards_train/margins": -0.2283191829919815, + "rewards_train/rejected": -0.0911228209733963, + "step": 216 + }, + { + "epoch": 0.06, + "logps_train/chosen": -128.93641662597656, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -115.05934143066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006358337588608265, + "rewards_train/margins": 0.012292480561882257, + "rewards_train/rejected": -0.0059341429732739925, + "step": 216 + }, + { + "epoch": 0.06, + "logps_train/chosen": -3.808271646499634, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -7.8125, + "logps_train/rejected": -8.67282485961914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02229783497750759, + "rewards_train/margins": 0.10833032242953777, + "rewards_train/rejected": -0.08603248745203018, + "step": 216 + }, + { + "epoch": 0.06, + "logps_train/chosen": -5.406296253204346, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -5.81659460067749, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01562962494790554, + "rewards_train/margins": 0.03477983735501766, + "rewards_train/rejected": -0.0504094623029232, + "step": 216 + }, + { + "epoch": 0.06, + "logps_train/chosen": -125.16075134277344, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -179.37582397460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08392486721277237, + "rewards_train/margins": 0.4215072765946388, + "rewards_train/rejected": -0.33758240938186646, + "step": 216 + }, + { + "epoch": 0.06, + "logps_train/chosen": -137.98475646972656, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -80.4786376953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.39847564697265625, + "rewards_train/margins": -0.05061188340187073, + "rewards_train/rejected": -0.3478637635707855, + "step": 216 + }, + { + "epoch": 0.06, + "logps_train/chosen": -25.908004760742188, + "logps_train/ref_chosen": -26.125, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -18.8544864654541, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.02169952355325222, + "rewards_train/margins": -0.042851829901337624, + "rewards_train/rejected": 0.06455135345458984, + "step": 216 + }, + { + "epoch": 0.06, + "logps_train/chosen": -55.44429016113281, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -74.96859741210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15557098388671875, + "rewards_train/margins": 0.5024307370185852, + "rewards_train/rejected": -0.34685975313186646, + "step": 216 + }, + { + "epoch": 0.06, + "logps_train/chosen": -11.417878150939941, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -4.309731960296631, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03553781658411026, + "rewards_train/margins": -0.013939620926976204, + "rewards_train/rejected": -0.021598195657134056, + "step": 217 + }, + { + "epoch": 0.06, + "logps_train/chosen": -125.59915924072266, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -143.34503173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1900840848684311, + "rewards_train/margins": 0.5245872586965561, + "rewards_train/rejected": -0.334503173828125, + "step": 217 + }, + { + "epoch": 0.06, + "logps_train/chosen": -90.42190551757812, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -129.33810424804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10780944675207138, + "rewards_train/margins": 0.5916198715567589, + "rewards_train/rejected": -0.4838104248046875, + "step": 217 + }, + { + "epoch": 0.06, + "logps_train/chosen": -160.29904174804688, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -7.439401626586914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2299041748046875, + "rewards_train/margins": -0.14846400916576385, + "rewards_train/rejected": -0.08144016563892365, + "step": 217 + }, + { + "epoch": 0.06, + "logps_train/chosen": -6.286559104919434, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -12.445002555847168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.002594089601188898, + "rewards_train/margins": 0.040844345930963755, + "rewards_train/rejected": -0.03825025632977486, + "step": 217 + }, + { + "epoch": 0.06, + "logps_train/chosen": -151.10311889648438, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -171.255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010311889462172985, + "rewards_train/margins": 0.515274059958756, + "rewards_train/rejected": -0.525585949420929, + "step": 217 + }, + { + "epoch": 0.06, + "logps_train/chosen": -95.69364929199219, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -23.575254440307617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03063507191836834, + "rewards_train/margins": 0.13816051743924618, + "rewards_train/rejected": -0.10752544552087784, + "step": 217 + }, + { + "epoch": 0.06, + "logps_train/chosen": -11.459222793579102, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -6.7726006507873535, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.033422280102968216, + "rewards_train/margins": 0.034462783485651016, + "rewards_train/rejected": -0.06788506358861923, + "step": 217 + }, + { + "epoch": 0.06, + "learning_rate": 1.9924428046023446e-06, + "loss": 0.6297, + "step": 218 + }, + { + "epoch": 0.06, + "logps_train/chosen": -0.2345334142446518, + "logps_train/ref_chosen": -0.279296875, + "logps_train/ref_rejected": -0.279296875, + "logps_train/rejected": -0.22844548523426056, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0044763460755348206, + "rewards_train/margins": -0.000608792994171381, + "rewards_train/rejected": 0.0050851390697062016, + "step": 218 + }, + { + "epoch": 0.06, + "logps_train/chosen": -7.785242080688477, + "logps_train/ref_chosen": -7.4375, + "logps_train/ref_rejected": -7.34375, + "logps_train/rejected": -9.475021362304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.034774210304021835, + "rewards_train/margins": 0.17835292592644691, + "rewards_train/rejected": -0.21312713623046875, + "step": 218 + }, + { + "epoch": 0.06, + "logps_train/chosen": -127.99591827392578, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -177.22854614257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2004081755876541, + "rewards_train/margins": 0.4232627898454666, + "rewards_train/rejected": -0.2228546142578125, + "step": 218 + }, + { + "epoch": 0.06, + "logps_train/chosen": -136.44158935546875, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -136.700927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15584106743335724, + "rewards_train/margins": 0.025933831930160522, + "rewards_train/rejected": 0.12990723550319672, + "step": 218 + }, + { + "epoch": 0.06, + "logps_train/chosen": -61.81480026245117, + "logps_train/ref_chosen": -60.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -153.8356475830078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1814800351858139, + "rewards_train/margins": 1.3020846992731094, + "rewards_train/rejected": -1.4835647344589233, + "step": 218 + }, + { + "epoch": 0.06, + "logps_train/chosen": -3.609808921813965, + "logps_train/ref_chosen": -3.3125, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -11.762067794799805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.029730891808867455, + "rewards_train/margins": 0.021475886926054955, + "rewards_train/rejected": -0.05120677873492241, + "step": 218 + }, + { + "epoch": 0.06, + "logps_train/chosen": -11.44039249420166, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -13.91374397277832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005960750859230757, + "rewards_train/margins": 0.11608515260741115, + "rewards_train/rejected": -0.11012440174818039, + "step": 218 + }, + { + "epoch": 0.06, + "logps_train/chosen": -2.6636974811553955, + "logps_train/ref_chosen": -2.5625, + "logps_train/ref_rejected": -2.75, + "logps_train/rejected": -2.9087278842926025, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010119748301804066, + "rewards_train/margins": 0.005753041245043278, + "rewards_train/rejected": -0.015872789546847343, + "step": 218 + }, + { + "epoch": 0.06, + "logps_train/chosen": -115.12061309814453, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -122.57420349121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26206132769584656, + "rewards_train/margins": 0.39535900950431824, + "rewards_train/rejected": -0.6574203372001648, + "step": 219 + }, + { + "epoch": 0.06, + "logps_train/chosen": -11.205678939819336, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -21.861318588256836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05443210527300835, + "rewards_train/margins": 0.2655639611184597, + "rewards_train/rejected": -0.21113185584545135, + "step": 219 + }, + { + "epoch": 0.06, + "logps_train/chosen": -76.19241333007812, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -153.47518920898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16924133896827698, + "rewards_train/margins": 0.4782775938510895, + "rewards_train/rejected": -0.6475189328193665, + "step": 219 + }, + { + "epoch": 0.06, + "logps_train/chosen": -101.32414245605469, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -139.33450317382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08241424709558487, + "rewards_train/margins": 0.6510361060500145, + "rewards_train/rejected": -0.7334503531455994, + "step": 219 + }, + { + "epoch": 0.06, + "logps_train/chosen": -91.08992004394531, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -89.432861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14100800454616547, + "rewards_train/margins": 0.5842941552400589, + "rewards_train/rejected": -0.44328615069389343, + "step": 219 + }, + { + "epoch": 0.06, + "logps_train/chosen": -110.90107727050781, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -110.56698608398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29010772705078125, + "rewards_train/margins": 0.016590893268585205, + "rewards_train/rejected": -0.30669862031936646, + "step": 219 + }, + { + "epoch": 0.06, + "logps_train/chosen": -51.07066345214844, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -51.19368362426758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04293365404009819, + "rewards_train/margins": 0.012302016839385033, + "rewards_train/rejected": 0.030631637200713158, + "step": 219 + }, + { + "epoch": 0.06, + "logps_train/chosen": -147.8445587158203, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -103.45420837402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18445587158203125, + "rewards_train/margins": 0.1609649658203125, + "rewards_train/rejected": -0.34542083740234375, + "step": 219 + }, + { + "epoch": 0.06, + "learning_rate": 1.9921147013144777e-06, + "loss": 0.572, + "step": 220 + }, + { + "epoch": 0.06, + "logps_train/chosen": -84.956787109375, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -97.1129150390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3043212890625, + "rewards_train/margins": 0.7656128108501434, + "rewards_train/rejected": -0.46129152178764343, + "step": 220 + }, + { + "epoch": 0.06, + "logps_train/chosen": -79.82583618164062, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -92.90424346923828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.16741637885570526, + "rewards_train/margins": -0.14215926826000214, + "rewards_train/rejected": 0.3095756471157074, + "step": 220 + }, + { + "epoch": 0.06, + "logps_train/chosen": -112.811767578125, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -205.03103637695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.38117676973342896, + "rewards_train/margins": 1.0219269394874573, + "rewards_train/rejected": -1.4031037092208862, + "step": 220 + }, + { + "epoch": 0.06, + "logps_train/chosen": -53.83796691894531, + "logps_train/ref_chosen": -54.0, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -64.48383331298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01620330847799778, + "rewards_train/margins": 0.014586639706976712, + "rewards_train/rejected": 0.001616668771021068, + "step": 220 + }, + { + "epoch": 0.06, + "logps_train/chosen": -124.29878234863281, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -107.05436706542969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.229878231883049, + "rewards_train/margins": 0.025558486580848694, + "rewards_train/rejected": -0.2554367184638977, + "step": 220 + }, + { + "epoch": 0.06, + "logps_train/chosen": -15.265754699707031, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -33.72761535644531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01092453021556139, + "rewards_train/margins": 0.18368607480078936, + "rewards_train/rejected": -0.17276154458522797, + "step": 220 + }, + { + "epoch": 0.06, + "logps_train/chosen": -117.98446655273438, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -145.54629516601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.351553350687027, + "rewards_train/margins": 0.9061828553676605, + "rewards_train/rejected": -0.5546295046806335, + "step": 220 + }, + { + "epoch": 0.06, + "logps_train/chosen": -41.93058776855469, + "logps_train/ref_chosen": -41.25, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -21.608726501464844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06805878132581711, + "rewards_train/margins": -0.03218613192439079, + "rewards_train/rejected": -0.035872649401426315, + "step": 220 + }, + { + "epoch": 0.06, + "logps_train/chosen": -23.96622085571289, + "logps_train/ref_chosen": -24.375, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -5.1700520515441895, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04087791591882706, + "rewards_train/margins": 0.08288312330842018, + "rewards_train/rejected": -0.042005207389593124, + "step": 221 + }, + { + "epoch": 0.06, + "logps_train/chosen": -32.45948028564453, + "logps_train/ref_chosen": -34.25, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -6.070237159729004, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1790519803762436, + "rewards_train/margins": 0.3079507052898407, + "rewards_train/rejected": -0.1288987249135971, + "step": 221 + }, + { + "epoch": 0.06, + "logps_train/chosen": -4.99144172668457, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -6.59375, + "logps_train/rejected": -7.603610038757324, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02101917378604412, + "rewards_train/margins": 0.0799668300896883, + "rewards_train/rejected": -0.10098600387573242, + "step": 221 + }, + { + "epoch": 0.06, + "logps_train/chosen": -0.961946964263916, + "logps_train/ref_chosen": -0.271484375, + "logps_train/ref_rejected": -5.28125, + "logps_train/rejected": -7.5416975021362305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0690462589263916, + "rewards_train/margins": 0.15699850022792816, + "rewards_train/rejected": -0.22604475915431976, + "step": 221 + }, + { + "epoch": 0.06, + "logps_train/chosen": -9.821889877319336, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -28.125, + "logps_train/rejected": -27.785890579223633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.057188987731933594, + "rewards_train/margins": -0.09109992906451225, + "rewards_train/rejected": 0.03391094133257866, + "step": 221 + }, + { + "epoch": 0.06, + "logps_train/chosen": -175.67979431152344, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -160.4432373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03202056884765625, + "rewards_train/margins": 0.9763442873954773, + "rewards_train/rejected": -0.944323718547821, + "step": 221 + }, + { + "epoch": 0.06, + "logps_train/chosen": -65.32115173339844, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -71.97325897216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01788482628762722, + "rewards_train/margins": 0.21521072648465633, + "rewards_train/rejected": -0.1973259001970291, + "step": 221 + }, + { + "epoch": 0.06, + "logps_train/chosen": -7.169236660003662, + "logps_train/ref_chosen": -7.1875, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -5.789816379547119, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0018263340461999178, + "rewards_train/margins": 0.024557971628382802, + "rewards_train/rejected": -0.022731637582182884, + "step": 221 + }, + { + "epoch": 0.06, + "learning_rate": 1.991779654275582e-06, + "loss": 0.5801, + "step": 222 + }, + { + "epoch": 0.06, + "logps_train/chosen": -126.91763305664062, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -142.1561279296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.441763311624527, + "rewards_train/margins": 0.373849481344223, + "rewards_train/rejected": -0.81561279296875, + "step": 222 + }, + { + "epoch": 0.06, + "logps_train/chosen": -18.18670082092285, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -6.28125, + "logps_train/rejected": -6.693093299865723, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.043829917907714844, + "rewards_train/margins": 0.08501425012946129, + "rewards_train/rejected": -0.041184332221746445, + "step": 222 + }, + { + "epoch": 0.06, + "logps_train/chosen": -4.842601776123047, + "logps_train/ref_chosen": -5.03125, + "logps_train/ref_rejected": -6.65625, + "logps_train/rejected": -7.82789945602417, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018864823505282402, + "rewards_train/margins": 0.1360297705978155, + "rewards_train/rejected": -0.11716494709253311, + "step": 222 + }, + { + "epoch": 0.06, + "logps_train/chosen": -164.0131378173828, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -178.90817260742188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6013137698173523, + "rewards_train/margins": -0.01049649715423584, + "rewards_train/rejected": -0.5908172726631165, + "step": 222 + }, + { + "epoch": 0.06, + "logps_train/chosen": -11.581984519958496, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -4.863551616668701, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01680154912173748, + "rewards_train/margins": 0.02190671069547534, + "rewards_train/rejected": -0.00510516157373786, + "step": 222 + }, + { + "epoch": 0.06, + "logps_train/chosen": -124.09368133544922, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -174.0315399169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7593681216239929, + "rewards_train/margins": 0.14378589391708374, + "rewards_train/rejected": -0.9031540155410767, + "step": 222 + }, + { + "epoch": 0.06, + "logps_train/chosen": -46.292110443115234, + "logps_train/ref_chosen": -46.75, + "logps_train/ref_rejected": -45.75, + "logps_train/rejected": -44.867279052734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0457889549434185, + "rewards_train/margins": -0.042483139783144, + "rewards_train/rejected": 0.0882720947265625, + "step": 222 + }, + { + "epoch": 0.06, + "logps_train/chosen": -120.34661102294922, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -109.89396667480469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5346611142158508, + "rewards_train/margins": 0.4047355651855469, + "rewards_train/rejected": -0.9393966794013977, + "step": 222 + }, + { + "epoch": 0.06, + "logps_train/chosen": -4.172060966491699, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -5.28125, + "logps_train/rejected": -4.988965034484863, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.007793903350830078, + "rewards_train/margins": -0.021434593945741653, + "rewards_train/rejected": 0.02922849729657173, + "step": 223 + }, + { + "epoch": 0.06, + "logps_train/chosen": -11.899274826049805, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -1.8125, + "logps_train/rejected": -5.517366409301758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08992748707532883, + "rewards_train/margins": 0.28055915981531143, + "rewards_train/rejected": -0.37048664689064026, + "step": 223 + }, + { + "epoch": 0.06, + "logps_train/chosen": -6.767416477203369, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -7.689852237701416, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.017008353024721146, + "rewards_train/margins": 0.03286857716739178, + "rewards_train/rejected": -0.01586022414267063, + "step": 223 + }, + { + "epoch": 0.06, + "logps_train/chosen": -172.81605529785156, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -212.0, + "logps_train/rejected": -215.636474609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1816055327653885, + "rewards_train/margins": 0.1820419281721115, + "rewards_train/rejected": -0.3636474609375, + "step": 223 + }, + { + "epoch": 0.06, + "logps_train/chosen": -49.58925247192383, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -17.23027229309082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11607475578784943, + "rewards_train/margins": 0.21410198509693146, + "rewards_train/rejected": -0.09802722930908203, + "step": 223 + }, + { + "epoch": 0.06, + "logps_train/chosen": -100.36875915527344, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -125.18434143066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5868759155273438, + "rewards_train/margins": 0.4815582036972046, + "rewards_train/rejected": -1.0684341192245483, + "step": 223 + }, + { + "epoch": 0.06, + "logps_train/chosen": -5.049243450164795, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -6.693899154663086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.004450655076652765, + "rewards_train/margins": -0.010534429457038641, + "rewards_train/rejected": 0.014985084533691406, + "step": 223 + }, + { + "epoch": 0.06, + "logps_train/chosen": -60.666893005371094, + "logps_train/ref_chosen": -61.25, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -108.83301544189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.058310698717832565, + "rewards_train/margins": 0.1416122429072857, + "rewards_train/rejected": -0.08330154418945312, + "step": 223 + }, + { + "epoch": 0.06, + "learning_rate": 1.9914376658306317e-06, + "loss": 0.6236, + "step": 224 + }, + { + "epoch": 0.06, + "logps_train/chosen": -95.28691864013672, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -107.58695983886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.028691863641142845, + "rewards_train/margins": 0.4300041142851114, + "rewards_train/rejected": -0.4586959779262543, + "step": 224 + }, + { + "epoch": 0.06, + "logps_train/chosen": -93.02337646484375, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -93.36964416503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0023376464378088713, + "rewards_train/margins": 0.03462677006609738, + "rewards_train/rejected": -0.03696441650390625, + "step": 224 + }, + { + "epoch": 0.06, + "logps_train/chosen": -20.31987190246582, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -43.0, + "logps_train/rejected": -43.47785568237305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01801281049847603, + "rewards_train/margins": 0.06579837948083878, + "rewards_train/rejected": -0.04778556898236275, + "step": 224 + }, + { + "epoch": 0.06, + "logps_train/chosen": -82.68074035644531, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -99.43380737304688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.08192596584558487, + "rewards_train/margins": -0.02469329535961151, + "rewards_train/rejected": 0.10661926120519638, + "step": 224 + }, + { + "epoch": 0.06, + "logps_train/chosen": -48.25706481933594, + "logps_train/ref_chosen": -49.75, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -80.91993713378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14929352700710297, + "rewards_train/margins": 0.04128723591566086, + "rewards_train/rejected": 0.10800629109144211, + "step": 224 + }, + { + "epoch": 0.06, + "logps_train/chosen": -92.42448425292969, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -113.74592590332031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4924484193325043, + "rewards_train/margins": -0.5678558275103569, + "rewards_train/rejected": 0.07540740817785263, + "step": 224 + }, + { + "epoch": 0.06, + "logps_train/chosen": -78.44159698486328, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -126.42578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005840301513671875, + "rewards_train/margins": 1.0984184741973877, + "rewards_train/rejected": -1.0925781726837158, + "step": 224 + }, + { + "epoch": 0.06, + "logps_train/chosen": -3.9342055320739746, + "logps_train/ref_chosen": -3.703125, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -21.836517333984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02310805395245552, + "rewards_train/margins": 0.06054368242621422, + "rewards_train/rejected": -0.08365173637866974, + "step": 224 + }, + { + "epoch": 0.06, + "logps_train/chosen": -192.8300323486328, + "logps_train/ref_chosen": -187.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -187.25537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5830032229423523, + "rewards_train/margins": 0.24253392219543457, + "rewards_train/rejected": -0.8255371451377869, + "step": 225 + }, + { + "epoch": 0.06, + "logps_train/chosen": -81.14998626708984, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -104.3709716796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36499863862991333, + "rewards_train/margins": 0.6720985770225525, + "rewards_train/rejected": -1.0370972156524658, + "step": 225 + }, + { + "epoch": 0.06, + "logps_train/chosen": -5.052215576171875, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -1.1875, + "logps_train/rejected": -1.459559679031372, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013528442941606045, + "rewards_train/margins": 0.04073441121727228, + "rewards_train/rejected": -0.027205968275666237, + "step": 225 + }, + { + "epoch": 0.06, + "logps_train/chosen": -5.249112129211426, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -4.89700984954834, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015536213293671608, + "rewards_train/margins": 0.008539771661162376, + "rewards_train/rejected": -0.024075984954833984, + "step": 225 + }, + { + "epoch": 0.06, + "logps_train/chosen": -67.40216827392578, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -120.85236358642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14021682739257812, + "rewards_train/margins": 0.9450196027755737, + "rewards_train/rejected": -1.0852364301681519, + "step": 225 + }, + { + "epoch": 0.06, + "logps_train/chosen": -6.9525933265686035, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -8.477104187011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0077593326568603516, + "rewards_train/margins": 0.027451086789369583, + "rewards_train/rejected": -0.035210419446229935, + "step": 225 + }, + { + "epoch": 0.06, + "logps_train/chosen": -2.314645290374756, + "logps_train/ref_chosen": -2.15625, + "logps_train/ref_rejected": -7.46875, + "logps_train/rejected": -8.427796363830566, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015839530155062675, + "rewards_train/margins": 0.08006511069834232, + "rewards_train/rejected": -0.095904640853405, + "step": 225 + }, + { + "epoch": 0.06, + "logps_train/chosen": -79.93700408935547, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -100.90766906738281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.25629958510398865, + "rewards_train/margins": 0.24706649128347635, + "rewards_train/rejected": 0.009233093820512295, + "step": 225 + }, + { + "epoch": 0.06, + "learning_rate": 1.9910887383731836e-06, + "loss": 0.6109, + "step": 226 + }, + { + "epoch": 0.06, + "logps_train/chosen": -5.474682807922363, + "logps_train/ref_chosen": -5.65625, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -23.032909393310547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01815672032535076, + "rewards_train/margins": -0.05355233885347843, + "rewards_train/rejected": 0.0717090591788292, + "step": 226 + }, + { + "epoch": 0.06, + "logps_train/chosen": -109.66304779052734, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -93.01513671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6663047671318054, + "rewards_train/margins": -0.01479107141494751, + "rewards_train/rejected": -0.6515136957168579, + "step": 226 + }, + { + "epoch": 0.06, + "logps_train/chosen": -1.281991720199585, + "logps_train/ref_chosen": -1.4375, + "logps_train/ref_rejected": -2.671875, + "logps_train/rejected": -2.922363758087158, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.015550828538835049, + "rewards_train/margins": 0.0405997047200799, + "rewards_train/rejected": -0.02504887618124485, + "step": 226 + }, + { + "epoch": 0.06, + "logps_train/chosen": -2.2954907417297363, + "logps_train/ref_chosen": -0.36328125, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -6.541177272796631, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19322095811367035, + "rewards_train/margins": 0.026521772146224976, + "rewards_train/rejected": -0.21974273025989532, + "step": 226 + }, + { + "epoch": 0.06, + "logps_train/chosen": -65.17916107177734, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -50.04009246826172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3179161250591278, + "rewards_train/margins": -0.2889068778604269, + "rewards_train/rejected": -0.029009247198700905, + "step": 226 + }, + { + "epoch": 0.06, + "logps_train/chosen": -18.137611389160156, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -26.93294906616211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.036238860338926315, + "rewards_train/margins": 0.19203376397490501, + "rewards_train/rejected": -0.1557949036359787, + "step": 226 + }, + { + "epoch": 0.06, + "logps_train/chosen": -69.16930389404297, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -107.56804656982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1830696165561676, + "rewards_train/margins": 1.1398743093013763, + "rewards_train/rejected": -0.9568046927452087, + "step": 226 + }, + { + "epoch": 0.06, + "logps_train/chosen": -1.5735398530960083, + "logps_train/ref_chosen": -1.734375, + "logps_train/ref_rejected": -0.61328125, + "logps_train/rejected": -0.5712550282478333, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01608351431787014, + "rewards_train/margins": 0.011880891863256693, + "rewards_train/rejected": 0.004202622454613447, + "step": 226 + }, + { + "epoch": 0.06, + "logps_train/chosen": -210.02450561523438, + "logps_train/ref_chosen": -197.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -152.39976501464844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3024505376815796, + "rewards_train/margins": -0.26247406005859375, + "rewards_train/rejected": -1.0399764776229858, + "step": 227 + }, + { + "epoch": 0.06, + "logps_train/chosen": -11.134748458862305, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -29.5, + "logps_train/rejected": -28.494239807128906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01972484588623047, + "rewards_train/margins": -0.12030086666345596, + "rewards_train/rejected": 0.1005760207772255, + "step": 227 + }, + { + "epoch": 0.06, + "logps_train/chosen": -0.6747133731842041, + "logps_train/ref_chosen": -0.6171875, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -7.159915924072266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.005752587225288153, + "rewards_train/margins": 0.0008640051819384098, + "rewards_train/rejected": -0.0066165924072265625, + "step": 227 + }, + { + "epoch": 0.06, + "logps_train/chosen": -17.834415435791016, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -28.5, + "logps_train/rejected": -28.501895904541016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.020941544324159622, + "rewards_train/margins": -0.020751953867147677, + "rewards_train/rejected": -0.00018959045701194555, + "step": 227 + }, + { + "epoch": 0.06, + "logps_train/chosen": -94.09666442871094, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -94.66436767578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2903335690498352, + "rewards_train/margins": 0.7067703306674957, + "rewards_train/rejected": -0.4164367616176605, + "step": 227 + }, + { + "epoch": 0.06, + "logps_train/chosen": -145.8778839111328, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -170.09634399414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11221160739660263, + "rewards_train/margins": 0.7218460068106651, + "rewards_train/rejected": -0.6096343994140625, + "step": 227 + }, + { + "epoch": 0.06, + "logps_train/chosen": -117.88270568847656, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -133.10032653808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5382705926895142, + "rewards_train/margins": 0.4717620611190796, + "rewards_train/rejected": -1.0100326538085938, + "step": 227 + }, + { + "epoch": 0.06, + "logps_train/chosen": -116.19764709472656, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -127.75334167480469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01976471021771431, + "rewards_train/margins": -0.3444305546581745, + "rewards_train/rejected": 0.3246658444404602, + "step": 227 + }, + { + "epoch": 0.06, + "learning_rate": 1.990732874345359e-06, + "loss": 0.6458, + "step": 228 + }, + { + "epoch": 0.06, + "logps_train/chosen": -65.16453552246094, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -114.47259521484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11645355075597763, + "rewards_train/margins": 0.7808059826493263, + "rewards_train/rejected": -0.897259533405304, + "step": 228 + }, + { + "epoch": 0.06, + "logps_train/chosen": -1.7764209508895874, + "logps_train/ref_chosen": -1.609375, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -5.793430328369141, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01670459471642971, + "rewards_train/margins": -0.04361156187951565, + "rewards_train/rejected": 0.026906967163085938, + "step": 228 + }, + { + "epoch": 0.06, + "logps_train/chosen": -91.44624328613281, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -156.18450927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05537567287683487, + "rewards_train/margins": 0.4738265946507454, + "rewards_train/rejected": -0.4184509217739105, + "step": 228 + }, + { + "epoch": 0.06, + "logps_train/chosen": -117.56563568115234, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -139.09689331054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0065635680221021175, + "rewards_train/margins": 0.20312576601281762, + "rewards_train/rejected": -0.20968933403491974, + "step": 228 + }, + { + "epoch": 0.06, + "logps_train/chosen": -9.32722282409668, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -1.3828125, + "logps_train/rejected": -4.708657264709473, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07977771759033203, + "rewards_train/margins": 0.4123621881008148, + "rewards_train/rejected": -0.3325844705104828, + "step": 228 + }, + { + "epoch": 0.06, + "logps_train/chosen": -103.89094543457031, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -120.82473754882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21090546250343323, + "rewards_train/margins": 1.0433792173862457, + "rewards_train/rejected": -0.8324737548828125, + "step": 228 + }, + { + "epoch": 0.06, + "logps_train/chosen": -89.19352722167969, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -116.35346221923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2306472808122635, + "rewards_train/margins": 0.3159935027360916, + "rewards_train/rejected": -0.08534622192382812, + "step": 228 + }, + { + "epoch": 0.06, + "logps_train/chosen": -23.55805015563965, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -20.375, + "logps_train/rejected": -20.003860473632812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1183050200343132, + "rewards_train/margins": -0.15541897341609, + "rewards_train/rejected": 0.03711395338177681, + "step": 228 + }, + { + "epoch": 0.06, + "logps_train/chosen": -81.72944641113281, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -53.0, + "logps_train/rejected": -53.45005798339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02705535851418972, + "rewards_train/margins": 0.07206115685403347, + "rewards_train/rejected": -0.04500579833984375, + "step": 229 + }, + { + "epoch": 0.06, + "logps_train/chosen": -10.589803695678711, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -38.737361907958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07773037254810333, + "rewards_train/margins": 0.021005816757678986, + "rewards_train/rejected": -0.09873618930578232, + "step": 229 + }, + { + "epoch": 0.06, + "logps_train/chosen": -103.02155303955078, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -78.68655395507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.29784470796585083, + "rewards_train/margins": 0.2665001042187214, + "rewards_train/rejected": 0.03134460374712944, + "step": 229 + }, + { + "epoch": 0.06, + "logps_train/chosen": -95.04671478271484, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -91.28815460205078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3546714782714844, + "rewards_train/margins": -0.07585600018501282, + "rewards_train/rejected": -0.27881547808647156, + "step": 229 + }, + { + "epoch": 0.06, + "logps_train/chosen": -23.203182220458984, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -11.981354713439941, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04218177869915962, + "rewards_train/margins": 0.12781725451350212, + "rewards_train/rejected": -0.0856354758143425, + "step": 229 + }, + { + "epoch": 0.06, + "logps_train/chosen": -3.8462350368499756, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -5.230706691741943, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04087350517511368, + "rewards_train/margins": 0.016572166234254837, + "rewards_train/rejected": -0.057445671409368515, + "step": 229 + }, + { + "epoch": 0.06, + "logps_train/chosen": -159.7092742919922, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -166.570068359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3709274232387543, + "rewards_train/margins": -0.613920584321022, + "rewards_train/rejected": 0.24299316108226776, + "step": 229 + }, + { + "epoch": 0.06, + "logps_train/chosen": -99.94949340820312, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -95.46122741699219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15505066514015198, + "rewards_train/margins": 0.7011733949184418, + "rewards_train/rejected": -0.5461227297782898, + "step": 229 + }, + { + "epoch": 0.06, + "learning_rate": 1.99037007623783e-06, + "loss": 0.6071, + "step": 230 + }, + { + "epoch": 0.06, + "logps_train/chosen": -64.83628845214844, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -130.1903839111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.166371151804924, + "rewards_train/margins": 0.7354095429182053, + "rewards_train/rejected": -0.5690383911132812, + "step": 230 + }, + { + "epoch": 0.06, + "logps_train/chosen": -5.2063493728637695, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -5.128539085388184, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0050099375657737255, + "rewards_train/margins": 0.004718970973044634, + "rewards_train/rejected": -0.00972890853881836, + "step": 230 + }, + { + "epoch": 0.06, + "logps_train/chosen": -89.37403869628906, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -105.37593078613281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06259613484144211, + "rewards_train/margins": 0.6001892015337944, + "rewards_train/rejected": -0.5375930666923523, + "step": 230 + }, + { + "epoch": 0.06, + "logps_train/chosen": -8.070656776428223, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -7.25, + "logps_train/rejected": -7.69398307800293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0008156776311807334, + "rewards_train/margins": 0.043582630169112235, + "rewards_train/rejected": -0.04439830780029297, + "step": 230 + }, + { + "epoch": 0.06, + "logps_train/chosen": -0.6691675186157227, + "logps_train/ref_chosen": -0.6796875, + "logps_train/ref_rejected": -0.6796875, + "logps_train/rejected": -0.7297990322113037, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00105199811514467, + "rewards_train/margins": 0.006063151522539556, + "rewards_train/rejected": -0.005011153407394886, + "step": 230 + }, + { + "epoch": 0.06, + "logps_train/chosen": -168.81735229492188, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -138.08685302734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11826477199792862, + "rewards_train/margins": 0.4269500747323036, + "rewards_train/rejected": -0.308685302734375, + "step": 230 + }, + { + "epoch": 0.06, + "logps_train/chosen": -19.773439407348633, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -3.796875, + "logps_train/rejected": -4.337430477142334, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.010156059637665749, + "rewards_train/margins": 0.06421160884201527, + "rewards_train/rejected": -0.05405554920434952, + "step": 230 + }, + { + "epoch": 0.06, + "logps_train/chosen": -113.71519470214844, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -128.52374267578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02151947095990181, + "rewards_train/margins": -0.06914520263671875, + "rewards_train/rejected": 0.04762573167681694, + "step": 230 + }, + { + "epoch": 0.06, + "logps_train/chosen": -4.268853187561035, + "logps_train/ref_chosen": -4.65625, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -9.399178504943848, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.038739681243896484, + "rewards_train/margins": 0.09115753322839737, + "rewards_train/rejected": -0.052417851984500885, + "step": 231 + }, + { + "epoch": 0.06, + "logps_train/chosen": -5.467856407165527, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -16.489967346191406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 8.93592878128402e-05, + "rewards_train/margins": -0.01341390627931105, + "rewards_train/rejected": 0.01350326556712389, + "step": 231 + }, + { + "epoch": 0.06, + "logps_train/chosen": -15.063528060913086, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -36.5, + "logps_train/rejected": -37.243221282958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.018852805718779564, + "rewards_train/margins": 0.055469321087002754, + "rewards_train/rejected": -0.07432212680578232, + "step": 231 + }, + { + "epoch": 0.06, + "logps_train/chosen": -0.30013906955718994, + "logps_train/ref_chosen": -0.30859375, + "logps_train/ref_rejected": -3.484375, + "logps_train/rejected": -3.1981916427612305, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0008454680792056024, + "rewards_train/margins": -0.02777286764467135, + "rewards_train/rejected": 0.028618335723876953, + "step": 231 + }, + { + "epoch": 0.06, + "logps_train/chosen": -113.69426727294922, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -163.52249145507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.030573273077607155, + "rewards_train/margins": 0.38282241858541965, + "rewards_train/rejected": -0.3522491455078125, + "step": 231 + }, + { + "epoch": 0.06, + "logps_train/chosen": -15.348182678222656, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -13.169661521911621, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00893173273652792, + "rewards_train/margins": 0.007147884927690029, + "rewards_train/rejected": 0.0017838478088378906, + "step": 231 + }, + { + "epoch": 0.06, + "logps_train/chosen": -62.33338165283203, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -14.168052673339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2666618525981903, + "rewards_train/margins": 0.4772171229124069, + "rewards_train/rejected": -0.2105552703142166, + "step": 231 + }, + { + "epoch": 0.06, + "logps_train/chosen": -105.4774398803711, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -111.03759765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09774398803710938, + "rewards_train/margins": 0.8560158014297485, + "rewards_train/rejected": -0.9537597894668579, + "step": 231 + }, + { + "epoch": 0.06, + "learning_rate": 1.9900003465897977e-06, + "loss": 0.5959, + "step": 232 + }, + { + "epoch": 0.06, + "logps_train/chosen": -25.94780921936035, + "logps_train/ref_chosen": -26.125, + "logps_train/ref_rejected": -29.75, + "logps_train/rejected": -31.615638732910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.017719078809022903, + "rewards_train/margins": 0.204282958060503, + "rewards_train/rejected": -0.1865638792514801, + "step": 232 + }, + { + "epoch": 0.06, + "logps_train/chosen": -77.49131774902344, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -95.7835693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04913177713751793, + "rewards_train/margins": 0.47922519221901894, + "rewards_train/rejected": -0.5283569693565369, + "step": 232 + }, + { + "epoch": 0.06, + "logps_train/chosen": -74.8439712524414, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -127.87036895751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4656028747558594, + "rewards_train/margins": 1.4026398062705994, + "rewards_train/rejected": -0.93703693151474, + "step": 232 + }, + { + "epoch": 0.06, + "logps_train/chosen": -170.51004028320312, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -140.98544311523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25100404024124146, + "rewards_train/margins": 0.24754026532173157, + "rewards_train/rejected": -0.498544305562973, + "step": 232 + }, + { + "epoch": 0.06, + "logps_train/chosen": -98.60625457763672, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -100.8531494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.060625459998846054, + "rewards_train/margins": 0.9246895052492619, + "rewards_train/rejected": -0.9853149652481079, + "step": 232 + }, + { + "epoch": 0.06, + "logps_train/chosen": -112.56047058105469, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -116.8448486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10604705661535263, + "rewards_train/margins": 0.028437815606594086, + "rewards_train/rejected": -0.13448487222194672, + "step": 232 + }, + { + "epoch": 0.06, + "logps_train/chosen": -4.187601566314697, + "logps_train/ref_chosen": -4.40625, + "logps_train/ref_rejected": -22.375, + "logps_train/rejected": -22.09453010559082, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.021864844486117363, + "rewards_train/margins": -0.006182145327329636, + "rewards_train/rejected": 0.028046989813447, + "step": 232 + }, + { + "epoch": 0.06, + "logps_train/chosen": -4.602804660797119, + "logps_train/ref_chosen": -3.859375, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -8.391631126403809, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07434296607971191, + "rewards_train/margins": -0.07267985341604799, + "rewards_train/rejected": -0.0016631126636639237, + "step": 232 + }, + { + "epoch": 0.07, + "logps_train/chosen": -70.88542938232422, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -73.17072296142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2885429561138153, + "rewards_train/margins": 0.028529345989227295, + "rewards_train/rejected": -0.3170723021030426, + "step": 233 + }, + { + "epoch": 0.07, + "logps_train/chosen": -107.53369140625, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -91.70244598388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05336914211511612, + "rewards_train/margins": 1.1668755039572716, + "rewards_train/rejected": -1.2202446460723877, + "step": 233 + }, + { + "epoch": 0.07, + "logps_train/chosen": -97.75885772705078, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -151.91659545898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07588577270507812, + "rewards_train/margins": 0.6157737970352173, + "rewards_train/rejected": -0.6916595697402954, + "step": 233 + }, + { + "epoch": 0.07, + "logps_train/chosen": -8.716401100158691, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -7.59375, + "logps_train/rejected": -8.57673168182373, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0841401144862175, + "rewards_train/margins": 0.014158055186271667, + "rewards_train/rejected": -0.09829816967248917, + "step": 233 + }, + { + "epoch": 0.07, + "logps_train/chosen": -8.847907066345215, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -14.353968620300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07854070514440536, + "rewards_train/margins": 0.08810616284608841, + "rewards_train/rejected": -0.16664686799049377, + "step": 233 + }, + { + "epoch": 0.07, + "logps_train/chosen": -102.378662109375, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -164.03579711914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4378662109375, + "rewards_train/margins": 0.6657135486602783, + "rewards_train/rejected": -1.1035797595977783, + "step": 233 + }, + { + "epoch": 0.07, + "logps_train/chosen": -2.5842416286468506, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -7.718934059143066, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02751333825290203, + "rewards_train/margins": 0.1306567471474409, + "rewards_train/rejected": -0.10314340889453888, + "step": 233 + }, + { + "epoch": 0.07, + "logps_train/chosen": -103.33485412597656, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -124.36349487304688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5834854245185852, + "rewards_train/margins": -0.3471359312534332, + "rewards_train/rejected": -0.23634949326515198, + "step": 233 + }, + { + "epoch": 0.07, + "learning_rate": 1.9896236879889788e-06, + "loss": 0.56, + "step": 234 + }, + { + "epoch": 0.07, + "logps_train/chosen": -108.76472473144531, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -156.1359405517578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22647248208522797, + "rewards_train/margins": 1.087121620774269, + "rewards_train/rejected": -1.313594102859497, + "step": 234 + }, + { + "epoch": 0.07, + "logps_train/chosen": -5.005396366119385, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -4.665433406829834, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018210364505648613, + "rewards_train/margins": 0.03162870556116104, + "rewards_train/rejected": -0.013418341055512428, + "step": 234 + }, + { + "epoch": 0.07, + "logps_train/chosen": -36.971771240234375, + "logps_train/ref_chosen": -35.0, + "logps_train/ref_rejected": -3.15625, + "logps_train/rejected": -3.700422763824463, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.19717712700366974, + "rewards_train/margins": -0.14275984838604927, + "rewards_train/rejected": -0.05441727861762047, + "step": 234 + }, + { + "epoch": 0.07, + "logps_train/chosen": -104.68367004394531, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -119.013916015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08163299411535263, + "rewards_train/margins": 0.33302461355924606, + "rewards_train/rejected": -0.25139161944389343, + "step": 234 + }, + { + "epoch": 0.07, + "logps_train/chosen": -102.94698333740234, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -117.30111694335938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1446983367204666, + "rewards_train/margins": -0.46458666026592255, + "rewards_train/rejected": 0.31988832354545593, + "step": 234 + }, + { + "epoch": 0.07, + "logps_train/chosen": -51.46302032470703, + "logps_train/ref_chosen": -52.75, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -19.767980575561523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1286979764699936, + "rewards_train/margins": 0.2554960399866104, + "rewards_train/rejected": -0.12679806351661682, + "step": 234 + }, + { + "epoch": 0.07, + "logps_train/chosen": -19.958053588867188, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -12.875, + "logps_train/rejected": -12.709598541259766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06669463962316513, + "rewards_train/margins": 0.05015449412167072, + "rewards_train/rejected": 0.016540145501494408, + "step": 234 + }, + { + "epoch": 0.07, + "logps_train/chosen": -0.4295918643474579, + "logps_train/ref_chosen": -0.48046875, + "logps_train/ref_rejected": -0.48046875, + "logps_train/rejected": -0.4438726603984833, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005087688565254211, + "rewards_train/margins": 0.0014280795585364103, + "rewards_train/rejected": 0.003659609006717801, + "step": 234 + }, + { + "epoch": 0.07, + "logps_train/chosen": -37.37590789794922, + "logps_train/ref_chosen": -37.5, + "logps_train/ref_rejected": -12.4375, + "logps_train/rejected": -13.912524223327637, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012409210205078125, + "rewards_train/margins": 0.1599116325378418, + "rewards_train/rejected": -0.14750242233276367, + "step": 235 + }, + { + "epoch": 0.07, + "logps_train/chosen": -3.8097574710845947, + "logps_train/ref_chosen": -3.359375, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -4.506139755249023, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04503824934363365, + "rewards_train/margins": -0.016299273818731308, + "rewards_train/rejected": -0.028738975524902344, + "step": 235 + }, + { + "epoch": 0.07, + "logps_train/chosen": -108.99971008300781, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -125.20846557617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04997101053595543, + "rewards_train/margins": 0.17087555304169655, + "rewards_train/rejected": -0.22084656357765198, + "step": 235 + }, + { + "epoch": 0.07, + "logps_train/chosen": -114.95166015625, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -127.25048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.204833984375, + "rewards_train/margins": 1.229882836341858, + "rewards_train/rejected": -1.025048851966858, + "step": 235 + }, + { + "epoch": 0.07, + "logps_train/chosen": -76.35987854003906, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -115.57178497314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.164012148976326, + "rewards_train/margins": 0.17119064647704363, + "rewards_train/rejected": -0.00717849750071764, + "step": 235 + }, + { + "epoch": 0.07, + "logps_train/chosen": -1.0318922996520996, + "logps_train/ref_chosen": -0.1611328125, + "logps_train/ref_rejected": -0.1611328125, + "logps_train/rejected": -1.196000337600708, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08707594871520996, + "rewards_train/margins": 0.01641080528497696, + "rewards_train/rejected": -0.10348675400018692, + "step": 235 + }, + { + "epoch": 0.07, + "logps_train/chosen": -10.366623878479004, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -20.375, + "logps_train/rejected": -20.036911010742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03833761438727379, + "rewards_train/margins": 0.004528716206550598, + "rewards_train/rejected": 0.03380889818072319, + "step": 235 + }, + { + "epoch": 0.07, + "logps_train/chosen": -119.20310974121094, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -151.58682250976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07031097263097763, + "rewards_train/margins": 0.3883712962269783, + "rewards_train/rejected": -0.45868226885795593, + "step": 235 + }, + { + "epoch": 0.07, + "learning_rate": 1.989240103071583e-06, + "loss": 0.6158, + "step": 236 + }, + { + "epoch": 0.07, + "logps_train/chosen": -12.164952278137207, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -9.277859687805176, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05399522930383682, + "rewards_train/margins": -0.06370926089584827, + "rewards_train/rejected": 0.009714031592011452, + "step": 236 + }, + { + "epoch": 0.07, + "logps_train/chosen": -4.917696952819824, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -12.23452377319336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02301969565451145, + "rewards_train/margins": -0.018317318055778742, + "rewards_train/rejected": -0.00470237759873271, + "step": 236 + }, + { + "epoch": 0.07, + "logps_train/chosen": -129.81256103515625, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -93.65309143066406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7312561273574829, + "rewards_train/margins": -0.5159469842910767, + "rewards_train/rejected": -0.21530914306640625, + "step": 236 + }, + { + "epoch": 0.07, + "logps_train/chosen": -116.78131103515625, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -178.52557373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.028131127357483, + "rewards_train/margins": 0.02442622184753418, + "rewards_train/rejected": -1.052557349205017, + "step": 236 + }, + { + "epoch": 0.07, + "logps_train/chosen": -10.930233001708984, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -10.477073669433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06947670131921768, + "rewards_train/margins": 0.16718406975269318, + "rewards_train/rejected": -0.0977073684334755, + "step": 236 + }, + { + "epoch": 0.07, + "logps_train/chosen": -9.032583236694336, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -80.85743713378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.059508323669433594, + "rewards_train/margins": 0.07623539865016937, + "rewards_train/rejected": -0.13574372231960297, + "step": 236 + }, + { + "epoch": 0.07, + "logps_train/chosen": -6.377868175506592, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -20.318359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01903681829571724, + "rewards_train/margins": -0.17470088973641396, + "rewards_train/rejected": 0.15566407144069672, + "step": 236 + }, + { + "epoch": 0.07, + "logps_train/chosen": -112.75450134277344, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -101.25706481933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3245498836040497, + "rewards_train/margins": 0.3502563666552305, + "rewards_train/rejected": -0.02570648305118084, + "step": 236 + }, + { + "epoch": 0.07, + "logps_train/chosen": -118.3287582397461, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -116.3099365234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1671241819858551, + "rewards_train/margins": 0.1981178354471922, + "rewards_train/rejected": -0.03099365346133709, + "step": 237 + }, + { + "epoch": 0.07, + "logps_train/chosen": -2.9589877128601074, + "logps_train/ref_chosen": -2.734375, + "logps_train/ref_rejected": -7.75, + "logps_train/rejected": -8.538331031799316, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.022461270913481712, + "rewards_train/margins": 0.05637183226644993, + "rewards_train/rejected": -0.07883310317993164, + "step": 237 + }, + { + "epoch": 0.07, + "logps_train/chosen": -62.10062789916992, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -112.32118225097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06493721157312393, + "rewards_train/margins": 0.6470554247498512, + "rewards_train/rejected": -0.5821182131767273, + "step": 237 + }, + { + "epoch": 0.07, + "logps_train/chosen": -135.57408142089844, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -114.19378662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15740814805030823, + "rewards_train/margins": 0.4619705379009247, + "rewards_train/rejected": -0.6193786859512329, + "step": 237 + }, + { + "epoch": 0.07, + "logps_train/chosen": -3.0961098670959473, + "logps_train/ref_chosen": -2.546875, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -30.16363525390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05492348596453667, + "rewards_train/margins": -0.06355996057391167, + "rewards_train/rejected": 0.008636474609375, + "step": 237 + }, + { + "epoch": 0.07, + "logps_train/chosen": -82.90930938720703, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -144.11599731445312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.040930937975645065, + "rewards_train/margins": -0.02933120634406805, + "rewards_train/rejected": -0.011599731631577015, + "step": 237 + }, + { + "epoch": 0.07, + "logps_train/chosen": -83.32691955566406, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -66.3271484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11730804294347763, + "rewards_train/margins": 0.200022891163826, + "rewards_train/rejected": -0.08271484822034836, + "step": 237 + }, + { + "epoch": 0.07, + "logps_train/chosen": -18.107587814331055, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -27.25, + "logps_train/rejected": -27.200883865356445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02674121968448162, + "rewards_train/margins": 0.021829606033861637, + "rewards_train/rejected": 0.004911613650619984, + "step": 237 + }, + { + "epoch": 0.07, + "learning_rate": 1.9888495945222988e-06, + "loss": 0.6604, + "step": 238 + }, + { + "epoch": 0.07, + "logps_train/chosen": -4.248911380767822, + "logps_train/ref_chosen": -3.515625, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -8.213729858398438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07332863658666611, + "rewards_train/margins": -0.08320565056055784, + "rewards_train/rejected": 0.009877013973891735, + "step": 238 + }, + { + "epoch": 0.07, + "logps_train/chosen": -188.24929809570312, + "logps_train/ref_chosen": -189.0, + "logps_train/ref_rejected": -264.0, + "logps_train/rejected": -265.3253173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07507019490003586, + "rewards_train/margins": 0.20760194212198257, + "rewards_train/rejected": -0.13253174722194672, + "step": 238 + }, + { + "epoch": 0.07, + "logps_train/chosen": -122.98846435546875, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -157.36553955078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4488464295864105, + "rewards_train/margins": 1.1877075731754303, + "rewards_train/rejected": -1.6365540027618408, + "step": 238 + }, + { + "epoch": 0.07, + "logps_train/chosen": -16.524572372436523, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -11.809843063354492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14620724320411682, + "rewards_train/margins": 0.03477706015110016, + "rewards_train/rejected": -0.18098430335521698, + "step": 238 + }, + { + "epoch": 0.07, + "logps_train/chosen": -3.810258388519287, + "logps_train/ref_chosen": -3.609375, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -11.13181209564209, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02008833922445774, + "rewards_train/margins": 0.018092872574925423, + "rewards_train/rejected": -0.03818121179938316, + "step": 238 + }, + { + "epoch": 0.07, + "logps_train/chosen": -96.74788665771484, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -110.99241638183594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.024788737297058, + "rewards_train/margins": -0.1755470633506775, + "rewards_train/rejected": -0.8492416739463806, + "step": 238 + }, + { + "epoch": 0.07, + "logps_train/chosen": -7.738929748535156, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -9.926016807556152, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.08860702812671661, + "rewards_train/margins": -0.025041289627552032, + "rewards_train/rejected": 0.11364831775426865, + "step": 238 + }, + { + "epoch": 0.07, + "logps_train/chosen": -12.761004447937012, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -25.69080352783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03860044479370117, + "rewards_train/margins": 0.15547990798950195, + "rewards_train/rejected": -0.19408035278320312, + "step": 238 + }, + { + "epoch": 0.07, + "logps_train/chosen": -63.18385314941406, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -41.09037399291992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05661468580365181, + "rewards_train/margins": 0.040652085095644, + "rewards_train/rejected": 0.015962600708007812, + "step": 239 + }, + { + "epoch": 0.07, + "logps_train/chosen": -5.416662216186523, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -22.906461715698242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.057291220873594284, + "rewards_train/margins": 0.10835495963692665, + "rewards_train/rejected": -0.16564618051052094, + "step": 239 + }, + { + "epoch": 0.07, + "logps_train/chosen": -96.04115295410156, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -104.89141845703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10411529988050461, + "rewards_train/margins": 0.03502654284238815, + "rewards_train/rejected": -0.13914184272289276, + "step": 239 + }, + { + "epoch": 0.07, + "logps_train/chosen": -2.930730104446411, + "logps_train/ref_chosen": -1.46875, + "logps_train/ref_rejected": -0.33984375, + "logps_train/rejected": -5.524184703826904, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14619801938533783, + "rewards_train/margins": 0.37223608791828156, + "rewards_train/rejected": -0.5184341073036194, + "step": 239 + }, + { + "epoch": 0.07, + "logps_train/chosen": -64.974365234375, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -106.84451293945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19743652641773224, + "rewards_train/margins": 0.837014839053154, + "rewards_train/rejected": -1.0344513654708862, + "step": 239 + }, + { + "epoch": 0.07, + "logps_train/chosen": -3.5196266174316406, + "logps_train/ref_chosen": -3.3125, + "logps_train/ref_rejected": -13.6875, + "logps_train/rejected": -14.403566360473633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.020712662488222122, + "rewards_train/margins": 0.05089397355914116, + "rewards_train/rejected": -0.07160663604736328, + "step": 239 + }, + { + "epoch": 0.07, + "logps_train/chosen": -107.85466766357422, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -105.26094818115234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48546677827835083, + "rewards_train/margins": 0.1906280517578125, + "rewards_train/rejected": -0.6760948300361633, + "step": 239 + }, + { + "epoch": 0.07, + "logps_train/chosen": -63.93157196044922, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -72.80253601074219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.40684279799461365, + "rewards_train/margins": 0.037096381187438965, + "rewards_train/rejected": 0.3697464168071747, + "step": 239 + }, + { + "epoch": 0.07, + "learning_rate": 1.9884521650742714e-06, + "loss": 0.6177, + "step": 240 + }, + { + "epoch": 0.07, + "logps_train/chosen": -143.57652282714844, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -188.0, + "logps_train/rejected": -193.396728515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25765228271484375, + "rewards_train/margins": 0.28202056884765625, + "rewards_train/rejected": -0.5396728515625, + "step": 240 + }, + { + "epoch": 0.07, + "logps_train/chosen": -44.83396911621094, + "logps_train/ref_chosen": -45.25, + "logps_train/ref_rejected": -28.625, + "logps_train/rejected": -28.219432830810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04160308837890625, + "rewards_train/margins": 0.0010463707149028778, + "rewards_train/rejected": 0.04055671766400337, + "step": 240 + }, + { + "epoch": 0.07, + "logps_train/chosen": -136.18846130371094, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -142.3214874267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11884613335132599, + "rewards_train/margins": 0.2133026272058487, + "rewards_train/rejected": -0.3321487605571747, + "step": 240 + }, + { + "epoch": 0.07, + "logps_train/chosen": -90.10440063476562, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -70.88804626464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08955993503332138, + "rewards_train/margins": 0.678364597260952, + "rewards_train/rejected": -0.5888046622276306, + "step": 240 + }, + { + "epoch": 0.07, + "logps_train/chosen": -83.03480529785156, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -118.19805908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2965194880962372, + "rewards_train/margins": 1.91632542014122, + "rewards_train/rejected": -1.619805932044983, + "step": 240 + }, + { + "epoch": 0.07, + "logps_train/chosen": -64.78324127197266, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -118.29338073730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0716758742928505, + "rewards_train/margins": 0.7510139718651772, + "rewards_train/rejected": -0.6793380975723267, + "step": 240 + }, + { + "epoch": 0.07, + "logps_train/chosen": -128.4266815185547, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -127.02316284179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15733185410499573, + "rewards_train/margins": 0.15964813833124936, + "rewards_train/rejected": -0.0023162842262536287, + "step": 240 + }, + { + "epoch": 0.07, + "logps_train/chosen": -111.01715850830078, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -158.22177124023438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8517159223556519, + "rewards_train/margins": -0.12953877449035645, + "rewards_train/rejected": -1.7221771478652954, + "step": 240 + }, + { + "epoch": 0.07, + "logps_train/chosen": -8.29035758972168, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -10.779826164245605, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008464241400361061, + "rewards_train/margins": 0.07394685782492161, + "rewards_train/rejected": -0.06548261642456055, + "step": 241 + }, + { + "epoch": 0.07, + "logps_train/chosen": -1.1012191772460938, + "logps_train/ref_chosen": -0.94921875, + "logps_train/ref_rejected": -0.94921875, + "logps_train/rejected": -1.109481930732727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015200043097138405, + "rewards_train/margins": 0.0008262749761343002, + "rewards_train/rejected": -0.016026318073272705, + "step": 241 + }, + { + "epoch": 0.07, + "logps_train/chosen": -22.80466079711914, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -47.0, + "logps_train/rejected": -47.173004150390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11796607822179794, + "rewards_train/margins": -0.10066566243767738, + "rewards_train/rejected": -0.01730041578412056, + "step": 241 + }, + { + "epoch": 0.07, + "logps_train/chosen": -5.8649091720581055, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -6.007352352142334, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04788408428430557, + "rewards_train/margins": 0.16111931949853897, + "rewards_train/rejected": -0.1132352352142334, + "step": 241 + }, + { + "epoch": 0.07, + "logps_train/chosen": -1.905390739440918, + "logps_train/ref_chosen": -2.015625, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -6.202043533325195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011023426428437233, + "rewards_train/margins": 0.06872778199613094, + "rewards_train/rejected": -0.05770435556769371, + "step": 241 + }, + { + "epoch": 0.07, + "logps_train/chosen": -108.997314453125, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -171.63229370117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0497314929962158, + "rewards_train/margins": 0.9134979248046875, + "rewards_train/rejected": -1.9632294178009033, + "step": 241 + }, + { + "epoch": 0.07, + "logps_train/chosen": -121.2877197265625, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -116.18150329589844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.47877198457717896, + "rewards_train/margins": -0.3606216534972191, + "rewards_train/rejected": -0.11815033107995987, + "step": 241 + }, + { + "epoch": 0.07, + "logps_train/chosen": -92.99087524414062, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -134.7155303955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8490875363349915, + "rewards_train/margins": 0.5724654793739319, + "rewards_train/rejected": -1.4215530157089233, + "step": 241 + }, + { + "epoch": 0.07, + "learning_rate": 1.988047817509086e-06, + "loss": 0.5751, + "step": 242 + }, + { + "epoch": 0.07, + "logps_train/chosen": -1.525840163230896, + "logps_train/ref_chosen": -1.609375, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -13.008232116699219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008353483863174915, + "rewards_train/margins": 0.0029266956262290478, + "rewards_train/rejected": 0.0054267882369458675, + "step": 242 + }, + { + "epoch": 0.07, + "logps_train/chosen": -80.5443344116211, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -147.02951049804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6544334292411804, + "rewards_train/margins": 0.8485175967216492, + "rewards_train/rejected": -1.5029510259628296, + "step": 242 + }, + { + "epoch": 0.07, + "logps_train/chosen": -9.082695960998535, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -12.103670120239258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10201960057020187, + "rewards_train/margins": 0.008347414433956146, + "rewards_train/rejected": -0.11036701500415802, + "step": 242 + }, + { + "epoch": 0.07, + "logps_train/chosen": -74.19056701660156, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -4.8051886558532715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2809433043003082, + "rewards_train/margins": 0.3270871713757515, + "rewards_train/rejected": -0.04614386707544327, + "step": 242 + }, + { + "epoch": 0.07, + "logps_train/chosen": -21.842178344726562, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -7.75, + "logps_train/rejected": -8.786918640136719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1717178374528885, + "rewards_train/margins": -0.06802596896886826, + "rewards_train/rejected": -0.10369186848402023, + "step": 242 + }, + { + "epoch": 0.07, + "logps_train/chosen": -14.317118644714355, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -70.4906234741211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00046186448889784515, + "rewards_train/margins": 0.2486004799429793, + "rewards_train/rejected": -0.24906234443187714, + "step": 242 + }, + { + "epoch": 0.07, + "logps_train/chosen": -137.17413330078125, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -116.84700012207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.317413330078125, + "rewards_train/margins": 0.36728668212890625, + "rewards_train/rejected": -0.6847000122070312, + "step": 242 + }, + { + "epoch": 0.07, + "logps_train/chosen": -20.301952362060547, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -15.902066230773926, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.005195236299186945, + "rewards_train/margins": -0.06498861545696855, + "rewards_train/rejected": 0.0597933791577816, + "step": 242 + }, + { + "epoch": 0.07, + "logps_train/chosen": -4.58505392074585, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -1.9453125, + "logps_train/rejected": -2.338343620300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01961960829794407, + "rewards_train/margins": 0.058922721073031425, + "rewards_train/rejected": -0.03930311277508736, + "step": 243 + }, + { + "epoch": 0.07, + "logps_train/chosen": -9.21436882019043, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -4.99189567565918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1348131150007248, + "rewards_train/margins": 0.20587768405675888, + "rewards_train/rejected": -0.07106456905603409, + "step": 243 + }, + { + "epoch": 0.07, + "logps_train/chosen": -85.11393737792969, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -78.14768981933594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.011393737979233265, + "rewards_train/margins": -0.046624758280813694, + "rewards_train/rejected": 0.03523102030158043, + "step": 243 + }, + { + "epoch": 0.07, + "logps_train/chosen": -0.9133039712905884, + "logps_train/ref_chosen": -0.9921875, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -9.015398979187012, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007888353429734707, + "rewards_train/margins": 0.05317825358361006, + "rewards_train/rejected": -0.04528990015387535, + "step": 243 + }, + { + "epoch": 0.07, + "logps_train/chosen": -43.97163772583008, + "logps_train/ref_chosen": -42.75, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -36.36444091796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12216377258300781, + "rewards_train/margins": -0.11071968078613281, + "rewards_train/rejected": -0.011444091796875, + "step": 243 + }, + { + "epoch": 0.07, + "logps_train/chosen": -14.329343795776367, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -23.263412475585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05456561967730522, + "rewards_train/margins": 0.08090686798095703, + "rewards_train/rejected": -0.02634124830365181, + "step": 243 + }, + { + "epoch": 0.07, + "logps_train/chosen": -132.3645782470703, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -34.29341506958008, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.036457896232605, + "rewards_train/margins": -1.0571163892745972, + "rewards_train/rejected": 0.020658493041992188, + "step": 243 + }, + { + "epoch": 0.07, + "logps_train/chosen": -65.95427703857422, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -44.75, + "logps_train/rejected": -46.2225341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1545722931623459, + "rewards_train/margins": 0.30182571709156036, + "rewards_train/rejected": -0.14725342392921448, + "step": 243 + }, + { + "epoch": 0.07, + "learning_rate": 1.9876365546567466e-06, + "loss": 0.675, + "step": 244 + }, + { + "epoch": 0.07, + "logps_train/chosen": -102.40321350097656, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -140.5029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009678649716079235, + "rewards_train/margins": 0.6599716423079371, + "rewards_train/rejected": -0.6502929925918579, + "step": 244 + }, + { + "epoch": 0.07, + "logps_train/chosen": -119.61270141601562, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -119.63046264648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03872985765337944, + "rewards_train/margins": 0.0017761215567588806, + "rewards_train/rejected": 0.03695373609662056, + "step": 244 + }, + { + "epoch": 0.07, + "logps_train/chosen": -73.79872131347656, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -70.6611557006836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12012787163257599, + "rewards_train/margins": 0.4362434595823288, + "rewards_train/rejected": -0.3161155879497528, + "step": 244 + }, + { + "epoch": 0.07, + "logps_train/chosen": -72.91122436523438, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -54.0, + "logps_train/rejected": -51.705810546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.15887756645679474, + "rewards_train/margins": -0.0705413818359375, + "rewards_train/rejected": 0.22941894829273224, + "step": 244 + }, + { + "epoch": 0.07, + "logps_train/chosen": -74.8388442993164, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -10.199362754821777, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06611557304859161, + "rewards_train/margins": 0.20480185747146606, + "rewards_train/rejected": -0.13868628442287445, + "step": 244 + }, + { + "epoch": 0.07, + "logps_train/chosen": -125.92984008789062, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -120.15802001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.392984002828598, + "rewards_train/margins": 0.7228180468082428, + "rewards_train/rejected": -1.1158020496368408, + "step": 244 + }, + { + "epoch": 0.07, + "logps_train/chosen": -12.722436904907227, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -10.061573028564453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.05900631099939346, + "rewards_train/margins": -0.028586387634277344, + "rewards_train/rejected": 0.0875926986336708, + "step": 244 + }, + { + "epoch": 0.07, + "logps_train/chosen": -75.94647979736328, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -122.99271392822266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2553520202636719, + "rewards_train/margins": 1.2046234011650085, + "rewards_train/rejected": -0.9492713809013367, + "step": 244 + }, + { + "epoch": 0.07, + "logps_train/chosen": -87.78526306152344, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -166.63772583007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3285263180732727, + "rewards_train/margins": 0.5352462530136108, + "rewards_train/rejected": -0.8637725710868835, + "step": 245 + }, + { + "epoch": 0.07, + "logps_train/chosen": -1.3378820419311523, + "logps_train/ref_chosen": -1.171875, + "logps_train/ref_rejected": -1.59375, + "logps_train/rejected": -1.5330756902694702, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.016600703820586205, + "rewards_train/margins": -0.02266813488677144, + "rewards_train/rejected": 0.006067431066185236, + "step": 245 + }, + { + "epoch": 0.07, + "logps_train/chosen": -2.4263501167297363, + "logps_train/ref_chosen": -2.4375, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -15.095163345336914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0011149883503094316, + "rewards_train/margins": 0.0543813236290589, + "rewards_train/rejected": -0.053266335278749466, + "step": 245 + }, + { + "epoch": 0.07, + "logps_train/chosen": -92.88621520996094, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -129.98155212402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2613784968852997, + "rewards_train/margins": 0.4595337063074112, + "rewards_train/rejected": -0.1981552094221115, + "step": 245 + }, + { + "epoch": 0.07, + "logps_train/chosen": -134.13771057128906, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -198.34243774414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.463771104812622, + "rewards_train/margins": 0.37047266960144043, + "rewards_train/rejected": -1.8342437744140625, + "step": 245 + }, + { + "epoch": 0.07, + "logps_train/chosen": -3.8700156211853027, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -7.90625, + "logps_train/rejected": -8.284981727600098, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.019248438999056816, + "rewards_train/margins": 0.05712161399424076, + "rewards_train/rejected": -0.037873174995183945, + "step": 245 + }, + { + "epoch": 0.07, + "logps_train/chosen": -63.17106628417969, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -68.66644287109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.132893368601799, + "rewards_train/margins": 0.349537655711174, + "rewards_train/rejected": -0.216644287109375, + "step": 245 + }, + { + "epoch": 0.07, + "logps_train/chosen": -149.77560424804688, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -122.59515380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.477560430765152, + "rewards_train/margins": 0.5819549262523651, + "rewards_train/rejected": -1.059515357017517, + "step": 245 + }, + { + "epoch": 0.07, + "learning_rate": 1.9872183793956576e-06, + "loss": 0.5497, + "step": 246 + }, + { + "epoch": 0.07, + "logps_train/chosen": -11.567519187927246, + "logps_train/ref_chosen": -12.6875, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -23.2580623626709, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11199808120727539, + "rewards_train/margins": 0.33780431747436523, + "rewards_train/rejected": -0.22580623626708984, + "step": 246 + }, + { + "epoch": 0.07, + "logps_train/chosen": -148.39755249023438, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -146.78909301757812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0397552251815796, + "rewards_train/margins": -0.4608458876609802, + "rewards_train/rejected": -0.5789093375205994, + "step": 246 + }, + { + "epoch": 0.07, + "logps_train/chosen": -27.678478240966797, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -17.869953155517578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1303478330373764, + "rewards_train/margins": -0.018352515995502472, + "rewards_train/rejected": -0.11199531704187393, + "step": 246 + }, + { + "epoch": 0.07, + "logps_train/chosen": -5.659552097320557, + "logps_train/ref_chosen": -5.875, + "logps_train/ref_rejected": -6.34375, + "logps_train/rejected": -6.869545936584473, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021544789895415306, + "rewards_train/margins": 0.07412438280880451, + "rewards_train/rejected": -0.052579592913389206, + "step": 246 + }, + { + "epoch": 0.07, + "logps_train/chosen": -151.54928588867188, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -142.58546447753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04507141187787056, + "rewards_train/margins": 1.0036178715527058, + "rewards_train/rejected": -0.9585464596748352, + "step": 246 + }, + { + "epoch": 0.07, + "logps_train/chosen": -69.93302154541016, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -244.0, + "logps_train/rejected": -256.11199951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4066978394985199, + "rewards_train/margins": 1.6178978383541107, + "rewards_train/rejected": -1.2111999988555908, + "step": 246 + }, + { + "epoch": 0.07, + "logps_train/chosen": -4.30282735824585, + "logps_train/ref_chosen": -4.65625, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -6.615748405456543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0353422649204731, + "rewards_train/margins": 0.0719171054661274, + "rewards_train/rejected": -0.0365748405456543, + "step": 246 + }, + { + "epoch": 0.07, + "logps_train/chosen": -88.56904602050781, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -88.45427703857422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.24309539794921875, + "rewards_train/margins": -0.011476904153823853, + "rewards_train/rejected": 0.2545723021030426, + "step": 246 + }, + { + "epoch": 0.07, + "logps_train/chosen": -2.5479581356048584, + "logps_train/ref_chosen": -2.4375, + "logps_train/ref_rejected": -2.84375, + "logps_train/rejected": -2.8112316131591797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01104581356048584, + "rewards_train/margins": -0.014297652291134, + "rewards_train/rejected": 0.00325183873064816, + "step": 247 + }, + { + "epoch": 0.07, + "logps_train/chosen": -4.295046329498291, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -12.861106872558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.001379632973112166, + "rewards_train/margins": 0.05973105353768915, + "rewards_train/rejected": -0.061110686510801315, + "step": 247 + }, + { + "epoch": 0.07, + "logps_train/chosen": -118.65361785888672, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -128.93597412109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5653617978096008, + "rewards_train/margins": 0.3782356381416321, + "rewards_train/rejected": -0.9435974359512329, + "step": 247 + }, + { + "epoch": 0.07, + "logps_train/chosen": -88.933837890625, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -139.4864501953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8433837890625, + "rewards_train/margins": 0.70526123046875, + "rewards_train/rejected": -1.54864501953125, + "step": 247 + }, + { + "epoch": 0.07, + "logps_train/chosen": -44.502262115478516, + "logps_train/ref_chosen": -42.75, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -79.02278137207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17522621154785156, + "rewards_train/margins": 0.7270519137382507, + "rewards_train/rejected": -0.9022781252861023, + "step": 247 + }, + { + "epoch": 0.07, + "logps_train/chosen": -136.13763427734375, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -114.60032653808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31376343965530396, + "rewards_train/margins": 0.24626922607421875, + "rewards_train/rejected": -0.5600326657295227, + "step": 247 + }, + { + "epoch": 0.07, + "logps_train/chosen": -12.351173400878906, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -15.01591682434082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.002382660051807761, + "rewards_train/margins": 0.07897434546612203, + "rewards_train/rejected": -0.07659168541431427, + "step": 247 + }, + { + "epoch": 0.07, + "logps_train/chosen": -280.3028259277344, + "logps_train/ref_chosen": -268.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -163.90939331054688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2302826642990112, + "rewards_train/margins": -0.6393433213233948, + "rewards_train/rejected": -0.5909393429756165, + "step": 247 + }, + { + "epoch": 0.07, + "learning_rate": 1.986793294652602e-06, + "loss": 0.6046, + "step": 248 + }, + { + "epoch": 0.07, + "logps_train/chosen": -143.26296997070312, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -157.3165283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22629700601100922, + "rewards_train/margins": 0.7053558379411697, + "rewards_train/rejected": -0.931652843952179, + "step": 248 + }, + { + "epoch": 0.07, + "logps_train/chosen": -6.301754474639893, + "logps_train/ref_chosen": -0.77734375, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -3.8312034606933594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5524410605430603, + "rewards_train/margins": -0.9380707144737244, + "rewards_train/rejected": 0.38562965393066406, + "step": 248 + }, + { + "epoch": 0.07, + "logps_train/chosen": -6.328827857971191, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -7.100771903991699, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004617214202880859, + "rewards_train/margins": 0.02094440534710884, + "rewards_train/rejected": -0.01632719114422798, + "step": 248 + }, + { + "epoch": 0.07, + "logps_train/chosen": -78.59941101074219, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -115.82684326171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10994110256433487, + "rewards_train/margins": 1.0727431997656822, + "rewards_train/rejected": -1.182684302330017, + "step": 248 + }, + { + "epoch": 0.07, + "logps_train/chosen": -170.73724365234375, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -41.5, + "logps_train/rejected": -42.57036209106445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07372436672449112, + "rewards_train/margins": 0.03331184387207031, + "rewards_train/rejected": -0.10703621059656143, + "step": 248 + }, + { + "epoch": 0.07, + "logps_train/chosen": -15.16914176940918, + "logps_train/ref_chosen": -15.6875, + "logps_train/ref_rejected": -6.65625, + "logps_train/rejected": -7.034635543823242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05183582380414009, + "rewards_train/margins": 0.08967437967658043, + "rewards_train/rejected": -0.03783855587244034, + "step": 248 + }, + { + "epoch": 0.07, + "logps_train/chosen": -9.876888275146484, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -12.689422607421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06231117248535156, + "rewards_train/margins": 0.06250343323335983, + "rewards_train/rejected": -0.0001922607480082661, + "step": 248 + }, + { + "epoch": 0.07, + "logps_train/chosen": -139.90097045898438, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -189.7329559326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7900970578193665, + "rewards_train/margins": 0.3831985592842102, + "rewards_train/rejected": -1.1732956171035767, + "step": 248 + }, + { + "epoch": 0.07, + "logps_train/chosen": -83.86286163330078, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -128.2191925048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08628616482019424, + "rewards_train/margins": 1.185633085668087, + "rewards_train/rejected": -1.2719192504882812, + "step": 249 + }, + { + "epoch": 0.07, + "logps_train/chosen": -76.11466979980469, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -98.23492431640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3114669919013977, + "rewards_train/margins": -0.18797455728054047, + "rewards_train/rejected": -0.12349243462085724, + "step": 249 + }, + { + "epoch": 0.07, + "logps_train/chosen": -67.09046173095703, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -143.9336395263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00904617365449667, + "rewards_train/margins": 1.48431780282408, + "rewards_train/rejected": -1.4933639764785767, + "step": 249 + }, + { + "epoch": 0.07, + "logps_train/chosen": -108.9356689453125, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -141.87765502929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4935669004917145, + "rewards_train/margins": 1.9941986501216888, + "rewards_train/rejected": -2.4877655506134033, + "step": 249 + }, + { + "epoch": 0.07, + "logps_train/chosen": -3.325558662414551, + "logps_train/ref_chosen": -3.6875, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -6.583403587341309, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03619413450360298, + "rewards_train/margins": 0.08203449472784996, + "rewards_train/rejected": -0.04584036022424698, + "step": 249 + }, + { + "epoch": 0.07, + "logps_train/chosen": -0.3778384327888489, + "logps_train/ref_chosen": -0.390625, + "logps_train/ref_rejected": -2.53125, + "logps_train/rejected": -2.1575376987457275, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.001278656767681241, + "rewards_train/margins": -0.036092572612687945, + "rewards_train/rejected": 0.037371229380369186, + "step": 249 + }, + { + "epoch": 0.07, + "logps_train/chosen": -124.8892822265625, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -136.52276611328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03892822191119194, + "rewards_train/margins": 1.013348389416933, + "rewards_train/rejected": -1.052276611328125, + "step": 249 + }, + { + "epoch": 0.07, + "logps_train/chosen": -0.5188494920730591, + "logps_train/ref_chosen": -0.388671875, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -17.550857543945312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.013017761521041393, + "rewards_train/margins": -0.14543201308697462, + "rewards_train/rejected": 0.13241425156593323, + "step": 249 + }, + { + "epoch": 0.07, + "learning_rate": 1.9863613034027223e-06, + "loss": 0.5634, + "step": 250 + }, + { + "epoch": 0.07, + "logps_train/chosen": -145.42771911621094, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -130.990966796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6427718997001648, + "rewards_train/margins": -0.3436752259731293, + "rewards_train/rejected": -0.2990966737270355, + "step": 250 + }, + { + "epoch": 0.07, + "logps_train/chosen": -134.19699096679688, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -141.0281982421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.380300909280777, + "rewards_train/margins": 1.1831207573413849, + "rewards_train/rejected": -0.8028198480606079, + "step": 250 + }, + { + "epoch": 0.07, + "logps_train/chosen": -10.969775199890137, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -13.422904968261719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009272480383515358, + "rewards_train/margins": 0.007812977186404169, + "rewards_train/rejected": 0.0014595031971111894, + "step": 250 + }, + { + "epoch": 0.07, + "logps_train/chosen": -13.065841674804688, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -31.375, + "logps_train/rejected": -32.077205657958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04341583326458931, + "rewards_train/margins": 0.11363640055060387, + "rewards_train/rejected": -0.07022056728601456, + "step": 250 + }, + { + "epoch": 0.07, + "logps_train/chosen": -51.74342346191406, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -30.375, + "logps_train/rejected": -34.331504821777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0006576538435183465, + "rewards_train/margins": 0.39630813006078824, + "rewards_train/rejected": -0.3956504762172699, + "step": 250 + }, + { + "epoch": 0.07, + "logps_train/chosen": -35.6307373046875, + "logps_train/ref_chosen": -35.25, + "logps_train/ref_rejected": -55.75, + "logps_train/rejected": -57.38311767578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03807372972369194, + "rewards_train/margins": 0.12523803487420082, + "rewards_train/rejected": -0.16331176459789276, + "step": 250 + }, + { + "epoch": 0.07, + "logps_train/chosen": -134.98712158203125, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -87.97952270507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.30128785967826843, + "rewards_train/margins": 0.29924013023264706, + "rewards_train/rejected": 0.0020477294456213713, + "step": 250 + }, + { + "epoch": 0.07, + "logps_train/chosen": -4.886034965515137, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -5.162325859069824, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06360349804162979, + "rewards_train/margins": 0.03387908637523651, + "rewards_train/rejected": -0.0974825844168663, + "step": 250 + }, + { + "epoch": 0.07, + "logps_train/chosen": -12.909346580505371, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -29.0, + "logps_train/rejected": -30.46630859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14093466103076935, + "rewards_train/margins": 0.005696207284927368, + "rewards_train/rejected": -0.14663086831569672, + "step": 251 + }, + { + "epoch": 0.07, + "logps_train/chosen": -111.49618530273438, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -146.11392211914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15038147568702698, + "rewards_train/margins": 0.06177368760108948, + "rewards_train/rejected": 0.0886077880859375, + "step": 251 + }, + { + "epoch": 0.07, + "logps_train/chosen": -105.49810791015625, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -104.78678894042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2998107969760895, + "rewards_train/margins": 0.2788681089878082, + "rewards_train/rejected": -0.5786789059638977, + "step": 251 + }, + { + "epoch": 0.07, + "logps_train/chosen": -13.803317070007324, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -7.59375, + "logps_train/rejected": -9.170404434204102, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08841829746961594, + "rewards_train/margins": 0.24608374387025833, + "rewards_train/rejected": -0.1576654464006424, + "step": 251 + }, + { + "epoch": 0.07, + "logps_train/chosen": -6.8324875831604, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -6.009841442108154, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01675124280154705, + "rewards_train/margins": 0.13961038552224636, + "rewards_train/rejected": -0.12285914272069931, + "step": 251 + }, + { + "epoch": 0.07, + "logps_train/chosen": -11.959364891052246, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -12.4375, + "logps_train/rejected": -13.408291816711426, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07906351238489151, + "rewards_train/margins": 0.17614269256591797, + "rewards_train/rejected": -0.09707918018102646, + "step": 251 + }, + { + "epoch": 0.07, + "logps_train/chosen": -17.969118118286133, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -18.9180965423584, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05941181257367134, + "rewards_train/margins": 0.03239784017205238, + "rewards_train/rejected": -0.09180965274572372, + "step": 251 + }, + { + "epoch": 0.07, + "logps_train/chosen": -2.5557703971862793, + "logps_train/ref_chosen": -2.734375, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -12.024057388305664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01786046102643013, + "rewards_train/margins": 0.20776620879769325, + "rewards_train/rejected": -0.18990574777126312, + "step": 251 + }, + { + "epoch": 0.07, + "learning_rate": 1.9859224086694994e-06, + "loss": 0.6157, + "step": 252 + }, + { + "epoch": 0.07, + "logps_train/chosen": -116.1240234375, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -130.53204345703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.23759765923023224, + "rewards_train/margins": 0.6408020108938217, + "rewards_train/rejected": -0.4032043516635895, + "step": 252 + }, + { + "epoch": 0.07, + "logps_train/chosen": -112.00849914550781, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -12.4375, + "logps_train/rejected": -13.247207641601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0008499145624227822, + "rewards_train/margins": 0.08012084959773347, + "rewards_train/rejected": -0.08097076416015625, + "step": 252 + }, + { + "epoch": 0.07, + "logps_train/chosen": -36.90467834472656, + "logps_train/ref_chosen": -37.5, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -34.41476058959961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05953216552734375, + "rewards_train/margins": 0.05100822448730469, + "rewards_train/rejected": 0.008523941040039062, + "step": 252 + }, + { + "epoch": 0.07, + "logps_train/chosen": -4.616717338562012, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -7.84375, + "logps_train/rejected": -8.238061904907227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016453266143798828, + "rewards_train/margins": 0.055884458124637604, + "rewards_train/rejected": -0.039431191980838776, + "step": 252 + }, + { + "epoch": 0.07, + "logps_train/chosen": -69.31275177001953, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -110.86688232421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018724823370575905, + "rewards_train/margins": 0.4054130557924509, + "rewards_train/rejected": -0.386688232421875, + "step": 252 + }, + { + "epoch": 0.07, + "logps_train/chosen": -148.45404052734375, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -118.4952392578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.045404076576233, + "rewards_train/margins": -0.29588013887405396, + "rewards_train/rejected": -0.749523937702179, + "step": 252 + }, + { + "epoch": 0.07, + "logps_train/chosen": -134.18130493164062, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -146.13099670410156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.281869500875473, + "rewards_train/margins": -0.005030840635299683, + "rewards_train/rejected": 0.2869003415107727, + "step": 252 + }, + { + "epoch": 0.07, + "logps_train/chosen": -6.012319564819336, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -10.848652839660645, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.039393045008182526, + "rewards_train/margins": -0.0382416695356369, + "rewards_train/rejected": 0.07763471454381943, + "step": 252 + }, + { + "epoch": 0.07, + "logps_train/chosen": -2.927145004272461, + "logps_train/ref_chosen": -3.0, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -20.688913345336914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007285499479621649, + "rewards_train/margins": 0.013676834292709827, + "rewards_train/rejected": -0.006391334813088179, + "step": 253 + }, + { + "epoch": 0.07, + "logps_train/chosen": -141.73904418945312, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -145.41586303710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5739044547080994, + "rewards_train/margins": 0.767681896686554, + "rewards_train/rejected": -1.3415863513946533, + "step": 253 + }, + { + "epoch": 0.07, + "logps_train/chosen": -184.56246948242188, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -191.98655700683594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3562469482421875, + "rewards_train/margins": -0.05759119987487793, + "rewards_train/rejected": -1.2986557483673096, + "step": 253 + }, + { + "epoch": 0.07, + "logps_train/chosen": -7.114832878112793, + "logps_train/ref_chosen": -6.09375, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -21.70761489868164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10210829228162766, + "rewards_train/margins": -0.031346797943115234, + "rewards_train/rejected": -0.07076149433851242, + "step": 253 + }, + { + "epoch": 0.07, + "logps_train/chosen": -7.430641174316406, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -15.0, + "logps_train/rejected": -14.969076156616211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.033689118921756744, + "rewards_train/margins": -0.03678150335326791, + "rewards_train/rejected": 0.0030923844315111637, + "step": 253 + }, + { + "epoch": 0.07, + "logps_train/chosen": -2.0558223724365234, + "logps_train/ref_chosen": -2.3125, + "logps_train/ref_rejected": -1.7265625, + "logps_train/rejected": -2.057913064956665, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.025667762383818626, + "rewards_train/margins": 0.05880281887948513, + "rewards_train/rejected": -0.033135056495666504, + "step": 253 + }, + { + "epoch": 0.07, + "logps_train/chosen": -111.56842803955078, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -129.67723083496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1431571990251541, + "rewards_train/margins": 1.110880270600319, + "rewards_train/rejected": -0.9677230715751648, + "step": 253 + }, + { + "epoch": 0.07, + "logps_train/chosen": -178.943115234375, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -132.2248992919922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3943115174770355, + "rewards_train/margins": -0.37182158790528774, + "rewards_train/rejected": -0.02248992957174778, + "step": 253 + }, + { + "epoch": 0.07, + "learning_rate": 1.985476613524731e-06, + "loss": 0.6399, + "step": 254 + }, + { + "epoch": 0.07, + "logps_train/chosen": -11.771900177001953, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -8.98590087890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.029059981927275658, + "rewards_train/margins": 0.0964000727981329, + "rewards_train/rejected": -0.06734009087085724, + "step": 254 + }, + { + "epoch": 0.07, + "logps_train/chosen": -24.67724609375, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -12.741199493408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06772460788488388, + "rewards_train/margins": 0.10014534741640091, + "rewards_train/rejected": -0.1678699553012848, + "step": 254 + }, + { + "epoch": 0.07, + "logps_train/chosen": -16.733129501342773, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -19.82546615600586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.051687050610780716, + "rewards_train/margins": 0.159233670681715, + "rewards_train/rejected": -0.1075466200709343, + "step": 254 + }, + { + "epoch": 0.07, + "logps_train/chosen": -99.28912353515625, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -100.82078552246094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.021087646484375, + "rewards_train/margins": -0.04683380573987961, + "rewards_train/rejected": 0.06792145222425461, + "step": 254 + }, + { + "epoch": 0.07, + "logps_train/chosen": -6.128705024719238, + "logps_train/ref_chosen": -6.15625, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -26.13608169555664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0027544975746423006, + "rewards_train/margins": 0.11636267160065472, + "rewards_train/rejected": -0.11360817402601242, + "step": 254 + }, + { + "epoch": 0.07, + "logps_train/chosen": -88.41097259521484, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -125.1595687866211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34109726548194885, + "rewards_train/margins": 0.6248596012592316, + "rewards_train/rejected": -0.9659568667411804, + "step": 254 + }, + { + "epoch": 0.07, + "logps_train/chosen": -4.269218444824219, + "logps_train/ref_chosen": -0.140625, + "logps_train/ref_rejected": -29.625, + "logps_train/rejected": -27.971500396728516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.41285935044288635, + "rewards_train/margins": -0.5782093107700348, + "rewards_train/rejected": 0.16534996032714844, + "step": 254 + }, + { + "epoch": 0.07, + "logps_train/chosen": -6.65867805480957, + "logps_train/ref_chosen": -7.03125, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -19.76651954650879, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03725719451904297, + "rewards_train/margins": 0.15140914916992188, + "rewards_train/rejected": -0.1141519546508789, + "step": 254 + }, + { + "epoch": 0.07, + "logps_train/chosen": -130.09078979492188, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -169.26132202148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19092102348804474, + "rewards_train/margins": 0.3170532286167145, + "rewards_train/rejected": -0.12613220512866974, + "step": 255 + }, + { + "epoch": 0.07, + "logps_train/chosen": -63.80434799194336, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -63.70744323730469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05543480068445206, + "rewards_train/margins": -0.009690474718809128, + "rewards_train/rejected": -0.04574432596564293, + "step": 255 + }, + { + "epoch": 0.07, + "logps_train/chosen": -37.92279052734375, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -139.01846313476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16727904975414276, + "rewards_train/margins": 1.4845672398805618, + "rewards_train/rejected": -1.6518462896347046, + "step": 255 + }, + { + "epoch": 0.07, + "logps_train/chosen": -35.635231018066406, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -3.28125, + "logps_train/rejected": -3.3950483798980713, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.061476897448301315, + "rewards_train/margins": 0.07285673543810844, + "rewards_train/rejected": -0.011379837989807129, + "step": 255 + }, + { + "epoch": 0.07, + "logps_train/chosen": -131.4995880126953, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -164.353759765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8999587893486023, + "rewards_train/margins": 1.0354172587394714, + "rewards_train/rejected": -1.9353760480880737, + "step": 255 + }, + { + "epoch": 0.07, + "logps_train/chosen": -1.4404336214065552, + "logps_train/ref_chosen": -1.28125, + "logps_train/ref_rejected": -1.1953125, + "logps_train/rejected": -1.0779553651809692, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.015918362885713577, + "rewards_train/margins": -0.027654076926410198, + "rewards_train/rejected": 0.011735714040696621, + "step": 255 + }, + { + "epoch": 0.07, + "logps_train/chosen": -121.5345230102539, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -153.35491943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.003452301025390625, + "rewards_train/margins": 0.5320396423339844, + "rewards_train/rejected": -0.535491943359375, + "step": 255 + }, + { + "epoch": 0.07, + "logps_train/chosen": -111.42672729492188, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -141.44387817382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4426727294921875, + "rewards_train/margins": 0.8017151355743408, + "rewards_train/rejected": -1.2443878650665283, + "step": 255 + }, + { + "epoch": 0.07, + "learning_rate": 1.9850239210885106e-06, + "loss": 0.5801, + "step": 256 + }, + { + "epoch": 0.07, + "logps_train/chosen": -105.48136901855469, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -149.78729248046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7481369376182556, + "rewards_train/margins": 1.0305923819541931, + "rewards_train/rejected": -1.7787293195724487, + "step": 256 + }, + { + "epoch": 0.07, + "logps_train/chosen": -152.7983856201172, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -214.0, + "logps_train/rejected": -221.93760681152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07983856648206711, + "rewards_train/margins": 0.7139221504330635, + "rewards_train/rejected": -0.7937607169151306, + "step": 256 + }, + { + "epoch": 0.07, + "logps_train/chosen": -16.922279357910156, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -19.987157821655273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.057772066444158554, + "rewards_train/margins": 0.04398784879595041, + "rewards_train/rejected": 0.013784217648208141, + "step": 256 + }, + { + "epoch": 0.07, + "logps_train/chosen": -167.8399200439453, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -170.62728881835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0839920043945312, + "rewards_train/margins": 0.07873690128326416, + "rewards_train/rejected": -1.1627289056777954, + "step": 256 + }, + { + "epoch": 0.07, + "logps_train/chosen": -13.834457397460938, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -10.741498947143555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17280426621437073, + "rewards_train/margins": 0.2844541594386101, + "rewards_train/rejected": -0.11164989322423935, + "step": 256 + }, + { + "epoch": 0.07, + "logps_train/chosen": -6.812928199768066, + "logps_train/ref_chosen": -7.375, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -19.25774574279785, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05620718002319336, + "rewards_train/margins": 0.1319817528128624, + "rewards_train/rejected": -0.07577457278966904, + "step": 256 + }, + { + "epoch": 0.07, + "logps_train/chosen": -15.223466873168945, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -33.8668212890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0160966869443655, + "rewards_train/margins": 0.07058544643223286, + "rewards_train/rejected": -0.08668213337659836, + "step": 256 + }, + { + "epoch": 0.07, + "logps_train/chosen": -172.80752563476562, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -157.78939819335938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.280752569437027, + "rewards_train/margins": -0.40181275457143784, + "rewards_train/rejected": 0.12106018513441086, + "step": 256 + }, + { + "epoch": 0.07, + "logps_train/chosen": -76.03279876708984, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -116.60438537597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7532798647880554, + "rewards_train/margins": 0.7071587443351746, + "rewards_train/rejected": -1.46043860912323, + "step": 257 + }, + { + "epoch": 0.07, + "logps_train/chosen": -96.35509490966797, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -111.0364990234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11449050903320312, + "rewards_train/margins": 0.81814044713974, + "rewards_train/rejected": -0.7036499381065369, + "step": 257 + }, + { + "epoch": 0.07, + "logps_train/chosen": -145.31024169921875, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -87.9468002319336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03102417103946209, + "rewards_train/margins": -0.03634414775297046, + "rewards_train/rejected": 0.0053199767135083675, + "step": 257 + }, + { + "epoch": 0.07, + "logps_train/chosen": -58.20267105102539, + "logps_train/ref_chosen": -56.75, + "logps_train/ref_rejected": -7.78125, + "logps_train/rejected": -7.643102169036865, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.14526711404323578, + "rewards_train/margins": -0.1590818976983428, + "rewards_train/rejected": 0.013814783655107021, + "step": 257 + }, + { + "epoch": 0.07, + "logps_train/chosen": -56.613067626953125, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -68.03469848632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06369324028491974, + "rewards_train/margins": 0.11716308817267418, + "rewards_train/rejected": -0.05346984788775444, + "step": 257 + }, + { + "epoch": 0.07, + "logps_train/chosen": -3.499040126800537, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -4.3482160568237305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012595987878739834, + "rewards_train/margins": 0.03179259318858385, + "rewards_train/rejected": -0.019196605309844017, + "step": 257 + }, + { + "epoch": 0.07, + "logps_train/chosen": -34.85139465332031, + "logps_train/ref_chosen": -35.0, + "logps_train/ref_rejected": -31.375, + "logps_train/rejected": -34.09492492675781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01486053504049778, + "rewards_train/margins": 0.286853039637208, + "rewards_train/rejected": -0.2719925045967102, + "step": 257 + }, + { + "epoch": 0.07, + "logps_train/chosen": -1.962792992591858, + "logps_train/ref_chosen": -2.09375, + "logps_train/ref_rejected": -0.859375, + "logps_train/rejected": -0.877230703830719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013095701113343239, + "rewards_train/margins": 0.014881271519698203, + "rewards_train/rejected": -0.0017855704063549638, + "step": 257 + }, + { + "epoch": 0.07, + "learning_rate": 1.9845643345292055e-06, + "loss": 0.6004, + "step": 258 + }, + { + "epoch": 0.07, + "logps_train/chosen": -75.92401885986328, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -141.2587432861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10759811848402023, + "rewards_train/margins": 1.6334724947810173, + "rewards_train/rejected": -1.525874376296997, + "step": 258 + }, + { + "epoch": 0.07, + "logps_train/chosen": -25.211395263671875, + "logps_train/ref_chosen": -25.375, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -101.06511688232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01636047475039959, + "rewards_train/margins": 0.7228721510618925, + "rewards_train/rejected": -0.7065116763114929, + "step": 258 + }, + { + "epoch": 0.07, + "logps_train/chosen": -82.32833099365234, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -94.1579818725586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5328330993652344, + "rewards_train/margins": -0.41703490912914276, + "rewards_train/rejected": -0.11579819023609161, + "step": 258 + }, + { + "epoch": 0.07, + "logps_train/chosen": -57.98926544189453, + "logps_train/ref_chosen": -58.0, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -62.650115966796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0010734557872638106, + "rewards_train/margins": -0.03391494753304869, + "rewards_train/rejected": 0.0349884033203125, + "step": 258 + }, + { + "epoch": 0.07, + "logps_train/chosen": -11.782097816467285, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -6.75, + "logps_train/rejected": -8.107943534851074, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09679021686315536, + "rewards_train/margins": 0.23258457332849503, + "rewards_train/rejected": -0.13579435646533966, + "step": 258 + }, + { + "epoch": 0.07, + "logps_train/chosen": -120.7962646484375, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -86.67965698242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07037353515625, + "rewards_train/margins": 0.1883392333984375, + "rewards_train/rejected": -0.1179656982421875, + "step": 258 + }, + { + "epoch": 0.07, + "logps_train/chosen": -25.783130645751953, + "logps_train/ref_chosen": -26.25, + "logps_train/ref_rejected": -30.5, + "logps_train/rejected": -31.486751556396484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04668693616986275, + "rewards_train/margins": 0.14536209031939507, + "rewards_train/rejected": -0.09867515414953232, + "step": 258 + }, + { + "epoch": 0.07, + "logps_train/chosen": -24.012285232543945, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -31.375, + "logps_train/rejected": -33.88351821899414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04877147823572159, + "rewards_train/margins": 0.2996233180165291, + "rewards_train/rejected": -0.2508518397808075, + "step": 258 + }, + { + "epoch": 0.07, + "logps_train/chosen": -196.42486572265625, + "logps_train/ref_chosen": -188.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -188.7649383544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.842486560344696, + "rewards_train/margins": 0.03400731086730957, + "rewards_train/rejected": -0.8764938712120056, + "step": 259 + }, + { + "epoch": 0.07, + "logps_train/chosen": -145.69705200195312, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -139.17718505859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06970520317554474, + "rewards_train/margins": -0.05198669619858265, + "rewards_train/rejected": -0.01771850697696209, + "step": 259 + }, + { + "epoch": 0.07, + "logps_train/chosen": -113.31389617919922, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -139.05238342285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6813896298408508, + "rewards_train/margins": 1.2238487601280212, + "rewards_train/rejected": -1.905238389968872, + "step": 259 + }, + { + "epoch": 0.07, + "logps_train/chosen": -101.96285247802734, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -149.9169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.40371474623680115, + "rewards_train/margins": 1.595414012670517, + "rewards_train/rejected": -1.1916992664337158, + "step": 259 + }, + { + "epoch": 0.07, + "logps_train/chosen": -10.77204418182373, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -19.039451599121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.014704418368637562, + "rewards_train/margins": 0.0017407415434718132, + "rewards_train/rejected": -0.016445159912109375, + "step": 259 + }, + { + "epoch": 0.07, + "logps_train/chosen": -125.94329833984375, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -154.72430419921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3056701719760895, + "rewards_train/margins": 1.7781006395816803, + "rewards_train/rejected": -1.4724304676055908, + "step": 259 + }, + { + "epoch": 0.07, + "logps_train/chosen": -105.75259399414062, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -140.91867065429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7752594351768494, + "rewards_train/margins": 1.066607654094696, + "rewards_train/rejected": -1.8418670892715454, + "step": 259 + }, + { + "epoch": 0.07, + "logps_train/chosen": -2.421835422515869, + "logps_train/ref_chosen": -2.015625, + "logps_train/ref_rejected": -62.5, + "logps_train/rejected": -64.07737731933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.040621042251586914, + "rewards_train/margins": 0.11711668968200684, + "rewards_train/rejected": -0.15773773193359375, + "step": 259 + }, + { + "epoch": 0.07, + "learning_rate": 1.984097857063434e-06, + "loss": 0.5117, + "step": 260 + }, + { + "epoch": 0.07, + "logps_train/chosen": -69.18799591064453, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -106.5115737915039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.018799591809511185, + "rewards_train/margins": 0.38235778734087944, + "rewards_train/rejected": -0.4011573791503906, + "step": 260 + }, + { + "epoch": 0.07, + "logps_train/chosen": -134.61874389648438, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -156.03460693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4118744134902954, + "rewards_train/margins": 1.1915863752365112, + "rewards_train/rejected": -2.6034607887268066, + "step": 260 + }, + { + "epoch": 0.07, + "logps_train/chosen": -176.40313720703125, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -238.0, + "logps_train/rejected": -247.04701232910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05968628078699112, + "rewards_train/margins": 0.9643875136971474, + "rewards_train/rejected": -0.9047012329101562, + "step": 260 + }, + { + "epoch": 0.07, + "logps_train/chosen": -99.5117416381836, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -119.88922119140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3011741638183594, + "rewards_train/margins": 0.0877479612827301, + "rewards_train/rejected": -0.3889221251010895, + "step": 260 + }, + { + "epoch": 0.07, + "logps_train/chosen": -3.651872396469116, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -7.71875, + "logps_train/rejected": -8.511962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0441877618432045, + "rewards_train/margins": 0.12350904941558838, + "rewards_train/rejected": -0.07932128757238388, + "step": 260 + }, + { + "epoch": 0.07, + "logps_train/chosen": -106.52000427246094, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -199.0, + "logps_train/rejected": -218.65052795410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2020004242658615, + "rewards_train/margins": 1.7630524188280106, + "rewards_train/rejected": -1.965052843093872, + "step": 260 + }, + { + "epoch": 0.07, + "logps_train/chosen": -6.411933422088623, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -9.216156959533691, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1130683422088623, + "rewards_train/margins": 0.014797359704971313, + "rewards_train/rejected": -0.12786570191383362, + "step": 260 + }, + { + "epoch": 0.07, + "logps_train/chosen": -113.12450408935547, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -98.11327362060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.062450408935546875, + "rewards_train/margins": 0.04887695610523224, + "rewards_train/rejected": -0.11132736504077911, + "step": 260 + }, + { + "epoch": 0.07, + "logps_train/chosen": -6.835183143615723, + "logps_train/ref_chosen": -7.125, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -9.705450057983398, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.028981685638427734, + "rewards_train/margins": -0.025473307818174362, + "rewards_train/rejected": 0.0544549934566021, + "step": 261 + }, + { + "epoch": 0.07, + "logps_train/chosen": -118.96990203857422, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -136.9869384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4469902217388153, + "rewards_train/margins": 0.05170363187789917, + "rewards_train/rejected": -0.4986938536167145, + "step": 261 + }, + { + "epoch": 0.07, + "logps_train/chosen": -21.353694915771484, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -5.100181579589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10213050991296768, + "rewards_train/margins": 0.12777366861701012, + "rewards_train/rejected": -0.025643158704042435, + "step": 261 + }, + { + "epoch": 0.07, + "logps_train/chosen": -23.671398162841797, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -57.0, + "logps_train/rejected": -61.533695220947266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.029639815911650658, + "rewards_train/margins": 0.4237297121435404, + "rewards_train/rejected": -0.45336952805519104, + "step": 261 + }, + { + "epoch": 0.07, + "logps_train/chosen": -142.70806884765625, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -111.10226440429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4291931092739105, + "rewards_train/margins": 0.08941954374313354, + "rewards_train/rejected": 0.339773565530777, + "step": 261 + }, + { + "epoch": 0.07, + "logps_train/chosen": -74.47282409667969, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -101.94140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09728240966796875, + "rewards_train/margins": 0.3468582332134247, + "rewards_train/rejected": -0.44414064288139343, + "step": 261 + }, + { + "epoch": 0.07, + "logps_train/chosen": -88.55209350585938, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -120.19873809814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40520936250686646, + "rewards_train/margins": 0.9146644473075867, + "rewards_train/rejected": -1.3198738098144531, + "step": 261 + }, + { + "epoch": 0.07, + "logps_train/chosen": -46.702484130859375, + "logps_train/ref_chosen": -43.5, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -65.41069793701172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.32024842500686646, + "rewards_train/margins": -0.12917862832546234, + "rewards_train/rejected": -0.1910697966814041, + "step": 261 + }, + { + "epoch": 0.07, + "learning_rate": 1.983624491956043e-06, + "loss": 0.5432, + "step": 262 + }, + { + "epoch": 0.07, + "logps_train/chosen": -8.630305290222168, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -10.538702011108398, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.013030529022216797, + "rewards_train/margins": 0.07833967357873917, + "rewards_train/rejected": -0.09137020260095596, + "step": 262 + }, + { + "epoch": 0.07, + "logps_train/chosen": -67.61695861816406, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -133.30087280273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3616958558559418, + "rewards_train/margins": 0.16839143633842468, + "rewards_train/rejected": -0.5300872921943665, + "step": 262 + }, + { + "epoch": 0.07, + "logps_train/chosen": -3.948873996734619, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -4.237276077270508, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.018324900418519974, + "rewards_train/margins": -0.03209729306399822, + "rewards_train/rejected": 0.013772392645478249, + "step": 262 + }, + { + "epoch": 0.07, + "logps_train/chosen": -68.81234741210938, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -163.08108520507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0312347412109375, + "rewards_train/margins": 1.176873803138733, + "rewards_train/rejected": -1.2081085443496704, + "step": 262 + }, + { + "epoch": 0.07, + "logps_train/chosen": -16.565570831298828, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -5.87881326675415, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00655708322301507, + "rewards_train/margins": 0.13757424941286445, + "rewards_train/rejected": -0.14413133263587952, + "step": 262 + }, + { + "epoch": 0.07, + "logps_train/chosen": -76.99809265136719, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -99.78317260742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3998092710971832, + "rewards_train/margins": 1.378508061170578, + "rewards_train/rejected": -1.7783173322677612, + "step": 262 + }, + { + "epoch": 0.07, + "logps_train/chosen": -15.16942310333252, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -9.6598482131958, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04819231107831001, + "rewards_train/margins": -0.025957489386200905, + "rewards_train/rejected": -0.022234821692109108, + "step": 262 + }, + { + "epoch": 0.07, + "logps_train/chosen": -10.729619979858398, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -3.25, + "logps_train/rejected": -3.1242330074310303, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11671199649572372, + "rewards_train/margins": -0.1292886957526207, + "rewards_train/rejected": 0.012576699256896973, + "step": 262 + }, + { + "epoch": 0.07, + "logps_train/chosen": -3.4193458557128906, + "logps_train/ref_chosen": -3.734375, + "logps_train/ref_rejected": -7.46875, + "logps_train/rejected": -8.055329322814941, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03150291368365288, + "rewards_train/margins": 0.09016084671020508, + "rewards_train/rejected": -0.0586579330265522, + "step": 263 + }, + { + "epoch": 0.07, + "logps_train/chosen": -13.26656723022461, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -14.707979202270508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.039156723767519, + "rewards_train/margins": 0.056641194969415665, + "rewards_train/rejected": -0.09579791873693466, + "step": 263 + }, + { + "epoch": 0.07, + "logps_train/chosen": -92.11530303955078, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -128.97726440429688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.061530303210020065, + "rewards_train/margins": -0.3638038747012615, + "rewards_train/rejected": 0.30227357149124146, + "step": 263 + }, + { + "epoch": 0.07, + "logps_train/chosen": -74.11937713623047, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -56.75, + "logps_train/rejected": -62.33197021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2619377076625824, + "rewards_train/margins": 0.2962593138217926, + "rewards_train/rejected": -0.558197021484375, + "step": 263 + }, + { + "epoch": 0.07, + "logps_train/chosen": -11.049148559570312, + "logps_train/ref_chosen": -11.5625, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -11.356354713439941, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05133514478802681, + "rewards_train/margins": 0.2244706191122532, + "rewards_train/rejected": -0.17313547432422638, + "step": 263 + }, + { + "epoch": 0.07, + "logps_train/chosen": -17.442707061767578, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -27.75, + "logps_train/rejected": -27.444297790527344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00677070626989007, + "rewards_train/margins": -0.037340928334742785, + "rewards_train/rejected": 0.030570222064852715, + "step": 263 + }, + { + "epoch": 0.07, + "logps_train/chosen": -103.52963256835938, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -170.49586486816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25296327471733093, + "rewards_train/margins": 0.7966232597827911, + "rewards_train/rejected": -1.049586534500122, + "step": 263 + }, + { + "epoch": 0.07, + "logps_train/chosen": -50.673973083496094, + "logps_train/ref_chosen": -49.0, + "logps_train/ref_rejected": -60.0, + "logps_train/rejected": -57.60809326171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.16739730536937714, + "rewards_train/margins": -0.40658798813819885, + "rewards_train/rejected": 0.23919068276882172, + "step": 263 + }, + { + "epoch": 0.07, + "learning_rate": 1.983144242520088e-06, + "loss": 0.6196, + "step": 264 + }, + { + "epoch": 0.07, + "logps_train/chosen": -23.923221588134766, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -13.581788063049316, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10482215881347656, + "rewards_train/margins": -0.13414335250854492, + "rewards_train/rejected": 0.02932119369506836, + "step": 264 + }, + { + "epoch": 0.07, + "logps_train/chosen": -9.271554946899414, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -6.148869514465332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08534450829029083, + "rewards_train/margins": 0.056481460109353065, + "rewards_train/rejected": 0.028863048180937767, + "step": 264 + }, + { + "epoch": 0.07, + "logps_train/chosen": -2.4221742153167725, + "logps_train/ref_chosen": -2.453125, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -16.53572654724121, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0030950785148888826, + "rewards_train/margins": 0.03166773286648095, + "rewards_train/rejected": -0.028572654351592064, + "step": 264 + }, + { + "epoch": 0.07, + "logps_train/chosen": -11.18020248413086, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -3.90625, + "logps_train/rejected": -4.5439581871032715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006979751866310835, + "rewards_train/margins": 0.07075057504698634, + "rewards_train/rejected": -0.0637708231806755, + "step": 264 + }, + { + "epoch": 0.07, + "logps_train/chosen": -42.53739929199219, + "logps_train/ref_chosen": -43.0, + "logps_train/ref_rejected": -3.484375, + "logps_train/rejected": -4.409458160400391, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04626007005572319, + "rewards_train/margins": 0.13876838609576225, + "rewards_train/rejected": -0.09250831604003906, + "step": 264 + }, + { + "epoch": 0.07, + "logps_train/chosen": -197.83914184570312, + "logps_train/ref_chosen": -195.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -138.64694213867188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.283914178609848, + "rewards_train/margins": -0.3192199654877186, + "rewards_train/rejected": 0.03530578687787056, + "step": 264 + }, + { + "epoch": 0.07, + "logps_train/chosen": -3.18634033203125, + "logps_train/ref_chosen": -3.421875, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -12.236312866210938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.02355346642434597, + "rewards_train/margins": -0.00906524620950222, + "rewards_train/rejected": 0.03261871263384819, + "step": 264 + }, + { + "epoch": 0.07, + "logps_train/chosen": -22.804201126098633, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -19.172893524169922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.15542010962963104, + "rewards_train/margins": -0.17563075758516788, + "rewards_train/rejected": 0.020210647955536842, + "step": 264 + }, + { + "epoch": 0.07, + "logps_train/chosen": -116.51431274414062, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -184.00827026367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.351431280374527, + "rewards_train/margins": 1.6493957936763763, + "rewards_train/rejected": -2.0008270740509033, + "step": 265 + }, + { + "epoch": 0.07, + "logps_train/chosen": -61.49019241333008, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -61.62480545043945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05098075792193413, + "rewards_train/margins": 0.01346130296587944, + "rewards_train/rejected": 0.03751945495605469, + "step": 265 + }, + { + "epoch": 0.07, + "logps_train/chosen": -94.60932922363281, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -142.43873596191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3390670716762543, + "rewards_train/margins": 1.7829407155513763, + "rewards_train/rejected": -1.443873643875122, + "step": 265 + }, + { + "epoch": 0.07, + "logps_train/chosen": -71.77397155761719, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -84.97107696533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1726028472185135, + "rewards_train/margins": 0.06971054524183273, + "rewards_train/rejected": 0.10289230197668076, + "step": 265 + }, + { + "epoch": 0.07, + "logps_train/chosen": -134.16944885253906, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -139.90170288085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6169449090957642, + "rewards_train/margins": 0.17322540283203125, + "rewards_train/rejected": -0.7901703119277954, + "step": 265 + }, + { + "epoch": 0.07, + "logps_train/chosen": -1.0176503658294678, + "logps_train/ref_chosen": -1.234375, + "logps_train/ref_rejected": -0.322265625, + "logps_train/rejected": -0.517893373966217, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021672463044524193, + "rewards_train/margins": 0.04123523831367493, + "rewards_train/rejected": -0.019562775269150734, + "step": 265 + }, + { + "epoch": 0.07, + "logps_train/chosen": -1.7462846040725708, + "logps_train/ref_chosen": -1.703125, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -14.693563461303711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0043159606866538525, + "rewards_train/margins": -0.05995961604639888, + "rewards_train/rejected": 0.055643655359745026, + "step": 265 + }, + { + "epoch": 0.07, + "logps_train/chosen": -66.8798828125, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -128.34703063964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.31201171875, + "rewards_train/margins": 0.14671477675437927, + "rewards_train/rejected": 0.16529694199562073, + "step": 265 + }, + { + "epoch": 0.07, + "learning_rate": 1.982657112116805e-06, + "loss": 0.6278, + "step": 266 + }, + { + "epoch": 0.07, + "logps_train/chosen": -78.71060180664062, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -144.487548828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12106018513441086, + "rewards_train/margins": 1.1276946738362312, + "rewards_train/rejected": -1.248754858970642, + "step": 266 + }, + { + "epoch": 0.07, + "logps_train/chosen": -145.1920166015625, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -152.4174346923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.61920166015625, + "rewards_train/margins": 0.6225417852401733, + "rewards_train/rejected": -1.2417434453964233, + "step": 266 + }, + { + "epoch": 0.07, + "logps_train/chosen": -93.12628173828125, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -153.2503662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.087371826171875, + "rewards_train/margins": 0.9124084711074829, + "rewards_train/rejected": -0.8250366449356079, + "step": 266 + }, + { + "epoch": 0.07, + "logps_train/chosen": -118.65219116210938, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -127.7791748046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2652190923690796, + "rewards_train/margins": 0.21269845962524414, + "rewards_train/rejected": -1.4779175519943237, + "step": 266 + }, + { + "epoch": 0.07, + "logps_train/chosen": -83.24354553222656, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -119.26487731933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3743545711040497, + "rewards_train/margins": 1.1521331369876862, + "rewards_train/rejected": -1.5264877080917358, + "step": 266 + }, + { + "epoch": 0.07, + "logps_train/chosen": -20.707983016967773, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -26.293317794799805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.020798301324248314, + "rewards_train/margins": 0.24603348411619663, + "rewards_train/rejected": -0.26683178544044495, + "step": 266 + }, + { + "epoch": 0.07, + "logps_train/chosen": -14.07295036315918, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -3.90625, + "logps_train/rejected": -5.34604024887085, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06354504078626633, + "rewards_train/margins": 0.08043398708105087, + "rewards_train/rejected": -0.1439790278673172, + "step": 266 + }, + { + "epoch": 0.07, + "logps_train/chosen": -128.97857666015625, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -134.28672790527344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4978576600551605, + "rewards_train/margins": -0.1691848635673523, + "rewards_train/rejected": -0.3286727964878082, + "step": 266 + }, + { + "epoch": 0.07, + "logps_train/chosen": -4.76756477355957, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -4.454269886016846, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.04824352264404297, + "rewards_train/margins": -0.04382949322462082, + "rewards_train/rejected": 0.09207301586866379, + "step": 267 + }, + { + "epoch": 0.07, + "logps_train/chosen": -130.357421875, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -128.0524139404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.935742199420929, + "rewards_train/margins": 0.7194991707801819, + "rewards_train/rejected": -1.6552413702011108, + "step": 267 + }, + { + "epoch": 0.07, + "logps_train/chosen": -16.982769012451172, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -4.522425651550293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010776901617646217, + "rewards_train/margins": 0.01021566428244114, + "rewards_train/rejected": -0.020992565900087357, + "step": 267 + }, + { + "epoch": 0.07, + "logps_train/chosen": -74.87452697753906, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -149.31964111328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28745269775390625, + "rewards_train/margins": 1.1445114612579346, + "rewards_train/rejected": -1.4319641590118408, + "step": 267 + }, + { + "epoch": 0.07, + "logps_train/chosen": -76.70329284667969, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -141.84523010253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02967071533203125, + "rewards_train/margins": 1.7141937017440796, + "rewards_train/rejected": -1.6845229864120483, + "step": 267 + }, + { + "epoch": 0.07, + "logps_train/chosen": -91.24864959716797, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -117.1452865600586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2751350402832031, + "rewards_train/margins": 1.9396637678146362, + "rewards_train/rejected": -1.664528727531433, + "step": 267 + }, + { + "epoch": 0.07, + "logps_train/chosen": -11.858168601989746, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -6.21875, + "logps_train/rejected": -7.360316276550293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01706686057150364, + "rewards_train/margins": 0.09708976559340954, + "rewards_train/rejected": -0.11415662616491318, + "step": 267 + }, + { + "epoch": 0.07, + "logps_train/chosen": -116.43650817871094, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -57.80225372314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10634918510913849, + "rewards_train/margins": 0.13657455705106258, + "rewards_train/rejected": -0.030225371941924095, + "step": 267 + }, + { + "epoch": 0.07, + "learning_rate": 1.98216310415559e-06, + "loss": 0.4736, + "step": 268 + }, + { + "epoch": 0.07, + "logps_train/chosen": -11.458928108215332, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -12.4375, + "logps_train/rejected": -12.636844635009766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03339281305670738, + "rewards_train/margins": -0.01345834881067276, + "rewards_train/rejected": -0.019934464246034622, + "step": 268 + }, + { + "epoch": 0.07, + "logps_train/chosen": -105.10810852050781, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -104.93505859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.03918914869427681, + "rewards_train/margins": -0.01730499416589737, + "rewards_train/rejected": 0.05649414286017418, + "step": 268 + }, + { + "epoch": 0.07, + "logps_train/chosen": -15.488972663879395, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -14.976335525512695, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05514726787805557, + "rewards_train/margins": -0.02626371569931507, + "rewards_train/rejected": -0.0288835521787405, + "step": 268 + }, + { + "epoch": 0.07, + "logps_train/chosen": -5.341658115386963, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -12.044048309326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015415811911225319, + "rewards_train/margins": 0.07648902200162411, + "rewards_train/rejected": -0.09190483391284943, + "step": 268 + }, + { + "epoch": 0.07, + "logps_train/chosen": -5.033823013305664, + "logps_train/ref_chosen": -5.21875, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -13.251583099365234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018492698669433594, + "rewards_train/margins": 0.06240100786089897, + "rewards_train/rejected": -0.04390830919146538, + "step": 268 + }, + { + "epoch": 0.07, + "logps_train/chosen": -54.4461669921875, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -25.426776885986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19461670517921448, + "rewards_train/margins": 0.010560989379882812, + "rewards_train/rejected": -0.2051776945590973, + "step": 268 + }, + { + "epoch": 0.07, + "logps_train/chosen": -0.665019690990448, + "logps_train/ref_chosen": -0.79296875, + "logps_train/ref_rejected": -0.79296875, + "logps_train/rejected": -0.6652989387512207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01279490627348423, + "rewards_train/margins": 2.7924776077270508e-05, + "rewards_train/rejected": 0.01276698149740696, + "step": 268 + }, + { + "epoch": 0.07, + "logps_train/chosen": -11.29223346710205, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -14.344375610351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06452665477991104, + "rewards_train/margins": 0.04896421544253826, + "rewards_train/rejected": 0.01556243933737278, + "step": 268 + }, + { + "epoch": 0.08, + "logps_train/chosen": -2.4085893630981445, + "logps_train/ref_chosen": -2.71875, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -5.160533905029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.031016064807772636, + "rewards_train/margins": 0.059569455683231354, + "rewards_train/rejected": -0.028553390875458717, + "step": 269 + }, + { + "epoch": 0.08, + "logps_train/chosen": -99.73674011230469, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -135.74142456054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12367401272058487, + "rewards_train/margins": 1.2004684433341026, + "rewards_train/rejected": -1.3241424560546875, + "step": 269 + }, + { + "epoch": 0.08, + "logps_train/chosen": -147.79933166503906, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -146.09780883789062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9799332022666931, + "rewards_train/margins": -0.4701523184776306, + "rewards_train/rejected": -0.5097808837890625, + "step": 269 + }, + { + "epoch": 0.08, + "logps_train/chosen": -1.420322060585022, + "logps_train/ref_chosen": -1.3828125, + "logps_train/ref_rejected": -7.5625, + "logps_train/rejected": -8.447059631347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0037509561516344547, + "rewards_train/margins": 0.08470501145347953, + "rewards_train/rejected": -0.08845596760511398, + "step": 269 + }, + { + "epoch": 0.08, + "logps_train/chosen": -211.049560546875, + "logps_train/ref_chosen": -188.0, + "logps_train/ref_rejected": -189.0, + "logps_train/rejected": -212.5876007080078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3049561977386475, + "rewards_train/margins": 0.05380392074584961, + "rewards_train/rejected": -2.358760118484497, + "step": 269 + }, + { + "epoch": 0.08, + "logps_train/chosen": -65.08246612548828, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -75.2643814086914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3082466125488281, + "rewards_train/margins": 0.26819151639938354, + "rewards_train/rejected": -0.5764381289482117, + "step": 269 + }, + { + "epoch": 0.08, + "logps_train/chosen": -9.051237106323242, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -4.321802139282227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03237628936767578, + "rewards_train/margins": 0.0333065033191815, + "rewards_train/rejected": -0.0009302139515057206, + "step": 269 + }, + { + "epoch": 0.08, + "logps_train/chosen": -88.07178497314453, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -83.98065185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15717850625514984, + "rewards_train/margins": 1.090886726975441, + "rewards_train/rejected": -1.2480652332305908, + "step": 269 + }, + { + "epoch": 0.08, + "learning_rate": 1.981662222093976e-06, + "loss": 0.6382, + "step": 270 + }, + { + "epoch": 0.08, + "logps_train/chosen": -109.21963500976562, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -177.12562561035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1219635009765625, + "rewards_train/margins": 1.9905991554260254, + "rewards_train/rejected": -2.112562656402588, + "step": 270 + }, + { + "epoch": 0.08, + "logps_train/chosen": -148.98019409179688, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -177.5338897705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8980194330215454, + "rewards_train/margins": 0.5553696155548096, + "rewards_train/rejected": -1.453389048576355, + "step": 270 + }, + { + "epoch": 0.08, + "logps_train/chosen": -156.24636840820312, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -197.0, + "logps_train/rejected": -208.55569458007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0246368646621704, + "rewards_train/margins": 0.13093256950378418, + "rewards_train/rejected": -1.1555694341659546, + "step": 270 + }, + { + "epoch": 0.08, + "logps_train/chosen": -0.2857184410095215, + "logps_train/ref_chosen": -0.283203125, + "logps_train/ref_rejected": -0.283203125, + "logps_train/rejected": -0.27195489406585693, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00025153160095214844, + "rewards_train/margins": -0.0013763547176495194, + "rewards_train/rejected": 0.001124823116697371, + "step": 270 + }, + { + "epoch": 0.08, + "logps_train/chosen": -19.909225463867188, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -6.600524425506592, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.159077450633049, + "rewards_train/margins": 0.3003799021244049, + "rewards_train/rejected": -0.1413024514913559, + "step": 270 + }, + { + "epoch": 0.08, + "logps_train/chosen": -130.52859497070312, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -140.8446044921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.452859491109848, + "rewards_train/margins": -0.16839903593063354, + "rewards_train/rejected": -0.2844604551792145, + "step": 270 + }, + { + "epoch": 0.08, + "logps_train/chosen": -1.3245785236358643, + "logps_train/ref_chosen": -1.234375, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -8.405452728271484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009020352736115456, + "rewards_train/margins": -0.05597508139908314, + "rewards_train/rejected": 0.04695472866296768, + "step": 270 + }, + { + "epoch": 0.08, + "logps_train/chosen": -27.27855682373047, + "logps_train/ref_chosen": -29.5, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -8.660707473754883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22214432060718536, + "rewards_train/margins": 0.28196506947278976, + "rewards_train/rejected": -0.0598207488656044, + "step": 270 + }, + { + "epoch": 0.08, + "logps_train/chosen": -2.7315938472747803, + "logps_train/ref_chosen": -1.9375, + "logps_train/ref_rejected": -1.9375, + "logps_train/rejected": -2.8666884899139404, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07940938323736191, + "rewards_train/margins": 0.013509467244148254, + "rewards_train/rejected": -0.09291885048151016, + "step": 271 + }, + { + "epoch": 0.08, + "logps_train/chosen": -45.801849365234375, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -24.641704559326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03018493764102459, + "rewards_train/margins": 0.3089855182915926, + "rewards_train/rejected": -0.3391704559326172, + "step": 271 + }, + { + "epoch": 0.08, + "logps_train/chosen": -42.22585678100586, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -50.25, + "logps_train/rejected": -56.296119689941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24758568406105042, + "rewards_train/margins": 0.3570263087749481, + "rewards_train/rejected": -0.6046119928359985, + "step": 271 + }, + { + "epoch": 0.08, + "logps_train/chosen": -124.77354431152344, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -131.6844024658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3773544430732727, + "rewards_train/margins": 0.9910858273506165, + "rewards_train/rejected": -1.3684402704238892, + "step": 271 + }, + { + "epoch": 0.08, + "logps_train/chosen": -28.97464942932129, + "logps_train/ref_chosen": -26.5, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -7.832087516784668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24746493995189667, + "rewards_train/margins": 0.06386880576610565, + "rewards_train/rejected": -0.3113337457180023, + "step": 271 + }, + { + "epoch": 0.08, + "logps_train/chosen": -0.9810739159584045, + "logps_train/ref_chosen": -1.03125, + "logps_train/ref_rejected": -1.515625, + "logps_train/rejected": -1.6809412240982056, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005017608404159546, + "rewards_train/margins": 0.021549230441451073, + "rewards_train/rejected": -0.016531622037291527, + "step": 271 + }, + { + "epoch": 0.08, + "logps_train/chosen": -136.5753936767578, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -204.73696899414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.34246063232421875, + "rewards_train/margins": 1.816157579421997, + "rewards_train/rejected": -1.4736969470977783, + "step": 271 + }, + { + "epoch": 0.08, + "logps_train/chosen": -16.393692016601562, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -7.125, + "logps_train/rejected": -7.863215923309326, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09813079982995987, + "rewards_train/margins": 0.17195239663124084, + "rewards_train/rejected": -0.07382159680128098, + "step": 271 + }, + { + "epoch": 0.08, + "learning_rate": 1.9811544694376053e-06, + "loss": 0.5446, + "step": 272 + }, + { + "epoch": 0.08, + "logps_train/chosen": -18.551542282104492, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -16.98528480529785, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1176542267203331, + "rewards_train/margins": -0.15662574768066406, + "rewards_train/rejected": 0.03897152096033096, + "step": 272 + }, + { + "epoch": 0.08, + "logps_train/chosen": -7.139189720153809, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -9.718090057373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02045602910220623, + "rewards_train/margins": 0.08601503632962704, + "rewards_train/rejected": -0.0655590072274208, + "step": 272 + }, + { + "epoch": 0.08, + "logps_train/chosen": -6.133063316345215, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -66.5283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09294366836547852, + "rewards_train/margins": 0.09577569970861077, + "rewards_train/rejected": -0.0028320313431322575, + "step": 272 + }, + { + "epoch": 0.08, + "logps_train/chosen": -101.17658996582031, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -101.68030548095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03234100341796875, + "rewards_train/margins": 0.050371551886200905, + "rewards_train/rejected": -0.018030548468232155, + "step": 272 + }, + { + "epoch": 0.08, + "logps_train/chosen": -119.45118713378906, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -119.08904266357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6451187133789062, + "rewards_train/margins": 0.41378557682037354, + "rewards_train/rejected": -1.0589042901992798, + "step": 272 + }, + { + "epoch": 0.08, + "logps_train/chosen": -64.60946655273438, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -164.4014892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13594666123390198, + "rewards_train/margins": 1.4042023122310638, + "rewards_train/rejected": -1.5401489734649658, + "step": 272 + }, + { + "epoch": 0.08, + "logps_train/chosen": -108.7335433959961, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -97.98307037353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5733543634414673, + "rewards_train/margins": 0.024952709674835205, + "rewards_train/rejected": -0.5983070731163025, + "step": 272 + }, + { + "epoch": 0.08, + "logps_train/chosen": -31.98322868347168, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -14.125, + "logps_train/rejected": -15.69184398651123, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0016771316295489669, + "rewards_train/margins": 0.158361530280672, + "rewards_train/rejected": -0.15668439865112305, + "step": 272 + }, + { + "epoch": 0.08, + "logps_train/chosen": -0.20557144284248352, + "logps_train/ref_chosen": -0.23046875, + "logps_train/ref_rejected": -0.23046875, + "logps_train/rejected": -0.20485781133174896, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.002489730715751648, + "rewards_train/margins": -7.1363290771842e-05, + "rewards_train/rejected": 0.00256109400652349, + "step": 273 + }, + { + "epoch": 0.08, + "logps_train/chosen": -0.558300793170929, + "logps_train/ref_chosen": -0.66015625, + "logps_train/ref_rejected": -0.66015625, + "logps_train/rejected": -0.5637972950935364, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01018554624170065, + "rewards_train/margins": 0.000549650751054287, + "rewards_train/rejected": 0.009635895490646362, + "step": 273 + }, + { + "epoch": 0.08, + "logps_train/chosen": -41.27983093261719, + "logps_train/ref_chosen": -39.25, + "logps_train/ref_rejected": -30.875, + "logps_train/rejected": -34.11467361450195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.202983096241951, + "rewards_train/margins": 0.1209842711687088, + "rewards_train/rejected": -0.3239673674106598, + "step": 273 + }, + { + "epoch": 0.08, + "logps_train/chosen": -24.398971557617188, + "logps_train/ref_chosen": -21.375, + "logps_train/ref_rejected": -29.0, + "logps_train/rejected": -34.45868682861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3023971617221832, + "rewards_train/margins": 0.24347153306007385, + "rewards_train/rejected": -0.5458686947822571, + "step": 273 + }, + { + "epoch": 0.08, + "logps_train/chosen": -28.831600189208984, + "logps_train/ref_chosen": -28.25, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -28.243154525756836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05816001817584038, + "rewards_train/margins": 0.10365543141961098, + "rewards_train/rejected": -0.16181544959545135, + "step": 273 + }, + { + "epoch": 0.08, + "logps_train/chosen": -105.63491821289062, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -110.1714096069336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5634918212890625, + "rewards_train/margins": 1.253649115562439, + "rewards_train/rejected": -1.8171409368515015, + "step": 273 + }, + { + "epoch": 0.08, + "logps_train/chosen": -139.84461975097656, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -115.173095703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4844619929790497, + "rewards_train/margins": -0.3671524226665497, + "rewards_train/rejected": -0.1173095703125, + "step": 273 + }, + { + "epoch": 0.08, + "logps_train/chosen": -177.43576049804688, + "logps_train/ref_chosen": -180.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -200.54367065429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2564239501953125, + "rewards_train/margins": 2.010791063308716, + "rewards_train/rejected": -1.7543671131134033, + "step": 273 + }, + { + "epoch": 0.08, + "learning_rate": 1.9806398497402087e-06, + "loss": 0.5803, + "step": 274 + }, + { + "epoch": 0.08, + "logps_train/chosen": -102.01576232910156, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -102.0596923828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05157623440027237, + "rewards_train/margins": 0.004393003880977631, + "rewards_train/rejected": -0.05596923828125, + "step": 274 + }, + { + "epoch": 0.08, + "logps_train/chosen": -0.9592357277870178, + "logps_train/ref_chosen": -0.98828125, + "logps_train/ref_rejected": -6.46875, + "logps_train/rejected": -5.950026035308838, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0029045522678643465, + "rewards_train/margins": -0.048967846436426044, + "rewards_train/rejected": 0.05187239870429039, + "step": 274 + }, + { + "epoch": 0.08, + "logps_train/chosen": -95.39460754394531, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -91.6891860961914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2894607484340668, + "rewards_train/margins": 0.37945786118507385, + "rewards_train/rejected": -0.6689186096191406, + "step": 274 + }, + { + "epoch": 0.08, + "logps_train/chosen": -12.742155075073242, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -3.46875, + "logps_train/rejected": -4.196207523345947, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02578449249267578, + "rewards_train/margins": 0.09853024780750275, + "rewards_train/rejected": -0.07274575531482697, + "step": 274 + }, + { + "epoch": 0.08, + "logps_train/chosen": -5.07003116607666, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -18.71164321899414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0007531166193075478, + "rewards_train/margins": -0.054588795464951545, + "rewards_train/rejected": 0.053835678845644, + "step": 274 + }, + { + "epoch": 0.08, + "logps_train/chosen": -150.5147705078125, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -142.10623168945312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.751477062702179, + "rewards_train/margins": -0.540853887796402, + "rewards_train/rejected": -0.21062317490577698, + "step": 274 + }, + { + "epoch": 0.08, + "logps_train/chosen": -13.35091781616211, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -10.899460792541504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.033658217638731, + "rewards_train/margins": 0.0736042968928814, + "rewards_train/rejected": -0.03994607925415039, + "step": 274 + }, + { + "epoch": 0.08, + "logps_train/chosen": -8.46360969543457, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -8.000797271728516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11613903194665909, + "rewards_train/margins": 0.18809375911951065, + "rewards_train/rejected": -0.07195472717285156, + "step": 274 + }, + { + "epoch": 0.08, + "logps_train/chosen": -0.16548395156860352, + "logps_train/ref_chosen": -0.2314453125, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -5.3327531814575195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006596136372536421, + "rewards_train/margins": 0.1508089634589851, + "rewards_train/rejected": -0.14421282708644867, + "step": 275 + }, + { + "epoch": 0.08, + "logps_train/chosen": -120.32717895507812, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -108.37892150878906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9327179193496704, + "rewards_train/margins": -0.19482576847076416, + "rewards_train/rejected": -0.7378921508789062, + "step": 275 + }, + { + "epoch": 0.08, + "logps_train/chosen": -24.079315185546875, + "logps_train/ref_chosen": -25.0, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -10.953577041625977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09206848591566086, + "rewards_train/margins": 0.12492619082331657, + "rewards_train/rejected": -0.032857704907655716, + "step": 275 + }, + { + "epoch": 0.08, + "logps_train/chosen": -72.72551727294922, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -74.6071548461914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.022551728412508965, + "rewards_train/margins": 0.6381637919694185, + "rewards_train/rejected": -0.6607155203819275, + "step": 275 + }, + { + "epoch": 0.08, + "logps_train/chosen": -71.69583129882812, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -116.14109802246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.269583135843277, + "rewards_train/margins": 1.0445266664028168, + "rewards_train/rejected": -1.3141098022460938, + "step": 275 + }, + { + "epoch": 0.08, + "logps_train/chosen": -118.0843505859375, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -76.61395263671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9084351062774658, + "rewards_train/margins": -0.747039794921875, + "rewards_train/rejected": -1.1613953113555908, + "step": 275 + }, + { + "epoch": 0.08, + "logps_train/chosen": -43.11627960205078, + "logps_train/ref_chosen": -44.0, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -24.675518035888672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08837204426527023, + "rewards_train/margins": 0.24342385679483414, + "rewards_train/rejected": -0.1550518125295639, + "step": 275 + }, + { + "epoch": 0.08, + "logps_train/chosen": -1.4497308731079102, + "logps_train/ref_chosen": -1.46875, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -5.68446159362793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.001901912735775113, + "rewards_train/margins": 0.08597307209856808, + "rewards_train/rejected": -0.08407115936279297, + "step": 275 + }, + { + "epoch": 0.08, + "learning_rate": 1.9801183666035786e-06, + "loss": 0.6685, + "step": 276 + }, + { + "epoch": 0.08, + "logps_train/chosen": -10.524060249328613, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -7.130228042602539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07884397357702255, + "rewards_train/margins": 0.18561677634716034, + "rewards_train/rejected": -0.10677280277013779, + "step": 276 + }, + { + "epoch": 0.08, + "logps_train/chosen": -130.98904418945312, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -132.08287048339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2010955810546875, + "rewards_train/margins": 0.009382620453834534, + "rewards_train/rejected": 0.19171296060085297, + "step": 276 + }, + { + "epoch": 0.08, + "logps_train/chosen": -18.0537109375, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -7.75, + "logps_train/rejected": -7.773425579071045, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06962890923023224, + "rewards_train/margins": 0.07197146723046899, + "rewards_train/rejected": -0.0023425580002367496, + "step": 276 + }, + { + "epoch": 0.08, + "logps_train/chosen": -9.416440963745117, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -19.042953491210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03960590437054634, + "rewards_train/margins": 0.04390125349164009, + "rewards_train/rejected": -0.00429534912109375, + "step": 276 + }, + { + "epoch": 0.08, + "logps_train/chosen": -33.16196823120117, + "logps_train/ref_chosen": -35.0, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -11.631189346313477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18380318582057953, + "rewards_train/margins": 0.2906721234321594, + "rewards_train/rejected": -0.1068689376115799, + "step": 276 + }, + { + "epoch": 0.08, + "logps_train/chosen": -136.4923858642578, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -191.03189086914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24923859536647797, + "rewards_train/margins": 0.8539505153894424, + "rewards_train/rejected": -1.1031891107559204, + "step": 276 + }, + { + "epoch": 0.08, + "logps_train/chosen": -80.68142700195312, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -69.32981872558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13185730576515198, + "rewards_train/margins": 0.06483917683362961, + "rewards_train/rejected": 0.06701812893152237, + "step": 276 + }, + { + "epoch": 0.08, + "logps_train/chosen": -6.177757740020752, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -25.232664108276367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08340077847242355, + "rewards_train/margins": 0.1648656353354454, + "rewards_train/rejected": -0.24826641380786896, + "step": 276 + }, + { + "epoch": 0.08, + "logps_train/chosen": -165.064697265625, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -153.00576782226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.806469738483429, + "rewards_train/margins": 0.9941070675849915, + "rewards_train/rejected": -1.8005768060684204, + "step": 277 + }, + { + "epoch": 0.08, + "logps_train/chosen": -201.3755340576172, + "logps_train/ref_chosen": -189.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -144.00845336914062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2375534772872925, + "rewards_train/margins": -0.13670814037322998, + "rewards_train/rejected": -1.1008453369140625, + "step": 277 + }, + { + "epoch": 0.08, + "logps_train/chosen": -7.878509521484375, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -7.356106758117676, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0933990478515625, + "rewards_train/margins": 0.17588472366333008, + "rewards_train/rejected": -0.08248567581176758, + "step": 277 + }, + { + "epoch": 0.08, + "logps_train/chosen": -89.30949401855469, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -114.58744812011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08094940334558487, + "rewards_train/margins": 0.37779542058706284, + "rewards_train/rejected": -0.4587448239326477, + "step": 277 + }, + { + "epoch": 0.08, + "logps_train/chosen": -4.032447814941406, + "logps_train/ref_chosen": -3.59375, + "logps_train/ref_rejected": -0.9375, + "logps_train/rejected": -0.9039647579193115, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.043869782239198685, + "rewards_train/margins": -0.04722330649383366, + "rewards_train/rejected": 0.0033535242546349764, + "step": 277 + }, + { + "epoch": 0.08, + "logps_train/chosen": -140.44674682617188, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -138.46942138671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0446746349334717, + "rewards_train/margins": -1.5977324843406677, + "rewards_train/rejected": -0.44694215059280396, + "step": 277 + }, + { + "epoch": 0.08, + "logps_train/chosen": -99.62272644042969, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -114.21224975585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2622726559638977, + "rewards_train/margins": 1.0089523196220398, + "rewards_train/rejected": -1.2712249755859375, + "step": 277 + }, + { + "epoch": 0.08, + "logps_train/chosen": -3.223615884780884, + "logps_train/ref_chosen": -3.359375, + "logps_train/ref_rejected": -2.171875, + "logps_train/rejected": -2.3258979320526123, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013575911521911621, + "rewards_train/margins": 0.028978205285966396, + "rewards_train/rejected": -0.015402293764054775, + "step": 277 + }, + { + "epoch": 0.08, + "learning_rate": 1.9795900236775438e-06, + "loss": 0.6566, + "step": 278 + }, + { + "epoch": 0.08, + "logps_train/chosen": -151.49903869628906, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -199.0, + "logps_train/rejected": -205.8980712890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05009613186120987, + "rewards_train/margins": 0.7399032488465309, + "rewards_train/rejected": -0.689807116985321, + "step": 278 + }, + { + "epoch": 0.08, + "logps_train/chosen": -106.09422302246094, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -101.25564575195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8094223141670227, + "rewards_train/margins": 0.7661423087120056, + "rewards_train/rejected": -1.5755646228790283, + "step": 278 + }, + { + "epoch": 0.08, + "logps_train/chosen": -3.4341413974761963, + "logps_train/ref_chosen": -2.90625, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -6.575303077697754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05278914049267769, + "rewards_train/margins": 0.05161616578698158, + "rewards_train/rejected": -0.10440530627965927, + "step": 278 + }, + { + "epoch": 0.08, + "logps_train/chosen": -159.11656188964844, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -148.34759521484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.411656141281128, + "rewards_train/margins": -0.37689661979675293, + "rewards_train/rejected": -2.034759521484375, + "step": 278 + }, + { + "epoch": 0.08, + "logps_train/chosen": -21.01983642578125, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -14.625, + "logps_train/rejected": -17.491825103759766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07301636040210724, + "rewards_train/margins": 0.3596988767385483, + "rewards_train/rejected": -0.28668251633644104, + "step": 278 + }, + { + "epoch": 0.08, + "logps_train/chosen": -8.805379867553711, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -5.544310092926025, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0007120132795535028, + "rewards_train/margins": -0.026106977427843958, + "rewards_train/rejected": 0.02681899070739746, + "step": 278 + }, + { + "epoch": 0.08, + "logps_train/chosen": -8.40818977355957, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -12.657672882080078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.015431023202836514, + "rewards_train/margins": -0.012551688589155674, + "rewards_train/rejected": 0.027982711791992188, + "step": 278 + }, + { + "epoch": 0.08, + "logps_train/chosen": -2.5414469242095947, + "logps_train/ref_chosen": -2.15625, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -2.500471591949463, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03851969167590141, + "rewards_train/margins": -0.027535032480955124, + "rewards_train/rejected": -0.010984659194946289, + "step": 278 + }, + { + "epoch": 0.08, + "logps_train/chosen": -186.36569213867188, + "logps_train/ref_chosen": -183.0, + "logps_train/ref_rejected": -188.0, + "logps_train/rejected": -193.84561157226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.336569219827652, + "rewards_train/margins": 0.24799194931983948, + "rewards_train/rejected": -0.5845611691474915, + "step": 279 + }, + { + "epoch": 0.08, + "logps_train/chosen": -89.04820251464844, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -115.23727416992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5548202395439148, + "rewards_train/margins": 1.3189072012901306, + "rewards_train/rejected": -1.8737274408340454, + "step": 279 + }, + { + "epoch": 0.08, + "logps_train/chosen": -131.47718811035156, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -142.34193420410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04771881178021431, + "rewards_train/margins": 1.9364746324717999, + "rewards_train/rejected": -1.9841934442520142, + "step": 279 + }, + { + "epoch": 0.08, + "logps_train/chosen": -24.926103591918945, + "logps_train/ref_chosen": -25.0, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -10.348651885986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007389640901237726, + "rewards_train/margins": 0.06100483098998666, + "rewards_train/rejected": -0.05361519008874893, + "step": 279 + }, + { + "epoch": 0.08, + "logps_train/chosen": -24.048154830932617, + "logps_train/ref_chosen": -25.375, + "logps_train/ref_rejected": -2.5625, + "logps_train/rejected": -3.370530128479004, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13268451392650604, + "rewards_train/margins": 0.21348752826452255, + "rewards_train/rejected": -0.08080301433801651, + "step": 279 + }, + { + "epoch": 0.08, + "logps_train/chosen": -79.16499328613281, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -103.89715576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2664993405342102, + "rewards_train/margins": 0.42321622371673584, + "rewards_train/rejected": -0.689715564250946, + "step": 279 + }, + { + "epoch": 0.08, + "logps_train/chosen": -34.815879821777344, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -7.90625, + "logps_train/rejected": -8.726550102233887, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.15658798813819885, + "rewards_train/margins": -0.07455797493457794, + "rewards_train/rejected": -0.08203001320362091, + "step": 279 + }, + { + "epoch": 0.08, + "logps_train/chosen": -97.34925842285156, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -146.94032287597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.184925839304924, + "rewards_train/margins": 1.9091064482927322, + "rewards_train/rejected": -2.0940322875976562, + "step": 279 + }, + { + "epoch": 0.08, + "learning_rate": 1.9790548246599444e-06, + "loss": 0.5349, + "step": 280 + }, + { + "epoch": 0.08, + "logps_train/chosen": -10.676961898803711, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -7.388883113861084, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11769618839025497, + "rewards_train/margins": 0.039942122995853424, + "rewards_train/rejected": -0.1576383113861084, + "step": 280 + }, + { + "epoch": 0.08, + "logps_train/chosen": -0.4748636782169342, + "logps_train/ref_chosen": -0.4765625, + "logps_train/ref_rejected": -0.4765625, + "logps_train/rejected": -0.47993820905685425, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00016988218703772873, + "rewards_train/margins": 0.0005074530927231535, + "rewards_train/rejected": -0.0003375709056854248, + "step": 280 + }, + { + "epoch": 0.08, + "logps_train/chosen": -58.67656326293945, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -120.73554229736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1426563262939453, + "rewards_train/margins": 0.18089792132377625, + "rewards_train/rejected": -0.32355424761772156, + "step": 280 + }, + { + "epoch": 0.08, + "logps_train/chosen": -9.349599838256836, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -7.128113746643066, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.003709983779117465, + "rewards_train/margins": -0.012773609487339854, + "rewards_train/rejected": 0.00906362570822239, + "step": 280 + }, + { + "epoch": 0.08, + "logps_train/chosen": -75.13921356201172, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -91.38875579833984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3639213740825653, + "rewards_train/margins": 0.6249541938304901, + "rewards_train/rejected": -0.9888755679130554, + "step": 280 + }, + { + "epoch": 0.08, + "logps_train/chosen": -49.716026306152344, + "logps_train/ref_chosen": -49.25, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -17.667753219604492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.046602632850408554, + "rewards_train/margins": 0.20142269507050514, + "rewards_train/rejected": -0.2480253279209137, + "step": 280 + }, + { + "epoch": 0.08, + "logps_train/chosen": -16.560544967651367, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -24.033784866333008, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.14394550025463104, + "rewards_train/margins": -0.1151760071516037, + "rewards_train/rejected": 0.25912150740623474, + "step": 280 + }, + { + "epoch": 0.08, + "logps_train/chosen": -7.798762321472168, + "logps_train/ref_chosen": -7.03125, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -6.623291015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0767512321472168, + "rewards_train/margins": -0.04567212983965874, + "rewards_train/rejected": -0.03107910230755806, + "step": 280 + }, + { + "epoch": 0.08, + "logps_train/chosen": -9.501181602478027, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -16.940082550048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.025118160992860794, + "rewards_train/margins": 0.05639009550213814, + "rewards_train/rejected": -0.08150825649499893, + "step": 281 + }, + { + "epoch": 0.08, + "logps_train/chosen": -13.025734901428223, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -55.5, + "logps_train/rejected": -58.50343322753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00992651004344225, + "rewards_train/margins": 0.31026984471827745, + "rewards_train/rejected": -0.3003433346748352, + "step": 281 + }, + { + "epoch": 0.08, + "logps_train/chosen": -6.227418899536133, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -26.973628997802734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02586689032614231, + "rewards_train/margins": 0.10899600945413113, + "rewards_train/rejected": -0.13486289978027344, + "step": 281 + }, + { + "epoch": 0.08, + "logps_train/chosen": -12.921588897705078, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -3.078125, + "logps_train/rejected": -2.6491341590881348, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.007841110229492188, + "rewards_train/margins": -0.035057973116636276, + "rewards_train/rejected": 0.042899083346128464, + "step": 281 + }, + { + "epoch": 0.08, + "logps_train/chosen": -3.1434309482574463, + "logps_train/ref_chosen": -3.34375, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -12.866294860839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02003190480172634, + "rewards_train/margins": 0.1566613968461752, + "rewards_train/rejected": -0.13662949204444885, + "step": 281 + }, + { + "epoch": 0.08, + "logps_train/chosen": -8.126575469970703, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -35.69845962524414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03734245523810387, + "rewards_train/margins": 0.15718841925263405, + "rewards_train/rejected": -0.11984596401453018, + "step": 281 + }, + { + "epoch": 0.08, + "logps_train/chosen": -95.19929504394531, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -143.4503173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7199295163154602, + "rewards_train/margins": 1.4751023650169373, + "rewards_train/rejected": -2.1950318813323975, + "step": 281 + }, + { + "epoch": 0.08, + "logps_train/chosen": -15.41049861907959, + "logps_train/ref_chosen": -14.5, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -20.488306045532227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09104986488819122, + "rewards_train/margins": 0.2202807515859604, + "rewards_train/rejected": -0.3113306164741516, + "step": 281 + }, + { + "epoch": 0.08, + "learning_rate": 1.9785127732966066e-06, + "loss": 0.6105, + "step": 282 + }, + { + "epoch": 0.08, + "logps_train/chosen": -43.076438903808594, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -58.47499465942383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08264388889074326, + "rewards_train/margins": 0.6148555651307106, + "rewards_train/rejected": -0.6974994540214539, + "step": 282 + }, + { + "epoch": 0.08, + "logps_train/chosen": -91.81327819824219, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -88.22489166259766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5313278436660767, + "rewards_train/margins": -0.008838653564453125, + "rewards_train/rejected": -0.5224891901016235, + "step": 282 + }, + { + "epoch": 0.08, + "logps_train/chosen": -6.383242607116699, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -21.037906646728516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.019574260339140892, + "rewards_train/margins": 0.12171640433371067, + "rewards_train/rejected": -0.14129066467285156, + "step": 282 + }, + { + "epoch": 0.08, + "logps_train/chosen": -99.85820007324219, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -138.01455688476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3358200192451477, + "rewards_train/margins": 2.315635621547699, + "rewards_train/rejected": -2.6514556407928467, + "step": 282 + }, + { + "epoch": 0.08, + "logps_train/chosen": -4.385209083557129, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -5.75, + "logps_train/rejected": -7.512001037597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06147909164428711, + "rewards_train/margins": 0.23767919838428497, + "rewards_train/rejected": -0.17620010673999786, + "step": 282 + }, + { + "epoch": 0.08, + "logps_train/chosen": -25.51629638671875, + "logps_train/ref_chosen": -25.5, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -11.724720001220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0016296387184411287, + "rewards_train/margins": 0.13959236140362918, + "rewards_train/rejected": -0.1412220001220703, + "step": 282 + }, + { + "epoch": 0.08, + "logps_train/chosen": -0.6272616982460022, + "logps_train/ref_chosen": -0.87109375, + "logps_train/ref_rejected": -1.2890625, + "logps_train/rejected": -3.016979217529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02438320592045784, + "rewards_train/margins": 0.19717488065361977, + "rewards_train/rejected": -0.17279167473316193, + "step": 282 + }, + { + "epoch": 0.08, + "logps_train/chosen": -5.602389812469482, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -35.0, + "logps_train/rejected": -33.919395446777344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.03351102024316788, + "rewards_train/margins": -0.07454943656921387, + "rewards_train/rejected": 0.10806045681238174, + "step": 282 + }, + { + "epoch": 0.08, + "logps_train/chosen": -142.69000244140625, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -163.0115509033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.069000244140625, + "rewards_train/margins": 0.13215485215187073, + "rewards_train/rejected": -0.20115509629249573, + "step": 283 + }, + { + "epoch": 0.08, + "logps_train/chosen": -34.96644592285156, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -43.75, + "logps_train/rejected": -48.55961608886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2216445952653885, + "rewards_train/margins": 0.2593170255422592, + "rewards_train/rejected": -0.4809616208076477, + "step": 283 + }, + { + "epoch": 0.08, + "logps_train/chosen": -11.427460670471191, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -2.484375, + "logps_train/rejected": -7.061789512634277, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04274606704711914, + "rewards_train/margins": 0.414995402097702, + "rewards_train/rejected": -0.45774146914482117, + "step": 283 + }, + { + "epoch": 0.08, + "logps_train/chosen": -9.397294998168945, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -16.888578414916992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0209794994443655, + "rewards_train/margins": 0.042878346517682076, + "rewards_train/rejected": -0.06385784596204758, + "step": 283 + }, + { + "epoch": 0.08, + "logps_train/chosen": -178.95132446289062, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -123.56134033203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.09513258934021, + "rewards_train/margins": -0.38899850845336914, + "rewards_train/rejected": -1.7061340808868408, + "step": 283 + }, + { + "epoch": 0.08, + "logps_train/chosen": -133.30445861816406, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -150.02037048339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8304458856582642, + "rewards_train/margins": 1.4215911626815796, + "rewards_train/rejected": -2.2520370483398438, + "step": 283 + }, + { + "epoch": 0.08, + "logps_train/chosen": -151.03936767578125, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -177.39427185058594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20393677055835724, + "rewards_train/margins": -0.06450958549976349, + "rewards_train/rejected": -0.13942718505859375, + "step": 283 + }, + { + "epoch": 0.08, + "logps_train/chosen": -4.805668830871582, + "logps_train/ref_chosen": -3.5625, + "logps_train/ref_rejected": -4.46875, + "logps_train/rejected": -4.837498664855957, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12431688606739044, + "rewards_train/margins": -0.08744201809167862, + "rewards_train/rejected": -0.03687486797571182, + "step": 283 + }, + { + "epoch": 0.08, + "learning_rate": 1.9779638733813153e-06, + "loss": 0.585, + "step": 284 + }, + { + "epoch": 0.08, + "logps_train/chosen": -5.900140762329102, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -9.253205299377441, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.030639076605439186, + "rewards_train/margins": -0.049068547785282135, + "rewards_train/rejected": 0.01842947117984295, + "step": 284 + }, + { + "epoch": 0.08, + "logps_train/chosen": -160.15234375, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -82.92241668701172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.115234375, + "rewards_train/margins": -0.17299270629882812, + "rewards_train/rejected": 0.057758331298828125, + "step": 284 + }, + { + "epoch": 0.08, + "logps_train/chosen": -123.90071868896484, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -164.81649780273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.040071871131658554, + "rewards_train/margins": 0.041577909141778946, + "rewards_train/rejected": -0.0816497802734375, + "step": 284 + }, + { + "epoch": 0.08, + "logps_train/chosen": -1.3214362859725952, + "logps_train/ref_chosen": -1.2265625, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -6.548769950866699, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009487378410995007, + "rewards_train/margins": -0.010860383394174278, + "rewards_train/rejected": 0.0013730049831792712, + "step": 284 + }, + { + "epoch": 0.08, + "logps_train/chosen": -15.47274112701416, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -11.701610565185547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.041024114936590195, + "rewards_train/margins": -0.014613058418035507, + "rewards_train/rejected": -0.026411056518554688, + "step": 284 + }, + { + "epoch": 0.08, + "logps_train/chosen": -88.84019470214844, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -144.40914916992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0340194702148438, + "rewards_train/margins": 1.2068955898284912, + "rewards_train/rejected": -2.240915060043335, + "step": 284 + }, + { + "epoch": 0.08, + "logps_train/chosen": -148.83377075195312, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -183.04412841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9833770990371704, + "rewards_train/margins": 1.3210357427597046, + "rewards_train/rejected": -2.304412841796875, + "step": 284 + }, + { + "epoch": 0.08, + "logps_train/chosen": -22.547544479370117, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -3.640625, + "logps_train/rejected": -5.483857154846191, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07975444942712784, + "rewards_train/margins": 0.10456877201795578, + "rewards_train/rejected": -0.18432322144508362, + "step": 284 + }, + { + "epoch": 0.08, + "logps_train/chosen": -5.424286842346191, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -3.734375, + "logps_train/rejected": -4.1400299072265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0330536849796772, + "rewards_train/margins": 0.00751180574297905, + "rewards_train/rejected": -0.04056549072265625, + "step": 285 + }, + { + "epoch": 0.08, + "logps_train/chosen": -39.941192626953125, + "logps_train/ref_chosen": -43.5, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -56.60148620605469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3558807373046875, + "rewards_train/margins": 0.9410293698310852, + "rewards_train/rejected": -0.5851486325263977, + "step": 285 + }, + { + "epoch": 0.08, + "logps_train/chosen": -6.565535545349121, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -2.4375, + "logps_train/rejected": -2.6448168754577637, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10594644397497177, + "rewards_train/margins": 0.12667813152074814, + "rewards_train/rejected": -0.020731687545776367, + "step": 285 + }, + { + "epoch": 0.08, + "logps_train/chosen": -125.39105987548828, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -157.03131103515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2391059398651123, + "rewards_train/margins": 0.46402525901794434, + "rewards_train/rejected": -2.7031311988830566, + "step": 285 + }, + { + "epoch": 0.08, + "logps_train/chosen": -4.922508239746094, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -5.25, + "logps_train/rejected": -6.52988862991333, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.039125826209783554, + "rewards_train/margins": 0.08886303380131721, + "rewards_train/rejected": -0.12798886001110077, + "step": 285 + }, + { + "epoch": 0.08, + "logps_train/chosen": -116.06745910644531, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -207.4754180908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7067459225654602, + "rewards_train/margins": 2.140795886516571, + "rewards_train/rejected": -2.8475418090820312, + "step": 285 + }, + { + "epoch": 0.08, + "logps_train/chosen": -130.25828552246094, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -91.6172103881836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5258285403251648, + "rewards_train/margins": 0.035892486572265625, + "rewards_train/rejected": -0.5617210268974304, + "step": 285 + }, + { + "epoch": 0.08, + "logps_train/chosen": -8.265113830566406, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -13.643457412719727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.048386383801698685, + "rewards_train/margins": 0.06595936045050621, + "rewards_train/rejected": -0.1143457442522049, + "step": 285 + }, + { + "epoch": 0.08, + "learning_rate": 1.977408128755787e-06, + "loss": 0.5593, + "step": 286 + }, + { + "epoch": 0.08, + "logps_train/chosen": -137.04296875, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -107.87362670898438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.604296863079071, + "rewards_train/margins": -0.16693419218063354, + "rewards_train/rejected": -0.4373626708984375, + "step": 286 + }, + { + "epoch": 0.08, + "logps_train/chosen": -22.480072021484375, + "logps_train/ref_chosen": -23.0, + "logps_train/ref_rejected": -3.21875, + "logps_train/rejected": -3.6874794960021973, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05199280008673668, + "rewards_train/margins": 0.09886575117707253, + "rewards_train/rejected": -0.046872951090335846, + "step": 286 + }, + { + "epoch": 0.08, + "logps_train/chosen": -16.5860595703125, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -27.197052001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07889404147863388, + "rewards_train/margins": 0.13609924167394638, + "rewards_train/rejected": -0.0572052001953125, + "step": 286 + }, + { + "epoch": 0.08, + "logps_train/chosen": -9.301154136657715, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -20.29844093322754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.032384585589170456, + "rewards_train/margins": 0.16222868487238884, + "rewards_train/rejected": -0.12984409928321838, + "step": 286 + }, + { + "epoch": 0.08, + "logps_train/chosen": -80.00495910644531, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -158.37246704101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09950409084558487, + "rewards_train/margins": 2.1367507949471474, + "rewards_train/rejected": -2.0372467041015625, + "step": 286 + }, + { + "epoch": 0.08, + "logps_train/chosen": -9.014760971069336, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -7.4375, + "logps_train/rejected": -8.019204139709473, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.017273902893066406, + "rewards_train/margins": 0.07544431835412979, + "rewards_train/rejected": -0.058170415461063385, + "step": 286 + }, + { + "epoch": 0.08, + "logps_train/chosen": -1.0277243852615356, + "logps_train/ref_chosen": -0.953125, + "logps_train/ref_rejected": -3.625, + "logps_train/rejected": -4.685791492462158, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0074599385261535645, + "rewards_train/margins": 0.09861921519041061, + "rewards_train/rejected": -0.10607915371656418, + "step": 286 + }, + { + "epoch": 0.08, + "logps_train/chosen": -31.852779388427734, + "logps_train/ref_chosen": -32.25, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -15.216874122619629, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03972206264734268, + "rewards_train/margins": 0.18640948086977005, + "rewards_train/rejected": -0.14668741822242737, + "step": 286 + }, + { + "epoch": 0.08, + "logps_train/chosen": -248.3878173828125, + "logps_train/ref_chosen": -241.0, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -25.097583770751953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.738781750202179, + "rewards_train/margins": -0.5290233641862869, + "rewards_train/rejected": -0.20975838601589203, + "step": 287 + }, + { + "epoch": 0.08, + "logps_train/chosen": -0.9476252198219299, + "logps_train/ref_chosen": -0.890625, + "logps_train/ref_rejected": -2.25, + "logps_train/rejected": -2.224759340286255, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.005700021982192993, + "rewards_train/margins": -0.008224088000133634, + "rewards_train/rejected": 0.0025240660179406404, + "step": 287 + }, + { + "epoch": 0.08, + "logps_train/chosen": -95.53669738769531, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -82.28143310546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.196330264210701, + "rewards_train/margins": 0.07447357475757599, + "rewards_train/rejected": 0.121856689453125, + "step": 287 + }, + { + "epoch": 0.08, + "logps_train/chosen": -67.90559387207031, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -88.50627899169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3405593931674957, + "rewards_train/margins": 0.7600685060024261, + "rewards_train/rejected": -1.1006278991699219, + "step": 287 + }, + { + "epoch": 0.08, + "logps_train/chosen": -116.20014953613281, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -138.59036254882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7200149297714233, + "rewards_train/margins": 0.8390213251113892, + "rewards_train/rejected": -2.5590362548828125, + "step": 287 + }, + { + "epoch": 0.08, + "logps_train/chosen": -1.7235180139541626, + "logps_train/ref_chosen": -2.03125, + "logps_train/ref_rejected": -5.4375, + "logps_train/rejected": -5.375662803649902, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03077319823205471, + "rewards_train/margins": 0.024589478503912687, + "rewards_train/rejected": 0.006183719728142023, + "step": 287 + }, + { + "epoch": 0.08, + "logps_train/chosen": -126.32901000976562, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -122.04525756835938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8829010128974915, + "rewards_train/margins": -0.37837523221969604, + "rewards_train/rejected": -0.5045257806777954, + "step": 287 + }, + { + "epoch": 0.08, + "logps_train/chosen": -97.64322662353516, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -130.74859619140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46432265639305115, + "rewards_train/margins": 1.910536915063858, + "rewards_train/rejected": -2.374859571456909, + "step": 287 + }, + { + "epoch": 0.08, + "learning_rate": 1.9768455433096456e-06, + "loss": 0.5934, + "step": 288 + }, + { + "epoch": 0.08, + "logps_train/chosen": -6.619009971618652, + "logps_train/ref_chosen": -7.125, + "logps_train/ref_rejected": -3.453125, + "logps_train/rejected": -3.5450870990753174, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.050599005073308945, + "rewards_train/margins": 0.05979521479457617, + "rewards_train/rejected": -0.009196209721267223, + "step": 288 + }, + { + "epoch": 0.08, + "logps_train/chosen": -190.68954467773438, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -185.69383239746094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2689545154571533, + "rewards_train/margins": -0.6995712518692017, + "rewards_train/rejected": -0.5693832635879517, + "step": 288 + }, + { + "epoch": 0.08, + "logps_train/chosen": -186.03411865234375, + "logps_train/ref_chosen": -185.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -106.8956527709961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10341186821460724, + "rewards_train/margins": -0.26384659111499786, + "rewards_train/rejected": 0.16043472290039062, + "step": 288 + }, + { + "epoch": 0.08, + "logps_train/chosen": -39.87706756591797, + "logps_train/ref_chosen": -40.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -78.8399658203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.062293242663145065, + "rewards_train/margins": 0.046289823949337006, + "rewards_train/rejected": 0.01600341871380806, + "step": 288 + }, + { + "epoch": 0.08, + "logps_train/chosen": -7.1855621337890625, + "logps_train/ref_chosen": -6.59375, + "logps_train/ref_rejected": -3.671875, + "logps_train/rejected": -3.5500035285949707, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05918121337890625, + "rewards_train/margins": -0.07136836089193821, + "rewards_train/rejected": 0.01218714751303196, + "step": 288 + }, + { + "epoch": 0.08, + "logps_train/chosen": -111.13931274414062, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -126.40667724609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5139312744140625, + "rewards_train/margins": 1.0767364501953125, + "rewards_train/rejected": -1.590667724609375, + "step": 288 + }, + { + "epoch": 0.08, + "logps_train/chosen": -70.60298156738281, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -115.72748565673828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01029815711081028, + "rewards_train/margins": -0.2875496093183756, + "rewards_train/rejected": 0.2772514522075653, + "step": 288 + }, + { + "epoch": 0.08, + "logps_train/chosen": -147.22750854492188, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -193.0, + "logps_train/rejected": -218.6759033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7227509021759033, + "rewards_train/margins": 0.8448395729064941, + "rewards_train/rejected": -2.5675904750823975, + "step": 288 + }, + { + "epoch": 0.08, + "logps_train/chosen": -13.734845161437988, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -10.245664596557617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1547345221042633, + "rewards_train/margins": 0.026081934571266174, + "rewards_train/rejected": -0.18081645667552948, + "step": 289 + }, + { + "epoch": 0.08, + "logps_train/chosen": -63.52667999267578, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -62.5, + "logps_train/rejected": -63.13690185546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10266800224781036, + "rewards_train/margins": -0.038977816700935364, + "rewards_train/rejected": -0.063690185546875, + "step": 289 + }, + { + "epoch": 0.08, + "logps_train/chosen": -54.33195495605469, + "logps_train/ref_chosen": -52.25, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -81.06092834472656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2081954926252365, + "rewards_train/margins": -0.15210265666246414, + "rewards_train/rejected": -0.05609283596277237, + "step": 289 + }, + { + "epoch": 0.08, + "logps_train/chosen": -12.612075805664062, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -6.75, + "logps_train/rejected": -6.982087135314941, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11745758354663849, + "rewards_train/margins": -0.09424887038767338, + "rewards_train/rejected": -0.02320871315896511, + "step": 289 + }, + { + "epoch": 0.08, + "logps_train/chosen": -8.119669914245605, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -8.259071350097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08803301304578781, + "rewards_train/margins": 0.21706514805555344, + "rewards_train/rejected": -0.12903213500976562, + "step": 289 + }, + { + "epoch": 0.08, + "logps_train/chosen": -3.0248966217041016, + "logps_train/ref_chosen": -2.59375, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -17.1478214263916, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.043114662170410156, + "rewards_train/margins": 0.07166748493909836, + "rewards_train/rejected": -0.11478214710950851, + "step": 289 + }, + { + "epoch": 0.08, + "logps_train/chosen": -61.773521423339844, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -89.95941162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.022647857666015625, + "rewards_train/margins": 0.06858902052044868, + "rewards_train/rejected": -0.04594116285443306, + "step": 289 + }, + { + "epoch": 0.08, + "logps_train/chosen": -6.075902938842773, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -9.161544799804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.023659706115722656, + "rewards_train/margins": 0.03356418665498495, + "rewards_train/rejected": -0.009904480539262295, + "step": 289 + }, + { + "epoch": 0.08, + "learning_rate": 1.9762761209803926e-06, + "loss": 0.6867, + "step": 290 + }, + { + "epoch": 0.08, + "logps_train/chosen": -76.23493957519531, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -76.35144805908203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02349395863711834, + "rewards_train/margins": -0.1383491512387991, + "rewards_train/rejected": 0.11485519260168076, + "step": 290 + }, + { + "epoch": 0.08, + "logps_train/chosen": -118.60244750976562, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -152.31231689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.43975526094436646, + "rewards_train/margins": 0.270986944437027, + "rewards_train/rejected": 0.16876831650733948, + "step": 290 + }, + { + "epoch": 0.08, + "logps_train/chosen": -114.16015625, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -134.93728637695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5660156607627869, + "rewards_train/margins": 1.2777130007743835, + "rewards_train/rejected": -1.8437286615371704, + "step": 290 + }, + { + "epoch": 0.08, + "logps_train/chosen": -3.93355131149292, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -6.21875, + "logps_train/rejected": -9.578097343444824, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04101986810564995, + "rewards_train/margins": 0.3769545964896679, + "rewards_train/rejected": -0.33593472838401794, + "step": 290 + }, + { + "epoch": 0.08, + "logps_train/chosen": -65.250732421875, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -65.51370239257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02492675743997097, + "rewards_train/margins": 0.0262969967443496, + "rewards_train/rejected": -0.0013702393043786287, + "step": 290 + }, + { + "epoch": 0.08, + "logps_train/chosen": -87.51698303222656, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -81.06354522705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20169830322265625, + "rewards_train/margins": 1.004656195640564, + "rewards_train/rejected": -1.2063544988632202, + "step": 290 + }, + { + "epoch": 0.08, + "logps_train/chosen": -10.03934383392334, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -3.453125, + "logps_train/rejected": -3.1289567947387695, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.08518438786268234, + "rewards_train/margins": -0.11760120838880539, + "rewards_train/rejected": 0.03241682052612305, + "step": 290 + }, + { + "epoch": 0.08, + "logps_train/chosen": -35.03256607055664, + "logps_train/ref_chosen": -37.5, + "logps_train/ref_rejected": -7.125, + "logps_train/rejected": -7.489750385284424, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24674339592456818, + "rewards_train/margins": 0.2832184359431267, + "rewards_train/rejected": -0.0364750400185585, + "step": 290 + }, + { + "epoch": 0.08, + "logps_train/chosen": -79.61568450927734, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -135.8111114501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3115684688091278, + "rewards_train/margins": 1.4195427000522614, + "rewards_train/rejected": -1.7311111688613892, + "step": 291 + }, + { + "epoch": 0.08, + "logps_train/chosen": -105.27165222167969, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -108.7977523803711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8771652579307556, + "rewards_train/margins": -0.2473900318145752, + "rewards_train/rejected": -0.6297752261161804, + "step": 291 + }, + { + "epoch": 0.08, + "logps_train/chosen": -92.77461242675781, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -80.84486389160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5774612426757812, + "rewards_train/margins": -0.24297484755516052, + "rewards_train/rejected": -0.3344863951206207, + "step": 291 + }, + { + "epoch": 0.08, + "logps_train/chosen": -72.68865966796875, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -16.779342651367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.43113404512405396, + "rewards_train/margins": 0.7090683281421661, + "rewards_train/rejected": -0.2779342830181122, + "step": 291 + }, + { + "epoch": 0.08, + "logps_train/chosen": -129.9736785888672, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -116.69219970703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5973678827285767, + "rewards_train/margins": -0.7281479239463806, + "rewards_train/rejected": -0.869219958782196, + "step": 291 + }, + { + "epoch": 0.08, + "logps_train/chosen": -9.079676628112793, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -8.082634925842285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06078233942389488, + "rewards_train/margins": 0.1065458320081234, + "rewards_train/rejected": -0.045763492584228516, + "step": 291 + }, + { + "epoch": 0.08, + "logps_train/chosen": -22.007606506347656, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -28.0, + "logps_train/rejected": -28.983478546142578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.15076065063476562, + "rewards_train/margins": -0.052412793040275574, + "rewards_train/rejected": -0.09834785759449005, + "step": 291 + }, + { + "epoch": 0.08, + "logps_train/chosen": -109.81040954589844, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -128.16494750976562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2810409665107727, + "rewards_train/margins": -0.014546215534210205, + "rewards_train/rejected": -0.2664947509765625, + "step": 291 + }, + { + "epoch": 0.08, + "learning_rate": 1.9756998657533806e-06, + "loss": 0.6158, + "step": 292 + }, + { + "epoch": 0.08, + "logps_train/chosen": -130.04522705078125, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -123.63944244384766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5045228004455566, + "rewards_train/margins": -0.9405785799026489, + "rewards_train/rejected": -1.5639442205429077, + "step": 292 + }, + { + "epoch": 0.08, + "logps_train/chosen": -16.149566650390625, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -22.727210998535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03504333645105362, + "rewards_train/margins": 0.007764436304569244, + "rewards_train/rejected": 0.027278900146484375, + "step": 292 + }, + { + "epoch": 0.08, + "logps_train/chosen": -143.58250427246094, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -156.44491577148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3417495787143707, + "rewards_train/margins": 0.28624115511775017, + "rewards_train/rejected": 0.05550842359662056, + "step": 292 + }, + { + "epoch": 0.08, + "logps_train/chosen": -22.028179168701172, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -20.028440475463867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015317916870117188, + "rewards_train/margins": 0.02502613142132759, + "rewards_train/rejected": -0.04034404829144478, + "step": 292 + }, + { + "epoch": 0.08, + "logps_train/chosen": -75.31964111328125, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -93.51947021484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.08196411281824112, + "rewards_train/margins": -0.08001709135714918, + "rewards_train/rejected": -0.0019470214610919356, + "step": 292 + }, + { + "epoch": 0.08, + "logps_train/chosen": -129.6988525390625, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -184.0851593017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21988525986671448, + "rewards_train/margins": 2.1886306703090668, + "rewards_train/rejected": -2.4085159301757812, + "step": 292 + }, + { + "epoch": 0.08, + "logps_train/chosen": -8.046524047851562, + "logps_train/ref_chosen": -7.4375, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -8.614360809326172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06090240553021431, + "rewards_train/margins": -0.030716324225068092, + "rewards_train/rejected": -0.030186081305146217, + "step": 292 + }, + { + "epoch": 0.08, + "logps_train/chosen": -8.39978313446045, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -14.625, + "logps_train/rejected": -15.49083137512207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01627168618142605, + "rewards_train/margins": 0.10285482369363308, + "rewards_train/rejected": -0.08658313751220703, + "step": 292 + }, + { + "epoch": 0.08, + "logps_train/chosen": -9.670919418334961, + "logps_train/ref_chosen": -7.46875, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -8.699390411376953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22021694481372833, + "rewards_train/margins": 0.05909709632396698, + "rewards_train/rejected": -0.2793140411376953, + "step": 293 + }, + { + "epoch": 0.08, + "logps_train/chosen": -105.91346740722656, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -98.18182373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34134674072265625, + "rewards_train/margins": 0.8768357038497925, + "rewards_train/rejected": -1.2181824445724487, + "step": 293 + }, + { + "epoch": 0.08, + "logps_train/chosen": -122.8384017944336, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -129.25408935546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8838402032852173, + "rewards_train/margins": -0.00843125581741333, + "rewards_train/rejected": -0.875408947467804, + "step": 293 + }, + { + "epoch": 0.08, + "logps_train/chosen": -21.419281005859375, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -20.4201717376709, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04557190090417862, + "rewards_train/margins": 0.07508907467126846, + "rewards_train/rejected": -0.029517173767089844, + "step": 293 + }, + { + "epoch": 0.08, + "logps_train/chosen": -129.33444213867188, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -189.8882293701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06655579060316086, + "rewards_train/margins": 2.655378870666027, + "rewards_train/rejected": -2.588823080062866, + "step": 293 + }, + { + "epoch": 0.08, + "logps_train/chosen": -140.505859375, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -153.4712371826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.550585925579071, + "rewards_train/margins": 0.1965377926826477, + "rewards_train/rejected": -0.7471237182617188, + "step": 293 + }, + { + "epoch": 0.08, + "logps_train/chosen": -10.456263542175293, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -29.25, + "logps_train/rejected": -28.734455108642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14812365174293518, + "rewards_train/margins": 0.09656916186213493, + "rewards_train/rejected": 0.05155448988080025, + "step": 293 + }, + { + "epoch": 0.08, + "logps_train/chosen": -120.14765930175781, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -146.78587341308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4647659063339233, + "rewards_train/margins": 1.1638215780258179, + "rewards_train/rejected": -2.628587484359741, + "step": 293 + }, + { + "epoch": 0.08, + "learning_rate": 1.9751167816617853e-06, + "loss": 0.5838, + "step": 294 + }, + { + "epoch": 0.08, + "logps_train/chosen": -117.73577117919922, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -118.06851196289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1235771179199219, + "rewards_train/margins": 0.5832741260528564, + "rewards_train/rejected": -1.7068512439727783, + "step": 294 + }, + { + "epoch": 0.08, + "logps_train/chosen": -72.61135864257812, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -149.1733856201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0388641357421875, + "rewards_train/margins": 2.156202793121338, + "rewards_train/rejected": -2.1173386573791504, + "step": 294 + }, + { + "epoch": 0.08, + "logps_train/chosen": -82.06466674804688, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -155.11492919921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19353333115577698, + "rewards_train/margins": 0.705026239156723, + "rewards_train/rejected": -0.511492908000946, + "step": 294 + }, + { + "epoch": 0.08, + "logps_train/chosen": -17.268957138061523, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -4.865873336791992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.035604286938905716, + "rewards_train/margins": 0.09094162285327911, + "rewards_train/rejected": -0.0553373359143734, + "step": 294 + }, + { + "epoch": 0.08, + "logps_train/chosen": -138.9283905029297, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -110.86030578613281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4071609675884247, + "rewards_train/margins": 2.393191546201706, + "rewards_train/rejected": -1.9860305786132812, + "step": 294 + }, + { + "epoch": 0.08, + "logps_train/chosen": -129.39283752441406, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -109.69755554199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3892837464809418, + "rewards_train/margins": 0.380471795797348, + "rewards_train/rejected": -0.7697555422782898, + "step": 294 + }, + { + "epoch": 0.08, + "logps_train/chosen": -2.1649653911590576, + "logps_train/ref_chosen": -2.421875, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -13.382614135742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.025690961629152298, + "rewards_train/margins": 0.15145238116383553, + "rewards_train/rejected": -0.12576141953468323, + "step": 294 + }, + { + "epoch": 0.08, + "logps_train/chosen": -146.23391723632812, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -139.42477416992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12339172512292862, + "rewards_train/margins": 1.3190857395529747, + "rewards_train/rejected": -1.4424774646759033, + "step": 294 + }, + { + "epoch": 0.08, + "logps_train/chosen": -3.979045867919922, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -2.953125, + "logps_train/rejected": -3.5717899799346924, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08647041767835617, + "rewards_train/margins": 0.14833691716194153, + "rewards_train/rejected": -0.06186649948358536, + "step": 295 + }, + { + "epoch": 0.08, + "logps_train/chosen": -21.72536849975586, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -11.218599319458008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010036850348114967, + "rewards_train/margins": 0.13682307861745358, + "rewards_train/rejected": -0.14685992896556854, + "step": 295 + }, + { + "epoch": 0.08, + "logps_train/chosen": -2.3756496906280518, + "logps_train/ref_chosen": -2.53125, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -5.502499103546143, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.015560030937194824, + "rewards_train/margins": 0.03455994091928005, + "rewards_train/rejected": -0.018999909982085228, + "step": 295 + }, + { + "epoch": 0.08, + "logps_train/chosen": -79.7044677734375, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -110.10515594482422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47044679522514343, + "rewards_train/margins": 0.9400688707828522, + "rewards_train/rejected": -1.4105156660079956, + "step": 295 + }, + { + "epoch": 0.08, + "logps_train/chosen": -3.3466272354125977, + "logps_train/ref_chosen": -3.359375, + "logps_train/ref_rejected": -5.84375, + "logps_train/rejected": -6.6633687019348145, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0012747765285894275, + "rewards_train/margins": 0.08323664672207087, + "rewards_train/rejected": -0.08196187019348145, + "step": 295 + }, + { + "epoch": 0.08, + "logps_train/chosen": -107.73593139648438, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -128.55807495117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1764068603515625, + "rewards_train/margins": 2.08221435546875, + "rewards_train/rejected": -1.9058074951171875, + "step": 295 + }, + { + "epoch": 0.08, + "logps_train/chosen": -106.09541320800781, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -120.6336669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20954132080078125, + "rewards_train/margins": 0.05382537841796875, + "rewards_train/rejected": -0.26336669921875, + "step": 295 + }, + { + "epoch": 0.08, + "logps_train/chosen": -27.82179832458496, + "logps_train/ref_chosen": -31.75, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -25.004154205322266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.39282017946243286, + "rewards_train/margins": 0.43073559924960136, + "rewards_train/rejected": -0.0379154197871685, + "step": 295 + }, + { + "epoch": 0.08, + "learning_rate": 1.974526872786577e-06, + "loss": 0.4537, + "step": 296 + }, + { + "epoch": 0.08, + "logps_train/chosen": -70.10903930664062, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -87.33775329589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06090393289923668, + "rewards_train/margins": 1.572871420532465, + "rewards_train/rejected": -1.6337753534317017, + "step": 296 + }, + { + "epoch": 0.08, + "logps_train/chosen": -50.22496795654297, + "logps_train/ref_chosen": -44.75, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -31.29449462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5474967956542969, + "rewards_train/margins": 0.06945270299911499, + "rewards_train/rejected": -0.6169494986534119, + "step": 296 + }, + { + "epoch": 0.08, + "logps_train/chosen": -181.62936401367188, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -118.34871673583984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7629363536834717, + "rewards_train/margins": -1.1780647039413452, + "rewards_train/rejected": -1.5848716497421265, + "step": 296 + }, + { + "epoch": 0.08, + "logps_train/chosen": -0.41237276792526245, + "logps_train/ref_chosen": -0.58203125, + "logps_train/ref_rejected": -0.58203125, + "logps_train/rejected": -0.4201655387878418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016965849325060844, + "rewards_train/margins": 0.000779278576374054, + "rewards_train/rejected": 0.01618657074868679, + "step": 296 + }, + { + "epoch": 0.08, + "logps_train/chosen": -143.04356384277344, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -203.0, + "logps_train/rejected": -233.1817169189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6043564081192017, + "rewards_train/margins": 2.4138153791427612, + "rewards_train/rejected": -3.018171787261963, + "step": 296 + }, + { + "epoch": 0.08, + "logps_train/chosen": -5.849542140960693, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -10.757022857666016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.038079213351011276, + "rewards_train/margins": 0.025123070925474167, + "rewards_train/rejected": -0.06320228427648544, + "step": 296 + }, + { + "epoch": 0.08, + "logps_train/chosen": -157.681396484375, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -173.11180114746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3681396543979645, + "rewards_train/margins": 0.14304044842720032, + "rewards_train/rejected": -0.5111801028251648, + "step": 296 + }, + { + "epoch": 0.08, + "logps_train/chosen": -37.17463684082031, + "logps_train/ref_chosen": -37.5, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -10.214753150939941, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03253631666302681, + "rewards_train/margins": 0.1977616287767887, + "rewards_train/rejected": -0.1652253121137619, + "step": 296 + }, + { + "epoch": 0.08, + "logps_train/chosen": -79.86090850830078, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -33.25, + "logps_train/rejected": -35.444740295410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21390914916992188, + "rewards_train/margins": 0.43338318169116974, + "rewards_train/rejected": -0.21947403252124786, + "step": 297 + }, + { + "epoch": 0.08, + "logps_train/chosen": -130.1585693359375, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -109.30182647705078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9658569693565369, + "rewards_train/margins": -0.18567430973052979, + "rewards_train/rejected": -0.7801826596260071, + "step": 297 + }, + { + "epoch": 0.08, + "logps_train/chosen": -22.492034912109375, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -20.18729591369629, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08670349419116974, + "rewards_train/margins": 0.09452609717845917, + "rewards_train/rejected": -0.1812295913696289, + "step": 297 + }, + { + "epoch": 0.08, + "logps_train/chosen": -26.84185791015625, + "logps_train/ref_chosen": -26.25, + "logps_train/ref_rejected": -26.25, + "logps_train/rejected": -26.803178787231445, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05918579176068306, + "rewards_train/margins": -0.0038679130375385284, + "rewards_train/rejected": -0.05531787872314453, + "step": 297 + }, + { + "epoch": 0.08, + "logps_train/chosen": -1.4774396419525146, + "logps_train/ref_chosen": -1.3359375, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -8.360575675964355, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01415021438151598, + "rewards_train/margins": 0.12503235321491957, + "rewards_train/rejected": -0.13918256759643555, + "step": 297 + }, + { + "epoch": 0.08, + "logps_train/chosen": -31.626951217651367, + "logps_train/ref_chosen": -31.375, + "logps_train/ref_rejected": -53.5, + "logps_train/rejected": -56.903656005859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02519512176513672, + "rewards_train/margins": 0.3151704967021942, + "rewards_train/rejected": -0.34036561846733093, + "step": 297 + }, + { + "epoch": 0.08, + "logps_train/chosen": -76.39700317382812, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -129.93833923339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03970031812787056, + "rewards_train/margins": 3.254133652895689, + "rewards_train/rejected": -3.2938339710235596, + "step": 297 + }, + { + "epoch": 0.08, + "logps_train/chosen": -36.1470832824707, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -31.625, + "logps_train/rejected": -38.495513916015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03529167175292969, + "rewards_train/margins": 0.7223430871963501, + "rewards_train/rejected": -0.6870514154434204, + "step": 297 + }, + { + "epoch": 0.08, + "learning_rate": 1.9739301432564926e-06, + "loss": 0.5762, + "step": 298 + }, + { + "epoch": 0.08, + "logps_train/chosen": -160.15655517578125, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -71.44140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18434448540210724, + "rewards_train/margins": 0.3284851163625717, + "rewards_train/rejected": -0.14414063096046448, + "step": 298 + }, + { + "epoch": 0.08, + "logps_train/chosen": -116.3205337524414, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -108.65316009521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7820534110069275, + "rewards_train/margins": 0.3332626223564148, + "rewards_train/rejected": -1.1153160333633423, + "step": 298 + }, + { + "epoch": 0.08, + "logps_train/chosen": -13.32763671875, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -12.262537002563477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04848632961511612, + "rewards_train/margins": 0.018490029498934746, + "rewards_train/rejected": 0.029996300116181374, + "step": 298 + }, + { + "epoch": 0.08, + "logps_train/chosen": -11.110727310180664, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -15.765352249145508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.042322732508182526, + "rewards_train/margins": 0.17796248942613602, + "rewards_train/rejected": -0.22028522193431854, + "step": 298 + }, + { + "epoch": 0.08, + "logps_train/chosen": -3.3867483139038086, + "logps_train/ref_chosen": -3.421875, + "logps_train/ref_rejected": -14.125, + "logps_train/rejected": -14.332755088806152, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0035126686561852694, + "rewards_train/margins": 0.024288178654387593, + "rewards_train/rejected": -0.020775509998202324, + "step": 298 + }, + { + "epoch": 0.08, + "logps_train/chosen": -12.291725158691406, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -52.5, + "logps_train/rejected": -53.51390838623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07082748413085938, + "rewards_train/margins": 0.17221832275390625, + "rewards_train/rejected": -0.10139083862304688, + "step": 298 + }, + { + "epoch": 0.08, + "logps_train/chosen": -59.291526794433594, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -119.19523620605469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1708473265171051, + "rewards_train/margins": 3.0903710424900055, + "rewards_train/rejected": -2.9195237159729004, + "step": 298 + }, + { + "epoch": 0.08, + "logps_train/chosen": -14.330018043518066, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -4.766314506530762, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.089251808822155, + "rewards_train/margins": -0.11574535816907883, + "rewards_train/rejected": 0.026493549346923828, + "step": 298 + }, + { + "epoch": 0.08, + "logps_train/chosen": -38.414306640625, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -90.94798278808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2585693299770355, + "rewards_train/margins": 0.30336761102080345, + "rewards_train/rejected": -0.04479828104376793, + "step": 299 + }, + { + "epoch": 0.08, + "logps_train/chosen": -74.23309326171875, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -94.86480712890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.423309326171875, + "rewards_train/margins": 0.7131713628768921, + "rewards_train/rejected": -1.136480689048767, + "step": 299 + }, + { + "epoch": 0.08, + "logps_train/chosen": -214.08349609375, + "logps_train/ref_chosen": -215.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -171.15792846679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09165038913488388, + "rewards_train/margins": 0.8074432238936424, + "rewards_train/rejected": -0.7157928347587585, + "step": 299 + }, + { + "epoch": 0.08, + "logps_train/chosen": -3.0747697353363037, + "logps_train/ref_chosen": -2.6875, + "logps_train/ref_rejected": -3.609375, + "logps_train/rejected": -3.909905195236206, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03872697427868843, + "rewards_train/margins": -0.008673954755067825, + "rewards_train/rejected": -0.030053019523620605, + "step": 299 + }, + { + "epoch": 0.08, + "logps_train/chosen": -9.115863800048828, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -6.4036865234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05716362223029137, + "rewards_train/margins": 0.03503227420151234, + "rewards_train/rejected": 0.02213134802877903, + "step": 299 + }, + { + "epoch": 0.08, + "logps_train/chosen": -3.54950213432312, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -5.448266983032227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09192478656768799, + "rewards_train/margins": 0.07112648524343967, + "rewards_train/rejected": 0.020798301324248314, + "step": 299 + }, + { + "epoch": 0.08, + "logps_train/chosen": -19.246871948242188, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -3.8125, + "logps_train/rejected": -5.077519416809082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.012187195010483265, + "rewards_train/margins": 0.11431475263088942, + "rewards_train/rejected": -0.12650194764137268, + "step": 299 + }, + { + "epoch": 0.08, + "logps_train/chosen": -1.6838488578796387, + "logps_train/ref_chosen": -1.6953125, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -7.909309387207031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0011463642586022615, + "rewards_train/margins": -0.004797697300091386, + "rewards_train/rejected": 0.005944061558693647, + "step": 299 + }, + { + "epoch": 0.08, + "learning_rate": 1.9733265972480058e-06, + "loss": 0.5719, + "step": 300 + }, + { + "epoch": 0.08, + "logps_train/chosen": -74.74983215332031, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -167.32174682617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1749832183122635, + "rewards_train/margins": 2.1571915596723557, + "rewards_train/rejected": -2.332174777984619, + "step": 300 + }, + { + "epoch": 0.08, + "logps_train/chosen": -129.00489807128906, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -200.37240600585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.45048987865448, + "rewards_train/margins": 3.086750626564026, + "rewards_train/rejected": -4.537240505218506, + "step": 300 + }, + { + "epoch": 0.08, + "logps_train/chosen": -69.71128845214844, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -80.95661926269531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07112884521484375, + "rewards_train/margins": -0.0254669189453125, + "rewards_train/rejected": -0.04566192626953125, + "step": 300 + }, + { + "epoch": 0.08, + "logps_train/chosen": -1.0264025926589966, + "logps_train/ref_chosen": -1.3671875, + "logps_train/ref_rejected": -2.796875, + "logps_train/rejected": -2.8055999279022217, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03407848998904228, + "rewards_train/margins": 0.03495098277926445, + "rewards_train/rejected": -0.000872492790222168, + "step": 300 + }, + { + "epoch": 0.08, + "logps_train/chosen": -80.08786010742188, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -118.2705307006836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.308786004781723, + "rewards_train/margins": 0.4182671010494232, + "rewards_train/rejected": -0.7270531058311462, + "step": 300 + }, + { + "epoch": 0.08, + "logps_train/chosen": -135.76939392089844, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -157.25302124023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.576939344406128, + "rewards_train/margins": 0.24836277961730957, + "rewards_train/rejected": -2.8253021240234375, + "step": 300 + }, + { + "epoch": 0.08, + "logps_train/chosen": -22.04362678527832, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -45.25, + "logps_train/rejected": -44.57876205444336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.03313732147216797, + "rewards_train/margins": -0.033986471593379974, + "rewards_train/rejected": 0.06712379306554794, + "step": 300 + }, + { + "epoch": 0.08, + "logps_train/chosen": -17.72179412841797, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -15.6875, + "logps_train/rejected": -16.88431739807129, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.072179414331913, + "rewards_train/margins": 0.04750232398509979, + "rewards_train/rejected": -0.11968173831701279, + "step": 300 + }, + { + "epoch": 0.08, + "logps_train/chosen": -85.50338745117188, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -99.72048950195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35033875703811646, + "rewards_train/margins": 0.021710187196731567, + "rewards_train/rejected": -0.372048944234848, + "step": 301 + }, + { + "epoch": 0.08, + "logps_train/chosen": -3.5388853549957275, + "logps_train/ref_chosen": -2.890625, + "logps_train/ref_rejected": -0.82421875, + "logps_train/rejected": -0.933472752571106, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06482603400945663, + "rewards_train/margins": -0.053900633938610554, + "rewards_train/rejected": -0.01092540007084608, + "step": 301 + }, + { + "epoch": 0.08, + "logps_train/chosen": -61.98994827270508, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -93.99269104003906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.42399483919143677, + "rewards_train/margins": -0.22472573816776276, + "rewards_train/rejected": -0.199269101023674, + "step": 301 + }, + { + "epoch": 0.08, + "logps_train/chosen": -92.93060302734375, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -82.48832702636719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15693970024585724, + "rewards_train/margins": 0.10577240213751793, + "rewards_train/rejected": 0.05116729810833931, + "step": 301 + }, + { + "epoch": 0.08, + "logps_train/chosen": -86.92755126953125, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -113.07627868652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.542755126953125, + "rewards_train/margins": 0.5648727416992188, + "rewards_train/rejected": -1.1076278686523438, + "step": 301 + }, + { + "epoch": 0.08, + "logps_train/chosen": -29.84615707397461, + "logps_train/ref_chosen": -30.125, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -26.700565338134766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.027884293347597122, + "rewards_train/margins": 0.33544083312153816, + "rewards_train/rejected": -0.30755653977394104, + "step": 301 + }, + { + "epoch": 0.08, + "logps_train/chosen": -85.11414337158203, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -121.15925598144531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2614143490791321, + "rewards_train/margins": 0.2545112371444702, + "rewards_train/rejected": -0.5159255862236023, + "step": 301 + }, + { + "epoch": 0.08, + "logps_train/chosen": -88.56121826171875, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -88.67710876464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.556121826171875, + "rewards_train/margins": 0.01158905029296875, + "rewards_train/rejected": -0.5677108764648438, + "step": 301 + }, + { + "epoch": 0.08, + "learning_rate": 1.972716238985299e-06, + "loss": 0.5691, + "step": 302 + }, + { + "epoch": 0.08, + "logps_train/chosen": -86.89353942871094, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -182.26705932617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06064605712890625, + "rewards_train/margins": 0.7873520255088806, + "rewards_train/rejected": -0.7267059683799744, + "step": 302 + }, + { + "epoch": 0.08, + "logps_train/chosen": -50.41429901123047, + "logps_train/ref_chosen": -46.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -87.67742919921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3914299011230469, + "rewards_train/margins": 0.226313054561615, + "rewards_train/rejected": -0.6177429556846619, + "step": 302 + }, + { + "epoch": 0.08, + "logps_train/chosen": -10.544561386108398, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -27.25, + "logps_train/rejected": -29.744691848754883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06429386138916016, + "rewards_train/margins": 0.3137630522251129, + "rewards_train/rejected": -0.24946919083595276, + "step": 302 + }, + { + "epoch": 0.08, + "logps_train/chosen": -4.036253929138184, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -5.15625, + "logps_train/rejected": -6.028017997741699, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03387460857629776, + "rewards_train/margins": 0.12105140835046768, + "rewards_train/rejected": -0.08717679977416992, + "step": 302 + }, + { + "epoch": 0.08, + "logps_train/chosen": -79.08322143554688, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -106.63697814941406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.19167785346508026, + "rewards_train/margins": -0.09462432563304901, + "rewards_train/rejected": 0.2863021790981293, + "step": 302 + }, + { + "epoch": 0.08, + "logps_train/chosen": -172.5408935546875, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -182.96852111816406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.354089379310608, + "rewards_train/margins": -0.15723729133605957, + "rewards_train/rejected": -1.1968520879745483, + "step": 302 + }, + { + "epoch": 0.08, + "logps_train/chosen": -96.5408935546875, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -87.17562103271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00408935546875, + "rewards_train/margins": 0.8134727478027344, + "rewards_train/rejected": -0.8175621032714844, + "step": 302 + }, + { + "epoch": 0.08, + "logps_train/chosen": -0.7275245189666748, + "logps_train/ref_chosen": -0.7578125, + "logps_train/ref_rejected": -3.765625, + "logps_train/rejected": -4.309628009796143, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0030287981498986483, + "rewards_train/margins": 0.057429100619629025, + "rewards_train/rejected": -0.05440030246973038, + "step": 302 + }, + { + "epoch": 0.08, + "logps_train/chosen": -89.35289001464844, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -148.2635955810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16471099853515625, + "rewards_train/margins": 2.8410706520080566, + "rewards_train/rejected": -2.6763596534729004, + "step": 303 + }, + { + "epoch": 0.08, + "logps_train/chosen": -11.811470985412598, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -56.25, + "logps_train/rejected": -56.479576110839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0061470987275242805, + "rewards_train/margins": 0.016810513101518154, + "rewards_train/rejected": -0.022957611829042435, + "step": 303 + }, + { + "epoch": 0.08, + "logps_train/chosen": -125.070068359375, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -153.85220336914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25700685381889343, + "rewards_train/margins": 0.628213495016098, + "rewards_train/rejected": -0.8852203488349915, + "step": 303 + }, + { + "epoch": 0.08, + "logps_train/chosen": -56.679134368896484, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -94.98675537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18208657205104828, + "rewards_train/margins": 1.6307620853185654, + "rewards_train/rejected": -1.448675513267517, + "step": 303 + }, + { + "epoch": 0.08, + "logps_train/chosen": -1.710459589958191, + "logps_train/ref_chosen": -1.1328125, + "logps_train/ref_rejected": -6.46875, + "logps_train/rejected": -7.47280216217041, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05776470899581909, + "rewards_train/margins": 0.042640507221221924, + "rewards_train/rejected": -0.10040521621704102, + "step": 303 + }, + { + "epoch": 0.08, + "logps_train/chosen": -137.98361206054688, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -159.36471557617188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4483612775802612, + "rewards_train/margins": -0.6118897199630737, + "rewards_train/rejected": -0.8364715576171875, + "step": 303 + }, + { + "epoch": 0.08, + "logps_train/chosen": -1.2368991374969482, + "logps_train/ref_chosen": -1.2421875, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -5.552702903747559, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0005288362735882401, + "rewards_train/margins": 0.14642412366811186, + "rewards_train/rejected": -0.14589528739452362, + "step": 303 + }, + { + "epoch": 0.08, + "logps_train/chosen": -172.21556091308594, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -166.61282348632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17844390869140625, + "rewards_train/margins": 0.4397262632846832, + "rewards_train/rejected": -0.261282354593277, + "step": 303 + }, + { + "epoch": 0.08, + "learning_rate": 1.9720990727402324e-06, + "loss": 0.5547, + "step": 304 + }, + { + "epoch": 0.08, + "logps_train/chosen": -91.13716125488281, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -94.30995178222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08628387749195099, + "rewards_train/margins": 0.9172790795564651, + "rewards_train/rejected": -0.8309952020645142, + "step": 304 + }, + { + "epoch": 0.08, + "logps_train/chosen": -0.3437339663505554, + "logps_train/ref_chosen": -0.451171875, + "logps_train/ref_rejected": -3.609375, + "logps_train/rejected": -4.4649658203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.010743791237473488, + "rewards_train/margins": 0.09630287624895573, + "rewards_train/rejected": -0.08555908501148224, + "step": 304 + }, + { + "epoch": 0.08, + "logps_train/chosen": -48.24297332763672, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -12.440957069396973, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.42570266127586365, + "rewards_train/margins": 0.7447983622550964, + "rewards_train/rejected": -0.3190957009792328, + "step": 304 + }, + { + "epoch": 0.08, + "logps_train/chosen": -5.791676044464111, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -10.513467788696289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04479260370135307, + "rewards_train/margins": 0.03780417516827583, + "rewards_train/rejected": -0.0825967788696289, + "step": 304 + }, + { + "epoch": 0.08, + "logps_train/chosen": -52.32560348510742, + "logps_train/ref_chosen": -53.75, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -20.830482482910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14243964850902557, + "rewards_train/margins": 0.2879879027605057, + "rewards_train/rejected": -0.1455482542514801, + "step": 304 + }, + { + "epoch": 0.08, + "logps_train/chosen": -11.506150245666504, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -11.478496551513672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05686502531170845, + "rewards_train/margins": 0.022234629839658737, + "rewards_train/rejected": -0.07909965515136719, + "step": 304 + }, + { + "epoch": 0.08, + "logps_train/chosen": -24.938167572021484, + "logps_train/ref_chosen": -23.875, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -24.78963851928711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10631676018238068, + "rewards_train/margins": -0.01485290378332138, + "rewards_train/rejected": -0.0914638563990593, + "step": 304 + }, + { + "epoch": 0.08, + "logps_train/chosen": -31.806638717651367, + "logps_train/ref_chosen": -29.0, + "logps_train/ref_rejected": -61.5, + "logps_train/rejected": -65.18225860595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2806638777256012, + "rewards_train/margins": 0.08756199479103088, + "rewards_train/rejected": -0.3682258725166321, + "step": 304 + }, + { + "epoch": 0.09, + "logps_train/chosen": -69.67945861816406, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -91.84862518310547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03205413743853569, + "rewards_train/margins": 0.2669166587293148, + "rewards_train/rejected": -0.2348625212907791, + "step": 305 + }, + { + "epoch": 0.09, + "logps_train/chosen": -171.56288146972656, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -204.44374084472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.856288194656372, + "rewards_train/margins": 1.1880860328674316, + "rewards_train/rejected": -3.0443742275238037, + "step": 305 + }, + { + "epoch": 0.09, + "logps_train/chosen": -13.194934844970703, + "logps_train/ref_chosen": -7.5, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -15.01381778717041, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5694934725761414, + "rewards_train/margins": -0.018111705780029297, + "rewards_train/rejected": -0.5513817667961121, + "step": 305 + }, + { + "epoch": 0.09, + "logps_train/chosen": -5.931014060974121, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -11.976423263549805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013148593716323376, + "rewards_train/margins": 0.11079091858118773, + "rewards_train/rejected": -0.09764232486486435, + "step": 305 + }, + { + "epoch": 0.09, + "logps_train/chosen": -50.08749008178711, + "logps_train/ref_chosen": -48.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -72.22117614746094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.15874901413917542, + "rewards_train/margins": -0.0866313949227333, + "rewards_train/rejected": -0.07211761921644211, + "step": 305 + }, + { + "epoch": 0.09, + "logps_train/chosen": -107.23919677734375, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -93.52982330322266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.423919677734375, + "rewards_train/margins": 1.3290627002716064, + "rewards_train/rejected": -1.7529823780059814, + "step": 305 + }, + { + "epoch": 0.09, + "logps_train/chosen": -9.296463012695312, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -8.175191879272461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04839630052447319, + "rewards_train/margins": 0.1441228874027729, + "rewards_train/rejected": -0.1925191879272461, + "step": 305 + }, + { + "epoch": 0.09, + "logps_train/chosen": -45.618316650390625, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -26.375, + "logps_train/rejected": -29.626100540161133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.31316834688186646, + "rewards_train/margins": 0.6382783949375153, + "rewards_train/rejected": -0.3251100480556488, + "step": 305 + }, + { + "epoch": 0.09, + "learning_rate": 1.9714751028323164e-06, + "loss": 0.5518, + "step": 306 + }, + { + "epoch": 0.09, + "logps_train/chosen": -69.59605407714844, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -60.25, + "logps_train/rejected": -62.228302001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05960540845990181, + "rewards_train/margins": 0.1382247917354107, + "rewards_train/rejected": -0.1978302001953125, + "step": 306 + }, + { + "epoch": 0.09, + "logps_train/chosen": -9.112894058227539, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -2.25, + "logps_train/rejected": -2.810389995574951, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03246059641242027, + "rewards_train/margins": 0.08849959820508957, + "rewards_train/rejected": -0.056039001792669296, + "step": 306 + }, + { + "epoch": 0.09, + "logps_train/chosen": -77.8060302734375, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -79.94599914550781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01939697377383709, + "rewards_train/margins": 0.16399688832461834, + "rewards_train/rejected": -0.14459991455078125, + "step": 306 + }, + { + "epoch": 0.09, + "logps_train/chosen": -38.129005432128906, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -135.64620971679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01290054339915514, + "rewards_train/margins": 0.15172043722122908, + "rewards_train/rejected": -0.16462098062038422, + "step": 306 + }, + { + "epoch": 0.09, + "logps_train/chosen": -59.71234130859375, + "logps_train/ref_chosen": -55.75, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -73.51309204101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3962341248989105, + "rewards_train/margins": 0.00507509708404541, + "rewards_train/rejected": -0.40130922198295593, + "step": 306 + }, + { + "epoch": 0.09, + "logps_train/chosen": -15.927370071411133, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -11.46605396270752, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06773700565099716, + "rewards_train/margins": -0.10238160938024521, + "rewards_train/rejected": 0.03464460372924805, + "step": 306 + }, + { + "epoch": 0.09, + "logps_train/chosen": -26.33545684814453, + "logps_train/ref_chosen": -30.125, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -63.709144592285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.37895432114601135, + "rewards_train/margins": 0.049868762493133545, + "rewards_train/rejected": 0.3290855586528778, + "step": 306 + }, + { + "epoch": 0.09, + "logps_train/chosen": -157.43939208984375, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -194.0, + "logps_train/rejected": -228.83447265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.543939232826233, + "rewards_train/margins": 1.939508080482483, + "rewards_train/rejected": -3.483447313308716, + "step": 306 + }, + { + "epoch": 0.09, + "logps_train/chosen": -17.3963680267334, + "logps_train/ref_chosen": -15.8125, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -15.634550094604492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15838681161403656, + "rewards_train/margins": 0.030068203806877136, + "rewards_train/rejected": -0.1884550154209137, + "step": 307 + }, + { + "epoch": 0.09, + "logps_train/chosen": -2.2540996074676514, + "logps_train/ref_chosen": -0.84375, + "logps_train/ref_rejected": -1.8671875, + "logps_train/rejected": -1.256052017211914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.14103496074676514, + "rewards_train/margins": -0.20214850828051567, + "rewards_train/rejected": 0.061113547533750534, + "step": 307 + }, + { + "epoch": 0.09, + "logps_train/chosen": -130.14068603515625, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -191.36773681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.564068615436554, + "rewards_train/margins": 1.972705066204071, + "rewards_train/rejected": -2.536773681640625, + "step": 307 + }, + { + "epoch": 0.09, + "logps_train/chosen": -89.74481201171875, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -83.362060546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17551879584789276, + "rewards_train/margins": 0.361724853515625, + "rewards_train/rejected": -0.18620605766773224, + "step": 307 + }, + { + "epoch": 0.09, + "logps_train/chosen": -2.8883979320526123, + "logps_train/ref_chosen": -2.125, + "logps_train/ref_rejected": -1.4140625, + "logps_train/rejected": -1.2727686166763306, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07633979618549347, + "rewards_train/margins": -0.0904691843315959, + "rewards_train/rejected": 0.014129388146102428, + "step": 307 + }, + { + "epoch": 0.09, + "logps_train/chosen": -22.42609405517578, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -27.48954200744629, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0051094056107103825, + "rewards_train/margins": 0.08134479960426688, + "rewards_train/rejected": -0.08645420521497726, + "step": 307 + }, + { + "epoch": 0.09, + "logps_train/chosen": -17.493417739868164, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -20.078927993774414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09934177249670029, + "rewards_train/margins": -0.003948971629142761, + "rewards_train/rejected": -0.09539280086755753, + "step": 307 + }, + { + "epoch": 0.09, + "logps_train/chosen": -86.554443359375, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -127.97174072265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14455567300319672, + "rewards_train/margins": 1.8417297452688217, + "rewards_train/rejected": -1.697174072265625, + "step": 307 + }, + { + "epoch": 0.09, + "learning_rate": 1.970844333628678e-06, + "loss": 0.5705, + "step": 308 + }, + { + "epoch": 0.09, + "logps_train/chosen": -17.883018493652344, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -21.391626358032227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.036698151379823685, + "rewards_train/margins": 0.1258607916533947, + "rewards_train/rejected": -0.08916264027357101, + "step": 308 + }, + { + "epoch": 0.09, + "logps_train/chosen": -29.736698150634766, + "logps_train/ref_chosen": -30.25, + "logps_train/ref_rejected": -24.375, + "logps_train/rejected": -25.694746017456055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05133018642663956, + "rewards_train/margins": 0.1833047941327095, + "rewards_train/rejected": -0.13197460770606995, + "step": 308 + }, + { + "epoch": 0.09, + "logps_train/chosen": -75.54313659667969, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -87.2763900756836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1456863433122635, + "rewards_train/margins": 0.07332535088062286, + "rewards_train/rejected": 0.07236099243164062, + "step": 308 + }, + { + "epoch": 0.09, + "logps_train/chosen": -103.30657958984375, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -98.53805541992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.580657958984375, + "rewards_train/margins": 0.5731476545333862, + "rewards_train/rejected": -1.1538056135177612, + "step": 308 + }, + { + "epoch": 0.09, + "logps_train/chosen": -55.72383117675781, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -63.423988342285156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04738311842083931, + "rewards_train/margins": -0.07998428493738174, + "rewards_train/rejected": 0.032601166516542435, + "step": 308 + }, + { + "epoch": 0.09, + "logps_train/chosen": -5.093257904052734, + "logps_train/ref_chosen": -4.65625, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -10.215079307556152, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04370079189538956, + "rewards_train/margins": 0.0965571478009224, + "rewards_train/rejected": -0.14025793969631195, + "step": 308 + }, + { + "epoch": 0.09, + "logps_train/chosen": -133.243896484375, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -137.5478057861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2243897914886475, + "rewards_train/margins": 0.4303908348083496, + "rewards_train/rejected": -2.654780626296997, + "step": 308 + }, + { + "epoch": 0.09, + "logps_train/chosen": -168.91505432128906, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -158.32424926757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09150543063879013, + "rewards_train/margins": 0.24091950803995132, + "rewards_train/rejected": -0.33242493867874146, + "step": 308 + }, + { + "epoch": 0.09, + "logps_train/chosen": -87.63151550292969, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -102.06137084960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01315155066549778, + "rewards_train/margins": 0.49298552237451077, + "rewards_train/rejected": -0.5061370730400085, + "step": 309 + }, + { + "epoch": 0.09, + "logps_train/chosen": -172.74099731445312, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -139.52359008789062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8740997314453125, + "rewards_train/margins": -0.2717406749725342, + "rewards_train/rejected": -1.6023590564727783, + "step": 309 + }, + { + "epoch": 0.09, + "logps_train/chosen": -79.8660659790039, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -99.41325378417969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08660659939050674, + "rewards_train/margins": 0.154718779027462, + "rewards_train/rejected": -0.24132537841796875, + "step": 309 + }, + { + "epoch": 0.09, + "logps_train/chosen": -81.41281127929688, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -98.41580200195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05871887132525444, + "rewards_train/margins": 0.10029907152056694, + "rewards_train/rejected": -0.0415802001953125, + "step": 309 + }, + { + "epoch": 0.09, + "logps_train/chosen": -104.6275863647461, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -77.6819839477539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1872413605451584, + "rewards_train/margins": 0.4554397612810135, + "rewards_train/rejected": -0.2681984007358551, + "step": 309 + }, + { + "epoch": 0.09, + "logps_train/chosen": -4.877931118011475, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -13.12360668182373, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0846681147813797, + "rewards_train/margins": 0.17144255340099335, + "rewards_train/rejected": -0.25611066818237305, + "step": 309 + }, + { + "epoch": 0.09, + "logps_train/chosen": -32.9736328125, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -8.963258743286133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04736328125, + "rewards_train/margins": 0.19271259009838104, + "rewards_train/rejected": -0.24007587134838104, + "step": 309 + }, + { + "epoch": 0.09, + "logps_train/chosen": -94.35623168945312, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -125.73310852050781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4356231689453125, + "rewards_train/margins": 1.1376876831054688, + "rewards_train/rejected": -1.5733108520507812, + "step": 309 + }, + { + "epoch": 0.09, + "learning_rate": 1.970206769544033e-06, + "loss": 0.585, + "step": 310 + }, + { + "epoch": 0.09, + "logps_train/chosen": -61.48622131347656, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -119.58779907226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09862213581800461, + "rewards_train/margins": 0.9101577475667, + "rewards_train/rejected": -1.0087798833847046, + "step": 310 + }, + { + "epoch": 0.09, + "logps_train/chosen": -165.8812713623047, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -185.36117553710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6881271600723267, + "rewards_train/margins": 0.047990381717681885, + "rewards_train/rejected": -0.7361175417900085, + "step": 310 + }, + { + "epoch": 0.09, + "logps_train/chosen": -18.029142379760742, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -58.75, + "logps_train/rejected": -64.36835479736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0845857635140419, + "rewards_train/margins": 0.6464212313294411, + "rewards_train/rejected": -0.5618354678153992, + "step": 310 + }, + { + "epoch": 0.09, + "logps_train/chosen": -130.61141967773438, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -130.88153076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5611419677734375, + "rewards_train/margins": 0.37701112031936646, + "rewards_train/rejected": -0.938153088092804, + "step": 310 + }, + { + "epoch": 0.09, + "logps_train/chosen": -98.3172607421875, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -145.29446411132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16827392578125, + "rewards_train/margins": 0.6977203488349915, + "rewards_train/rejected": -0.5294464230537415, + "step": 310 + }, + { + "epoch": 0.09, + "logps_train/chosen": -13.344236373901367, + "logps_train/ref_chosen": -14.25, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -10.896620750427246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09057636559009552, + "rewards_train/margins": 0.07398843951523304, + "rewards_train/rejected": 0.01658792607486248, + "step": 310 + }, + { + "epoch": 0.09, + "logps_train/chosen": -3.090195655822754, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -3.421875, + "logps_train/rejected": -4.622269153594971, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03160543367266655, + "rewards_train/margins": 0.15164485201239586, + "rewards_train/rejected": -0.12003941833972931, + "step": 310 + }, + { + "epoch": 0.09, + "logps_train/chosen": -16.75162124633789, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -24.003557205200195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03766212612390518, + "rewards_train/margins": 0.18769360333681107, + "rewards_train/rejected": -0.22535572946071625, + "step": 310 + }, + { + "epoch": 0.09, + "logps_train/chosen": -91.47395324707031, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -134.6640167236328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09739532321691513, + "rewards_train/margins": -0.23099365085363388, + "rewards_train/rejected": 0.13359832763671875, + "step": 311 + }, + { + "epoch": 0.09, + "logps_train/chosen": -126.78901672363281, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -165.91940307617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6289016604423523, + "rewards_train/margins": 0.36303865909576416, + "rewards_train/rejected": -0.9919403195381165, + "step": 311 + }, + { + "epoch": 0.09, + "logps_train/chosen": -85.07208251953125, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -128.5800018310547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.142791748046875, + "rewards_train/margins": -0.09920807182788849, + "rewards_train/rejected": 0.2419998198747635, + "step": 311 + }, + { + "epoch": 0.09, + "logps_train/chosen": -136.98048400878906, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -174.49562072753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7019516229629517, + "rewards_train/margins": 1.151513695716858, + "rewards_train/rejected": -0.44956207275390625, + "step": 311 + }, + { + "epoch": 0.09, + "logps_train/chosen": -5.622293472290039, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -13.046269416809082, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.049729347229003906, + "rewards_train/margins": -0.05135240557137877, + "rewards_train/rejected": 0.0016230583423748612, + "step": 311 + }, + { + "epoch": 0.09, + "logps_train/chosen": -161.85012817382812, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -125.32682800292969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1850128173828125, + "rewards_train/margins": -0.10233001410961151, + "rewards_train/rejected": -0.08268280327320099, + "step": 311 + }, + { + "epoch": 0.09, + "logps_train/chosen": -176.21893310546875, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -152.64111328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.021893262863159, + "rewards_train/margins": -1.0577819347381592, + "rewards_train/rejected": -0.964111328125, + "step": 311 + }, + { + "epoch": 0.09, + "logps_train/chosen": -1.6021209955215454, + "logps_train/ref_chosen": -1.3828125, + "logps_train/ref_rejected": -3.953125, + "logps_train/rejected": -3.542463541030884, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02193084917962551, + "rewards_train/margins": -0.06299699656665325, + "rewards_train/rejected": 0.04106614738702774, + "step": 311 + }, + { + "epoch": 0.09, + "learning_rate": 1.969562415040654e-06, + "loss": 0.6342, + "step": 312 + }, + { + "epoch": 0.09, + "logps_train/chosen": -17.686124801635742, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -14.76402473449707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006387520115822554, + "rewards_train/margins": 0.13903999654576182, + "rewards_train/rejected": -0.13265247642993927, + "step": 312 + }, + { + "epoch": 0.09, + "logps_train/chosen": -140.82452392578125, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -182.73365783691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.782452404499054, + "rewards_train/margins": 0.2909134030342102, + "rewards_train/rejected": -1.0733658075332642, + "step": 312 + }, + { + "epoch": 0.09, + "logps_train/chosen": -93.35124206542969, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -56.0, + "logps_train/rejected": -58.9476432800293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01487579382956028, + "rewards_train/margins": 0.3096401337534189, + "rewards_train/rejected": -0.29476433992385864, + "step": 312 + }, + { + "epoch": 0.09, + "logps_train/chosen": -9.253278732299805, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -2.703125, + "logps_train/rejected": -3.78804874420166, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09342212975025177, + "rewards_train/margins": 0.20191450417041779, + "rewards_train/rejected": -0.10849237442016602, + "step": 312 + }, + { + "epoch": 0.09, + "logps_train/chosen": -16.395145416259766, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -24.375608444213867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.010485458187758923, + "rewards_train/margins": -0.014453697018325329, + "rewards_train/rejected": 0.02493915520608425, + "step": 312 + }, + { + "epoch": 0.09, + "logps_train/chosen": -77.9571762084961, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -103.06077575683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3542823791503906, + "rewards_train/margins": 1.9603599309921265, + "rewards_train/rejected": -1.6060775518417358, + "step": 312 + }, + { + "epoch": 0.09, + "logps_train/chosen": -108.82810974121094, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -111.31256103515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01718902587890625, + "rewards_train/margins": 0.24844513833522797, + "rewards_train/rejected": -0.23125611245632172, + "step": 312 + }, + { + "epoch": 0.09, + "logps_train/chosen": -101.9190673828125, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -98.35031127929688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.39190673828125, + "rewards_train/margins": -0.05687558650970459, + "rewards_train/rejected": -1.3350311517715454, + "step": 312 + }, + { + "epoch": 0.09, + "logps_train/chosen": -11.352811813354492, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -10.176823616027832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004031181335449219, + "rewards_train/margins": 0.026151180267333984, + "rewards_train/rejected": -0.030182361602783203, + "step": 313 + }, + { + "epoch": 0.09, + "logps_train/chosen": -2.888119697570801, + "logps_train/ref_chosen": -3.046875, + "logps_train/ref_rejected": -2.90625, + "logps_train/rejected": -2.836146354675293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01587553136050701, + "rewards_train/margins": 0.008865166921168566, + "rewards_train/rejected": 0.007010364439338446, + "step": 313 + }, + { + "epoch": 0.09, + "logps_train/chosen": -137.05055236816406, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -122.75468444824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10505523532629013, + "rewards_train/margins": 0.07041320949792862, + "rewards_train/rejected": -0.17546844482421875, + "step": 313 + }, + { + "epoch": 0.09, + "logps_train/chosen": -190.9625244140625, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -184.22091674804688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2962524890899658, + "rewards_train/margins": -0.3741608262062073, + "rewards_train/rejected": -0.9220916628837585, + "step": 313 + }, + { + "epoch": 0.09, + "logps_train/chosen": -119.3635025024414, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -127.62449645996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11364974826574326, + "rewards_train/margins": 0.776099406182766, + "rewards_train/rejected": -0.6624496579170227, + "step": 313 + }, + { + "epoch": 0.09, + "logps_train/chosen": -115.74867248535156, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -158.61325073242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1748672723770142, + "rewards_train/margins": 2.786457896232605, + "rewards_train/rejected": -3.961325168609619, + "step": 313 + }, + { + "epoch": 0.09, + "logps_train/chosen": -16.652202606201172, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -49.25, + "logps_train/rejected": -48.87235641479492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07227974385023117, + "rewards_train/margins": 0.0345153845846653, + "rewards_train/rejected": 0.03776435926556587, + "step": 313 + }, + { + "epoch": 0.09, + "logps_train/chosen": -4.1089019775390625, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -24.25, + "logps_train/rejected": -24.750947952270508, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07026519626379013, + "rewards_train/margins": -0.02017040178179741, + "rewards_train/rejected": -0.05009479448199272, + "step": 313 + }, + { + "epoch": 0.09, + "learning_rate": 1.9689112746283396e-06, + "loss": 0.5754, + "step": 314 + }, + { + "epoch": 0.09, + "logps_train/chosen": -152.93020629882812, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -187.0, + "logps_train/rejected": -214.09335327148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19302062690258026, + "rewards_train/margins": 2.5163147002458572, + "rewards_train/rejected": -2.7093353271484375, + "step": 314 + }, + { + "epoch": 0.09, + "logps_train/chosen": -12.915491104125977, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -13.470311164855957, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.24154911935329437, + "rewards_train/margins": -0.08201800286769867, + "rewards_train/rejected": -0.1595311164855957, + "step": 314 + }, + { + "epoch": 0.09, + "logps_train/chosen": -105.00286865234375, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -158.83624267578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15028686821460724, + "rewards_train/margins": 3.1833374947309494, + "rewards_train/rejected": -3.3336243629455566, + "step": 314 + }, + { + "epoch": 0.09, + "logps_train/chosen": -61.697349548339844, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -94.52631378173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1447349637746811, + "rewards_train/margins": 0.907896414399147, + "rewards_train/rejected": -1.0526313781738281, + "step": 314 + }, + { + "epoch": 0.09, + "logps_train/chosen": -111.82462310791016, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -164.5445098876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4824623167514801, + "rewards_train/margins": 3.721988767385483, + "rewards_train/rejected": -4.204451084136963, + "step": 314 + }, + { + "epoch": 0.09, + "logps_train/chosen": -23.420042037963867, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -17.550928115844727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08299579471349716, + "rewards_train/margins": 0.06308860518038273, + "rewards_train/rejected": 0.019907189533114433, + "step": 314 + }, + { + "epoch": 0.09, + "logps_train/chosen": -19.48554801940918, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -122.96153259277344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.19855479896068573, + "rewards_train/margins": -0.3024015426635742, + "rewards_train/rejected": 0.10384674370288849, + "step": 314 + }, + { + "epoch": 0.09, + "logps_train/chosen": -102.30194091796875, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -148.60629272460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6301941275596619, + "rewards_train/margins": 1.3304352164268494, + "rewards_train/rejected": -1.9606293439865112, + "step": 314 + }, + { + "epoch": 0.09, + "logps_train/chosen": -74.9974136352539, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -128.72398376464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3002586364746094, + "rewards_train/margins": 3.122657060623169, + "rewards_train/rejected": -2.8223984241485596, + "step": 315 + }, + { + "epoch": 0.09, + "logps_train/chosen": -105.13339233398438, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -147.58668518066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7633392214775085, + "rewards_train/margins": 0.3953292965888977, + "rewards_train/rejected": -1.1586685180664062, + "step": 315 + }, + { + "epoch": 0.09, + "logps_train/chosen": -12.26225757598877, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -15.0, + "logps_train/rejected": -14.667101860046387, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03247575834393501, + "rewards_train/margins": -0.06576557457447052, + "rewards_train/rejected": 0.03328981623053551, + "step": 315 + }, + { + "epoch": 0.09, + "logps_train/chosen": -149.81060791015625, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -137.52719116210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5810608267784119, + "rewards_train/margins": 1.1716583371162415, + "rewards_train/rejected": -1.7527191638946533, + "step": 315 + }, + { + "epoch": 0.09, + "logps_train/chosen": -26.4383544921875, + "logps_train/ref_chosen": -29.375, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -24.465585708618164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2936645448207855, + "rewards_train/margins": 0.3527231179177761, + "rewards_train/rejected": -0.059058573096990585, + "step": 315 + }, + { + "epoch": 0.09, + "logps_train/chosen": -0.7896074056625366, + "logps_train/ref_chosen": -1.2734375, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -7.212647914886475, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04838300868868828, + "rewards_train/margins": 0.35089780017733574, + "rewards_train/rejected": -0.30251479148864746, + "step": 315 + }, + { + "epoch": 0.09, + "logps_train/chosen": -7.984255313873291, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -13.450118064880371, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02030053175985813, + "rewards_train/margins": 0.1997112836688757, + "rewards_train/rejected": -0.22001181542873383, + "step": 315 + }, + { + "epoch": 0.09, + "logps_train/chosen": -9.350034713745117, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -9.87861442565918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05375347286462784, + "rewards_train/margins": 0.10285796970129013, + "rewards_train/rejected": -0.15661144256591797, + "step": 315 + }, + { + "epoch": 0.09, + "learning_rate": 1.968253352864382e-06, + "loss": 0.4268, + "step": 316 + }, + { + "epoch": 0.09, + "logps_train/chosen": -79.98663330078125, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -150.41098022460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6486634016036987, + "rewards_train/margins": 1.6424347162246704, + "rewards_train/rejected": -3.291098117828369, + "step": 316 + }, + { + "epoch": 0.09, + "logps_train/chosen": -11.723627090454102, + "logps_train/ref_chosen": -11.5625, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -26.64828109741211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.016112709417939186, + "rewards_train/margins": 0.18621540628373623, + "rewards_train/rejected": -0.20232811570167542, + "step": 316 + }, + { + "epoch": 0.09, + "logps_train/chosen": -164.319091796875, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -169.96340942382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0319092273712158, + "rewards_train/margins": 0.9644317626953125, + "rewards_train/rejected": -1.9963409900665283, + "step": 316 + }, + { + "epoch": 0.09, + "logps_train/chosen": -80.86666107177734, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -81.34444427490234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2133338898420334, + "rewards_train/margins": 0.047778308391571045, + "rewards_train/rejected": 0.16555558145046234, + "step": 316 + }, + { + "epoch": 0.09, + "logps_train/chosen": -2.4560935497283936, + "logps_train/ref_chosen": -2.734375, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -2.605313777923584, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.027828145772218704, + "rewards_train/margins": 0.0492970235645771, + "rewards_train/rejected": -0.0214688777923584, + "step": 316 + }, + { + "epoch": 0.09, + "logps_train/chosen": -137.21617126464844, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -153.75091552734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5216171145439148, + "rewards_train/margins": 1.3534744381904602, + "rewards_train/rejected": -1.875091552734375, + "step": 316 + }, + { + "epoch": 0.09, + "logps_train/chosen": -11.410858154296875, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -9.089278221130371, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07141418755054474, + "rewards_train/margins": 0.22721701860427856, + "rewards_train/rejected": -0.15580283105373383, + "step": 316 + }, + { + "epoch": 0.09, + "logps_train/chosen": -103.1676254272461, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -101.74097442626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.366762638092041, + "rewards_train/margins": 0.00733494758605957, + "rewards_train/rejected": -2.3740975856781006, + "step": 316 + }, + { + "epoch": 0.09, + "logps_train/chosen": -4.296600341796875, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -11.947286605834961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07658996433019638, + "rewards_train/margins": 0.058818625286221504, + "rewards_train/rejected": 0.017771339043974876, + "step": 317 + }, + { + "epoch": 0.09, + "logps_train/chosen": -5.092967987060547, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -4.297579765319824, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07179679721593857, + "rewards_train/margins": -0.12953882291913033, + "rewards_train/rejected": 0.05774202570319176, + "step": 317 + }, + { + "epoch": 0.09, + "logps_train/chosen": -10.543394088745117, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -22.625, + "logps_train/rejected": -22.77796173095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008160591125488281, + "rewards_train/margins": 0.02345676440745592, + "rewards_train/rejected": -0.01529617328196764, + "step": 317 + }, + { + "epoch": 0.09, + "logps_train/chosen": -65.4957504272461, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -120.80634307861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0004249572812113911, + "rewards_train/margins": 1.4310593366681132, + "rewards_train/rejected": -1.4306343793869019, + "step": 317 + }, + { + "epoch": 0.09, + "logps_train/chosen": -16.975500106811523, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -6.719640254974365, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.022550011053681374, + "rewards_train/margins": 0.09316401742398739, + "rewards_train/rejected": -0.11571402847766876, + "step": 317 + }, + { + "epoch": 0.09, + "logps_train/chosen": -4.74744987487793, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -9.921833038330078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.02213001251220703, + "rewards_train/margins": -0.023186683654785156, + "rewards_train/rejected": 0.04531669616699219, + "step": 317 + }, + { + "epoch": 0.09, + "logps_train/chosen": -3.001136541366577, + "logps_train/ref_chosen": -2.75, + "logps_train/ref_rejected": -1.953125, + "logps_train/rejected": -2.1068053245544434, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.025113655254244804, + "rewards_train/margins": -0.009745622985064983, + "rewards_train/rejected": -0.015368032269179821, + "step": 317 + }, + { + "epoch": 0.09, + "logps_train/chosen": -85.55023956298828, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -133.36187744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.005023956298828125, + "rewards_train/margins": 2.931163787841797, + "rewards_train/rejected": -2.936187744140625, + "step": 317 + }, + { + "epoch": 0.09, + "learning_rate": 1.9675886543535365e-06, + "loss": 0.5231, + "step": 318 + }, + { + "epoch": 0.09, + "logps_train/chosen": -11.447883605957031, + "logps_train/ref_chosen": -11.5625, + "logps_train/ref_rejected": -29.0, + "logps_train/rejected": -30.40155601501465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011461639776825905, + "rewards_train/margins": 0.1516172382980585, + "rewards_train/rejected": -0.1401555985212326, + "step": 318 + }, + { + "epoch": 0.09, + "logps_train/chosen": -7.477597713470459, + "logps_train/ref_chosen": -7.46875, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -6.663707733154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0008847713470458984, + "rewards_train/margins": 0.06236100196838379, + "rewards_train/rejected": -0.06324577331542969, + "step": 318 + }, + { + "epoch": 0.09, + "logps_train/chosen": -4.895384311676025, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -29.0, + "logps_train/rejected": -30.30229949951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03328843042254448, + "rewards_train/margins": 0.0969415195286274, + "rewards_train/rejected": -0.13022994995117188, + "step": 318 + }, + { + "epoch": 0.09, + "logps_train/chosen": -48.89872360229492, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -8.947599411010742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1851276457309723, + "rewards_train/margins": 0.383012592792511, + "rewards_train/rejected": -0.1978849470615387, + "step": 318 + }, + { + "epoch": 0.09, + "logps_train/chosen": -143.92198181152344, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -208.96240234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.392198324203491, + "rewards_train/margins": 2.304042100906372, + "rewards_train/rejected": -4.696240425109863, + "step": 318 + }, + { + "epoch": 0.09, + "logps_train/chosen": -1.997599720954895, + "logps_train/ref_chosen": -1.9296875, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -23.883983612060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.006791222374886274, + "rewards_train/margins": 0.28160713287070394, + "rewards_train/rejected": -0.2883983552455902, + "step": 318 + }, + { + "epoch": 0.09, + "logps_train/chosen": -128.75099182128906, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -135.8482208251953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.37509918212890625, + "rewards_train/margins": -0.190277099609375, + "rewards_train/rejected": -0.18482208251953125, + "step": 318 + }, + { + "epoch": 0.09, + "logps_train/chosen": -153.947509765625, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -144.64720153808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2947510480880737, + "rewards_train/margins": 0.4199690818786621, + "rewards_train/rejected": -1.7147201299667358, + "step": 318 + }, + { + "epoch": 0.09, + "logps_train/chosen": -164.86981201171875, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -131.19664001464844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.186981201171875, + "rewards_train/margins": -1.1173171997070312, + "rewards_train/rejected": -2.0696640014648438, + "step": 319 + }, + { + "epoch": 0.09, + "logps_train/chosen": -0.9157920479774475, + "logps_train/ref_chosen": -1.1796875, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -11.718670845031738, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02638954482972622, + "rewards_train/margins": 0.09200662933290005, + "rewards_train/rejected": -0.06561708450317383, + "step": 319 + }, + { + "epoch": 0.09, + "logps_train/chosen": -111.7329330444336, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -129.14743041992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22670669853687286, + "rewards_train/margins": 0.8914497643709183, + "rewards_train/rejected": -0.6647430658340454, + "step": 319 + }, + { + "epoch": 0.09, + "logps_train/chosen": -141.4100341796875, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -161.22967529296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4410034120082855, + "rewards_train/margins": 1.6819640696048737, + "rewards_train/rejected": -2.122967481613159, + "step": 319 + }, + { + "epoch": 0.09, + "logps_train/chosen": -8.81705093383789, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -24.125, + "logps_train/rejected": -26.363590240478516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07545509189367294, + "rewards_train/margins": 0.14840393513441086, + "rewards_train/rejected": -0.2238590270280838, + "step": 319 + }, + { + "epoch": 0.09, + "logps_train/chosen": -171.2254638671875, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -170.49757385253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7225464582443237, + "rewards_train/margins": 2.02721107006073, + "rewards_train/rejected": -3.7497575283050537, + "step": 319 + }, + { + "epoch": 0.09, + "logps_train/chosen": -58.83475112915039, + "logps_train/ref_chosen": -58.75, + "logps_train/ref_rejected": -6.59375, + "logps_train/rejected": -6.998214244842529, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008475112728774548, + "rewards_train/margins": 0.03197131399065256, + "rewards_train/rejected": -0.04044642671942711, + "step": 319 + }, + { + "epoch": 0.09, + "logps_train/chosen": -18.463417053222656, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -24.83600425720215, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016158295795321465, + "rewards_train/margins": 0.3247587215155363, + "rewards_train/rejected": -0.30860042572021484, + "step": 319 + }, + { + "epoch": 0.09, + "learning_rate": 1.9669171837479868e-06, + "loss": 0.5584, + "step": 320 + }, + { + "epoch": 0.09, + "logps_train/chosen": -114.32954406738281, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -29.5, + "logps_train/rejected": -32.30305862426758, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6829544305801392, + "rewards_train/margins": -0.40264856815338135, + "rewards_train/rejected": -0.2803058624267578, + "step": 320 + }, + { + "epoch": 0.09, + "logps_train/chosen": -130.65982055664062, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -181.0, + "logps_train/rejected": -189.0850830078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8159820437431335, + "rewards_train/margins": -0.00747370719909668, + "rewards_train/rejected": -0.8085083365440369, + "step": 320 + }, + { + "epoch": 0.09, + "logps_train/chosen": -113.93045043945312, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -104.50776672363281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5930450558662415, + "rewards_train/margins": 0.20773160457611084, + "rewards_train/rejected": -0.8007766604423523, + "step": 320 + }, + { + "epoch": 0.09, + "logps_train/chosen": -101.61605834960938, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -118.32696533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4616059064865112, + "rewards_train/margins": 0.7210906744003296, + "rewards_train/rejected": -2.182696580886841, + "step": 320 + }, + { + "epoch": 0.09, + "logps_train/chosen": -13.781686782836914, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -20.5925350189209, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16566868126392365, + "rewards_train/margins": 0.04358482360839844, + "rewards_train/rejected": -0.20925350487232208, + "step": 320 + }, + { + "epoch": 0.09, + "logps_train/chosen": -5.767183780670166, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -21.154130935668945, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.04515662416815758, + "rewards_train/margins": -0.0019302815198898315, + "rewards_train/rejected": 0.04708690568804741, + "step": 320 + }, + { + "epoch": 0.09, + "logps_train/chosen": -70.966552734375, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -113.35198974609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39665529131889343, + "rewards_train/margins": 0.5885436832904816, + "rewards_train/rejected": -0.985198974609375, + "step": 320 + }, + { + "epoch": 0.09, + "logps_train/chosen": -48.36671447753906, + "logps_train/ref_chosen": -43.0, + "logps_train/ref_rejected": -26.5, + "logps_train/rejected": -27.984342575073242, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5366714596748352, + "rewards_train/margins": -0.38823719322681427, + "rewards_train/rejected": -0.14843426644802094, + "step": 320 + }, + { + "epoch": 0.09, + "logps_train/chosen": -93.96749114990234, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -149.86874389648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.046749114990234375, + "rewards_train/margins": 0.9401252865791321, + "rewards_train/rejected": -0.9868744015693665, + "step": 321 + }, + { + "epoch": 0.09, + "logps_train/chosen": -88.58282470703125, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -95.79975891113281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09171753376722336, + "rewards_train/margins": 0.32169342786073685, + "rewards_train/rejected": -0.2299758940935135, + "step": 321 + }, + { + "epoch": 0.09, + "logps_train/chosen": -1.301226019859314, + "logps_train/ref_chosen": -1.4296875, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -5.239496231079102, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012846148572862148, + "rewards_train/margins": 0.10554577317088842, + "rewards_train/rejected": -0.09269962459802628, + "step": 321 + }, + { + "epoch": 0.09, + "logps_train/chosen": -26.43266487121582, + "logps_train/ref_chosen": -27.25, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -22.96561050415039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08173351734876633, + "rewards_train/margins": 0.05329456739127636, + "rewards_train/rejected": 0.028438949957489967, + "step": 321 + }, + { + "epoch": 0.09, + "logps_train/chosen": -158.60231018066406, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -139.37582397460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7602310180664062, + "rewards_train/margins": 0.47735142707824707, + "rewards_train/rejected": -2.2375824451446533, + "step": 321 + }, + { + "epoch": 0.09, + "logps_train/chosen": -38.160850524902344, + "logps_train/ref_chosen": -35.75, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -36.63885498046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.24108505249023438, + "rewards_train/margins": -0.12719955295324326, + "rewards_train/rejected": -0.11388549953699112, + "step": 321 + }, + { + "epoch": 0.09, + "logps_train/chosen": -205.5931396484375, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -226.91696166992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.559314250946045, + "rewards_train/margins": 0.43238210678100586, + "rewards_train/rejected": -4.991696357727051, + "step": 321 + }, + { + "epoch": 0.09, + "logps_train/chosen": -3.6011600494384766, + "logps_train/ref_chosen": -4.0, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -18.181644439697266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03988399729132652, + "rewards_train/margins": 0.19554844126105309, + "rewards_train/rejected": -0.15566444396972656, + "step": 321 + }, + { + "epoch": 0.09, + "learning_rate": 1.966238945747317e-06, + "loss": 0.6149, + "step": 322 + }, + { + "epoch": 0.09, + "logps_train/chosen": -68.95030212402344, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -56.25, + "logps_train/rejected": -60.582237243652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04503021389245987, + "rewards_train/margins": 0.3881935104727745, + "rewards_train/rejected": -0.4332237243652344, + "step": 322 + }, + { + "epoch": 0.09, + "logps_train/chosen": -192.064453125, + "logps_train/ref_chosen": -185.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -187.940185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7064453363418579, + "rewards_train/margins": 1.6875733137130737, + "rewards_train/rejected": -2.3940186500549316, + "step": 322 + }, + { + "epoch": 0.09, + "logps_train/chosen": -95.21846008300781, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -41.00798034667969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07815399020910263, + "rewards_train/margins": 0.4039520248770714, + "rewards_train/rejected": -0.32579803466796875, + "step": 322 + }, + { + "epoch": 0.09, + "logps_train/chosen": -109.24978637695312, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -158.86483764648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47497865557670593, + "rewards_train/margins": 1.7115052044391632, + "rewards_train/rejected": -2.186483860015869, + "step": 322 + }, + { + "epoch": 0.09, + "logps_train/chosen": -8.695053100585938, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -7.90625, + "logps_train/rejected": -8.688774108886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.013255310244858265, + "rewards_train/margins": 0.06499710213392973, + "rewards_train/rejected": -0.078252412378788, + "step": 322 + }, + { + "epoch": 0.09, + "logps_train/chosen": -3.000113010406494, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -9.13469123840332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.015613699331879616, + "rewards_train/margins": 0.2665828410536051, + "rewards_train/rejected": -0.25096914172172546, + "step": 322 + }, + { + "epoch": 0.09, + "logps_train/chosen": -29.72621726989746, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -37.40805435180664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22737827897071838, + "rewards_train/margins": 0.29318371415138245, + "rewards_train/rejected": -0.06580543518066406, + "step": 322 + }, + { + "epoch": 0.09, + "logps_train/chosen": -93.66548156738281, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -93.59439086914062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11654815822839737, + "rewards_train/margins": -0.007109068334102631, + "rewards_train/rejected": -0.10943908989429474, + "step": 322 + }, + { + "epoch": 0.09, + "logps_train/chosen": -94.4722900390625, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -156.06427001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29722902178764343, + "rewards_train/margins": 1.6091980040073395, + "rewards_train/rejected": -1.906427025794983, + "step": 323 + }, + { + "epoch": 0.09, + "logps_train/chosen": -63.25630187988281, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -119.98298645019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07563018798828125, + "rewards_train/margins": 2.222668409347534, + "rewards_train/rejected": -2.2982985973358154, + "step": 323 + }, + { + "epoch": 0.09, + "logps_train/chosen": -143.63766479492188, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -114.04446411132812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.463766485452652, + "rewards_train/margins": -0.959320068359375, + "rewards_train/rejected": 0.495553582906723, + "step": 323 + }, + { + "epoch": 0.09, + "logps_train/chosen": -3.2359423637390137, + "logps_train/ref_chosen": -2.546875, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -8.500354766845703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0689067393541336, + "rewards_train/margins": -0.2063712626695633, + "rewards_train/rejected": 0.1374645233154297, + "step": 323 + }, + { + "epoch": 0.09, + "logps_train/chosen": -82.97148132324219, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -83.28710174560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20285187661647797, + "rewards_train/margins": 0.031562045216560364, + "rewards_train/rejected": 0.1712898313999176, + "step": 323 + }, + { + "epoch": 0.09, + "logps_train/chosen": -38.69613265991211, + "logps_train/ref_chosen": -39.5, + "logps_train/ref_rejected": -35.0, + "logps_train/rejected": -44.28056335449219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08038673549890518, + "rewards_train/margins": 1.0084430947899818, + "rewards_train/rejected": -0.9280563592910767, + "step": 323 + }, + { + "epoch": 0.09, + "logps_train/chosen": -11.335434913635254, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -17.909460067749023, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02729349210858345, + "rewards_train/margins": -0.06134748458862305, + "rewards_train/rejected": 0.0340539924800396, + "step": 323 + }, + { + "epoch": 0.09, + "logps_train/chosen": -3.3600857257843018, + "logps_train/ref_chosen": -2.0625, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -10.879693031311035, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1297585815191269, + "rewards_train/margins": -0.11678927857428789, + "rewards_train/rejected": -0.012969302944839, + "step": 323 + }, + { + "epoch": 0.09, + "learning_rate": 1.9655539450984722e-06, + "loss": 0.5428, + "step": 324 + }, + { + "epoch": 0.09, + "logps_train/chosen": -9.107017517089844, + "logps_train/ref_chosen": -7.125, + "logps_train/ref_rejected": -0.291015625, + "logps_train/rejected": -0.14147453010082245, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1982017606496811, + "rewards_train/margins": -0.21315587032586336, + "rewards_train/rejected": 0.01495410967618227, + "step": 324 + }, + { + "epoch": 0.09, + "logps_train/chosen": -1.7933131456375122, + "logps_train/ref_chosen": -1.4765625, + "logps_train/ref_rejected": -6.1875, + "logps_train/rejected": -6.239959716796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0316750667989254, + "rewards_train/margins": -0.026429094839841127, + "rewards_train/rejected": -0.005245971959084272, + "step": 324 + }, + { + "epoch": 0.09, + "logps_train/chosen": -195.74929809570312, + "logps_train/ref_chosen": -191.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -166.05807495117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4749298095703125, + "rewards_train/margins": 1.1308777332305908, + "rewards_train/rejected": -1.6058075428009033, + "step": 324 + }, + { + "epoch": 0.09, + "logps_train/chosen": -55.72392272949219, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -55.89147186279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04739227518439293, + "rewards_train/margins": 0.016754914075136185, + "rewards_train/rejected": -0.06414718925952911, + "step": 324 + }, + { + "epoch": 0.09, + "logps_train/chosen": -6.426643371582031, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -6.942128658294678, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1364143341779709, + "rewards_train/margins": -0.045326463878154755, + "rewards_train/rejected": -0.09108787029981613, + "step": 324 + }, + { + "epoch": 0.09, + "logps_train/chosen": -94.74790954589844, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -162.5294952392578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3747909665107727, + "rewards_train/margins": 2.0781585574150085, + "rewards_train/rejected": -2.4529495239257812, + "step": 324 + }, + { + "epoch": 0.09, + "logps_train/chosen": -27.56154441833496, + "logps_train/ref_chosen": -26.5, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -34.11491012573242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1061544418334961, + "rewards_train/margins": 0.10533657670021057, + "rewards_train/rejected": -0.21149101853370667, + "step": 324 + }, + { + "epoch": 0.09, + "logps_train/chosen": -11.02338981628418, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -10.565694808959961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02266101911664009, + "rewards_train/margins": 0.2479805089533329, + "rewards_train/rejected": -0.2253194898366928, + "step": 324 + }, + { + "epoch": 0.09, + "logps_train/chosen": -155.50315856933594, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -161.71896362304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15031586587429047, + "rewards_train/margins": 1.0215804725885391, + "rewards_train/rejected": -1.1718963384628296, + "step": 325 + }, + { + "epoch": 0.09, + "logps_train/chosen": -8.038592338562012, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -42.25, + "logps_train/rejected": -42.425323486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008640766143798828, + "rewards_train/margins": 0.026173114776611328, + "rewards_train/rejected": -0.0175323486328125, + "step": 325 + }, + { + "epoch": 0.09, + "logps_train/chosen": -96.2101821899414, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -151.916259765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028981780633330345, + "rewards_train/margins": 0.42060775123536587, + "rewards_train/rejected": -0.3916259706020355, + "step": 325 + }, + { + "epoch": 0.09, + "logps_train/chosen": -17.861530303955078, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -71.49250793457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1486530303955078, + "rewards_train/margins": 0.5005977749824524, + "rewards_train/rejected": -0.6492508053779602, + "step": 325 + }, + { + "epoch": 0.09, + "logps_train/chosen": -30.01614761352539, + "logps_train/ref_chosen": -29.125, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -79.42784118652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08911476284265518, + "rewards_train/margins": 0.6536693796515465, + "rewards_train/rejected": -0.7427841424942017, + "step": 325 + }, + { + "epoch": 0.09, + "logps_train/chosen": -117.07958984375, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -150.34893798828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5579590201377869, + "rewards_train/margins": 0.47693485021591187, + "rewards_train/rejected": -1.0348938703536987, + "step": 325 + }, + { + "epoch": 0.09, + "logps_train/chosen": -89.51353454589844, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -65.43701934814453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20135346055030823, + "rewards_train/margins": -0.15765152499079704, + "rewards_train/rejected": -0.043701935559511185, + "step": 325 + }, + { + "epoch": 0.09, + "logps_train/chosen": -90.83365631103516, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -95.22080993652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11663436889648438, + "rewards_train/margins": 0.48871538043022156, + "rewards_train/rejected": -0.3720810115337372, + "step": 325 + }, + { + "epoch": 0.09, + "learning_rate": 1.9648621865957317e-06, + "loss": 0.5402, + "step": 326 + }, + { + "epoch": 0.09, + "logps_train/chosen": -115.79122924804688, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -76.01008605957031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0708770751953125, + "rewards_train/margins": 0.171885684132576, + "rewards_train/rejected": -0.10100860893726349, + "step": 326 + }, + { + "epoch": 0.09, + "logps_train/chosen": -1.4049893617630005, + "logps_train/ref_chosen": -1.6875, + "logps_train/ref_rejected": -1.546875, + "logps_train/rejected": -3.9917972087860107, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02825106494128704, + "rewards_train/margins": 0.2727432828396559, + "rewards_train/rejected": -0.24449221789836884, + "step": 326 + }, + { + "epoch": 0.09, + "logps_train/chosen": -13.431845664978027, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -24.825881958007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10056543350219727, + "rewards_train/margins": 0.508153647184372, + "rewards_train/rejected": -0.4075882136821747, + "step": 326 + }, + { + "epoch": 0.09, + "logps_train/chosen": -5.537595272064209, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -4.03125, + "logps_train/rejected": -5.855772018432617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05375952646136284, + "rewards_train/margins": 0.12869267538189888, + "rewards_train/rejected": -0.18245220184326172, + "step": 326 + }, + { + "epoch": 0.09, + "logps_train/chosen": -73.27777099609375, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -122.55976867675781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07777710258960724, + "rewards_train/margins": 1.628199741244316, + "rewards_train/rejected": -1.7059768438339233, + "step": 326 + }, + { + "epoch": 0.09, + "logps_train/chosen": -41.66993713378906, + "logps_train/ref_chosen": -42.5, + "logps_train/ref_rejected": -14.875, + "logps_train/rejected": -17.222026824951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08300628513097763, + "rewards_train/margins": 0.31770897656679153, + "rewards_train/rejected": -0.2347026914358139, + "step": 326 + }, + { + "epoch": 0.09, + "logps_train/chosen": -81.00182342529297, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -31.32347297668457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.099817655980587, + "rewards_train/margins": 0.9821649417281151, + "rewards_train/rejected": -0.8823472857475281, + "step": 326 + }, + { + "epoch": 0.09, + "logps_train/chosen": -16.423892974853516, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -1.859375, + "logps_train/rejected": -6.814267635345459, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17988930642604828, + "rewards_train/margins": 0.3155999630689621, + "rewards_train/rejected": -0.4954892694950104, + "step": 326 + }, + { + "epoch": 0.09, + "logps_train/chosen": -4.972055435180664, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -5.734261512756348, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05591945722699165, + "rewards_train/margins": 0.013720609247684479, + "rewards_train/rejected": 0.042198847979307175, + "step": 327 + }, + { + "epoch": 0.09, + "logps_train/chosen": -3.134237289428711, + "logps_train/ref_chosen": -3.21875, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -6.210978031158447, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008451270870864391, + "rewards_train/margins": 0.07017407473176718, + "rewards_train/rejected": -0.061722803860902786, + "step": 327 + }, + { + "epoch": 0.09, + "logps_train/chosen": -8.262674331665039, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -34.05937957763672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.054392434656620026, + "rewards_train/margins": 0.3265455290675163, + "rewards_train/rejected": -0.38093796372413635, + "step": 327 + }, + { + "epoch": 0.09, + "logps_train/chosen": -21.305648803710938, + "logps_train/ref_chosen": -22.75, + "logps_train/ref_rejected": -6.28125, + "logps_train/rejected": -9.813063621520996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1444351226091385, + "rewards_train/margins": 0.4976164847612381, + "rewards_train/rejected": -0.3531813621520996, + "step": 327 + }, + { + "epoch": 0.09, + "logps_train/chosen": -14.685795783996582, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -3.40625, + "logps_train/rejected": -3.586972236633301, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.27482959628105164, + "rewards_train/margins": -0.2567573729902506, + "rewards_train/rejected": -0.01807222329080105, + "step": 327 + }, + { + "epoch": 0.09, + "logps_train/chosen": -79.55137634277344, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -125.16089630126953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10513763874769211, + "rewards_train/margins": 0.21095199137926102, + "rewards_train/rejected": -0.3160896301269531, + "step": 327 + }, + { + "epoch": 0.09, + "logps_train/chosen": -33.34735107421875, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -48.25, + "logps_train/rejected": -50.237152099609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08473511040210724, + "rewards_train/margins": 0.11398009955883026, + "rewards_train/rejected": -0.1987152099609375, + "step": 327 + }, + { + "epoch": 0.09, + "logps_train/chosen": -14.690607070922852, + "logps_train/ref_chosen": -13.5625, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -25.838106155395508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11281070858240128, + "rewards_train/margins": 0.22099991887807846, + "rewards_train/rejected": -0.33381062746047974, + "step": 327 + }, + { + "epoch": 0.09, + "learning_rate": 1.9641636750806712e-06, + "loss": 0.5552, + "step": 328 + }, + { + "epoch": 0.09, + "logps_train/chosen": -17.222501754760742, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -6.75, + "logps_train/rejected": -7.808563709259033, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20975017547607422, + "rewards_train/margins": -0.10389380156993866, + "rewards_train/rejected": -0.10585637390613556, + "step": 328 + }, + { + "epoch": 0.09, + "logps_train/chosen": -16.650781631469727, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -13.805228233337402, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09742183983325958, + "rewards_train/margins": 0.10294466326013207, + "rewards_train/rejected": -0.005522823426872492, + "step": 328 + }, + { + "epoch": 0.09, + "logps_train/chosen": -1.6857539415359497, + "logps_train/ref_chosen": -1.890625, + "logps_train/ref_rejected": -7.90625, + "logps_train/rejected": -8.21605110168457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.020487105473876, + "rewards_train/margins": 0.05146721564233303, + "rewards_train/rejected": -0.03098011016845703, + "step": 328 + }, + { + "epoch": 0.09, + "logps_train/chosen": -6.656763553619385, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -3.234375, + "logps_train/rejected": -4.864237308502197, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11255135387182236, + "rewards_train/margins": 0.05043487995862961, + "rewards_train/rejected": -0.16298623383045197, + "step": 328 + }, + { + "epoch": 0.09, + "logps_train/chosen": -19.900676727294922, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -35.25, + "logps_train/rejected": -37.73417663574219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11506767570972443, + "rewards_train/margins": 0.13334998488426208, + "rewards_train/rejected": -0.2484176605939865, + "step": 328 + }, + { + "epoch": 0.09, + "logps_train/chosen": -3.5797600746154785, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -26.0, + "logps_train/rejected": -27.882061004638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05139899253845215, + "rewards_train/margins": 0.2396050989627838, + "rewards_train/rejected": -0.18820610642433167, + "step": 328 + }, + { + "epoch": 0.09, + "logps_train/chosen": -74.71588897705078, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -115.42838287353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.821588933467865, + "rewards_train/margins": 0.5712494254112244, + "rewards_train/rejected": -1.3928383588790894, + "step": 328 + }, + { + "epoch": 0.09, + "logps_train/chosen": -2.4529550075531006, + "logps_train/ref_chosen": -0.224609375, + "logps_train/ref_rejected": -0.224609375, + "logps_train/rejected": -2.3482143878936768, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.22283457219600677, + "rewards_train/margins": -0.010474070906639099, + "rewards_train/rejected": -0.21236050128936768, + "step": 328 + }, + { + "epoch": 0.09, + "logps_train/chosen": -165.93624877929688, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -136.07814025878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5936248898506165, + "rewards_train/margins": 0.11418914794921875, + "rewards_train/rejected": -0.7078140377998352, + "step": 329 + }, + { + "epoch": 0.09, + "logps_train/chosen": -4.775162696838379, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -14.4375, + "logps_train/rejected": -14.213428497314453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04626626893877983, + "rewards_train/margins": -0.06867341883480549, + "rewards_train/rejected": 0.022407149896025658, + "step": 329 + }, + { + "epoch": 0.09, + "logps_train/chosen": -184.8240203857422, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -141.14077758789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5824020504951477, + "rewards_train/margins": 1.631675660610199, + "rewards_train/rejected": -2.2140777111053467, + "step": 329 + }, + { + "epoch": 0.09, + "logps_train/chosen": -111.424072265625, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -110.97962188720703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.1575927734375, + "rewards_train/margins": -0.044445037841796875, + "rewards_train/rejected": 0.20203781127929688, + "step": 329 + }, + { + "epoch": 0.09, + "logps_train/chosen": -98.02946472167969, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -125.19252014160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40294647216796875, + "rewards_train/margins": 2.316305637359619, + "rewards_train/rejected": -2.719252109527588, + "step": 329 + }, + { + "epoch": 0.09, + "logps_train/chosen": -4.686527729034424, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -9.5625, + "logps_train/rejected": -10.912818908691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028222227469086647, + "rewards_train/margins": 0.1632541213184595, + "rewards_train/rejected": -0.13503189384937286, + "step": 329 + }, + { + "epoch": 0.09, + "logps_train/chosen": -4.843106269836426, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -2.46875, + "logps_train/rejected": -2.798135280609131, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.006185627076774836, + "rewards_train/margins": 0.02675290172919631, + "rewards_train/rejected": -0.032938528805971146, + "step": 329 + }, + { + "epoch": 0.09, + "logps_train/chosen": -70.47604370117188, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -188.07040405273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04760437086224556, + "rewards_train/margins": 3.1594361774623394, + "rewards_train/rejected": -3.207040548324585, + "step": 329 + }, + { + "epoch": 0.09, + "learning_rate": 1.9634584154421312e-06, + "loss": 0.5481, + "step": 330 + }, + { + "epoch": 0.09, + "logps_train/chosen": -117.33079528808594, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -150.41217041015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3330795764923096, + "rewards_train/margins": 0.5081374645233154, + "rewards_train/rejected": -1.841217041015625, + "step": 330 + }, + { + "epoch": 0.09, + "logps_train/chosen": -17.2095947265625, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -26.125, + "logps_train/rejected": -31.050189971923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04595947265625, + "rewards_train/margins": 0.44655951857566833, + "rewards_train/rejected": -0.49251899123191833, + "step": 330 + }, + { + "epoch": 0.09, + "logps_train/chosen": -6.687427043914795, + "logps_train/ref_chosen": -6.25, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -8.185909271240234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04374270513653755, + "rewards_train/margins": 0.14984821900725365, + "rewards_train/rejected": -0.1935909241437912, + "step": 330 + }, + { + "epoch": 0.09, + "logps_train/chosen": -102.73985290527344, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -121.12782287597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6239852905273438, + "rewards_train/margins": 1.4887969493865967, + "rewards_train/rejected": -2.1127822399139404, + "step": 330 + }, + { + "epoch": 0.09, + "logps_train/chosen": -82.45960998535156, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -11.579459190368652, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.245960995554924, + "rewards_train/margins": 0.018234923481941223, + "rewards_train/rejected": -0.26419591903686523, + "step": 330 + }, + { + "epoch": 0.09, + "logps_train/chosen": -72.3868408203125, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -74.66909790039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2613159120082855, + "rewards_train/margins": 0.07822570204734802, + "rewards_train/rejected": 0.1830902099609375, + "step": 330 + }, + { + "epoch": 0.09, + "logps_train/chosen": -14.97573471069336, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -13.637603759765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04617653042078018, + "rewards_train/margins": 0.09743690863251686, + "rewards_train/rejected": -0.05126037821173668, + "step": 330 + }, + { + "epoch": 0.09, + "logps_train/chosen": -1.750741720199585, + "logps_train/ref_chosen": -1.875, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -10.205305099487305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012425827793776989, + "rewards_train/margins": 0.2673313496634364, + "rewards_train/rejected": -0.2549055218696594, + "step": 330 + }, + { + "epoch": 0.09, + "logps_train/chosen": -74.28730773925781, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -98.82830810546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22873078286647797, + "rewards_train/margins": 0.6041000634431839, + "rewards_train/rejected": -0.8328308463096619, + "step": 331 + }, + { + "epoch": 0.09, + "logps_train/chosen": -27.67245101928711, + "logps_train/ref_chosen": -26.625, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -12.82398509979248, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10474510490894318, + "rewards_train/margins": 0.6807784289121628, + "rewards_train/rejected": -0.785523533821106, + "step": 331 + }, + { + "epoch": 0.09, + "logps_train/chosen": -10.895045280456543, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -18.464818954467773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13549546897411346, + "rewards_train/margins": 0.15697736479341984, + "rewards_train/rejected": -0.021481895819306374, + "step": 331 + }, + { + "epoch": 0.09, + "logps_train/chosen": -50.85595703125, + "logps_train/ref_chosen": -49.5, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -34.3873176574707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13559570908546448, + "rewards_train/margins": 0.05313606560230255, + "rewards_train/rejected": -0.18873177468776703, + "step": 331 + }, + { + "epoch": 0.09, + "logps_train/chosen": -1.9367289543151855, + "logps_train/ref_chosen": -2.109375, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -8.148123741149902, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.017264604568481445, + "rewards_train/margins": 0.22582697868347168, + "rewards_train/rejected": -0.20856237411499023, + "step": 331 + }, + { + "epoch": 0.09, + "logps_train/chosen": -70.17491912841797, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -105.27625274658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.28250810503959656, + "rewards_train/margins": 0.2101333811879158, + "rewards_train/rejected": 0.07237472385168076, + "step": 331 + }, + { + "epoch": 0.09, + "logps_train/chosen": -50.074378967285156, + "logps_train/ref_chosen": -49.75, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -104.09095001220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.032437898218631744, + "rewards_train/margins": 0.17665710300207138, + "rewards_train/rejected": -0.20909500122070312, + "step": 331 + }, + { + "epoch": 0.09, + "logps_train/chosen": -21.767436981201172, + "logps_train/ref_chosen": -22.0, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -22.627445220947266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.023256301879882812, + "rewards_train/margins": 0.2235008329153061, + "rewards_train/rejected": -0.20024453103542328, + "step": 331 + }, + { + "epoch": 0.09, + "learning_rate": 1.9627464126161814e-06, + "loss": 0.5534, + "step": 332 + }, + { + "epoch": 0.09, + "logps_train/chosen": -5.770538806915283, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -8.29581069946289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013571119867265224, + "rewards_train/margins": 0.10252719279378653, + "rewards_train/rejected": -0.0889560729265213, + "step": 332 + }, + { + "epoch": 0.09, + "logps_train/chosen": -6.073958396911621, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -11.188343048095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04572916030883789, + "rewards_train/margins": 0.25831346213817596, + "rewards_train/rejected": -0.21258430182933807, + "step": 332 + }, + { + "epoch": 0.09, + "logps_train/chosen": -115.7550048828125, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -129.96514892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3755005598068237, + "rewards_train/margins": 1.3710144758224487, + "rewards_train/rejected": -2.7465150356292725, + "step": 332 + }, + { + "epoch": 0.09, + "logps_train/chosen": -92.8134765625, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -79.1533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01865234412252903, + "rewards_train/margins": 0.9339843634516001, + "rewards_train/rejected": -0.915332019329071, + "step": 332 + }, + { + "epoch": 0.09, + "logps_train/chosen": -0.8193093538284302, + "logps_train/ref_chosen": -0.66796875, + "logps_train/ref_rejected": -12.4375, + "logps_train/rejected": -12.316696166992188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.015134060755372047, + "rewards_train/margins": -0.027214444242417812, + "rewards_train/rejected": 0.012080383487045765, + "step": 332 + }, + { + "epoch": 0.09, + "logps_train/chosen": -2.2134597301483154, + "logps_train/ref_chosen": -2.09375, + "logps_train/ref_rejected": -2.171875, + "logps_train/rejected": -1.9533507823944092, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.011970973573625088, + "rewards_train/margins": -0.03382339607924223, + "rewards_train/rejected": 0.02185242250561714, + "step": 332 + }, + { + "epoch": 0.09, + "logps_train/chosen": -108.99148559570312, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -151.70140075683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0008514404180459678, + "rewards_train/margins": 0.2709915220621042, + "rewards_train/rejected": -0.2701400816440582, + "step": 332 + }, + { + "epoch": 0.09, + "logps_train/chosen": -122.65748596191406, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -169.56265258789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16574859619140625, + "rewards_train/margins": 2.890516757965088, + "rewards_train/rejected": -3.056265354156494, + "step": 332 + }, + { + "epoch": 0.09, + "logps_train/chosen": -4.680959224700928, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -14.125, + "logps_train/rejected": -16.656984329223633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04309592396020889, + "rewards_train/margins": 0.21010252088308334, + "rewards_train/rejected": -0.25319844484329224, + "step": 333 + }, + { + "epoch": 0.09, + "logps_train/chosen": -17.961454391479492, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -25.836071014404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16635456681251526, + "rewards_train/margins": 0.8249616920948029, + "rewards_train/rejected": -0.6586071252822876, + "step": 333 + }, + { + "epoch": 0.09, + "logps_train/chosen": -4.424433708190918, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -6.732122421264648, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06588087230920792, + "rewards_train/margins": 0.010456368327140808, + "rewards_train/rejected": -0.07633724063634872, + "step": 333 + }, + { + "epoch": 0.09, + "logps_train/chosen": -140.5899658203125, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -98.1387939453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.258996605873108, + "rewards_train/margins": -0.7451171875, + "rewards_train/rejected": -0.5138794183731079, + "step": 333 + }, + { + "epoch": 0.09, + "logps_train/chosen": -129.78585815429688, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -133.9620819091797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.17858581244945526, + "rewards_train/margins": -0.1823776215314865, + "rewards_train/rejected": 0.00379180908203125, + "step": 333 + }, + { + "epoch": 0.09, + "logps_train/chosen": -82.97098541259766, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -76.00294494628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.047098543494939804, + "rewards_train/margins": 1.0031959749758244, + "rewards_train/rejected": -1.0502945184707642, + "step": 333 + }, + { + "epoch": 0.09, + "logps_train/chosen": -11.418052673339844, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -7.407807350158691, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.029305268079042435, + "rewards_train/margins": 0.027100466191768646, + "rewards_train/rejected": -0.05640573427081108, + "step": 333 + }, + { + "epoch": 0.09, + "logps_train/chosen": -142.85850524902344, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -155.03160095214844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2858505249023438, + "rewards_train/margins": -0.9826904535293579, + "rewards_train/rejected": -1.3031600713729858, + "step": 333 + }, + { + "epoch": 0.09, + "learning_rate": 1.962027671586086e-06, + "loss": 0.6044, + "step": 334 + }, + { + "epoch": 0.09, + "logps_train/chosen": -85.46382141113281, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -155.81234741210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9463821649551392, + "rewards_train/margins": 2.48485267162323, + "rewards_train/rejected": -3.431234836578369, + "step": 334 + }, + { + "epoch": 0.09, + "logps_train/chosen": -1.0927739143371582, + "logps_train/ref_chosen": -1.484375, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -14.09835147857666, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0391601100564003, + "rewards_train/margins": 0.21774526685476303, + "rewards_train/rejected": -0.17858515679836273, + "step": 334 + }, + { + "epoch": 0.09, + "logps_train/chosen": -0.4926537573337555, + "logps_train/ref_chosen": -0.458984375, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -7.2660017013549805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0033669383265078068, + "rewards_train/margins": 0.132608228828758, + "rewards_train/rejected": -0.1359751671552658, + "step": 334 + }, + { + "epoch": 0.09, + "logps_train/chosen": -74.7872085571289, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -158.55111694335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1712791472673416, + "rewards_train/margins": 4.826390936970711, + "rewards_train/rejected": -4.655111789703369, + "step": 334 + }, + { + "epoch": 0.09, + "logps_train/chosen": -80.72908782958984, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -158.0496368408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0229088068008423, + "rewards_train/margins": 2.2820550203323364, + "rewards_train/rejected": -3.3049638271331787, + "step": 334 + }, + { + "epoch": 0.09, + "logps_train/chosen": -93.8763656616211, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -94.73486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0876365676522255, + "rewards_train/margins": 0.5858497843146324, + "rewards_train/rejected": -0.6734863519668579, + "step": 334 + }, + { + "epoch": 0.09, + "logps_train/chosen": -99.73753356933594, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -147.38262939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0737533569335938, + "rewards_train/margins": 2.314509630203247, + "rewards_train/rejected": -3.388262987136841, + "step": 334 + }, + { + "epoch": 0.09, + "logps_train/chosen": -149.7269287109375, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -205.75521850585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.772692859172821, + "rewards_train/margins": 3.302829086780548, + "rewards_train/rejected": -4.075521945953369, + "step": 334 + }, + { + "epoch": 0.09, + "logps_train/chosen": -64.21311950683594, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -107.38194274902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.228688046336174, + "rewards_train/margins": 1.4668823927640915, + "rewards_train/rejected": -1.2381943464279175, + "step": 335 + }, + { + "epoch": 0.09, + "logps_train/chosen": -108.5135269165039, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -164.15518188476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2013527154922485, + "rewards_train/margins": 2.51416552066803, + "rewards_train/rejected": -3.7155182361602783, + "step": 335 + }, + { + "epoch": 0.09, + "logps_train/chosen": -120.55535125732422, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -112.36370849609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.34446486830711365, + "rewards_train/margins": 1.3308357298374176, + "rewards_train/rejected": -0.986370861530304, + "step": 335 + }, + { + "epoch": 0.09, + "logps_train/chosen": -116.75340270996094, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -145.12684631347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5753402709960938, + "rewards_train/margins": 3.737344264984131, + "rewards_train/rejected": -4.312684535980225, + "step": 335 + }, + { + "epoch": 0.09, + "logps_train/chosen": -5.339888572692871, + "logps_train/ref_chosen": -3.796875, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -9.920093536376953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.15430136024951935, + "rewards_train/margins": -0.1935420073568821, + "rewards_train/rejected": 0.03924064710736275, + "step": 335 + }, + { + "epoch": 0.09, + "logps_train/chosen": -6.707235813140869, + "logps_train/ref_chosen": -7.59375, + "logps_train/ref_rejected": -6.65625, + "logps_train/rejected": -8.305458068847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08865141868591309, + "rewards_train/margins": 0.2535722255706787, + "rewards_train/rejected": -0.16492080688476562, + "step": 335 + }, + { + "epoch": 0.09, + "logps_train/chosen": -80.37073516845703, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -147.20849609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6370735168457031, + "rewards_train/margins": 1.9837760925292969, + "rewards_train/rejected": -2.620849609375, + "step": 335 + }, + { + "epoch": 0.09, + "logps_train/chosen": -3.4120032787323, + "logps_train/ref_chosen": -3.46875, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -11.0057954788208, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0056746723130345345, + "rewards_train/margins": 0.16875421721488237, + "rewards_train/rejected": -0.16307954490184784, + "step": 335 + }, + { + "epoch": 0.09, + "learning_rate": 1.96130219738227e-06, + "loss": 0.2893, + "step": 336 + }, + { + "epoch": 0.09, + "logps_train/chosen": -11.771712303161621, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -95.81647491455078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1459212303161621, + "rewards_train/margins": -0.06427373737096786, + "rewards_train/rejected": -0.08164749294519424, + "step": 336 + }, + { + "epoch": 0.09, + "logps_train/chosen": -58.50859832763672, + "logps_train/ref_chosen": -59.75, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -74.22804260253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.124140165746212, + "rewards_train/margins": 0.7469444498419762, + "rewards_train/rejected": -0.6228042840957642, + "step": 336 + }, + { + "epoch": 0.09, + "logps_train/chosen": -207.2191925048828, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -209.31919860839844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.421919345855713, + "rewards_train/margins": -0.28999948501586914, + "rewards_train/rejected": -4.131919860839844, + "step": 336 + }, + { + "epoch": 0.09, + "logps_train/chosen": -110.30428314208984, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -146.95208740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3304283320903778, + "rewards_train/margins": 2.164780408143997, + "rewards_train/rejected": -2.495208740234375, + "step": 336 + }, + { + "epoch": 0.09, + "logps_train/chosen": -116.49153137207031, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -74.27202606201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04915313795208931, + "rewards_train/margins": 0.1780494712293148, + "rewards_train/rejected": -0.2272026091814041, + "step": 336 + }, + { + "epoch": 0.09, + "logps_train/chosen": -4.002338409423828, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -9.420660972595215, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09039115905761719, + "rewards_train/margins": 0.43870726227760315, + "rewards_train/rejected": -0.34831610321998596, + "step": 336 + }, + { + "epoch": 0.09, + "logps_train/chosen": -11.85922622680664, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -13.613303184509277, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.020327378064393997, + "rewards_train/margins": -0.05584230646491051, + "rewards_train/rejected": 0.0761696845293045, + "step": 336 + }, + { + "epoch": 0.09, + "logps_train/chosen": -154.8765869140625, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -118.5595474243164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6376588344573975, + "rewards_train/margins": -0.931704044342041, + "rewards_train/rejected": -2.7059547901153564, + "step": 336 + }, + { + "epoch": 0.09, + "logps_train/chosen": -99.61654663085938, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -164.56265258789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1116546392440796, + "rewards_train/margins": 4.344610810279846, + "rewards_train/rejected": -5.456265449523926, + "step": 337 + }, + { + "epoch": 0.09, + "logps_train/chosen": -0.17669740319252014, + "logps_train/ref_chosen": -0.22265625, + "logps_train/ref_rejected": -0.22265625, + "logps_train/rejected": -0.18624316155910492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004595884587615728, + "rewards_train/margins": 0.0009545756038278341, + "rewards_train/rejected": 0.0036413089837878942, + "step": 337 + }, + { + "epoch": 0.09, + "logps_train/chosen": -72.90017700195312, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -23.38710594177246, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7400177121162415, + "rewards_train/margins": -0.5763071179389954, + "rewards_train/rejected": -0.1637105941772461, + "step": 337 + }, + { + "epoch": 0.09, + "logps_train/chosen": -150.28005981445312, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -169.67120361328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6280059814453125, + "rewards_train/margins": 1.5391144752502441, + "rewards_train/rejected": -2.1671204566955566, + "step": 337 + }, + { + "epoch": 0.09, + "logps_train/chosen": -14.824065208435059, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -15.75, + "logps_train/rejected": -15.322369575500488, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05115652084350586, + "rewards_train/margins": -0.09391956403851509, + "rewards_train/rejected": 0.04276304319500923, + "step": 337 + }, + { + "epoch": 0.09, + "logps_train/chosen": -138.33892822265625, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -156.76052856445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.983892798423767, + "rewards_train/margins": 0.09216010570526123, + "rewards_train/rejected": -2.0760529041290283, + "step": 337 + }, + { + "epoch": 0.09, + "logps_train/chosen": -4.9502973556518555, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -4.951178073883057, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.13252973556518555, + "rewards_train/margins": -0.0749119259417057, + "rewards_train/rejected": -0.05761780962347984, + "step": 337 + }, + { + "epoch": 0.09, + "logps_train/chosen": -113.24015808105469, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -14.679046630859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.175984188914299, + "rewards_train/margins": 0.318888857960701, + "rewards_train/rejected": -0.14290466904640198, + "step": 337 + }, + { + "epoch": 0.09, + "learning_rate": 1.960569995082285e-06, + "loss": 0.6082, + "step": 338 + }, + { + "epoch": 0.09, + "logps_train/chosen": -93.29367065429688, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -150.03004455566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.329367071390152, + "rewards_train/margins": 2.2736373841762543, + "rewards_train/rejected": -2.6030044555664062, + "step": 338 + }, + { + "epoch": 0.09, + "logps_train/chosen": -5.527463912963867, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -13.820536613464355, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03712139278650284, + "rewards_train/margins": 0.25118228048086166, + "rewards_train/rejected": -0.2883036732673645, + "step": 338 + }, + { + "epoch": 0.09, + "logps_train/chosen": -15.716146469116211, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -7.538365840911865, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0783853530883789, + "rewards_train/margins": 0.23847194015979767, + "rewards_train/rejected": -0.16008658707141876, + "step": 338 + }, + { + "epoch": 0.09, + "logps_train/chosen": -25.804954528808594, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -34.2791748046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13049545884132385, + "rewards_train/margins": 0.07242202758789062, + "rewards_train/rejected": -0.20291748642921448, + "step": 338 + }, + { + "epoch": 0.09, + "logps_train/chosen": -15.84750747680664, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -14.677027702331543, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.008999252691864967, + "rewards_train/margins": -0.035797977820038795, + "rewards_train/rejected": 0.04479723051190376, + "step": 338 + }, + { + "epoch": 0.09, + "logps_train/chosen": -8.481632232666016, + "logps_train/ref_chosen": -7.375, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -78.86835479736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11066322773694992, + "rewards_train/margins": 0.17617226392030716, + "rewards_train/rejected": -0.2868354916572571, + "step": 338 + }, + { + "epoch": 0.09, + "logps_train/chosen": -146.5831298828125, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -107.7161865234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.008312940597534, + "rewards_train/margins": -0.5366942882537842, + "rewards_train/rejected": -2.47161865234375, + "step": 338 + }, + { + "epoch": 0.09, + "logps_train/chosen": -83.91299438476562, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -166.77578735351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6912994384765625, + "rewards_train/margins": 4.536279201507568, + "rewards_train/rejected": -5.227578639984131, + "step": 338 + }, + { + "epoch": 0.09, + "logps_train/chosen": -8.861040115356445, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -15.633295059204102, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02985401265323162, + "rewards_train/margins": 0.13972549326717854, + "rewards_train/rejected": -0.16957950592041016, + "step": 339 + }, + { + "epoch": 0.09, + "logps_train/chosen": -0.11330164968967438, + "logps_train/ref_chosen": -0.193359375, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -4.873637676239014, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008005772717297077, + "rewards_train/margins": 0.010994540294632316, + "rewards_train/rejected": -0.0029887675773352385, + "step": 339 + }, + { + "epoch": 0.09, + "logps_train/chosen": -109.47355651855469, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -112.36373901367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29735565185546875, + "rewards_train/margins": 1.3890182971954346, + "rewards_train/rejected": -1.6863739490509033, + "step": 339 + }, + { + "epoch": 0.09, + "logps_train/chosen": -140.22048950195312, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -144.20855712890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7220489382743835, + "rewards_train/margins": 2.398806869983673, + "rewards_train/rejected": -3.1208558082580566, + "step": 339 + }, + { + "epoch": 0.09, + "logps_train/chosen": -112.97492980957031, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -167.76300048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.152507022023201, + "rewards_train/margins": 0.1288070697337389, + "rewards_train/rejected": 0.02369995228946209, + "step": 339 + }, + { + "epoch": 0.09, + "logps_train/chosen": -162.05886840820312, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -136.00514221191406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2058868408203125, + "rewards_train/margins": -0.5053726434707642, + "rewards_train/rejected": -1.7005141973495483, + "step": 339 + }, + { + "epoch": 0.09, + "logps_train/chosen": -9.832197189331055, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -2.90625, + "logps_train/rejected": -4.645158767700195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07303028553724289, + "rewards_train/margins": 0.24692115932703018, + "rewards_train/rejected": -0.1738908737897873, + "step": 339 + }, + { + "epoch": 0.09, + "logps_train/chosen": -11.952192306518555, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -8.43689250946045, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.08896923065185547, + "rewards_train/margins": -0.11402997933328152, + "rewards_train/rejected": 0.02506074868142605, + "step": 339 + }, + { + "epoch": 0.1, + "learning_rate": 1.95983106981077e-06, + "loss": 0.5497, + "step": 340 + }, + { + "epoch": 0.1, + "logps_train/chosen": -18.26441764831543, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -4.225292205810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14855824410915375, + "rewards_train/margins": 0.14921246469020844, + "rewards_train/rejected": -0.0006542205810546875, + "step": 340 + }, + { + "epoch": 0.1, + "logps_train/chosen": -72.53701782226562, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -128.74755859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05370178446173668, + "rewards_train/margins": 0.9710540510714054, + "rewards_train/rejected": -1.024755835533142, + "step": 340 + }, + { + "epoch": 0.1, + "logps_train/chosen": -5.882055282592773, + "logps_train/ref_chosen": -5.40625, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -8.977173805236816, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0475805290043354, + "rewards_train/margins": 0.33138684555888176, + "rewards_train/rejected": -0.37896737456321716, + "step": 340 + }, + { + "epoch": 0.1, + "logps_train/chosen": -17.768320083618164, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -43.033111572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.026832008734345436, + "rewards_train/margins": 0.6014791484922171, + "rewards_train/rejected": -0.6283111572265625, + "step": 340 + }, + { + "epoch": 0.1, + "logps_train/chosen": -13.06075382232666, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -3.9375, + "logps_train/rejected": -4.3543548583984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0935753807425499, + "rewards_train/margins": -0.05188989266753197, + "rewards_train/rejected": -0.04168548807501793, + "step": 340 + }, + { + "epoch": 0.1, + "logps_train/chosen": -63.50663375854492, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -44.25, + "logps_train/rejected": -45.46551513671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7006633877754211, + "rewards_train/margins": -0.5791118741035461, + "rewards_train/rejected": -0.121551513671875, + "step": 340 + }, + { + "epoch": 0.1, + "logps_train/chosen": -118.09030151367188, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -175.83676147460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9590301513671875, + "rewards_train/margins": 3.3746461868286133, + "rewards_train/rejected": -5.333676338195801, + "step": 340 + }, + { + "epoch": 0.1, + "logps_train/chosen": -6.1488494873046875, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -29.765108108520508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02113494835793972, + "rewards_train/margins": 0.36787586845457554, + "rewards_train/rejected": -0.38901081681251526, + "step": 340 + }, + { + "epoch": 0.1, + "logps_train/chosen": -1.4224443435668945, + "logps_train/ref_chosen": -0.6484375, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -23.95598602294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07740068435668945, + "rewards_train/margins": 0.16819791495800018, + "rewards_train/rejected": -0.24559859931468964, + "step": 341 + }, + { + "epoch": 0.1, + "logps_train/chosen": -84.92060852050781, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -98.77286529541016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8920608758926392, + "rewards_train/margins": 0.7352256774902344, + "rewards_train/rejected": -1.6272865533828735, + "step": 341 + }, + { + "epoch": 0.1, + "logps_train/chosen": -105.42463684082031, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -88.14393615722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09246368706226349, + "rewards_train/margins": 0.47192995250225067, + "rewards_train/rejected": -0.5643936395645142, + "step": 341 + }, + { + "epoch": 0.1, + "logps_train/chosen": -46.657470703125, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -99.6282730102539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34074708819389343, + "rewards_train/margins": 0.5220802128314972, + "rewards_train/rejected": -0.8628273010253906, + "step": 341 + }, + { + "epoch": 0.1, + "logps_train/chosen": -95.72712707519531, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -154.38803100585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.072712779045105, + "rewards_train/margins": 1.1660903692245483, + "rewards_train/rejected": -2.2388031482696533, + "step": 341 + }, + { + "epoch": 0.1, + "logps_train/chosen": -23.24574851989746, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -114.51165771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.012074852362275124, + "rewards_train/margins": 0.23909092508256435, + "rewards_train/rejected": -0.2511657774448395, + "step": 341 + }, + { + "epoch": 0.1, + "logps_train/chosen": -156.3946533203125, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -154.5941619873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.63946533203125, + "rewards_train/margins": 0.6199510097503662, + "rewards_train/rejected": -2.259416341781616, + "step": 341 + }, + { + "epoch": 0.1, + "logps_train/chosen": -112.70880126953125, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -97.51105499267578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4708801209926605, + "rewards_train/margins": -0.01977461576461792, + "rewards_train/rejected": -0.4511055052280426, + "step": 341 + }, + { + "epoch": 0.1, + "learning_rate": 1.9590854267394186e-06, + "loss": 0.5109, + "step": 342 + }, + { + "epoch": 0.1, + "logps_train/chosen": -16.409387588500977, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -11.202880859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12156124413013458, + "rewards_train/margins": 0.37309934198856354, + "rewards_train/rejected": -0.25153809785842896, + "step": 342 + }, + { + "epoch": 0.1, + "logps_train/chosen": -13.557012557983398, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -1.140625, + "logps_train/rejected": -1.0812925100326538, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.038048744201660156, + "rewards_train/margins": 0.032115495298057795, + "rewards_train/rejected": 0.005933248903602362, + "step": 342 + }, + { + "epoch": 0.1, + "logps_train/chosen": -50.695411682128906, + "logps_train/ref_chosen": -52.25, + "logps_train/ref_rejected": -46.5, + "logps_train/rejected": -47.08898162841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15545883774757385, + "rewards_train/margins": 0.21435699984431267, + "rewards_train/rejected": -0.058898162096738815, + "step": 342 + }, + { + "epoch": 0.1, + "logps_train/chosen": -1.3070927858352661, + "logps_train/ref_chosen": -1.09375, + "logps_train/ref_rejected": -1.21875, + "logps_train/rejected": -0.8587039113044739, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02133427932858467, + "rewards_train/margins": -0.0573388896882534, + "rewards_train/rejected": 0.03600461035966873, + "step": 342 + }, + { + "epoch": 0.1, + "logps_train/chosen": -137.03147888183594, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -139.31298828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.053148031234741, + "rewards_train/margins": -0.5218491554260254, + "rewards_train/rejected": -1.5312988758087158, + "step": 342 + }, + { + "epoch": 0.1, + "logps_train/chosen": -29.106109619140625, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -1.8125, + "logps_train/rejected": -6.5089826583862305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.273110955953598, + "rewards_train/margins": 0.1965373158454895, + "rewards_train/rejected": -0.4696482717990875, + "step": 342 + }, + { + "epoch": 0.1, + "logps_train/chosen": -19.577186584472656, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -56.0, + "logps_train/rejected": -58.12238693237305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.020218659192323685, + "rewards_train/margins": 0.19202004000544548, + "rewards_train/rejected": -0.21223869919776917, + "step": 342 + }, + { + "epoch": 0.1, + "logps_train/chosen": -14.386334419250488, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -28.25, + "logps_train/rejected": -28.185266494750977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08636655658483505, + "rewards_train/margins": 0.07989320578053594, + "rewards_train/rejected": 0.006473350804299116, + "step": 342 + }, + { + "epoch": 0.1, + "logps_train/chosen": -6.718196392059326, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -1.25, + "logps_train/rejected": -1.8612761497497559, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006305360700935125, + "rewards_train/margins": 0.06743297493085265, + "rewards_train/rejected": -0.061127614229917526, + "step": 343 + }, + { + "epoch": 0.1, + "logps_train/chosen": -6.295895576477051, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -5.399484634399414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007910442538559437, + "rewards_train/margins": 0.04473390523344278, + "rewards_train/rejected": -0.03682346269488335, + "step": 343 + }, + { + "epoch": 0.1, + "logps_train/chosen": -4.855628967285156, + "logps_train/ref_chosen": -3.71875, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -7.553763389587402, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1136878952383995, + "rewards_train/margins": -0.05831155553460121, + "rewards_train/rejected": -0.055376339703798294, + "step": 343 + }, + { + "epoch": 0.1, + "logps_train/chosen": -0.1436833143234253, + "logps_train/ref_chosen": -0.251953125, + "logps_train/ref_rejected": -0.251953125, + "logps_train/rejected": -0.14314396679401398, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.010826981626451015, + "rewards_train/margins": -5.393475294113159e-05, + "rewards_train/rejected": 0.010880916379392147, + "step": 343 + }, + { + "epoch": 0.1, + "logps_train/chosen": -21.404882431030273, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -26.191404342651367, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11548824608325958, + "rewards_train/margins": -0.2463478147983551, + "rewards_train/rejected": 0.13085956871509552, + "step": 343 + }, + { + "epoch": 0.1, + "logps_train/chosen": -3.064845323562622, + "logps_train/ref_chosen": -2.5625, + "logps_train/ref_rejected": -2.9375, + "logps_train/rejected": -3.9009084701538086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.050234533846378326, + "rewards_train/margins": 0.04610631614923477, + "rewards_train/rejected": -0.0963408499956131, + "step": 343 + }, + { + "epoch": 0.1, + "logps_train/chosen": -54.854164123535156, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -58.75, + "logps_train/rejected": -60.035850524902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.035416413098573685, + "rewards_train/margins": 0.09316864237189293, + "rewards_train/rejected": -0.1285850554704666, + "step": 343 + }, + { + "epoch": 0.1, + "logps_train/chosen": -86.99562072753906, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -99.51065063476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44956207275390625, + "rewards_train/margins": 0.7015030384063721, + "rewards_train/rejected": -1.1510651111602783, + "step": 343 + }, + { + "epoch": 0.1, + "learning_rate": 1.958333071086941e-06, + "loss": 0.6654, + "step": 344 + }, + { + "epoch": 0.1, + "logps_train/chosen": -161.29090881347656, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -212.0, + "logps_train/rejected": -222.557861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7290908694267273, + "rewards_train/margins": 0.3266952633857727, + "rewards_train/rejected": -1.0557861328125, + "step": 344 + }, + { + "epoch": 0.1, + "logps_train/chosen": -184.41677856445312, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -144.54440307617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6416778564453125, + "rewards_train/margins": 0.112762451171875, + "rewards_train/rejected": -2.7544403076171875, + "step": 344 + }, + { + "epoch": 0.1, + "logps_train/chosen": -194.4336395263672, + "logps_train/ref_chosen": -189.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -202.6908416748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5433639883995056, + "rewards_train/margins": 2.4257202744483948, + "rewards_train/rejected": -2.9690842628479004, + "step": 344 + }, + { + "epoch": 0.1, + "logps_train/chosen": -11.118043899536133, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -6.49425745010376, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2805543839931488, + "rewards_train/margins": -0.06862863898277283, + "rewards_train/rejected": -0.21192574501037598, + "step": 344 + }, + { + "epoch": 0.1, + "logps_train/chosen": -17.172624588012695, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -24.685638427734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004762459080666304, + "rewards_train/margins": 0.1013013836927712, + "rewards_train/rejected": -0.1060638427734375, + "step": 344 + }, + { + "epoch": 0.1, + "logps_train/chosen": -79.10492706298828, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -100.38027954101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21049271523952484, + "rewards_train/margins": 0.5775352269411087, + "rewards_train/rejected": -0.7880279421806335, + "step": 344 + }, + { + "epoch": 0.1, + "logps_train/chosen": -3.183556079864502, + "logps_train/ref_chosen": -2.703125, + "logps_train/ref_rejected": -1.953125, + "logps_train/rejected": -1.8069056272506714, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.048043109476566315, + "rewards_train/margins": -0.0626650471240282, + "rewards_train/rejected": 0.014621937647461891, + "step": 344 + }, + { + "epoch": 0.1, + "logps_train/chosen": -147.84774780273438, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -163.14999389648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1847747564315796, + "rewards_train/margins": 0.13022470474243164, + "rewards_train/rejected": -1.3149994611740112, + "step": 344 + }, + { + "epoch": 0.1, + "logps_train/chosen": -76.49560546875, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -130.6147918701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.049560546875, + "rewards_train/margins": 1.9119186401367188, + "rewards_train/rejected": -1.9614791870117188, + "step": 345 + }, + { + "epoch": 0.1, + "logps_train/chosen": -89.32192993164062, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -128.8111572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7821930050849915, + "rewards_train/margins": 1.0989227890968323, + "rewards_train/rejected": -1.8811157941818237, + "step": 345 + }, + { + "epoch": 0.1, + "logps_train/chosen": -94.61510467529297, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -129.45980834960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9615104794502258, + "rewards_train/margins": 2.234470307826996, + "rewards_train/rejected": -3.1959807872772217, + "step": 345 + }, + { + "epoch": 0.1, + "logps_train/chosen": -77.75497436523438, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -77.47140502929688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.07450256496667862, + "rewards_train/margins": -0.02835693210363388, + "rewards_train/rejected": 0.1028594970703125, + "step": 345 + }, + { + "epoch": 0.1, + "logps_train/chosen": -59.549373626708984, + "logps_train/ref_chosen": -58.25, + "logps_train/ref_rejected": -43.25, + "logps_train/rejected": -48.707332611083984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12993736565113068, + "rewards_train/margins": 0.4157959073781967, + "rewards_train/rejected": -0.5457332730293274, + "step": 345 + }, + { + "epoch": 0.1, + "logps_train/chosen": -76.42621612548828, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -77.9542236328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09262161701917648, + "rewards_train/margins": 0.35280076414346695, + "rewards_train/rejected": -0.44542238116264343, + "step": 345 + }, + { + "epoch": 0.1, + "logps_train/chosen": -14.200372695922852, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -7.160017490386963, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12628726661205292, + "rewards_train/margins": -0.1321605178527534, + "rewards_train/rejected": 0.005873251240700483, + "step": 345 + }, + { + "epoch": 0.1, + "logps_train/chosen": -96.21128845214844, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -175.42697143554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7211288809776306, + "rewards_train/margins": 2.5215683579444885, + "rewards_train/rejected": -3.242697238922119, + "step": 345 + }, + { + "epoch": 0.1, + "learning_rate": 1.95757400811903e-06, + "loss": 0.4719, + "step": 346 + }, + { + "epoch": 0.1, + "logps_train/chosen": -100.12127685546875, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -176.16616821289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8121277093887329, + "rewards_train/margins": 1.2044891119003296, + "rewards_train/rejected": -2.0166168212890625, + "step": 346 + }, + { + "epoch": 0.1, + "logps_train/chosen": -1.773221492767334, + "logps_train/ref_chosen": -1.4375, + "logps_train/ref_rejected": -2.140625, + "logps_train/rejected": -3.0721940994262695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03357214853167534, + "rewards_train/margins": 0.059584762901067734, + "rewards_train/rejected": -0.09315691143274307, + "step": 346 + }, + { + "epoch": 0.1, + "logps_train/chosen": -117.24179077148438, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -166.36932373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5241791009902954, + "rewards_train/margins": 2.3127533197402954, + "rewards_train/rejected": -3.836932420730591, + "step": 346 + }, + { + "epoch": 0.1, + "logps_train/chosen": -119.18252563476562, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -122.02035522460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26825258135795593, + "rewards_train/margins": 0.7837829887866974, + "rewards_train/rejected": -1.0520355701446533, + "step": 346 + }, + { + "epoch": 0.1, + "logps_train/chosen": -126.95427703857422, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -138.35690307617188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6954277753829956, + "rewards_train/margins": -0.3097374439239502, + "rewards_train/rejected": -1.3856903314590454, + "step": 346 + }, + { + "epoch": 0.1, + "logps_train/chosen": -16.530010223388672, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -2.15625, + "logps_train/rejected": -7.136499881744385, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1344989836215973, + "rewards_train/margins": 0.6325239837169647, + "rewards_train/rejected": -0.49802500009536743, + "step": 346 + }, + { + "epoch": 0.1, + "logps_train/chosen": -8.904046058654785, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -5.577927112579346, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.027904605492949486, + "rewards_train/margins": 0.07676311023533344, + "rewards_train/rejected": -0.10466771572828293, + "step": 346 + }, + { + "epoch": 0.1, + "logps_train/chosen": -117.46119689941406, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -142.7659912109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29611968994140625, + "rewards_train/margins": 0.5804794430732727, + "rewards_train/rejected": -0.876599133014679, + "step": 346 + }, + { + "epoch": 0.1, + "logps_train/chosen": -16.76891326904297, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -2.359375, + "logps_train/rejected": -3.329298973083496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.014391327276825905, + "rewards_train/margins": 0.08260106854140759, + "rewards_train/rejected": -0.09699239581823349, + "step": 347 + }, + { + "epoch": 0.1, + "logps_train/chosen": -2.9695770740509033, + "logps_train/ref_chosen": -2.25, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -7.637503623962402, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07195770740509033, + "rewards_train/margins": -0.10195734538137913, + "rewards_train/rejected": 0.029999637976288795, + "step": 347 + }, + { + "epoch": 0.1, + "logps_train/chosen": -152.6131134033203, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -184.6431884765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.761311411857605, + "rewards_train/margins": 2.9030076265335083, + "rewards_train/rejected": -4.664319038391113, + "step": 347 + }, + { + "epoch": 0.1, + "logps_train/chosen": -88.2135238647461, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -89.76287841796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2713524103164673, + "rewards_train/margins": -0.2950645685195923, + "rewards_train/rejected": -0.976287841796875, + "step": 347 + }, + { + "epoch": 0.1, + "logps_train/chosen": -13.530003547668457, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -22.994836807250977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18449965119361877, + "rewards_train/margins": 0.20898333191871643, + "rewards_train/rejected": -0.024483680725097656, + "step": 347 + }, + { + "epoch": 0.1, + "logps_train/chosen": -77.35335540771484, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -102.01278686523438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3853355348110199, + "rewards_train/margins": -0.1840568482875824, + "rewards_train/rejected": -0.2012786865234375, + "step": 347 + }, + { + "epoch": 0.1, + "logps_train/chosen": -12.013668060302734, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -1.5703125, + "logps_train/rejected": -1.8475857973098755, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1798831969499588, + "rewards_train/margins": 0.20761052705347538, + "rewards_train/rejected": -0.02772733010351658, + "step": 347 + }, + { + "epoch": 0.1, + "logps_train/chosen": -27.973968505859375, + "logps_train/ref_chosen": -27.125, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -14.464618682861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08489685505628586, + "rewards_train/margins": 0.10531502217054367, + "rewards_train/rejected": -0.19021187722682953, + "step": 347 + }, + { + "epoch": 0.1, + "learning_rate": 1.956808243148321e-06, + "loss": 0.5441, + "step": 348 + }, + { + "epoch": 0.1, + "logps_train/chosen": -71.00414276123047, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -25.75, + "logps_train/rejected": -27.739105224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.050414275377988815, + "rewards_train/margins": 0.14849624410271645, + "rewards_train/rejected": -0.19891051948070526, + "step": 348 + }, + { + "epoch": 0.1, + "logps_train/chosen": -5.922758102416992, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -13.3125, + "logps_train/rejected": -14.89721965789795, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0422758124768734, + "rewards_train/margins": 0.116196159273386, + "rewards_train/rejected": -0.1584719717502594, + "step": 348 + }, + { + "epoch": 0.1, + "logps_train/chosen": -1.7747282981872559, + "logps_train/ref_chosen": -1.90625, + "logps_train/ref_rejected": -2.15625, + "logps_train/rejected": -2.826375961303711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0131521699950099, + "rewards_train/margins": 0.08016477059572935, + "rewards_train/rejected": -0.06701260060071945, + "step": 348 + }, + { + "epoch": 0.1, + "logps_train/chosen": -89.23548889160156, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -143.81211853027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17645111680030823, + "rewards_train/margins": 2.907663017511368, + "rewards_train/rejected": -2.7312119007110596, + "step": 348 + }, + { + "epoch": 0.1, + "logps_train/chosen": -59.29590606689453, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -71.67262268066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24540939927101135, + "rewards_train/margins": 0.06267166137695312, + "rewards_train/rejected": 0.18273773789405823, + "step": 348 + }, + { + "epoch": 0.1, + "logps_train/chosen": -12.362475395202637, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -18.829431533813477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08624754101037979, + "rewards_train/margins": 0.09669560939073563, + "rewards_train/rejected": -0.18294315040111542, + "step": 348 + }, + { + "epoch": 0.1, + "logps_train/chosen": -121.710205078125, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -176.97891235351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.621020495891571, + "rewards_train/margins": 2.676870882511139, + "rewards_train/rejected": -3.29789137840271, + "step": 348 + }, + { + "epoch": 0.1, + "logps_train/chosen": -15.250504493713379, + "logps_train/ref_chosen": -14.5, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -40.37261199951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07505045086145401, + "rewards_train/margins": 0.5872107371687889, + "rewards_train/rejected": -0.6622611880302429, + "step": 348 + }, + { + "epoch": 0.1, + "logps_train/chosen": -8.014404296875, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -26.192089080810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02980956993997097, + "rewards_train/margins": 0.4615184720605612, + "rewards_train/rejected": -0.4317089021205902, + "step": 349 + }, + { + "epoch": 0.1, + "logps_train/chosen": -24.14811134338379, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -12.45552921295166, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.40231114625930786, + "rewards_train/margins": -0.2567582279443741, + "rewards_train/rejected": -0.14555291831493378, + "step": 349 + }, + { + "epoch": 0.1, + "logps_train/chosen": -98.58336639404297, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -151.8326416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4083366394042969, + "rewards_train/margins": 2.4249274730682373, + "rewards_train/rejected": -2.833264112472534, + "step": 349 + }, + { + "epoch": 0.1, + "logps_train/chosen": -0.18932709097862244, + "logps_train/ref_chosen": -0.19921875, + "logps_train/ref_rejected": -0.19921875, + "logps_train/rejected": -0.18557587265968323, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0009891659719869494, + "rewards_train/margins": -0.0003751218318939209, + "rewards_train/rejected": 0.0013642878038808703, + "step": 349 + }, + { + "epoch": 0.1, + "logps_train/chosen": -22.492908477783203, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -28.75, + "logps_train/rejected": -35.55576705932617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1507091522216797, + "rewards_train/margins": 0.8312858939170837, + "rewards_train/rejected": -0.680576741695404, + "step": 349 + }, + { + "epoch": 0.1, + "logps_train/chosen": -198.93325805664062, + "logps_train/ref_chosen": -189.0, + "logps_train/ref_rejected": -192.0, + "logps_train/rejected": -206.97799682617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9933258295059204, + "rewards_train/margins": 0.5044739246368408, + "rewards_train/rejected": -1.4977997541427612, + "step": 349 + }, + { + "epoch": 0.1, + "logps_train/chosen": -43.17871856689453, + "logps_train/ref_chosen": -46.75, + "logps_train/ref_rejected": -54.75, + "logps_train/rejected": -52.314613342285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3571281433105469, + "rewards_train/margins": 0.11358948051929474, + "rewards_train/rejected": 0.24353866279125214, + "step": 349 + }, + { + "epoch": 0.1, + "logps_train/chosen": -122.15892028808594, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -209.3836669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7158920764923096, + "rewards_train/margins": 1.2224745750427246, + "rewards_train/rejected": -2.938366651535034, + "step": 349 + }, + { + "epoch": 0.1, + "learning_rate": 1.9560357815343576e-06, + "loss": 0.4756, + "step": 350 + }, + { + "epoch": 0.1, + "logps_train/chosen": -202.19601440429688, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -183.64833068847656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9196014404296875, + "rewards_train/margins": -0.9547684192657471, + "rewards_train/rejected": -2.9648330211639404, + "step": 350 + }, + { + "epoch": 0.1, + "logps_train/chosen": -75.16197967529297, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -69.12419128417969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6661979556083679, + "rewards_train/margins": -0.10377883911132812, + "rewards_train/rejected": -0.5624191164970398, + "step": 350 + }, + { + "epoch": 0.1, + "logps_train/chosen": -42.24876403808594, + "logps_train/ref_chosen": -43.0, + "logps_train/ref_rejected": -35.25, + "logps_train/rejected": -34.556861877441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07512360066175461, + "rewards_train/margins": 0.005809783935546875, + "rewards_train/rejected": 0.06931381672620773, + "step": 350 + }, + { + "epoch": 0.1, + "logps_train/chosen": -14.713521957397461, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -23.381759643554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009897804819047451, + "rewards_train/margins": 0.18557377811521292, + "rewards_train/rejected": -0.17567597329616547, + "step": 350 + }, + { + "epoch": 0.1, + "logps_train/chosen": -14.222891807556152, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -12.143038749694824, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01521081943064928, + "rewards_train/margins": 0.3045146884396672, + "rewards_train/rejected": -0.28930386900901794, + "step": 350 + }, + { + "epoch": 0.1, + "logps_train/chosen": -20.388355255126953, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -20.114639282226562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2888355255126953, + "rewards_train/margins": -0.10237158834934235, + "rewards_train/rejected": -0.18646393716335297, + "step": 350 + }, + { + "epoch": 0.1, + "logps_train/chosen": -87.96124267578125, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -117.02384185791016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20387573540210724, + "rewards_train/margins": 1.9562599211931229, + "rewards_train/rejected": -1.7523841857910156, + "step": 350 + }, + { + "epoch": 0.1, + "logps_train/chosen": -15.691340446472168, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -5.072408199310303, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15586595237255096, + "rewards_train/margins": 0.069356769323349, + "rewards_train/rejected": 0.08650918304920197, + "step": 350 + }, + { + "epoch": 0.1, + "logps_train/chosen": -71.47496032714844, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -153.26522827148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5974960327148438, + "rewards_train/margins": 2.9790267944335938, + "rewards_train/rejected": -3.5765228271484375, + "step": 351 + }, + { + "epoch": 0.1, + "logps_train/chosen": -139.24185180664062, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -81.59964752197266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5241851806640625, + "rewards_train/margins": -1.1642204225063324, + "rewards_train/rejected": -0.3599647581577301, + "step": 351 + }, + { + "epoch": 0.1, + "logps_train/chosen": -9.538854598999023, + "logps_train/ref_chosen": -8.0, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -6.275223731994629, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15388546884059906, + "rewards_train/margins": 0.06426191329956055, + "rewards_train/rejected": -0.2181473821401596, + "step": 351 + }, + { + "epoch": 0.1, + "logps_train/chosen": -18.59687042236328, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -17.343826293945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0028129578568041325, + "rewards_train/margins": 0.47469559917226434, + "rewards_train/rejected": -0.4718826413154602, + "step": 351 + }, + { + "epoch": 0.1, + "logps_train/chosen": -104.28439331054688, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -132.26454162597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7284393310546875, + "rewards_train/margins": 2.0480148792266846, + "rewards_train/rejected": -2.776454210281372, + "step": 351 + }, + { + "epoch": 0.1, + "logps_train/chosen": -17.416725158691406, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -18.96393394470215, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11667251586914062, + "rewards_train/margins": 0.44222086668014526, + "rewards_train/rejected": -0.5588933825492859, + "step": 351 + }, + { + "epoch": 0.1, + "logps_train/chosen": -7.596909046173096, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -9.444706916809082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09655909985303879, + "rewards_train/margins": 0.178529791533947, + "rewards_train/rejected": -0.0819706916809082, + "step": 351 + }, + { + "epoch": 0.1, + "logps_train/chosen": -12.912309646606445, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -5.6234540939331055, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03498096391558647, + "rewards_train/margins": -0.0851355567574501, + "rewards_train/rejected": 0.05015459284186363, + "step": 351 + }, + { + "epoch": 0.1, + "learning_rate": 1.9552566286835513e-06, + "loss": 0.6252, + "step": 352 + }, + { + "epoch": 0.1, + "logps_train/chosen": -116.27900695800781, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -153.37744140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1779006719589233, + "rewards_train/margins": -0.7401565313339233, + "rewards_train/rejected": -0.437744140625, + "step": 352 + }, + { + "epoch": 0.1, + "logps_train/chosen": -98.12351989746094, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -126.40564727783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1123520135879517, + "rewards_train/margins": 0.7782127857208252, + "rewards_train/rejected": -1.8905647993087769, + "step": 352 + }, + { + "epoch": 0.1, + "logps_train/chosen": -11.907032012939453, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -11.832813262939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06570320576429367, + "rewards_train/margins": 0.005078122019767761, + "rewards_train/rejected": -0.07078132778406143, + "step": 352 + }, + { + "epoch": 0.1, + "logps_train/chosen": -15.740985870361328, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -14.1875, + "logps_train/rejected": -16.846731185913086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.019651412963867188, + "rewards_train/margins": 0.2855745255947113, + "rewards_train/rejected": -0.2659231126308441, + "step": 352 + }, + { + "epoch": 0.1, + "logps_train/chosen": -11.055246353149414, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -9.12580680847168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23052464425563812, + "rewards_train/margins": 0.12268103659152985, + "rewards_train/rejected": -0.35320568084716797, + "step": 352 + }, + { + "epoch": 0.1, + "logps_train/chosen": -5.949693202972412, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -15.852492332458496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02690568007528782, + "rewards_train/margins": 0.2684049103409052, + "rewards_train/rejected": -0.24149923026561737, + "step": 352 + }, + { + "epoch": 0.1, + "logps_train/chosen": -93.65282440185547, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -147.15882873535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.034717559814453125, + "rewards_train/margins": 1.7506004571914673, + "rewards_train/rejected": -1.7158828973770142, + "step": 352 + }, + { + "epoch": 0.1, + "logps_train/chosen": -7.955413818359375, + "logps_train/ref_chosen": -7.59375, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -14.604726791381836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03616638109087944, + "rewards_train/margins": 0.3305562920868397, + "rewards_train/rejected": -0.3667226731777191, + "step": 352 + }, + { + "epoch": 0.1, + "logps_train/chosen": -1.6216143369674683, + "logps_train/ref_chosen": -1.5625, + "logps_train/ref_rejected": -3.203125, + "logps_train/rejected": -3.171466827392578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.005911433603614569, + "rewards_train/margins": -0.009077250957489014, + "rewards_train/rejected": 0.003165817353874445, + "step": 353 + }, + { + "epoch": 0.1, + "logps_train/chosen": -185.02450561523438, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -202.71234130859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.852450847625732, + "rewards_train/margins": 0.6187834739685059, + "rewards_train/rejected": -6.471234321594238, + "step": 353 + }, + { + "epoch": 0.1, + "logps_train/chosen": -154.4639892578125, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -210.63955688476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.24639892578125, + "rewards_train/margins": 0.6175568103790283, + "rewards_train/rejected": -2.8639557361602783, + "step": 353 + }, + { + "epoch": 0.1, + "logps_train/chosen": -28.41201400756836, + "logps_train/ref_chosen": -28.875, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -76.54914855957031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04629860073328018, + "rewards_train/margins": 0.3512134626507759, + "rewards_train/rejected": -0.3049148619174957, + "step": 353 + }, + { + "epoch": 0.1, + "logps_train/chosen": -6.39818000793457, + "logps_train/ref_chosen": -6.96875, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -7.700317859649658, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05705700069665909, + "rewards_train/margins": 0.13021378964185715, + "rewards_train/rejected": -0.07315678894519806, + "step": 353 + }, + { + "epoch": 0.1, + "logps_train/chosen": -143.37353515625, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -178.0774383544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5873535871505737, + "rewards_train/margins": 2.5203901529312134, + "rewards_train/rejected": -4.107743740081787, + "step": 353 + }, + { + "epoch": 0.1, + "logps_train/chosen": -38.32473373413086, + "logps_train/ref_chosen": -39.5, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -20.288105010986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11752662807703018, + "rewards_train/margins": 0.25883712619543076, + "rewards_train/rejected": -0.14131049811840057, + "step": 353 + }, + { + "epoch": 0.1, + "logps_train/chosen": -61.118709564208984, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -97.96322631835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11312904208898544, + "rewards_train/margins": 0.159451674669981, + "rewards_train/rejected": -0.04632263258099556, + "step": 353 + }, + { + "epoch": 0.1, + "learning_rate": 1.9544707900491468e-06, + "loss": 0.5404, + "step": 354 + }, + { + "epoch": 0.1, + "logps_train/chosen": -87.05064392089844, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -60.15379333496094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.09493561089038849, + "rewards_train/margins": -0.28968505561351776, + "rewards_train/rejected": 0.38462066650390625, + "step": 354 + }, + { + "epoch": 0.1, + "logps_train/chosen": -102.15586853027344, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -103.98790740966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6655868887901306, + "rewards_train/margins": 1.13320392370224, + "rewards_train/rejected": -1.7987908124923706, + "step": 354 + }, + { + "epoch": 0.1, + "logps_train/chosen": -14.52987289428711, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -25.386554718017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17798729240894318, + "rewards_train/margins": 0.3731682151556015, + "rewards_train/rejected": -0.5511555075645447, + "step": 354 + }, + { + "epoch": 0.1, + "logps_train/chosen": -148.4149169921875, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -159.53280639648438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.09149169921875, + "rewards_train/margins": -0.4382110834121704, + "rewards_train/rejected": -1.6532806158065796, + "step": 354 + }, + { + "epoch": 0.1, + "logps_train/chosen": -144.58779907226562, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -196.3563232421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8587799072265625, + "rewards_train/margins": 0.8768525123596191, + "rewards_train/rejected": -2.7356324195861816, + "step": 354 + }, + { + "epoch": 0.1, + "logps_train/chosen": -11.710098266601562, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -13.717630386352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5928848385810852, + "rewards_train/margins": 0.07887822389602661, + "rewards_train/rejected": -0.6717630624771118, + "step": 354 + }, + { + "epoch": 0.1, + "logps_train/chosen": -20.471893310546875, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -22.627763748168945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02781066857278347, + "rewards_train/margins": 0.34058705531060696, + "rewards_train/rejected": -0.3127763867378235, + "step": 354 + }, + { + "epoch": 0.1, + "logps_train/chosen": -102.68353271484375, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -138.8909454345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.118353247642517, + "rewards_train/margins": 0.22074127197265625, + "rewards_train/rejected": -1.3390945196151733, + "step": 354 + }, + { + "epoch": 0.1, + "logps_train/chosen": -82.65876770019531, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -96.66595458984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6158767938613892, + "rewards_train/margins": 1.3007186651229858, + "rewards_train/rejected": -1.916595458984375, + "step": 355 + }, + { + "epoch": 0.1, + "logps_train/chosen": -10.484167098999023, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -23.077329635620117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.004666709806770086, + "rewards_train/margins": -0.05943374847993255, + "rewards_train/rejected": 0.05476703867316246, + "step": 355 + }, + { + "epoch": 0.1, + "logps_train/chosen": -135.3590545654297, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -147.6456298828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2859054803848267, + "rewards_train/margins": 0.37865757942199707, + "rewards_train/rejected": -1.6645630598068237, + "step": 355 + }, + { + "epoch": 0.1, + "logps_train/chosen": -60.644287109375, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -86.78941345214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36442872881889343, + "rewards_train/margins": 0.5645126402378082, + "rewards_train/rejected": -0.9289413690567017, + "step": 355 + }, + { + "epoch": 0.1, + "logps_train/chosen": -19.769954681396484, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -17.411916732788086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10199546813964844, + "rewards_train/margins": -0.010803796350955963, + "rewards_train/rejected": -0.09119167178869247, + "step": 355 + }, + { + "epoch": 0.1, + "logps_train/chosen": -4.373453140258789, + "logps_train/ref_chosen": -5.40625, + "logps_train/ref_rejected": -2.671875, + "logps_train/rejected": -3.883455514907837, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10327968746423721, + "rewards_train/margins": 0.22443774342536926, + "rewards_train/rejected": -0.12115805596113205, + "step": 355 + }, + { + "epoch": 0.1, + "logps_train/chosen": -13.309967041015625, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -9.123208999633789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08099670708179474, + "rewards_train/margins": 0.0906991958618164, + "rewards_train/rejected": -0.17169590294361115, + "step": 355 + }, + { + "epoch": 0.1, + "logps_train/chosen": -84.27909851074219, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -100.24993896484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3779098689556122, + "rewards_train/margins": 2.147084027528763, + "rewards_train/rejected": -2.524993896484375, + "step": 355 + }, + { + "epoch": 0.1, + "learning_rate": 1.9536782711311805e-06, + "loss": 0.5436, + "step": 356 + }, + { + "epoch": 0.1, + "logps_train/chosen": -79.39854431152344, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -183.53077697753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23985444009304047, + "rewards_train/margins": 4.713223353028297, + "rewards_train/rejected": -4.953077793121338, + "step": 356 + }, + { + "epoch": 0.1, + "logps_train/chosen": -3.024052381515503, + "logps_train/ref_chosen": -2.1875, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -6.083250045776367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08365523815155029, + "rewards_train/margins": 0.1121697723865509, + "rewards_train/rejected": -0.1958250105381012, + "step": 356 + }, + { + "epoch": 0.1, + "logps_train/chosen": -86.8673095703125, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -90.15690612792969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.58673095703125, + "rewards_train/margins": -0.6210403442382812, + "rewards_train/rejected": -0.9656906127929688, + "step": 356 + }, + { + "epoch": 0.1, + "logps_train/chosen": -6.971543312072754, + "logps_train/ref_chosen": -6.0, + "logps_train/ref_rejected": -1.5625, + "logps_train/rejected": -2.8922605514526367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09715433418750763, + "rewards_train/margins": 0.03582172095775604, + "rewards_train/rejected": -0.13297605514526367, + "step": 356 + }, + { + "epoch": 0.1, + "logps_train/chosen": -40.18803405761719, + "logps_train/ref_chosen": -40.25, + "logps_train/ref_rejected": -29.25, + "logps_train/rejected": -31.98870086669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0061965943314135075, + "rewards_train/margins": 0.2800666750408709, + "rewards_train/rejected": -0.2738700807094574, + "step": 356 + }, + { + "epoch": 0.1, + "logps_train/chosen": -22.135007858276367, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -31.5, + "logps_train/rejected": -36.40605926513672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.38850077986717224, + "rewards_train/margins": 0.10210514068603516, + "rewards_train/rejected": -0.4906059205532074, + "step": 356 + }, + { + "epoch": 0.1, + "logps_train/chosen": -0.7401567697525024, + "logps_train/ref_chosen": -0.6484375, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -6.9795756340026855, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.009171927347779274, + "rewards_train/margins": -0.12371436692774296, + "rewards_train/rejected": 0.11454243957996368, + "step": 356 + }, + { + "epoch": 0.1, + "logps_train/chosen": -154.78053283691406, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -183.92800903320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3780532777309418, + "rewards_train/margins": 2.6147476732730865, + "rewards_train/rejected": -2.9928009510040283, + "step": 356 + }, + { + "epoch": 0.1, + "logps_train/chosen": -2.8000175952911377, + "logps_train/ref_chosen": -3.0625, + "logps_train/ref_rejected": -0.51953125, + "logps_train/rejected": -0.5245826244354248, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02624824084341526, + "rewards_train/margins": 0.02675337827531621, + "rewards_train/rejected": -0.0005051374319009483, + "step": 357 + }, + { + "epoch": 0.1, + "logps_train/chosen": -9.115255355834961, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -8.806490898132324, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.050974465906620026, + "rewards_train/margins": 0.2691235616803169, + "rewards_train/rejected": -0.2181490957736969, + "step": 357 + }, + { + "epoch": 0.1, + "logps_train/chosen": -13.992764472961426, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -39.75, + "logps_train/rejected": -35.37535858154297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.40072354674339294, + "rewards_train/margins": -0.03674060106277466, + "rewards_train/rejected": 0.4374641478061676, + "step": 357 + }, + { + "epoch": 0.1, + "logps_train/chosen": -5.0294599533081055, + "logps_train/ref_chosen": -5.75, + "logps_train/ref_rejected": -3.765625, + "logps_train/rejected": -4.2343316078186035, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07205400615930557, + "rewards_train/margins": 0.11892466619610786, + "rewards_train/rejected": -0.04687066003680229, + "step": 357 + }, + { + "epoch": 0.1, + "logps_train/chosen": -0.346992552280426, + "logps_train/ref_chosen": -0.4609375, + "logps_train/ref_rejected": -3.765625, + "logps_train/rejected": -3.5209708213806152, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.011394495144486427, + "rewards_train/margins": -0.01307092234492302, + "rewards_train/rejected": 0.024465417489409447, + "step": 357 + }, + { + "epoch": 0.1, + "logps_train/chosen": -2.6037349700927734, + "logps_train/ref_chosen": -1.9609375, + "logps_train/ref_rejected": -1.28125, + "logps_train/rejected": -1.4980018138885498, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06427974998950958, + "rewards_train/margins": -0.04260456748306751, + "rewards_train/rejected": -0.02167518250644207, + "step": 357 + }, + { + "epoch": 0.1, + "logps_train/chosen": -1.6365623474121094, + "logps_train/ref_chosen": -1.8046875, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -11.59903621673584, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016812516376376152, + "rewards_train/margins": 0.12046614103019238, + "rewards_train/rejected": -0.10365362465381622, + "step": 357 + }, + { + "epoch": 0.1, + "logps_train/chosen": -65.9233627319336, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -136.3302764892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0923362746834755, + "rewards_train/margins": 1.8406913504004478, + "rewards_train/rejected": -1.9330276250839233, + "step": 357 + }, + { + "epoch": 0.1, + "learning_rate": 1.9528790774764452e-06, + "loss": 0.5749, + "step": 358 + }, + { + "epoch": 0.1, + "logps_train/chosen": -74.61441040039062, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -96.5993423461914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26144105195999146, + "rewards_train/margins": 0.6984931826591492, + "rewards_train/rejected": -0.9599342346191406, + "step": 358 + }, + { + "epoch": 0.1, + "logps_train/chosen": -83.92042541503906, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -148.55477905273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24204254150390625, + "rewards_train/margins": 3.3134353160858154, + "rewards_train/rejected": -3.5554778575897217, + "step": 358 + }, + { + "epoch": 0.1, + "logps_train/chosen": -92.33235168457031, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -95.54792785644531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06676483154296875, + "rewards_train/margins": 0.6215576529502869, + "rewards_train/rejected": -0.5547928214073181, + "step": 358 + }, + { + "epoch": 0.1, + "logps_train/chosen": -92.7435302734375, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -88.9874496459961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.824353039264679, + "rewards_train/margins": 0.47439199686050415, + "rewards_train/rejected": -1.298745036125183, + "step": 358 + }, + { + "epoch": 0.1, + "logps_train/chosen": -131.6938018798828, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -205.94314575195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2693801820278168, + "rewards_train/margins": 3.12493434548378, + "rewards_train/rejected": -3.3943145275115967, + "step": 358 + }, + { + "epoch": 0.1, + "logps_train/chosen": -37.55073928833008, + "logps_train/ref_chosen": -41.75, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -88.33769226074219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.41992607712745667, + "rewards_train/margins": 2.703695446252823, + "rewards_train/rejected": -2.283769369125366, + "step": 358 + }, + { + "epoch": 0.1, + "logps_train/chosen": -10.306111335754395, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -14.903657913208008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0881388708949089, + "rewards_train/margins": 0.27225466817617416, + "rewards_train/rejected": -0.18411579728126526, + "step": 358 + }, + { + "epoch": 0.1, + "logps_train/chosen": -87.30084228515625, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -162.98110961914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.38008424639701843, + "rewards_train/margins": 2.2180266678333282, + "rewards_train/rejected": -2.5981109142303467, + "step": 358 + }, + { + "epoch": 0.1, + "logps_train/chosen": -35.099853515625, + "logps_train/ref_chosen": -35.25, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -33.961971282958984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0150146484375, + "rewards_train/margins": -0.03878822550177574, + "rewards_train/rejected": 0.05380287393927574, + "step": 359 + }, + { + "epoch": 0.1, + "logps_train/chosen": -56.56235122680664, + "logps_train/ref_chosen": -58.75, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -23.65084457397461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21876488626003265, + "rewards_train/margins": 0.7588493674993515, + "rewards_train/rejected": -0.5400844812393188, + "step": 359 + }, + { + "epoch": 0.1, + "logps_train/chosen": -80.03025817871094, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -79.13948059082031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.29697418212890625, + "rewards_train/margins": 0.26092224195599556, + "rewards_train/rejected": 0.03605194017291069, + "step": 359 + }, + { + "epoch": 0.1, + "logps_train/chosen": -108.37468719482422, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -108.92330169677734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5874687433242798, + "rewards_train/margins": 0.05486142635345459, + "rewards_train/rejected": -0.6423301696777344, + "step": 359 + }, + { + "epoch": 0.1, + "logps_train/chosen": -6.752849102020264, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -6.8125, + "logps_train/rejected": -7.831724166870117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.16590991616249084, + "rewards_train/margins": -0.06398750096559525, + "rewards_train/rejected": -0.1019224151968956, + "step": 359 + }, + { + "epoch": 0.1, + "logps_train/chosen": -17.69839096069336, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -9.494379043579102, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007339096162468195, + "rewards_train/margins": 0.0295988074503839, + "rewards_train/rejected": -0.0369379036128521, + "step": 359 + }, + { + "epoch": 0.1, + "logps_train/chosen": -113.04997253417969, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -148.74795532226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7049973011016846, + "rewards_train/margins": 1.9197983741760254, + "rewards_train/rejected": -3.62479567527771, + "step": 359 + }, + { + "epoch": 0.1, + "logps_train/chosen": -168.84664916992188, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -176.0459442138672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.384665012359619, + "rewards_train/margins": -0.1800706386566162, + "rewards_train/rejected": -2.204594373703003, + "step": 359 + }, + { + "epoch": 0.1, + "learning_rate": 1.9520732146784488e-06, + "loss": 0.4245, + "step": 360 + }, + { + "epoch": 0.1, + "logps_train/chosen": -178.040283203125, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -178.14132690429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.304028511047363, + "rewards_train/margins": 0.010104179382324219, + "rewards_train/rejected": -4.3141326904296875, + "step": 360 + }, + { + "epoch": 0.1, + "logps_train/chosen": -0.43169838190078735, + "logps_train/ref_chosen": -0.54296875, + "logps_train/ref_rejected": -0.54296875, + "logps_train/rejected": -0.427886962890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01112703699618578, + "rewards_train/margins": -0.00038114190101623535, + "rewards_train/rejected": 0.011508178897202015, + "step": 360 + }, + { + "epoch": 0.1, + "logps_train/chosen": -3.445544481277466, + "logps_train/ref_chosen": -2.96875, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -5.421008110046387, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04767945036292076, + "rewards_train/margins": 0.00692136213183403, + "rewards_train/rejected": -0.05460081249475479, + "step": 360 + }, + { + "epoch": 0.1, + "logps_train/chosen": -12.308648109436035, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -11.692737579345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.044135190546512604, + "rewards_train/margins": 0.23215895146131516, + "rewards_train/rejected": -0.18802376091480255, + "step": 360 + }, + { + "epoch": 0.1, + "logps_train/chosen": -2.1865665912628174, + "logps_train/ref_chosen": -2.828125, + "logps_train/ref_rejected": -4.25, + "logps_train/rejected": -5.945903301239014, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06415583938360214, + "rewards_train/margins": 0.23374617844820023, + "rewards_train/rejected": -0.16959033906459808, + "step": 360 + }, + { + "epoch": 0.1, + "logps_train/chosen": -87.18865966796875, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -106.64957427978516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2811340391635895, + "rewards_train/margins": 0.09609146416187286, + "rewards_train/rejected": 0.1850425750017166, + "step": 360 + }, + { + "epoch": 0.1, + "logps_train/chosen": -103.65390014648438, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -37.469566345214844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0153900384902954, + "rewards_train/margins": -0.49343341588974, + "rewards_train/rejected": -0.5219566226005554, + "step": 360 + }, + { + "epoch": 0.1, + "logps_train/chosen": -15.838973999023438, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -24.733959197998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05264740064740181, + "rewards_train/margins": 0.3207485191524029, + "rewards_train/rejected": -0.3733959197998047, + "step": 360 + }, + { + "epoch": 0.1, + "logps_train/chosen": -2.420184850692749, + "logps_train/ref_chosen": -2.671875, + "logps_train/ref_rejected": -2.625, + "logps_train/rejected": -2.4478859901428223, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.025169014930725098, + "rewards_train/margins": 0.007457613945007324, + "rewards_train/rejected": 0.017711400985717773, + "step": 361 + }, + { + "epoch": 0.1, + "logps_train/chosen": -188.38938903808594, + "logps_train/ref_chosen": -182.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -164.6378173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6389389038085938, + "rewards_train/margins": 0.9248428344726562, + "rewards_train/rejected": -1.56378173828125, + "step": 361 + }, + { + "epoch": 0.1, + "logps_train/chosen": -125.48316955566406, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -136.53900146484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3983169496059418, + "rewards_train/margins": 0.35558322072029114, + "rewards_train/rejected": -0.7539001703262329, + "step": 361 + }, + { + "epoch": 0.1, + "logps_train/chosen": -73.3377685546875, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -37.06710433959961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.31622314453125, + "rewards_train/margins": 0.24793357402086258, + "rewards_train/rejected": 0.06828957051038742, + "step": 361 + }, + { + "epoch": 0.1, + "logps_train/chosen": -158.93869018554688, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -155.58624267578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2938690185546875, + "rewards_train/margins": 0.2647552490234375, + "rewards_train/rejected": -0.558624267578125, + "step": 361 + }, + { + "epoch": 0.1, + "logps_train/chosen": -0.7440324425697327, + "logps_train/ref_chosen": -1.1484375, + "logps_train/ref_rejected": -3.46875, + "logps_train/rejected": -5.421611785888672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04044050723314285, + "rewards_train/margins": 0.23572669178247452, + "rewards_train/rejected": -0.19528618454933167, + "step": 361 + }, + { + "epoch": 0.1, + "logps_train/chosen": -1.8736493587493896, + "logps_train/ref_chosen": -1.421875, + "logps_train/ref_rejected": -1.1875, + "logps_train/rejected": -0.8999603390693665, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.045177437365055084, + "rewards_train/margins": -0.07393140345811844, + "rewards_train/rejected": 0.028753966093063354, + "step": 361 + }, + { + "epoch": 0.1, + "logps_train/chosen": -24.48373031616211, + "logps_train/ref_chosen": -28.25, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -82.70185852050781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.37662696838378906, + "rewards_train/margins": 1.0968128442764282, + "rewards_train/rejected": -0.7201858758926392, + "step": 361 + }, + { + "epoch": 0.1, + "learning_rate": 1.951260688377377e-06, + "loss": 0.6063, + "step": 362 + }, + { + "epoch": 0.1, + "logps_train/chosen": -114.67776489257812, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -104.07243347167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11777649074792862, + "rewards_train/margins": 0.28946685045957565, + "rewards_train/rejected": -0.4072433412075043, + "step": 362 + }, + { + "epoch": 0.1, + "logps_train/chosen": -43.910518646240234, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -41.5, + "logps_train/rejected": -44.31550598144531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06605186313390732, + "rewards_train/margins": 0.21549875289201736, + "rewards_train/rejected": -0.2815506160259247, + "step": 362 + }, + { + "epoch": 0.1, + "logps_train/chosen": -7.156811714172363, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -7.125, + "logps_train/rejected": -6.743719577789307, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06244383007287979, + "rewards_train/margins": 0.024315785616636276, + "rewards_train/rejected": 0.038128044456243515, + "step": 362 + }, + { + "epoch": 0.1, + "logps_train/chosen": -22.824329376220703, + "logps_train/ref_chosen": -25.625, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -21.134689331054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2800670564174652, + "rewards_train/margins": 0.18103598803281784, + "rewards_train/rejected": 0.09903106838464737, + "step": 362 + }, + { + "epoch": 0.1, + "logps_train/chosen": -124.85394287109375, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -176.54444885253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.935394287109375, + "rewards_train/margins": 1.6190507411956787, + "rewards_train/rejected": -2.5544450283050537, + "step": 362 + }, + { + "epoch": 0.1, + "logps_train/chosen": -90.89393615722656, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -182.40899658203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5893936157226562, + "rewards_train/margins": 2.4515061378479004, + "rewards_train/rejected": -4.040899753570557, + "step": 362 + }, + { + "epoch": 0.1, + "logps_train/chosen": -93.02725982666016, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -82.48725891113281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30272600054740906, + "rewards_train/margins": 0.1959998905658722, + "rewards_train/rejected": -0.49872589111328125, + "step": 362 + }, + { + "epoch": 0.1, + "logps_train/chosen": -9.552762031555176, + "logps_train/ref_chosen": -5.71875, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -15.745054244995117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.38340121507644653, + "rewards_train/margins": -0.14639578759670258, + "rewards_train/rejected": -0.23700542747974396, + "step": 362 + }, + { + "epoch": 0.1, + "logps_train/chosen": -47.90882110595703, + "logps_train/ref_chosen": -49.0, + "logps_train/ref_rejected": -58.25, + "logps_train/rejected": -60.45393753051758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10911788791418076, + "rewards_train/margins": 0.3295116499066353, + "rewards_train/rejected": -0.22039376199245453, + "step": 363 + }, + { + "epoch": 0.1, + "logps_train/chosen": -6.015262603759766, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -3.265625, + "logps_train/rejected": -3.8029866218566895, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12340126186609268, + "rewards_train/margins": -0.0696651004254818, + "rewards_train/rejected": -0.053736161440610886, + "step": 363 + }, + { + "epoch": 0.1, + "logps_train/chosen": -41.320343017578125, + "logps_train/ref_chosen": -31.125, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -117.82418823242188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0195343494415283, + "rewards_train/margins": -0.13711553812026978, + "rewards_train/rejected": -0.8824188113212585, + "step": 363 + }, + { + "epoch": 0.1, + "logps_train/chosen": -1.7387489080429077, + "logps_train/ref_chosen": -2.203125, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -8.663559913635254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04643760994076729, + "rewards_train/margins": 0.15966860577464104, + "rewards_train/rejected": -0.11323099583387375, + "step": 363 + }, + { + "epoch": 0.1, + "logps_train/chosen": -7.299623489379883, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -3.71875, + "logps_train/rejected": -5.403258323669434, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.28253766894340515, + "rewards_train/margins": 0.4509885013103485, + "rewards_train/rejected": -0.16845083236694336, + "step": 363 + }, + { + "epoch": 0.1, + "logps_train/chosen": -58.87474822998047, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -148.45651245117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1374748945236206, + "rewards_train/margins": 2.858176350593567, + "rewards_train/rejected": -3.9956512451171875, + "step": 363 + }, + { + "epoch": 0.1, + "logps_train/chosen": -54.63027572631836, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -59.95514678955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.28697243332862854, + "rewards_train/margins": 0.08248710632324219, + "rewards_train/rejected": 0.20448532700538635, + "step": 363 + }, + { + "epoch": 0.1, + "logps_train/chosen": -9.28981876373291, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -5.036600112915039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06476812809705734, + "rewards_train/margins": 0.10905313864350319, + "rewards_train/rejected": -0.04428501054644585, + "step": 363 + }, + { + "epoch": 0.1, + "learning_rate": 1.9504415042600516e-06, + "loss": 0.5355, + "step": 364 + }, + { + "epoch": 0.1, + "logps_train/chosen": -9.390189170837402, + "logps_train/ref_chosen": -8.0, + "logps_train/ref_rejected": -7.46875, + "logps_train/rejected": -10.345781326293945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1390189230442047, + "rewards_train/margins": 0.14868420362472534, + "rewards_train/rejected": -0.28770312666893005, + "step": 364 + }, + { + "epoch": 0.1, + "logps_train/chosen": -19.974241256713867, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -18.900470733642578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.015075874514877796, + "rewards_train/margins": -0.1948770610615611, + "rewards_train/rejected": 0.2099529355764389, + "step": 364 + }, + { + "epoch": 0.1, + "logps_train/chosen": -91.41803741455078, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -171.5253143310547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6418037414550781, + "rewards_train/margins": 2.6107277870178223, + "rewards_train/rejected": -3.2525315284729004, + "step": 364 + }, + { + "epoch": 0.1, + "logps_train/chosen": -194.6854705810547, + "logps_train/ref_chosen": -180.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -133.74114990234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4685471057891846, + "rewards_train/margins": 0.35556793212890625, + "rewards_train/rejected": -1.8241150379180908, + "step": 364 + }, + { + "epoch": 0.1, + "logps_train/chosen": -31.270212173461914, + "logps_train/ref_chosen": -29.25, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -11.731812477111816, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20202122628688812, + "rewards_train/margins": -0.11633997410535812, + "rewards_train/rejected": -0.08568125218153, + "step": 364 + }, + { + "epoch": 0.1, + "logps_train/chosen": -135.3679962158203, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -116.81620025634766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1867997646331787, + "rewards_train/margins": -0.15517973899841309, + "rewards_train/rejected": -2.0316200256347656, + "step": 364 + }, + { + "epoch": 0.1, + "logps_train/chosen": -67.84677124023438, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -67.87626647949219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21532288193702698, + "rewards_train/margins": 0.0029495209455490112, + "rewards_train/rejected": 0.21237336099147797, + "step": 364 + }, + { + "epoch": 0.1, + "logps_train/chosen": -159.67881774902344, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -132.099365234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2678818702697754, + "rewards_train/margins": -1.4079452753067017, + "rewards_train/rejected": -1.8599365949630737, + "step": 364 + }, + { + "epoch": 0.1, + "logps_train/chosen": -2.8977181911468506, + "logps_train/ref_chosen": -3.53125, + "logps_train/ref_rejected": -1.984375, + "logps_train/rejected": -2.8697330951690674, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06335318088531494, + "rewards_train/margins": 0.15188898891210556, + "rewards_train/rejected": -0.08853580802679062, + "step": 365 + }, + { + "epoch": 0.1, + "logps_train/chosen": -3.2394402027130127, + "logps_train/ref_chosen": -2.453125, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -22.178516387939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07863152027130127, + "rewards_train/margins": 0.30172011256217957, + "rewards_train/rejected": -0.38035163283348083, + "step": 365 + }, + { + "epoch": 0.1, + "logps_train/chosen": -7.753308296203613, + "logps_train/ref_chosen": -0.65234375, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -17.062904357910156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7100964784622192, + "rewards_train/margins": -0.4538060426712036, + "rewards_train/rejected": -0.2562904357910156, + "step": 365 + }, + { + "epoch": 0.1, + "logps_train/chosen": -19.300012588500977, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -31.25, + "logps_train/rejected": -36.28630447387695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14499874413013458, + "rewards_train/margins": 0.6486292034387589, + "rewards_train/rejected": -0.5036304593086243, + "step": 365 + }, + { + "epoch": 0.1, + "logps_train/chosen": -159.60403442382812, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -133.20132446289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6604034900665283, + "rewards_train/margins": 1.4597289562225342, + "rewards_train/rejected": -4.1201324462890625, + "step": 365 + }, + { + "epoch": 0.1, + "logps_train/chosen": -20.560087203979492, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -15.285039901733398, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09399127960205078, + "rewards_train/margins": 0.009995266795158386, + "rewards_train/rejected": 0.0839960128068924, + "step": 365 + }, + { + "epoch": 0.1, + "logps_train/chosen": -115.9213638305664, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -140.25050354003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.45786362886428833, + "rewards_train/margins": 2.0329139828681946, + "rewards_train/rejected": -1.5750503540039062, + "step": 365 + }, + { + "epoch": 0.1, + "logps_train/chosen": -80.39336395263672, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -118.41742706298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9393364191055298, + "rewards_train/margins": 0.6524063348770142, + "rewards_train/rejected": -1.591742753982544, + "step": 365 + }, + { + "epoch": 0.1, + "learning_rate": 1.9496156680598933e-06, + "loss": 0.6153, + "step": 366 + }, + { + "epoch": 0.1, + "logps_train/chosen": -8.369403839111328, + "logps_train/ref_chosen": -6.65625, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -14.94146728515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.17131538689136505, + "rewards_train/margins": -0.06466865539550781, + "rewards_train/rejected": -0.10664673149585724, + "step": 366 + }, + { + "epoch": 0.1, + "logps_train/chosen": -215.11541748046875, + "logps_train/ref_chosen": -186.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -171.78134155273438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.911541700363159, + "rewards_train/margins": -1.0334075689315796, + "rewards_train/rejected": -1.8781341314315796, + "step": 366 + }, + { + "epoch": 0.1, + "logps_train/chosen": -13.018301963806152, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -12.972295761108398, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.25808021426200867, + "rewards_train/margins": -0.004600644111633301, + "rewards_train/rejected": -0.25347957015037537, + "step": 366 + }, + { + "epoch": 0.1, + "logps_train/chosen": -14.70386028289795, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -10.15263843536377, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4578860402107239, + "rewards_train/margins": -0.2613721936941147, + "rewards_train/rejected": -0.1965138465166092, + "step": 366 + }, + { + "epoch": 0.1, + "logps_train/chosen": -177.533203125, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -188.5731201171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35332033038139343, + "rewards_train/margins": 0.3039917051792145, + "rewards_train/rejected": -0.6573120355606079, + "step": 366 + }, + { + "epoch": 0.1, + "logps_train/chosen": -4.990968227386475, + "logps_train/ref_chosen": -3.9375, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -18.646648406982422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10534682124853134, + "rewards_train/margins": -0.053181979805231094, + "rewards_train/rejected": -0.05216484144330025, + "step": 366 + }, + { + "epoch": 0.1, + "logps_train/chosen": -35.301082611083984, + "logps_train/ref_chosen": -31.5, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -35.88634490966797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3801082670688629, + "rewards_train/margins": -0.09147375822067261, + "rewards_train/rejected": -0.2886345088481903, + "step": 366 + }, + { + "epoch": 0.1, + "logps_train/chosen": -15.615161895751953, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -18.54070472717285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05098381265997887, + "rewards_train/margins": 0.23005428537726402, + "rewards_train/rejected": -0.17907047271728516, + "step": 366 + }, + { + "epoch": 0.1, + "logps_train/chosen": -177.72195434570312, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -178.19329833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6721954345703125, + "rewards_train/margins": 1.5471343994140625, + "rewards_train/rejected": -3.219329833984375, + "step": 367 + }, + { + "epoch": 0.1, + "logps_train/chosen": -116.64285278320312, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -140.51409912109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7142853140830994, + "rewards_train/margins": 1.7871246933937073, + "rewards_train/rejected": -2.5014100074768066, + "step": 367 + }, + { + "epoch": 0.1, + "logps_train/chosen": -5.583371162414551, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -6.34375, + "logps_train/rejected": -8.15531063079834, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1333371251821518, + "rewards_train/margins": 0.04781894385814667, + "rewards_train/rejected": -0.18115606904029846, + "step": 367 + }, + { + "epoch": 0.1, + "logps_train/chosen": -19.235652923583984, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -7.010993003845215, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16393470764160156, + "rewards_train/margins": 0.19315900839865208, + "rewards_train/rejected": -0.029224300757050514, + "step": 367 + }, + { + "epoch": 0.1, + "logps_train/chosen": -9.483537673950195, + "logps_train/ref_chosen": -7.90625, + "logps_train/ref_rejected": -9.5625, + "logps_train/rejected": -12.383975982666016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15772877633571625, + "rewards_train/margins": 0.12441883981227875, + "rewards_train/rejected": -0.282147616147995, + "step": 367 + }, + { + "epoch": 0.1, + "logps_train/chosen": -131.98773193359375, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -117.9551773071289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5012268424034119, + "rewards_train/margins": 2.59674471616745, + "rewards_train/rejected": -2.095517873764038, + "step": 367 + }, + { + "epoch": 0.1, + "logps_train/chosen": -19.535938262939453, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -76.68376922607422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3785938322544098, + "rewards_train/margins": -0.6102169156074524, + "rewards_train/rejected": 0.2316230833530426, + "step": 367 + }, + { + "epoch": 0.1, + "logps_train/chosen": -12.802227020263672, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -20.840599060058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04897270351648331, + "rewards_train/margins": 0.06008720397949219, + "rewards_train/rejected": -0.1090599074959755, + "step": 367 + }, + { + "epoch": 0.1, + "learning_rate": 1.9487831855568803e-06, + "loss": 0.6388, + "step": 368 + }, + { + "epoch": 0.1, + "logps_train/chosen": -116.11318969726562, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -206.07858276367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6113190054893494, + "rewards_train/margins": 4.4965391755104065, + "rewards_train/rejected": -5.107858180999756, + "step": 368 + }, + { + "epoch": 0.1, + "logps_train/chosen": -64.533447265625, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -95.1692886352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15334473550319672, + "rewards_train/margins": 0.6635841280221939, + "rewards_train/rejected": -0.8169288635253906, + "step": 368 + }, + { + "epoch": 0.1, + "logps_train/chosen": -9.350449562072754, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -13.12259292602539, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01004495657980442, + "rewards_train/margins": -0.016535663977265358, + "rewards_train/rejected": 0.0064907073974609375, + "step": 368 + }, + { + "epoch": 0.1, + "logps_train/chosen": -10.823067665100098, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -7.875, + "logps_train/rejected": -8.895547866821289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17394323647022247, + "rewards_train/margins": 0.2759980261325836, + "rewards_train/rejected": -0.10205478966236115, + "step": 368 + }, + { + "epoch": 0.1, + "logps_train/chosen": -5.3649444580078125, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -4.523733615875244, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013505554758012295, + "rewards_train/margins": 0.012753916322253644, + "rewards_train/rejected": 0.0007516384357586503, + "step": 368 + }, + { + "epoch": 0.1, + "logps_train/chosen": -182.9570770263672, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -202.70294189453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8957077264785767, + "rewards_train/margins": 2.974586606025696, + "rewards_train/rejected": -3.8702943325042725, + "step": 368 + }, + { + "epoch": 0.1, + "logps_train/chosen": -77.89640808105469, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -80.97002410888672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11035919189453125, + "rewards_train/margins": 1.0573616027832031, + "rewards_train/rejected": -0.9470024108886719, + "step": 368 + }, + { + "epoch": 0.1, + "logps_train/chosen": -18.592374801635742, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -28.75, + "logps_train/rejected": -27.222434997558594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.11576252430677414, + "rewards_train/margins": -0.03699397295713425, + "rewards_train/rejected": 0.1527564972639084, + "step": 368 + }, + { + "epoch": 0.1, + "logps_train/chosen": -3.201141119003296, + "logps_train/ref_chosen": -2.484375, + "logps_train/ref_rejected": -0.59765625, + "logps_train/rejected": -0.8017373085021973, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07167661190032959, + "rewards_train/margins": -0.051268504932522774, + "rewards_train/rejected": -0.020408106967806816, + "step": 369 + }, + { + "epoch": 0.1, + "logps_train/chosen": -220.49484252929688, + "logps_train/ref_chosen": -209.0, + "logps_train/ref_rejected": -184.0, + "logps_train/rejected": -194.9410858154297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1494842767715454, + "rewards_train/margins": -0.05537569522857666, + "rewards_train/rejected": -1.0941085815429688, + "step": 369 + }, + { + "epoch": 0.1, + "logps_train/chosen": -52.272438049316406, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -94.91436004638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2977561950683594, + "rewards_train/margins": 0.3391922004520893, + "rewards_train/rejected": -0.041436005383729935, + "step": 369 + }, + { + "epoch": 0.1, + "logps_train/chosen": -39.42343521118164, + "logps_train/ref_chosen": -34.5, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -40.230804443359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4923435151576996, + "rewards_train/margins": -0.09426307678222656, + "rewards_train/rejected": -0.398080438375473, + "step": 369 + }, + { + "epoch": 0.1, + "logps_train/chosen": -15.519720077514648, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -13.729310989379883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.048027992248535156, + "rewards_train/margins": 0.18970909714698792, + "rewards_train/rejected": -0.14168110489845276, + "step": 369 + }, + { + "epoch": 0.1, + "logps_train/chosen": -69.53893280029297, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -58.75, + "logps_train/rejected": -68.39321899414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.79610675573349, + "rewards_train/margins": 1.7604286670684814, + "rewards_train/rejected": -0.9643219113349915, + "step": 369 + }, + { + "epoch": 0.1, + "logps_train/chosen": -64.2459487915039, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -120.68860626220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39959487318992615, + "rewards_train/margins": 0.6192658245563507, + "rewards_train/rejected": -1.0188606977462769, + "step": 369 + }, + { + "epoch": 0.1, + "logps_train/chosen": -85.48404693603516, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -205.21942138671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10159530490636826, + "rewards_train/margins": 5.323537729680538, + "rewards_train/rejected": -5.22194242477417, + "step": 369 + }, + { + "epoch": 0.1, + "learning_rate": 1.9479440625775066e-06, + "loss": 0.4597, + "step": 370 + }, + { + "epoch": 0.1, + "logps_train/chosen": -60.361732482910156, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -125.54711151123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9611732363700867, + "rewards_train/margins": 1.3935380578041077, + "rewards_train/rejected": -2.3547112941741943, + "step": 370 + }, + { + "epoch": 0.1, + "logps_train/chosen": -8.127656936645508, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -14.562541007995605, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28464069962501526, + "rewards_train/margins": 0.2653634250164032, + "rewards_train/rejected": -0.5500041246414185, + "step": 370 + }, + { + "epoch": 0.1, + "logps_train/chosen": -4.723886489868164, + "logps_train/ref_chosen": -3.640625, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -32.05332565307617, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10832615196704865, + "rewards_train/margins": -0.2529935836791992, + "rewards_train/rejected": 0.14466743171215057, + "step": 370 + }, + { + "epoch": 0.1, + "logps_train/chosen": -147.96395874023438, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -146.98397827148438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.096395969390869, + "rewards_train/margins": -0.14799809455871582, + "rewards_train/rejected": -1.9483978748321533, + "step": 370 + }, + { + "epoch": 0.1, + "logps_train/chosen": -6.159631729125977, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -22.72493553161621, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0690881758928299, + "rewards_train/margins": -0.04659462161362171, + "rewards_train/rejected": -0.022493554279208183, + "step": 370 + }, + { + "epoch": 0.1, + "logps_train/chosen": -58.239688873291016, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -58.0, + "logps_train/rejected": -62.54273223876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5010311007499695, + "rewards_train/margins": 0.9553043246269226, + "rewards_train/rejected": -0.4542732238769531, + "step": 370 + }, + { + "epoch": 0.1, + "logps_train/chosen": -107.92504119873047, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -110.49722290039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1925041228532791, + "rewards_train/margins": 1.7072182148694992, + "rewards_train/rejected": -1.8997223377227783, + "step": 370 + }, + { + "epoch": 0.1, + "logps_train/chosen": -0.9107187986373901, + "logps_train/ref_chosen": -0.890625, + "logps_train/ref_rejected": -0.890625, + "logps_train/rejected": -0.930387020111084, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0020093799103051424, + "rewards_train/margins": 0.0019668221939355135, + "rewards_train/rejected": -0.003976202104240656, + "step": 370 + }, + { + "epoch": 0.1, + "logps_train/chosen": -176.5347900390625, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -188.348388671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5534790754318237, + "rewards_train/margins": -0.21864020824432373, + "rewards_train/rejected": -1.3348388671875, + "step": 371 + }, + { + "epoch": 0.1, + "logps_train/chosen": -42.74002456665039, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -62.318817138671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2759975492954254, + "rewards_train/margins": 0.1828792616724968, + "rewards_train/rejected": 0.09311828762292862, + "step": 371 + }, + { + "epoch": 0.1, + "logps_train/chosen": -5.688350200653076, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -2.890625, + "logps_train/rejected": -3.159790515899658, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015710020437836647, + "rewards_train/margins": 0.011206531897187233, + "rewards_train/rejected": -0.02691655233502388, + "step": 371 + }, + { + "epoch": 0.1, + "logps_train/chosen": -23.028217315673828, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -34.44386291503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015321731567382812, + "rewards_train/margins": 0.6415645480155945, + "rewards_train/rejected": -0.6568862795829773, + "step": 371 + }, + { + "epoch": 0.1, + "logps_train/chosen": -141.70321655273438, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -156.48289489746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.320321798324585, + "rewards_train/margins": 0.32796764373779297, + "rewards_train/rejected": -2.648289442062378, + "step": 371 + }, + { + "epoch": 0.1, + "logps_train/chosen": -10.920692443847656, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -9.169541358947754, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0795692428946495, + "rewards_train/margins": -0.018865104764699936, + "rewards_train/rejected": -0.06070413812994957, + "step": 371 + }, + { + "epoch": 0.1, + "logps_train/chosen": -192.80184936523438, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -192.64537048339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2801849842071533, + "rewards_train/margins": 3.184352159500122, + "rewards_train/rejected": -6.464537143707275, + "step": 371 + }, + { + "epoch": 0.1, + "logps_train/chosen": -205.21026611328125, + "logps_train/ref_chosen": -188.0, + "logps_train/ref_rejected": -186.0, + "logps_train/rejected": -216.0670623779297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7210266590118408, + "rewards_train/margins": 1.285679578781128, + "rewards_train/rejected": -3.0067062377929688, + "step": 371 + }, + { + "epoch": 0.1, + "learning_rate": 1.9470983049947442e-06, + "loss": 0.5215, + "step": 372 + }, + { + "epoch": 0.1, + "logps_train/chosen": -141.233642578125, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -130.30130004882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.223364233970642, + "rewards_train/margins": 1.4067658185958862, + "rewards_train/rejected": -2.6301300525665283, + "step": 372 + }, + { + "epoch": 0.1, + "logps_train/chosen": -11.349028587341309, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -1.5625, + "logps_train/rejected": -3.9787232875823975, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16509714722633362, + "rewards_train/margins": 0.40671947598457336, + "rewards_train/rejected": -0.24162232875823975, + "step": 372 + }, + { + "epoch": 0.1, + "logps_train/chosen": -208.30712890625, + "logps_train/ref_chosen": -185.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -130.1868133544922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3307130336761475, + "rewards_train/margins": -0.4620317220687866, + "rewards_train/rejected": -1.8686813116073608, + "step": 372 + }, + { + "epoch": 0.1, + "logps_train/chosen": -140.10690307617188, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -19.18172264099121, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8606903553009033, + "rewards_train/margins": -1.7300180941820145, + "rewards_train/rejected": -0.13067226111888885, + "step": 372 + }, + { + "epoch": 0.1, + "logps_train/chosen": -166.35476684570312, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -161.96463012695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0354766845703125, + "rewards_train/margins": 0.7109863758087158, + "rewards_train/rejected": -3.7464630603790283, + "step": 372 + }, + { + "epoch": 0.1, + "logps_train/chosen": -117.91317749023438, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -170.86349487304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.341317892074585, + "rewards_train/margins": 2.4450318813323975, + "rewards_train/rejected": -4.786349773406982, + "step": 372 + }, + { + "epoch": 0.1, + "logps_train/chosen": -3.0269622802734375, + "logps_train/ref_chosen": -3.0625, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -8.615833282470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.003553772112354636, + "rewards_train/margins": 0.15576210035942495, + "rewards_train/rejected": -0.1522083282470703, + "step": 372 + }, + { + "epoch": 0.1, + "logps_train/chosen": -116.13345336914062, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -191.21676635742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06334533542394638, + "rewards_train/margins": 2.358331300318241, + "rewards_train/rejected": -2.4216766357421875, + "step": 372 + }, + { + "epoch": 0.1, + "logps_train/chosen": -52.34367370605469, + "logps_train/ref_chosen": -50.0, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -40.04975891113281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.23436737060546875, + "rewards_train/margins": -0.004391476511955261, + "rewards_train/rejected": -0.2299758940935135, + "step": 373 + }, + { + "epoch": 0.1, + "logps_train/chosen": -6.375768661499023, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -3.40625, + "logps_train/rejected": -4.2479248046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0438268668949604, + "rewards_train/margins": 0.0403406135737896, + "rewards_train/rejected": -0.08416748046875, + "step": 373 + }, + { + "epoch": 0.1, + "logps_train/chosen": -82.24822235107422, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -94.38006591796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2248222380876541, + "rewards_train/margins": 1.7131844013929367, + "rewards_train/rejected": -1.9380066394805908, + "step": 373 + }, + { + "epoch": 0.1, + "logps_train/chosen": -20.95590591430664, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -7.394504547119141, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15809059143066406, + "rewards_train/margins": 0.040734872221946716, + "rewards_train/rejected": -0.19882546365261078, + "step": 373 + }, + { + "epoch": 0.1, + "logps_train/chosen": -4.710728645324707, + "logps_train/ref_chosen": -3.78125, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -6.685525417327881, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09294786304235458, + "rewards_train/margins": -0.02439531683921814, + "rewards_train/rejected": -0.06855254620313644, + "step": 373 + }, + { + "epoch": 0.1, + "logps_train/chosen": -4.197916507720947, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -16.12859344482422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.016666650772094727, + "rewards_train/margins": 0.3211927115917206, + "rewards_train/rejected": -0.3378593623638153, + "step": 373 + }, + { + "epoch": 0.1, + "logps_train/chosen": -102.734619140625, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -113.00425720214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.023461937904358, + "rewards_train/margins": 1.62696373462677, + "rewards_train/rejected": -2.650425672531128, + "step": 373 + }, + { + "epoch": 0.1, + "logps_train/chosen": -20.83216094970703, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -46.0, + "logps_train/rejected": -54.19139862060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09178390353918076, + "rewards_train/margins": 0.9109238013625145, + "rewards_train/rejected": -0.8191398978233337, + "step": 373 + }, + { + "epoch": 0.1, + "learning_rate": 1.946245918727999e-06, + "loss": 0.5469, + "step": 374 + }, + { + "epoch": 0.1, + "logps_train/chosen": -2.752448558807373, + "logps_train/ref_chosen": -1.6015625, + "logps_train/ref_rejected": -29.25, + "logps_train/rejected": -27.923561096191406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11508860439062119, + "rewards_train/margins": -0.2477324977517128, + "rewards_train/rejected": 0.1326438933610916, + "step": 374 + }, + { + "epoch": 0.1, + "logps_train/chosen": -7.396350860595703, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -7.078718185424805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0865100845694542, + "rewards_train/margins": 0.19323674589395523, + "rewards_train/rejected": -0.2797468304634094, + "step": 374 + }, + { + "epoch": 0.1, + "logps_train/chosen": -102.71138000488281, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -102.4765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.22113800048828125, + "rewards_train/margins": -0.023481741547584534, + "rewards_train/rejected": -0.19765625894069672, + "step": 374 + }, + { + "epoch": 0.1, + "logps_train/chosen": -207.79254150390625, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -186.0, + "logps_train/rejected": -217.25543212890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6792542934417725, + "rewards_train/margins": 0.4462890625, + "rewards_train/rejected": -3.1255433559417725, + "step": 374 + }, + { + "epoch": 0.1, + "logps_train/chosen": -2.1651484966278076, + "logps_train/ref_chosen": -2.0625, + "logps_train/ref_rejected": -6.90625, + "logps_train/rejected": -6.91214656829834, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.010264850221574306, + "rewards_train/margins": -0.009675193403381854, + "rewards_train/rejected": -0.0005896568181924522, + "step": 374 + }, + { + "epoch": 0.1, + "logps_train/chosen": -12.302322387695312, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -48.25, + "logps_train/rejected": -51.92627716064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3114822506904602, + "rewards_train/margins": 0.05614545941352844, + "rewards_train/rejected": -0.36762771010398865, + "step": 374 + }, + { + "epoch": 0.1, + "logps_train/chosen": -10.132930755615234, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -11.12116527557373, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10079307854175568, + "rewards_train/margins": 0.11132344603538513, + "rewards_train/rejected": -0.2121165245771408, + "step": 374 + }, + { + "epoch": 0.1, + "logps_train/chosen": -138.53326416015625, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -137.0332489013672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.953326463699341, + "rewards_train/margins": -0.3000016212463379, + "rewards_train/rejected": -3.653324842453003, + "step": 374 + }, + { + "epoch": 0.1, + "logps_train/chosen": -65.42064666748047, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -94.87889099121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34206467866897583, + "rewards_train/margins": 1.1458244919776917, + "rewards_train/rejected": -1.4878891706466675, + "step": 375 + }, + { + "epoch": 0.1, + "logps_train/chosen": -5.005488872528076, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -4.998573303222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.013048887252807617, + "rewards_train/margins": 0.04305844381451607, + "rewards_train/rejected": -0.056107331067323685, + "step": 375 + }, + { + "epoch": 0.1, + "logps_train/chosen": -5.309898376464844, + "logps_train/ref_chosen": -5.65625, + "logps_train/ref_rejected": -3.0625, + "logps_train/rejected": -5.089755535125732, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.034635163843631744, + "rewards_train/margins": 0.23736072331666946, + "rewards_train/rejected": -0.20272555947303772, + "step": 375 + }, + { + "epoch": 0.1, + "logps_train/chosen": -146.72064208984375, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -156.18521118164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.172064185142517, + "rewards_train/margins": 0.946457028388977, + "rewards_train/rejected": -2.118521213531494, + "step": 375 + }, + { + "epoch": 0.1, + "logps_train/chosen": -14.014920234680176, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -36.62204360961914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0702420249581337, + "rewards_train/margins": 0.3419623300433159, + "rewards_train/rejected": -0.4122043550014496, + "step": 375 + }, + { + "epoch": 0.1, + "logps_train/chosen": -4.435420989990234, + "logps_train/ref_chosen": -4.40625, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -7.996275901794434, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0029170990455895662, + "rewards_train/margins": 0.24358549411408603, + "rewards_train/rejected": -0.2465025931596756, + "step": 375 + }, + { + "epoch": 0.1, + "logps_train/chosen": -15.407709121704102, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -4.407020568847656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2157709151506424, + "rewards_train/margins": -0.24694385938346386, + "rewards_train/rejected": 0.031172944232821465, + "step": 375 + }, + { + "epoch": 0.1, + "logps_train/chosen": -38.22349166870117, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -22.66950225830078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2776508331298828, + "rewards_train/margins": 0.6821010708808899, + "rewards_train/rejected": -0.4044502377510071, + "step": 375 + }, + { + "epoch": 0.11, + "learning_rate": 1.9453869097430714e-06, + "loss": 0.6055, + "step": 376 + }, + { + "epoch": 0.11, + "logps_train/chosen": -101.5906753540039, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -123.65487670898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.29093247652053833, + "rewards_train/margins": 0.9064201712608337, + "rewards_train/rejected": -0.6154876947402954, + "step": 376 + }, + { + "epoch": 0.11, + "logps_train/chosen": -126.42342376708984, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -131.45773315429688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0423424243927002, + "rewards_train/margins": -0.09656912088394165, + "rewards_train/rejected": -0.9457733035087585, + "step": 376 + }, + { + "epoch": 0.11, + "logps_train/chosen": -23.86530303955078, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -9.189963340759277, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.011530304327607155, + "rewards_train/margins": -0.23003397323191166, + "rewards_train/rejected": 0.2185036689043045, + "step": 376 + }, + { + "epoch": 0.11, + "logps_train/chosen": -102.55044555664062, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -78.23668670654297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.14495544135570526, + "rewards_train/margins": -0.08137589693069458, + "rewards_train/rejected": 0.22633133828639984, + "step": 376 + }, + { + "epoch": 0.11, + "logps_train/chosen": -129.13095092773438, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -134.49765014648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6130951642990112, + "rewards_train/margins": 0.8366698026657104, + "rewards_train/rejected": -2.4497649669647217, + "step": 376 + }, + { + "epoch": 0.11, + "logps_train/chosen": -11.842263221740723, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -3.6875, + "logps_train/rejected": -5.761216640472412, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.25922632217407227, + "rewards_train/margins": -0.051854655146598816, + "rewards_train/rejected": -0.20737166702747345, + "step": 376 + }, + { + "epoch": 0.11, + "logps_train/chosen": -143.70156860351562, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -176.17877197265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.37015700340271, + "rewards_train/margins": 0.9477202892303467, + "rewards_train/rejected": -3.3178772926330566, + "step": 376 + }, + { + "epoch": 0.11, + "logps_train/chosen": -30.441917419433594, + "logps_train/ref_chosen": -30.875, + "logps_train/ref_rejected": -30.875, + "logps_train/rejected": -30.539443969726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.043308258056640625, + "rewards_train/margins": 0.009752653539180756, + "rewards_train/rejected": 0.03355560451745987, + "step": 376 + }, + { + "epoch": 0.11, + "logps_train/chosen": -184.79266357421875, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -176.5498809814453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.279266357421875, + "rewards_train/margins": -0.42427825927734375, + "rewards_train/rejected": -0.8549880981445312, + "step": 377 + }, + { + "epoch": 0.11, + "logps_train/chosen": -5.820526599884033, + "logps_train/ref_chosen": -5.40625, + "logps_train/ref_rejected": -30.0, + "logps_train/rejected": -31.422365188598633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04142766073346138, + "rewards_train/margins": 0.10080885514616966, + "rewards_train/rejected": -0.14223651587963104, + "step": 377 + }, + { + "epoch": 0.11, + "logps_train/chosen": -14.485198020935059, + "logps_train/ref_chosen": -15.5, + "logps_train/ref_rejected": -3.265625, + "logps_train/rejected": -4.146940231323242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10148020088672638, + "rewards_train/margins": 0.18961172550916672, + "rewards_train/rejected": -0.08813152462244034, + "step": 377 + }, + { + "epoch": 0.11, + "logps_train/chosen": -17.985519409179688, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -4.476277828216553, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07355194538831711, + "rewards_train/margins": -0.057174162939190865, + "rewards_train/rejected": -0.016377782449126244, + "step": 377 + }, + { + "epoch": 0.11, + "logps_train/chosen": -16.507362365722656, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -21.915679931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18676376342773438, + "rewards_train/margins": 0.39083175361156464, + "rewards_train/rejected": -0.20406799018383026, + "step": 377 + }, + { + "epoch": 0.11, + "logps_train/chosen": -168.80050659179688, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -148.59500122070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.280050754547119, + "rewards_train/margins": 0.17944955825805664, + "rewards_train/rejected": -4.459500312805176, + "step": 377 + }, + { + "epoch": 0.11, + "logps_train/chosen": -153.17649841308594, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -149.67938232421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1176499128341675, + "rewards_train/margins": -0.34971165657043457, + "rewards_train/rejected": -0.7679382562637329, + "step": 377 + }, + { + "epoch": 0.11, + "logps_train/chosen": -32.122833251953125, + "logps_train/ref_chosen": -29.5, + "logps_train/ref_rejected": -1.3515625, + "logps_train/rejected": -7.715855598449707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2622833251953125, + "rewards_train/margins": 0.3741459846496582, + "rewards_train/rejected": -0.6364293098449707, + "step": 377 + }, + { + "epoch": 0.11, + "learning_rate": 1.9445212840521136e-06, + "loss": 0.6347, + "step": 378 + }, + { + "epoch": 0.11, + "logps_train/chosen": -102.19320678710938, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -208.0849151611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8693206906318665, + "rewards_train/margins": 5.739170730113983, + "rewards_train/rejected": -6.60849142074585, + "step": 378 + }, + { + "epoch": 0.11, + "logps_train/chosen": -70.21348571777344, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -177.73646545410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1713485717773438, + "rewards_train/margins": 5.102298259735107, + "rewards_train/rejected": -6.273646831512451, + "step": 378 + }, + { + "epoch": 0.11, + "logps_train/chosen": -126.195068359375, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -97.59244537353516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8695068359375, + "rewards_train/margins": -0.5102622807025909, + "rewards_train/rejected": -0.35924455523490906, + "step": 378 + }, + { + "epoch": 0.11, + "logps_train/chosen": -70.97150421142578, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -27.0, + "logps_train/rejected": -32.954376220703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.102849580347538, + "rewards_train/margins": 0.6982872262597084, + "rewards_train/rejected": -0.5954376459121704, + "step": 378 + }, + { + "epoch": 0.11, + "logps_train/chosen": -19.504697799682617, + "logps_train/ref_chosen": -22.0, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -12.140332221984863, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24953022599220276, + "rewards_train/margins": 0.538563460111618, + "rewards_train/rejected": -0.2890332341194153, + "step": 378 + }, + { + "epoch": 0.11, + "logps_train/chosen": -77.59062194824219, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -203.1083984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.39093780517578125, + "rewards_train/margins": 5.201777935028076, + "rewards_train/rejected": -4.810840129852295, + "step": 378 + }, + { + "epoch": 0.11, + "logps_train/chosen": -123.01559448242188, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -136.3668670654297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0515594482421875, + "rewards_train/margins": 1.835127353668213, + "rewards_train/rejected": -2.8866868019104004, + "step": 378 + }, + { + "epoch": 0.11, + "logps_train/chosen": -25.386096954345703, + "logps_train/ref_chosen": -22.25, + "logps_train/ref_rejected": -7.03125, + "logps_train/rejected": -7.797199249267578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.31360968947410583, + "rewards_train/margins": -0.2370147630572319, + "rewards_train/rejected": -0.07659492641687393, + "step": 378 + }, + { + "epoch": 0.11, + "logps_train/chosen": -163.80233764648438, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -153.92141723632812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.480233907699585, + "rewards_train/margins": -0.08809208869934082, + "rewards_train/rejected": -2.392141819000244, + "step": 379 + }, + { + "epoch": 0.11, + "logps_train/chosen": -3.7715132236480713, + "logps_train/ref_chosen": -2.84375, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -11.958268165588379, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09277632087469101, + "rewards_train/margins": 0.07805050164461136, + "rewards_train/rejected": -0.17082682251930237, + "step": 379 + }, + { + "epoch": 0.11, + "logps_train/chosen": -71.0571060180664, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -71.3472671508789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1557106077671051, + "rewards_train/margins": 0.029016107320785522, + "rewards_train/rejected": -0.18472671508789062, + "step": 379 + }, + { + "epoch": 0.11, + "logps_train/chosen": -94.98110961914062, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -115.54584503173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34811097383499146, + "rewards_train/margins": 3.4064735770225525, + "rewards_train/rejected": -3.754584550857544, + "step": 379 + }, + { + "epoch": 0.11, + "logps_train/chosen": -11.78514575958252, + "logps_train/ref_chosen": -12.0, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -8.3836669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021485423669219017, + "rewards_train/margins": 0.1223521213978529, + "rewards_train/rejected": -0.10086669772863388, + "step": 379 + }, + { + "epoch": 0.11, + "logps_train/chosen": -73.36156463623047, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -150.11929321289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23615646362304688, + "rewards_train/margins": 4.075772762298584, + "rewards_train/rejected": -4.311929225921631, + "step": 379 + }, + { + "epoch": 0.11, + "logps_train/chosen": -11.83848762512207, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -1.2890625, + "logps_train/rejected": -3.4415574073791504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05990123748779297, + "rewards_train/margins": 0.27515073120594025, + "rewards_train/rejected": -0.21524949371814728, + "step": 379 + }, + { + "epoch": 0.11, + "logps_train/chosen": -171.2227783203125, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -189.6894073486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.222277879714966, + "rewards_train/margins": 1.5466628074645996, + "rewards_train/rejected": -3.7689406871795654, + "step": 379 + }, + { + "epoch": 0.11, + "learning_rate": 1.9436490477135876e-06, + "loss": 0.3961, + "step": 380 + }, + { + "epoch": 0.11, + "logps_train/chosen": -2.8958933353424072, + "logps_train/ref_chosen": -1.8515625, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -21.39898681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1044330820441246, + "rewards_train/margins": 0.08546560257673264, + "rewards_train/rejected": -0.18989868462085724, + "step": 380 + }, + { + "epoch": 0.11, + "logps_train/chosen": -73.68531799316406, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -200.49044799804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.181468203663826, + "rewards_train/margins": 6.430513098835945, + "rewards_train/rejected": -6.249044895172119, + "step": 380 + }, + { + "epoch": 0.11, + "logps_train/chosen": -0.5375592112541199, + "logps_train/ref_chosen": -0.5859375, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -6.370000839233398, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004837829153984785, + "rewards_train/margins": 0.05433791456744075, + "rewards_train/rejected": -0.04950008541345596, + "step": 380 + }, + { + "epoch": 0.11, + "logps_train/chosen": -60.79814529418945, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -81.06510925292969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1201854720711708, + "rewards_train/margins": 0.12669639755040407, + "rewards_train/rejected": -0.006510925479233265, + "step": 380 + }, + { + "epoch": 0.11, + "logps_train/chosen": -38.648929595947266, + "logps_train/ref_chosen": -30.875, + "logps_train/ref_rejected": -31.875, + "logps_train/rejected": -37.59065628051758, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7773929834365845, + "rewards_train/margins": -0.20582735538482666, + "rewards_train/rejected": -0.5715656280517578, + "step": 380 + }, + { + "epoch": 0.11, + "logps_train/chosen": -250.239990234375, + "logps_train/ref_chosen": -228.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -164.30404663085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2239990234375, + "rewards_train/margins": 0.6064057350158691, + "rewards_train/rejected": -2.830404758453369, + "step": 380 + }, + { + "epoch": 0.11, + "logps_train/chosen": -97.53846740722656, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -99.22163391113281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0038467408157885075, + "rewards_train/margins": 0.018316650297492743, + "rewards_train/rejected": -0.02216339111328125, + "step": 380 + }, + { + "epoch": 0.11, + "logps_train/chosen": -48.96232604980469, + "logps_train/ref_chosen": -45.25, + "logps_train/ref_rejected": -4.0, + "logps_train/rejected": -6.9520463943481445, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3712325990200043, + "rewards_train/margins": -0.07602795958518982, + "rewards_train/rejected": -0.29520463943481445, + "step": 380 + }, + { + "epoch": 0.11, + "logps_train/chosen": -137.67816162109375, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -162.5084686279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2178162336349487, + "rewards_train/margins": 2.4330307245254517, + "rewards_train/rejected": -3.6508469581604004, + "step": 381 + }, + { + "epoch": 0.11, + "logps_train/chosen": -9.47155475616455, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -11.1885986328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0465945266187191, + "rewards_train/margins": 0.24670439586043358, + "rewards_train/rejected": -0.20010986924171448, + "step": 381 + }, + { + "epoch": 0.11, + "logps_train/chosen": -61.656005859375, + "logps_train/ref_chosen": -59.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -143.00067138671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2656005918979645, + "rewards_train/margins": 1.8344665467739105, + "rewards_train/rejected": -2.100067138671875, + "step": 381 + }, + { + "epoch": 0.11, + "logps_train/chosen": -175.62271118164062, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -80.1578598022461, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3622710704803467, + "rewards_train/margins": -0.8964850902557373, + "rewards_train/rejected": -1.4657859802246094, + "step": 381 + }, + { + "epoch": 0.11, + "logps_train/chosen": -15.115224838256836, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -17.350004196166992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0927724838256836, + "rewards_train/margins": 0.3922279477119446, + "rewards_train/rejected": -0.4850004315376282, + "step": 381 + }, + { + "epoch": 0.11, + "logps_train/chosen": -9.325119972229004, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -9.939291954040527, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1674880087375641, + "rewards_train/margins": 0.1614172039553523, + "rewards_train/rejected": 0.0060708047822117805, + "step": 381 + }, + { + "epoch": 0.11, + "logps_train/chosen": -71.546875, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -205.0, + "logps_train/rejected": -217.7433319091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.49531251192092896, + "rewards_train/margins": 1.7696457505226135, + "rewards_train/rejected": -1.2743332386016846, + "step": 381 + }, + { + "epoch": 0.11, + "logps_train/chosen": -13.048562049865723, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -11.047345161437988, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3236062228679657, + "rewards_train/margins": -0.050121694803237915, + "rewards_train/rejected": -0.2734845280647278, + "step": 381 + }, + { + "epoch": 0.11, + "learning_rate": 1.9427702068322226e-06, + "loss": 0.5407, + "step": 382 + }, + { + "epoch": 0.11, + "logps_train/chosen": -43.66472625732422, + "logps_train/ref_chosen": -45.0, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -29.39496612548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13352738320827484, + "rewards_train/margins": 0.3230240046977997, + "rewards_train/rejected": -0.18949662148952484, + "step": 382 + }, + { + "epoch": 0.11, + "logps_train/chosen": -10.343608856201172, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -9.9690580368042, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12186088413000107, + "rewards_train/margins": -0.07495507970452309, + "rewards_train/rejected": -0.04690580442547798, + "step": 382 + }, + { + "epoch": 0.11, + "logps_train/chosen": -79.85736846923828, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -148.714599609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1357368528842926, + "rewards_train/margins": 2.7357231080532074, + "rewards_train/rejected": -2.8714599609375, + "step": 382 + }, + { + "epoch": 0.11, + "logps_train/chosen": -84.44709014892578, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -26.244983673095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.105290986597538, + "rewards_train/margins": 0.8047893419861794, + "rewards_train/rejected": -0.6994983553886414, + "step": 382 + }, + { + "epoch": 0.11, + "logps_train/chosen": -139.97998046875, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -161.99392700195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.497997999191284, + "rewards_train/margins": 0.0013947486877441406, + "rewards_train/rejected": -3.4993927478790283, + "step": 382 + }, + { + "epoch": 0.11, + "logps_train/chosen": -36.8230094909668, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -12.810213088989258, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03230094909667969, + "rewards_train/margins": -0.057529641315340996, + "rewards_train/rejected": 0.02522869221866131, + "step": 382 + }, + { + "epoch": 0.11, + "logps_train/chosen": -17.830833435058594, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -15.0, + "logps_train/rejected": -14.209287643432617, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1080833449959755, + "rewards_train/margins": -0.18715458363294601, + "rewards_train/rejected": 0.07907123863697052, + "step": 382 + }, + { + "epoch": 0.11, + "logps_train/chosen": -125.72222900390625, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -146.1961669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9722230434417725, + "rewards_train/margins": 2.9473936557769775, + "rewards_train/rejected": -5.91961669921875, + "step": 382 + }, + { + "epoch": 0.11, + "logps_train/chosen": -76.07102966308594, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -59.25, + "logps_train/rejected": -61.28886413574219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14289703965187073, + "rewards_train/margins": 0.34678345918655396, + "rewards_train/rejected": -0.20388641953468323, + "step": 383 + }, + { + "epoch": 0.11, + "logps_train/chosen": -199.25291442871094, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -182.58692932128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4252915382385254, + "rewards_train/margins": 0.0334014892578125, + "rewards_train/rejected": -3.458693027496338, + "step": 383 + }, + { + "epoch": 0.11, + "logps_train/chosen": -99.74674224853516, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -90.95683288574219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.42467424273490906, + "rewards_train/margins": -0.07899093627929688, + "rewards_train/rejected": -0.3456833064556122, + "step": 383 + }, + { + "epoch": 0.11, + "logps_train/chosen": -1.6508852243423462, + "logps_train/ref_chosen": -3.890625, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -16.7493839263916, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22397398948669434, + "rewards_train/margins": 0.22391238212730968, + "rewards_train/rejected": 6.160735938465223e-05, + "step": 383 + }, + { + "epoch": 0.11, + "logps_train/chosen": -112.33689880371094, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -141.49105834960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7836899161338806, + "rewards_train/margins": 0.2654159665107727, + "rewards_train/rejected": -1.0491058826446533, + "step": 383 + }, + { + "epoch": 0.11, + "logps_train/chosen": -58.84290313720703, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -152.18905639648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1842903196811676, + "rewards_train/margins": 4.08461531996727, + "rewards_train/rejected": -4.2689056396484375, + "step": 383 + }, + { + "epoch": 0.11, + "logps_train/chosen": -7.173220634460449, + "logps_train/ref_chosen": -7.40625, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -18.17719078063965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.023302936926484108, + "rewards_train/margins": 0.20352202095091343, + "rewards_train/rejected": -0.18021908402442932, + "step": 383 + }, + { + "epoch": 0.11, + "logps_train/chosen": -45.021934509277344, + "logps_train/ref_chosen": -36.75, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -57.10398483276367, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8271934390068054, + "rewards_train/margins": -0.06679493188858032, + "rewards_train/rejected": -0.7603985071182251, + "step": 383 + }, + { + "epoch": 0.11, + "learning_rate": 1.9418847675589735e-06, + "loss": 0.5259, + "step": 384 + }, + { + "epoch": 0.11, + "logps_train/chosen": -4.796225547790527, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -8.594818115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.029752446338534355, + "rewards_train/margins": 0.12360925786197186, + "rewards_train/rejected": -0.0938568115234375, + "step": 384 + }, + { + "epoch": 0.11, + "logps_train/chosen": -89.17453002929688, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -174.0269012451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4674530029296875, + "rewards_train/margins": 5.5352373123168945, + "rewards_train/rejected": -6.002690315246582, + "step": 384 + }, + { + "epoch": 0.11, + "logps_train/chosen": -3.9874935150146484, + "logps_train/ref_chosen": -2.8125, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -13.630133628845215, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11749935150146484, + "rewards_train/margins": 0.02051401138305664, + "rewards_train/rejected": -0.13801336288452148, + "step": 384 + }, + { + "epoch": 0.11, + "logps_train/chosen": -117.99755859375, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -236.128173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.749755859375, + "rewards_train/margins": 3.7630615234375, + "rewards_train/rejected": -4.5128173828125, + "step": 384 + }, + { + "epoch": 0.11, + "logps_train/chosen": -52.26399230957031, + "logps_train/ref_chosen": -52.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -74.78187561035156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02639923058450222, + "rewards_train/margins": -0.3482116814702749, + "rewards_train/rejected": 0.3218124508857727, + "step": 384 + }, + { + "epoch": 0.11, + "logps_train/chosen": -5.862578392028809, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -15.298909187316895, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007492160890251398, + "rewards_train/margins": 0.1811330826021731, + "rewards_train/rejected": -0.1736409217119217, + "step": 384 + }, + { + "epoch": 0.11, + "logps_train/chosen": -84.61666870117188, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -93.2035903930664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8616668581962585, + "rewards_train/margins": -0.09130781888961792, + "rewards_train/rejected": -0.7703590393066406, + "step": 384 + }, + { + "epoch": 0.11, + "logps_train/chosen": -11.718856811523438, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -19.384986877441406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11563568562269211, + "rewards_train/margins": -0.1646369993686676, + "rewards_train/rejected": 0.049001313745975494, + "step": 384 + }, + { + "epoch": 0.11, + "logps_train/chosen": -9.258099555969238, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -1.546875, + "logps_train/rejected": -2.298042058944702, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005440044682472944, + "rewards_train/margins": 0.0805567535571754, + "rewards_train/rejected": -0.07511670887470245, + "step": 385 + }, + { + "epoch": 0.11, + "logps_train/chosen": -6.592444896697998, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -20.375, + "logps_train/rejected": -22.45663070678711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1154944896697998, + "rewards_train/margins": 0.0926685780286789, + "rewards_train/rejected": -0.2081630676984787, + "step": 385 + }, + { + "epoch": 0.11, + "logps_train/chosen": -5.081721782684326, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -5.75, + "logps_train/rejected": -7.231716632843018, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08245282620191574, + "rewards_train/margins": 0.2306244894862175, + "rewards_train/rejected": -0.14817166328430176, + "step": 385 + }, + { + "epoch": 0.11, + "logps_train/chosen": -152.36807250976562, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -178.59530639648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7368072271347046, + "rewards_train/margins": 1.5227235555648804, + "rewards_train/rejected": -3.259530782699585, + "step": 385 + }, + { + "epoch": 0.11, + "logps_train/chosen": -4.316180229187012, + "logps_train/ref_chosen": -2.78125, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -5.311229705810547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1534930318593979, + "rewards_train/margins": -0.0083075612783432, + "rewards_train/rejected": -0.1451854705810547, + "step": 385 + }, + { + "epoch": 0.11, + "logps_train/chosen": -58.332950592041016, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -208.74227905273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2082950621843338, + "rewards_train/margins": 4.265932843089104, + "rewards_train/rejected": -4.4742279052734375, + "step": 385 + }, + { + "epoch": 0.11, + "logps_train/chosen": -129.4722137451172, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -167.4153289794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7472213506698608, + "rewards_train/margins": 2.5443116426467896, + "rewards_train/rejected": -4.29153299331665, + "step": 385 + }, + { + "epoch": 0.11, + "logps_train/chosen": -1.9873530864715576, + "logps_train/ref_chosen": -1.40625, + "logps_train/ref_rejected": -2.875, + "logps_train/rejected": -4.248681545257568, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0581103079020977, + "rewards_train/margins": 0.07925784960389137, + "rewards_train/rejected": -0.13736815750598907, + "step": 385 + }, + { + "epoch": 0.11, + "learning_rate": 1.9409927360909765e-06, + "loss": 0.4923, + "step": 386 + }, + { + "epoch": 0.11, + "logps_train/chosen": -9.585421562194824, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -15.0, + "logps_train/rejected": -18.50448989868164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13979215919971466, + "rewards_train/margins": 0.21065683662891388, + "rewards_train/rejected": -0.35044899582862854, + "step": 386 + }, + { + "epoch": 0.11, + "logps_train/chosen": -1.7634330987930298, + "logps_train/ref_chosen": -2.421875, + "logps_train/ref_rejected": -0.3671875, + "logps_train/rejected": -0.6580115556716919, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06584419310092926, + "rewards_train/margins": 0.09492659941315651, + "rewards_train/rejected": -0.02908240631222725, + "step": 386 + }, + { + "epoch": 0.11, + "logps_train/chosen": -30.393150329589844, + "logps_train/ref_chosen": -31.625, + "logps_train/ref_rejected": -7.84375, + "logps_train/rejected": -11.741083145141602, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12318497151136398, + "rewards_train/margins": 0.5129182860255241, + "rewards_train/rejected": -0.38973331451416016, + "step": 386 + }, + { + "epoch": 0.11, + "logps_train/chosen": -27.941434860229492, + "logps_train/ref_chosen": -24.875, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -5.908257007598877, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3066434860229492, + "rewards_train/margins": -0.2189427837729454, + "rewards_train/rejected": -0.08770070225000381, + "step": 386 + }, + { + "epoch": 0.11, + "logps_train/chosen": -24.69695281982422, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -67.41325378417969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6071953177452087, + "rewards_train/margins": 0.00913006067276001, + "rewards_train/rejected": -0.6163253784179688, + "step": 386 + }, + { + "epoch": 0.11, + "logps_train/chosen": -17.225746154785156, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -6.691634178161621, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.035074617713689804, + "rewards_train/margins": 0.1528388001024723, + "rewards_train/rejected": -0.1879134178161621, + "step": 386 + }, + { + "epoch": 0.11, + "logps_train/chosen": -3.2452239990234375, + "logps_train/ref_chosen": -3.125, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -5.480627536773682, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01202240027487278, + "rewards_train/margins": 0.08604035340249538, + "rewards_train/rejected": -0.09806275367736816, + "step": 386 + }, + { + "epoch": 0.11, + "logps_train/chosen": -11.721464157104492, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -23.303918838500977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02839641645550728, + "rewards_train/margins": 0.1519954763352871, + "rewards_train/rejected": -0.18039189279079437, + "step": 386 + }, + { + "epoch": 0.11, + "logps_train/chosen": -12.069741249084473, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -18.021177291870117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3132241368293762, + "rewards_train/margins": -0.24860640615224838, + "rewards_train/rejected": -0.06461773067712784, + "step": 387 + }, + { + "epoch": 0.11, + "logps_train/chosen": -89.28556823730469, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -139.0888671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.27855682373046875, + "rewards_train/margins": -0.2696701046079397, + "rewards_train/rejected": -0.00888671912252903, + "step": 387 + }, + { + "epoch": 0.11, + "logps_train/chosen": -163.6967315673828, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -147.32107543945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.669673204421997, + "rewards_train/margins": 1.1124343872070312, + "rewards_train/rejected": -2.7821075916290283, + "step": 387 + }, + { + "epoch": 0.11, + "logps_train/chosen": -99.96279907226562, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -126.95746612548828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7962799072265625, + "rewards_train/margins": -0.2505332827568054, + "rewards_train/rejected": -0.5457466244697571, + "step": 387 + }, + { + "epoch": 0.11, + "logps_train/chosen": -9.431931495666504, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -8.935853004455566, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17555685341358185, + "rewards_train/margins": 0.459767147898674, + "rewards_train/rejected": -0.28421029448509216, + "step": 387 + }, + { + "epoch": 0.11, + "logps_train/chosen": -6.893223285675049, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -7.5931620597839355, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09869732707738876, + "rewards_train/margins": 0.11999388784170151, + "rewards_train/rejected": -0.21869121491909027, + "step": 387 + }, + { + "epoch": 0.11, + "logps_train/chosen": -80.57691955566406, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -126.72631072998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3576919734477997, + "rewards_train/margins": 2.8149390518665314, + "rewards_train/rejected": -3.172631025314331, + "step": 387 + }, + { + "epoch": 0.11, + "logps_train/chosen": -55.210601806640625, + "logps_train/ref_chosen": -53.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -106.77058410644531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17106018960475922, + "rewards_train/margins": 0.7559982091188431, + "rewards_train/rejected": -0.9270583987236023, + "step": 387 + }, + { + "epoch": 0.11, + "learning_rate": 1.940094118671505e-06, + "loss": 0.5894, + "step": 388 + }, + { + "epoch": 0.11, + "logps_train/chosen": -1.3757880926132202, + "logps_train/ref_chosen": -1.1953125, + "logps_train/ref_rejected": -7.875, + "logps_train/rejected": -9.446030616760254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01804756000638008, + "rewards_train/margins": 0.1390555016696453, + "rewards_train/rejected": -0.1571030616760254, + "step": 388 + }, + { + "epoch": 0.11, + "logps_train/chosen": -5.135740280151367, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -10.828810691833496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.014550971798598766, + "rewards_train/margins": 0.1911820499226451, + "rewards_train/rejected": -0.17663107812404633, + "step": 388 + }, + { + "epoch": 0.11, + "logps_train/chosen": -0.6154494285583496, + "logps_train/ref_chosen": -0.53515625, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -7.127963066101074, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008029318414628506, + "rewards_train/margins": 0.15164198819547892, + "rewards_train/rejected": -0.15967130661010742, + "step": 388 + }, + { + "epoch": 0.11, + "logps_train/chosen": -150.57827758789062, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -129.77455139160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0078277587890625, + "rewards_train/margins": 1.3696274757385254, + "rewards_train/rejected": -4.377455234527588, + "step": 388 + }, + { + "epoch": 0.11, + "logps_train/chosen": -3.818772792816162, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -6.59375, + "logps_train/rejected": -8.063469886779785, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04937272146344185, + "rewards_train/margins": 0.19634470716118813, + "rewards_train/rejected": -0.14697198569774628, + "step": 388 + }, + { + "epoch": 0.11, + "logps_train/chosen": -140.74456787109375, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -149.02769470214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.724456787109375, + "rewards_train/margins": 0.27831268310546875, + "rewards_train/rejected": -2.0027694702148438, + "step": 388 + }, + { + "epoch": 0.11, + "logps_train/chosen": -102.79702758789062, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -147.70947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1797027587890625, + "rewards_train/margins": 0.39124464988708496, + "rewards_train/rejected": -2.5709474086761475, + "step": 388 + }, + { + "epoch": 0.11, + "logps_train/chosen": -7.755815029144287, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -15.31725788116455, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14941850304603577, + "rewards_train/margins": 0.4498943090438843, + "rewards_train/rejected": -0.3004758059978485, + "step": 388 + }, + { + "epoch": 0.11, + "logps_train/chosen": -106.43498229980469, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -103.22865295410156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4934982061386108, + "rewards_train/margins": -0.27063286304473877, + "rewards_train/rejected": -1.222865343093872, + "step": 389 + }, + { + "epoch": 0.11, + "logps_train/chosen": -209.51791381835938, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -137.26083374023438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.751791477203369, + "rewards_train/margins": -0.2757081985473633, + "rewards_train/rejected": -4.476083278656006, + "step": 389 + }, + { + "epoch": 0.11, + "logps_train/chosen": -1.9927960634231567, + "logps_train/ref_chosen": -1.9921875, + "logps_train/ref_rejected": -2.765625, + "logps_train/rejected": -5.0439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.085634231567383e-05, + "rewards_train/margins": 0.22777117788791656, + "rewards_train/rejected": -0.22783203423023224, + "step": 389 + }, + { + "epoch": 0.11, + "logps_train/chosen": -123.42446899414062, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -108.57152557373047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8424469232559204, + "rewards_train/margins": -0.13529431819915771, + "rewards_train/rejected": -1.7071526050567627, + "step": 389 + }, + { + "epoch": 0.11, + "logps_train/chosen": -126.1243896484375, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -97.63204956054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7624390125274658, + "rewards_train/margins": 0.7007660865783691, + "rewards_train/rejected": -2.463205099105835, + "step": 389 + }, + { + "epoch": 0.11, + "logps_train/chosen": -32.109161376953125, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -29.309499740600586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11408386379480362, + "rewards_train/margins": 0.7200338616967201, + "rewards_train/rejected": -0.6059499979019165, + "step": 389 + }, + { + "epoch": 0.11, + "logps_train/chosen": -78.4010238647461, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -79.30654907226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29010239243507385, + "rewards_train/margins": 0.3905525505542755, + "rewards_train/rejected": -0.6806549429893494, + "step": 389 + }, + { + "epoch": 0.11, + "logps_train/chosen": -11.40390682220459, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -8.852811813354492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.047109317034482956, + "rewards_train/margins": 0.20114050433039665, + "rewards_train/rejected": -0.1540311872959137, + "step": 389 + }, + { + "epoch": 0.11, + "learning_rate": 1.9391889215899296e-06, + "loss": 0.5743, + "step": 390 + }, + { + "epoch": 0.11, + "logps_train/chosen": -75.45399475097656, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -68.34933471679688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.24539947509765625, + "rewards_train/margins": -0.4104660004377365, + "rewards_train/rejected": 0.16506652534008026, + "step": 390 + }, + { + "epoch": 0.11, + "logps_train/chosen": -98.22348022460938, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -76.8031005859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3223479986190796, + "rewards_train/margins": -1.0420379340648651, + "rewards_train/rejected": -0.2803100645542145, + "step": 390 + }, + { + "epoch": 0.11, + "logps_train/chosen": -24.301883697509766, + "logps_train/ref_chosen": -22.75, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -8.992791175842285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15518836677074432, + "rewards_train/margins": 0.11909075081348419, + "rewards_train/rejected": -0.2742791175842285, + "step": 390 + }, + { + "epoch": 0.11, + "logps_train/chosen": -109.43638610839844, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -141.64520263671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4436386823654175, + "rewards_train/margins": 0.42088162899017334, + "rewards_train/rejected": -1.8645203113555908, + "step": 390 + }, + { + "epoch": 0.11, + "logps_train/chosen": -132.97906494140625, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -125.88463592529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2479065656661987, + "rewards_train/margins": 0.19055700302124023, + "rewards_train/rejected": -1.438463568687439, + "step": 390 + }, + { + "epoch": 0.11, + "logps_train/chosen": -118.47035217285156, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -97.24043273925781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8970352411270142, + "rewards_train/margins": 0.027008056640625, + "rewards_train/rejected": -0.9240432977676392, + "step": 390 + }, + { + "epoch": 0.11, + "logps_train/chosen": -11.45127010345459, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -8.832752227783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2701270282268524, + "rewards_train/margins": 0.08189821243286133, + "rewards_train/rejected": -0.35202524065971375, + "step": 390 + }, + { + "epoch": 0.11, + "logps_train/chosen": -56.47221374511719, + "logps_train/ref_chosen": -49.75, + "logps_train/ref_rejected": -58.0, + "logps_train/rejected": -71.53358459472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6722213625907898, + "rewards_train/margins": 0.6811371445655823, + "rewards_train/rejected": -1.353358507156372, + "step": 390 + }, + { + "epoch": 0.11, + "logps_train/chosen": -13.72454833984375, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -15.229748725891113, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11620483547449112, + "rewards_train/margins": 0.06302004307508469, + "rewards_train/rejected": -0.1792248785495758, + "step": 391 + }, + { + "epoch": 0.11, + "logps_train/chosen": -26.944820404052734, + "logps_train/ref_chosen": -25.375, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -103.0933837890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15698204934597015, + "rewards_train/margins": 0.3023563474416733, + "rewards_train/rejected": -0.45933839678764343, + "step": 391 + }, + { + "epoch": 0.11, + "logps_train/chosen": -7.269636154174805, + "logps_train/ref_chosen": -3.890625, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -10.298101425170898, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33790111541748047, + "rewards_train/margins": 0.09503403306007385, + "rewards_train/rejected": -0.4329351484775543, + "step": 391 + }, + { + "epoch": 0.11, + "logps_train/chosen": -113.4242172241211, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -219.77142333984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7924216985702515, + "rewards_train/margins": 4.3847209215164185, + "rewards_train/rejected": -6.17714262008667, + "step": 391 + }, + { + "epoch": 0.11, + "logps_train/chosen": -19.28335189819336, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -14.30998706817627, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.015835190191864967, + "rewards_train/margins": -0.034836484119296074, + "rewards_train/rejected": 0.019001293927431107, + "step": 391 + }, + { + "epoch": 0.11, + "logps_train/chosen": -62.926422119140625, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -139.90914916992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11764221638441086, + "rewards_train/margins": 2.1732727959752083, + "rewards_train/rejected": -2.290915012359619, + "step": 391 + }, + { + "epoch": 0.11, + "logps_train/chosen": -21.91330909729004, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -19.1485652923584, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.058669090270996094, + "rewards_train/margins": 0.4235256314277649, + "rewards_train/rejected": -0.3648565411567688, + "step": 391 + }, + { + "epoch": 0.11, + "logps_train/chosen": -123.23887634277344, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -174.60836791992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7738876342773438, + "rewards_train/margins": 1.2869491577148438, + "rewards_train/rejected": -3.0608367919921875, + "step": 391 + }, + { + "epoch": 0.11, + "learning_rate": 1.93827715118167e-06, + "loss": 0.574, + "step": 392 + }, + { + "epoch": 0.11, + "logps_train/chosen": -10.276115417480469, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -18.469341278076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01511154230684042, + "rewards_train/margins": 0.13182259444147348, + "rewards_train/rejected": -0.1469341367483139, + "step": 392 + }, + { + "epoch": 0.11, + "logps_train/chosen": -124.19439697265625, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -176.46348571777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.269439697265625, + "rewards_train/margins": 2.376908779144287, + "rewards_train/rejected": -4.646348476409912, + "step": 392 + }, + { + "epoch": 0.11, + "logps_train/chosen": -97.39114379882812, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -95.98450469970703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03911438211798668, + "rewards_train/margins": 0.8093360997736454, + "rewards_train/rejected": -0.8484504818916321, + "step": 392 + }, + { + "epoch": 0.11, + "logps_train/chosen": -2.6870148181915283, + "logps_train/ref_chosen": -3.53125, + "logps_train/ref_rejected": -3.4375, + "logps_train/rejected": -3.1821422576904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08442351967096329, + "rewards_train/margins": 0.05888774432241917, + "rewards_train/rejected": 0.02553577534854412, + "step": 392 + }, + { + "epoch": 0.11, + "logps_train/chosen": -10.158199310302734, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -10.651873588562012, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1154300719499588, + "rewards_train/margins": 0.28061743080616, + "rewards_train/rejected": -0.16518735885620117, + "step": 392 + }, + { + "epoch": 0.11, + "logps_train/chosen": -73.24600219726562, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -118.78192138671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12539978325366974, + "rewards_train/margins": 0.8535919338464737, + "rewards_train/rejected": -0.728192150592804, + "step": 392 + }, + { + "epoch": 0.11, + "logps_train/chosen": -2.6199493408203125, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -9.005759239196777, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1692550629377365, + "rewards_train/margins": 0.4073309898376465, + "rewards_train/rejected": -0.23807592689990997, + "step": 392 + }, + { + "epoch": 0.11, + "logps_train/chosen": -127.13168334960938, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -144.2817840576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2631683349609375, + "rewards_train/margins": 1.265010118484497, + "rewards_train/rejected": -3.5281784534454346, + "step": 392 + }, + { + "epoch": 0.11, + "logps_train/chosen": -0.5146768689155579, + "logps_train/ref_chosen": -0.7890625, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -6.376584053039551, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.027438564226031303, + "rewards_train/margins": 0.19322196952998638, + "rewards_train/rejected": -0.16578340530395508, + "step": 393 + }, + { + "epoch": 0.11, + "logps_train/chosen": -1.7228963375091553, + "logps_train/ref_chosen": -2.4375, + "logps_train/ref_rejected": -30.375, + "logps_train/rejected": -33.508331298828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07146036624908447, + "rewards_train/margins": 0.3847934901714325, + "rewards_train/rejected": -0.313333123922348, + "step": 393 + }, + { + "epoch": 0.11, + "logps_train/chosen": -2.2001497745513916, + "logps_train/ref_chosen": -2.609375, + "logps_train/ref_rejected": -2.46875, + "logps_train/rejected": -3.471726894378662, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04092252254486084, + "rewards_train/margins": 0.14122021198272705, + "rewards_train/rejected": -0.10029768943786621, + "step": 393 + }, + { + "epoch": 0.11, + "logps_train/chosen": -77.7411117553711, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -97.87140655517578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2241111844778061, + "rewards_train/margins": 0.11302946507930756, + "rewards_train/rejected": -0.33714064955711365, + "step": 393 + }, + { + "epoch": 0.11, + "logps_train/chosen": -91.83372497558594, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -142.46661376953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8833725452423096, + "rewards_train/margins": 0.31328892707824707, + "rewards_train/rejected": -2.1966614723205566, + "step": 393 + }, + { + "epoch": 0.11, + "logps_train/chosen": -1.0593408346176147, + "logps_train/ref_chosen": -1.1484375, + "logps_train/ref_rejected": -1.8125, + "logps_train/rejected": -2.4888956546783447, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008909666910767555, + "rewards_train/margins": 0.07654923386871815, + "rewards_train/rejected": -0.06763956695795059, + "step": 393 + }, + { + "epoch": 0.11, + "logps_train/chosen": -36.75537109375, + "logps_train/ref_chosen": -36.75, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -46.5461540222168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0005371093866415322, + "rewards_train/margins": 0.7290782809141092, + "rewards_train/rejected": -0.7296153903007507, + "step": 393 + }, + { + "epoch": 0.11, + "logps_train/chosen": -207.53811645507812, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -144.53213500976562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.853811740875244, + "rewards_train/margins": -0.20059823989868164, + "rewards_train/rejected": -2.6532135009765625, + "step": 393 + }, + { + "epoch": 0.11, + "learning_rate": 1.937358813828151e-06, + "loss": 0.5131, + "step": 394 + }, + { + "epoch": 0.11, + "logps_train/chosen": -129.16464233398438, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -129.10398864746094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3664642572402954, + "rewards_train/margins": -0.5060653686523438, + "rewards_train/rejected": -0.8603988885879517, + "step": 394 + }, + { + "epoch": 0.11, + "logps_train/chosen": -124.14846801757812, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -84.119140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7148467898368835, + "rewards_train/margins": 0.2970673441886902, + "rewards_train/rejected": -1.0119141340255737, + "step": 394 + }, + { + "epoch": 0.11, + "logps_train/chosen": -83.89735412597656, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -121.54653930664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1897354125976562, + "rewards_train/margins": 1.9149186611175537, + "rewards_train/rejected": -3.10465407371521, + "step": 394 + }, + { + "epoch": 0.11, + "logps_train/chosen": -2.5668208599090576, + "logps_train/ref_chosen": -1.96875, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -13.152809143066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05980708822607994, + "rewards_train/margins": 0.1679738350212574, + "rewards_train/rejected": -0.22778092324733734, + "step": 394 + }, + { + "epoch": 0.11, + "logps_train/chosen": -3.821276903152466, + "logps_train/ref_chosen": -0.287109375, + "logps_train/ref_rejected": -2.90625, + "logps_train/rejected": -5.316590309143066, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.35341677069664, + "rewards_train/margins": -0.11238273978233337, + "rewards_train/rejected": -0.24103403091430664, + "step": 394 + }, + { + "epoch": 0.11, + "logps_train/chosen": -33.37274169921875, + "logps_train/ref_chosen": -34.75, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -13.886442184448242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.137725830078125, + "rewards_train/margins": 0.7076200842857361, + "rewards_train/rejected": -0.5698942542076111, + "step": 394 + }, + { + "epoch": 0.11, + "logps_train/chosen": -18.15971565246582, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -5.558023452758789, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.30972155928611755, + "rewards_train/margins": -0.17579421401023865, + "rewards_train/rejected": -0.1339273452758789, + "step": 394 + }, + { + "epoch": 0.11, + "logps_train/chosen": -13.126168251037598, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -3.609375, + "logps_train/rejected": -4.438421726226807, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08113317936658859, + "rewards_train/margins": 0.16403785347938538, + "rewards_train/rejected": -0.08290467411279678, + "step": 394 + }, + { + "epoch": 0.11, + "logps_train/chosen": -36.750518798828125, + "logps_train/ref_chosen": -35.0, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -19.092226028442383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.17505188286304474, + "rewards_train/margins": -0.015829280018806458, + "rewards_train/rejected": -0.15922260284423828, + "step": 395 + }, + { + "epoch": 0.11, + "logps_train/chosen": -7.646844863891602, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -3.234375, + "logps_train/rejected": -4.18376350402832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16656552255153656, + "rewards_train/margins": 0.2615043744444847, + "rewards_train/rejected": -0.09493885189294815, + "step": 395 + }, + { + "epoch": 0.11, + "logps_train/chosen": -90.05030822753906, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -154.89395141601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6449691653251648, + "rewards_train/margins": 3.8843643069267273, + "rewards_train/rejected": -3.2393951416015625, + "step": 395 + }, + { + "epoch": 0.11, + "logps_train/chosen": -169.1132354736328, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -196.0, + "logps_train/rejected": -247.92572021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5113235712051392, + "rewards_train/margins": 3.6812485456466675, + "rewards_train/rejected": -5.192572116851807, + "step": 395 + }, + { + "epoch": 0.11, + "logps_train/chosen": -136.22665405273438, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -87.5770492553711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0226653814315796, + "rewards_train/margins": -0.7149604558944702, + "rewards_train/rejected": -0.3077049255371094, + "step": 395 + }, + { + "epoch": 0.11, + "logps_train/chosen": -145.57464599609375, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -198.0, + "logps_train/rejected": -256.097900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.807464599609375, + "rewards_train/margins": 3.0023255348205566, + "rewards_train/rejected": -5.809790134429932, + "step": 395 + }, + { + "epoch": 0.11, + "logps_train/chosen": -113.5581283569336, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -15.362197875976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.29418715834617615, + "rewards_train/margins": 0.7929069399833679, + "rewards_train/rejected": -0.4987197816371918, + "step": 395 + }, + { + "epoch": 0.11, + "logps_train/chosen": -107.14990997314453, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -186.6346435546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8149909973144531, + "rewards_train/margins": 3.6484732627868652, + "rewards_train/rejected": -4.463464260101318, + "step": 395 + }, + { + "epoch": 0.11, + "learning_rate": 1.936433915956762e-06, + "loss": 0.4823, + "step": 396 + }, + { + "epoch": 0.11, + "logps_train/chosen": -164.08108520507812, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -236.63087463378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.95810866355896, + "rewards_train/margins": 2.004978895187378, + "rewards_train/rejected": -5.963087558746338, + "step": 396 + }, + { + "epoch": 0.11, + "logps_train/chosen": -116.22972869873047, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -164.57833862304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7229728698730469, + "rewards_train/margins": 1.1348609924316406, + "rewards_train/rejected": -2.8578338623046875, + "step": 396 + }, + { + "epoch": 0.11, + "logps_train/chosen": -113.0574951171875, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -137.98204040527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19425049424171448, + "rewards_train/margins": 4.092454582452774, + "rewards_train/rejected": -3.8982040882110596, + "step": 396 + }, + { + "epoch": 0.11, + "logps_train/chosen": -11.060636520385742, + "logps_train/ref_chosen": -12.6875, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -15.548009872436523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16268634796142578, + "rewards_train/margins": 0.4612373411655426, + "rewards_train/rejected": -0.2985509932041168, + "step": 396 + }, + { + "epoch": 0.11, + "logps_train/chosen": -0.41731899976730347, + "logps_train/ref_chosen": -0.6171875, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -12.214606285095215, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.019986851140856743, + "rewards_train/margins": 0.05394748039543629, + "rewards_train/rejected": -0.033960629254579544, + "step": 396 + }, + { + "epoch": 0.11, + "logps_train/chosen": -7.264285087585449, + "logps_train/ref_chosen": -6.5625, + "logps_train/ref_rejected": -43.0, + "logps_train/rejected": -50.963623046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07017850875854492, + "rewards_train/margins": 0.7261838316917419, + "rewards_train/rejected": -0.7963623404502869, + "step": 396 + }, + { + "epoch": 0.11, + "logps_train/chosen": -136.51222229003906, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -151.23727416992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6512222290039062, + "rewards_train/margins": 2.1225051879882812, + "rewards_train/rejected": -2.7737274169921875, + "step": 396 + }, + { + "epoch": 0.11, + "logps_train/chosen": -8.738231658935547, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -13.163272857666016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06992683559656143, + "rewards_train/margins": 0.061254121363162994, + "rewards_train/rejected": 0.008672714233398438, + "step": 396 + }, + { + "epoch": 0.11, + "logps_train/chosen": -82.0312728881836, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -98.5044937133789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.053127288818359375, + "rewards_train/margins": 0.3473220765590668, + "rewards_train/rejected": -0.40044936537742615, + "step": 397 + }, + { + "epoch": 0.11, + "logps_train/chosen": -1.593596339225769, + "logps_train/ref_chosen": -1.4921875, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -6.984640121459961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01014088373631239, + "rewards_train/margins": 0.13207313138991594, + "rewards_train/rejected": -0.14221401512622833, + "step": 397 + }, + { + "epoch": 0.11, + "logps_train/chosen": -16.151123046875, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -88.0436019897461, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.29011231660842896, + "rewards_train/margins": -0.08575211465358734, + "rewards_train/rejected": -0.2043602019548416, + "step": 397 + }, + { + "epoch": 0.11, + "logps_train/chosen": -11.3013277053833, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -1.671875, + "logps_train/rejected": -3.4614181518554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.030132770538330078, + "rewards_train/margins": 0.14882154762744904, + "rewards_train/rejected": -0.1789543181657791, + "step": 397 + }, + { + "epoch": 0.11, + "logps_train/chosen": -123.52757263183594, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -149.56866455078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4527572393417358, + "rewards_train/margins": 1.9541093111038208, + "rewards_train/rejected": -3.4068665504455566, + "step": 397 + }, + { + "epoch": 0.11, + "logps_train/chosen": -20.828187942504883, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -13.33909797668457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.44218119978904724, + "rewards_train/margins": 0.7510910034179688, + "rewards_train/rejected": -0.3089098036289215, + "step": 397 + }, + { + "epoch": 0.11, + "logps_train/chosen": -91.90757751464844, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -130.2174072265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.140757754445076, + "rewards_train/margins": 1.7809830158948898, + "rewards_train/rejected": -1.9217407703399658, + "step": 397 + }, + { + "epoch": 0.11, + "logps_train/chosen": -2.8163726329803467, + "logps_train/ref_chosen": -3.1875, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -9.772907257080078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03711273893713951, + "rewards_train/margins": 0.17690346762537956, + "rewards_train/rejected": -0.13979072868824005, + "step": 397 + }, + { + "epoch": 0.11, + "learning_rate": 1.9355024640408066e-06, + "loss": 0.4105, + "step": 398 + }, + { + "epoch": 0.11, + "logps_train/chosen": -3.2821483612060547, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -3.171875, + "logps_train/rejected": -5.383737564086914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20616017282009125, + "rewards_train/margins": 0.42734643816947937, + "rewards_train/rejected": -0.22118626534938812, + "step": 398 + }, + { + "epoch": 0.11, + "logps_train/chosen": -11.117749214172363, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -9.82183837890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.08677492290735245, + "rewards_train/margins": -0.08584108500508592, + "rewards_train/rejected": -0.0009338379022665322, + "step": 398 + }, + { + "epoch": 0.11, + "logps_train/chosen": -25.965892791748047, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -2.453125, + "logps_train/rejected": -8.393205642700195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3090892732143402, + "rewards_train/margins": 0.28491881489753723, + "rewards_train/rejected": -0.5940080881118774, + "step": 398 + }, + { + "epoch": 0.11, + "logps_train/chosen": -13.061962127685547, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -25.067548751831055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3999462127685547, + "rewards_train/margins": 0.7255586385726929, + "rewards_train/rejected": -1.1255048513412476, + "step": 398 + }, + { + "epoch": 0.11, + "logps_train/chosen": -20.286020278930664, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -16.11358070373535, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1286020278930664, + "rewards_train/margins": 0.17650604248046875, + "rewards_train/rejected": -0.30510807037353516, + "step": 398 + }, + { + "epoch": 0.11, + "logps_train/chosen": -49.15834426879883, + "logps_train/ref_chosen": -48.25, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -72.77597045898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09083443135023117, + "rewards_train/margins": 0.3867626264691353, + "rewards_train/rejected": -0.47759705781936646, + "step": 398 + }, + { + "epoch": 0.11, + "logps_train/chosen": -22.579273223876953, + "logps_train/ref_chosen": -25.0, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -12.140181541442871, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2420726865530014, + "rewards_train/margins": 0.5310908406972885, + "rewards_train/rejected": -0.2890181541442871, + "step": 398 + }, + { + "epoch": 0.11, + "logps_train/chosen": -72.4971923828125, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -118.39046478271484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7497192621231079, + "rewards_train/margins": -0.060672760009765625, + "rewards_train/rejected": -0.6890465021133423, + "step": 398 + }, + { + "epoch": 0.11, + "logps_train/chosen": -4.08210563659668, + "logps_train/ref_chosen": -3.71875, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -10.13712215423584, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03633556514978409, + "rewards_train/margins": -0.0038733482360839844, + "rewards_train/rejected": -0.032462216913700104, + "step": 399 + }, + { + "epoch": 0.11, + "logps_train/chosen": -81.5132827758789, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -77.29331970214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15132828056812286, + "rewards_train/margins": 0.5280036777257919, + "rewards_train/rejected": -0.6793319582939148, + "step": 399 + }, + { + "epoch": 0.11, + "logps_train/chosen": -84.432861328125, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -109.41475677490234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05671386793255806, + "rewards_train/margins": 0.048189545050263405, + "rewards_train/rejected": 0.008524322882294655, + "step": 399 + }, + { + "epoch": 0.11, + "logps_train/chosen": -192.81056213378906, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -199.07318115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.381056308746338, + "rewards_train/margins": 0.02626180648803711, + "rewards_train/rejected": -4.407318115234375, + "step": 399 + }, + { + "epoch": 0.11, + "logps_train/chosen": -22.312345504760742, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -26.138015747070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4187345504760742, + "rewards_train/margins": 0.020067036151885986, + "rewards_train/rejected": -0.4388015866279602, + "step": 399 + }, + { + "epoch": 0.11, + "logps_train/chosen": -17.283815383911133, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -32.54281997680664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14661847054958344, + "rewards_train/margins": 0.6509004682302475, + "rewards_train/rejected": -0.5042819976806641, + "step": 399 + }, + { + "epoch": 0.11, + "logps_train/chosen": -196.4708251953125, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -96.06828308105469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0470826625823975, + "rewards_train/margins": -0.8402543067932129, + "rewards_train/rejected": -1.2068283557891846, + "step": 399 + }, + { + "epoch": 0.11, + "logps_train/chosen": -93.74690246582031, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -28.134174346923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02530975453555584, + "rewards_train/margins": 0.5637272130697966, + "rewards_train/rejected": -0.5384174585342407, + "step": 399 + }, + { + "epoch": 0.11, + "learning_rate": 1.934564464599461e-06, + "loss": 0.6112, + "step": 400 + }, + { + "epoch": 0.11, + "logps_train/chosen": -100.88782501220703, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -43.75, + "logps_train/rejected": -44.582523345947266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.038782503455877304, + "rewards_train/margins": 0.04446982964873314, + "rewards_train/rejected": -0.08325233310461044, + "step": 400 + }, + { + "epoch": 0.11, + "logps_train/chosen": -101.78778839111328, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -151.5151824951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.578778862953186, + "rewards_train/margins": 0.37273937463760376, + "rewards_train/rejected": -0.9515182375907898, + "step": 400 + }, + { + "epoch": 0.11, + "logps_train/chosen": -13.330379486083984, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -2.0, + "logps_train/rejected": -6.233535289764404, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07053794711828232, + "rewards_train/margins": 0.35281557589769363, + "rewards_train/rejected": -0.42335352301597595, + "step": 400 + }, + { + "epoch": 0.11, + "logps_train/chosen": -131.16201782226562, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -43.75, + "logps_train/rejected": -49.392024993896484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0662018060684204, + "rewards_train/margins": -0.5019993185997009, + "rewards_train/rejected": -0.5642024874687195, + "step": 400 + }, + { + "epoch": 0.11, + "logps_train/chosen": -7.510471820831299, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -6.90625, + "logps_train/rejected": -11.394683837890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30417218804359436, + "rewards_train/margins": 0.14467120170593262, + "rewards_train/rejected": -0.448843389749527, + "step": 400 + }, + { + "epoch": 0.11, + "logps_train/chosen": -66.42154693603516, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -148.5235595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36715468764305115, + "rewards_train/margins": 1.385201245546341, + "rewards_train/rejected": -1.752355933189392, + "step": 400 + }, + { + "epoch": 0.11, + "logps_train/chosen": -104.17554473876953, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -152.83853149414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.267554521560669, + "rewards_train/margins": 2.2162985801696777, + "rewards_train/rejected": -3.4838531017303467, + "step": 400 + }, + { + "epoch": 0.11, + "logps_train/chosen": -81.33961486816406, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -107.33289337158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.13396155834198, + "rewards_train/margins": 0.34932780265808105, + "rewards_train/rejected": -1.483289361000061, + "step": 400 + }, + { + "epoch": 0.11, + "logps_train/chosen": -70.44184875488281, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -60.25, + "logps_train/rejected": -57.60127639770508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3058151304721832, + "rewards_train/margins": 0.040942758321762085, + "rewards_train/rejected": 0.26487237215042114, + "step": 401 + }, + { + "epoch": 0.11, + "logps_train/chosen": -27.210205078125, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -28.125, + "logps_train/rejected": -32.419105529785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2960205078125, + "rewards_train/margins": 0.13339003920555115, + "rewards_train/rejected": -0.42941054701805115, + "step": 401 + }, + { + "epoch": 0.11, + "logps_train/chosen": -159.85340881347656, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -173.6346435546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.385340929031372, + "rewards_train/margins": -0.2218766212463379, + "rewards_train/rejected": -2.163464307785034, + "step": 401 + }, + { + "epoch": 0.11, + "logps_train/chosen": -3.7255609035491943, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -4.03125, + "logps_train/rejected": -5.835256099700928, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.058693911880254745, + "rewards_train/margins": 0.23909452185034752, + "rewards_train/rejected": -0.18040060997009277, + "step": 401 + }, + { + "epoch": 0.11, + "logps_train/chosen": -163.88162231445312, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -160.89065551757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011837768368422985, + "rewards_train/margins": 0.40090333204716444, + "rewards_train/rejected": -0.38906556367874146, + "step": 401 + }, + { + "epoch": 0.11, + "logps_train/chosen": -57.25313949584961, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -64.11552429199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04968605190515518, + "rewards_train/margins": 0.011238481849431992, + "rewards_train/rejected": 0.03844757005572319, + "step": 401 + }, + { + "epoch": 0.11, + "logps_train/chosen": -24.36466407775879, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -67.1864013671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.036466408520936966, + "rewards_train/margins": 0.932173740118742, + "rewards_train/rejected": -0.968640148639679, + "step": 401 + }, + { + "epoch": 0.11, + "logps_train/chosen": -24.734901428222656, + "logps_train/ref_chosen": -23.5, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -99.82789611816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12349014729261398, + "rewards_train/margins": 3.459299512207508, + "rewards_train/rejected": -3.582789659500122, + "step": 401 + }, + { + "epoch": 0.11, + "learning_rate": 1.933619924197726e-06, + "loss": 0.5264, + "step": 402 + }, + { + "epoch": 0.11, + "logps_train/chosen": -134.23129272460938, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -177.2330322265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.376870721578598, + "rewards_train/margins": 0.1001739501953125, + "rewards_train/rejected": 0.2766967713832855, + "step": 402 + }, + { + "epoch": 0.11, + "logps_train/chosen": -89.43583679199219, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -140.67361450195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8435837030410767, + "rewards_train/margins": 4.173777937889099, + "rewards_train/rejected": -5.017361640930176, + "step": 402 + }, + { + "epoch": 0.11, + "logps_train/chosen": -2.6373040676116943, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -11.476634979248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.022207094356417656, + "rewards_train/margins": 0.20112059824168682, + "rewards_train/rejected": -0.17891350388526917, + "step": 402 + }, + { + "epoch": 0.11, + "logps_train/chosen": -9.921473503112793, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -14.764505386352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0016026496887207031, + "rewards_train/margins": 0.29680320620536804, + "rewards_train/rejected": -0.29520055651664734, + "step": 402 + }, + { + "epoch": 0.11, + "logps_train/chosen": -6.572933197021484, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -15.615619659423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0666683241724968, + "rewards_train/margins": 0.501143641769886, + "rewards_train/rejected": -0.5678119659423828, + "step": 402 + }, + { + "epoch": 0.11, + "logps_train/chosen": -117.77255249023438, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -183.59616088867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3272552490234375, + "rewards_train/margins": 4.932361125946045, + "rewards_train/rejected": -7.259616374969482, + "step": 402 + }, + { + "epoch": 0.11, + "logps_train/chosen": -16.678476333618164, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -26.25, + "logps_train/rejected": -29.500085830688477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22409763932228088, + "rewards_train/margins": 0.1009109616279602, + "rewards_train/rejected": -0.3250086009502411, + "step": 402 + }, + { + "epoch": 0.11, + "logps_train/chosen": -80.53160095214844, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -80.63027954101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19683991372585297, + "rewards_train/margins": 0.009867861866950989, + "rewards_train/rejected": 0.18697205185890198, + "step": 402 + }, + { + "epoch": 0.11, + "logps_train/chosen": -89.45664978027344, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -58.605751037597656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0043350220657885075, + "rewards_train/margins": -0.31008988013491035, + "rewards_train/rejected": 0.31442490220069885, + "step": 403 + }, + { + "epoch": 0.11, + "logps_train/chosen": -3.1262011528015137, + "logps_train/ref_chosen": -2.140625, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -3.885314464569092, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09855761379003525, + "rewards_train/margins": -0.13190116733312607, + "rewards_train/rejected": 0.03334355354309082, + "step": 403 + }, + { + "epoch": 0.11, + "logps_train/chosen": -141.25491333007812, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -153.7835235595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2254914045333862, + "rewards_train/margins": 1.452860951423645, + "rewards_train/rejected": -2.6783523559570312, + "step": 403 + }, + { + "epoch": 0.11, + "logps_train/chosen": -131.1034393310547, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -192.38304138183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1103439331054688, + "rewards_train/margins": 1.2279603481292725, + "rewards_train/rejected": -2.338304281234741, + "step": 403 + }, + { + "epoch": 0.11, + "logps_train/chosen": -6.395792007446289, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -26.875, + "logps_train/rejected": -33.80860137939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10832919925451279, + "rewards_train/margins": 0.5850309506058693, + "rewards_train/rejected": -0.6933601498603821, + "step": 403 + }, + { + "epoch": 0.11, + "logps_train/chosen": -23.448780059814453, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -17.09265899658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.017621994018554688, + "rewards_train/margins": 0.3018878996372223, + "rewards_train/rejected": -0.2842659056186676, + "step": 403 + }, + { + "epoch": 0.11, + "logps_train/chosen": -40.014041900634766, + "logps_train/ref_chosen": -31.0, + "logps_train/ref_rejected": -43.0, + "logps_train/rejected": -67.57402038574219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9014042019844055, + "rewards_train/margins": 1.5559977889060974, + "rewards_train/rejected": -2.457401990890503, + "step": 403 + }, + { + "epoch": 0.11, + "logps_train/chosen": -98.83174133300781, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -153.5302734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11682587116956711, + "rewards_train/margins": 3.1198533102869987, + "rewards_train/rejected": -3.0030274391174316, + "step": 403 + }, + { + "epoch": 0.11, + "learning_rate": 1.9326688494463844e-06, + "loss": 0.4339, + "step": 404 + }, + { + "epoch": 0.11, + "logps_train/chosen": -15.649853706359863, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -19.726261138916016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09623537212610245, + "rewards_train/margins": 0.026390746235847473, + "rewards_train/rejected": -0.12262611836194992, + "step": 404 + }, + { + "epoch": 0.11, + "logps_train/chosen": -180.20501708984375, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -175.91259765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.520501613616943, + "rewards_train/margins": 0.620758056640625, + "rewards_train/rejected": -5.141259670257568, + "step": 404 + }, + { + "epoch": 0.11, + "logps_train/chosen": -142.32015991210938, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -141.42477416992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.182016134262085, + "rewards_train/margins": 0.91046142578125, + "rewards_train/rejected": -3.092477560043335, + "step": 404 + }, + { + "epoch": 0.11, + "logps_train/chosen": -113.55032348632812, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -121.65190887451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4550323486328125, + "rewards_train/margins": 0.060158610343933105, + "rewards_train/rejected": -1.5151909589767456, + "step": 404 + }, + { + "epoch": 0.11, + "logps_train/chosen": -104.00267028808594, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -99.32896423339844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5002670288085938, + "rewards_train/margins": -0.2673705965280533, + "rewards_train/rejected": -0.23289643228054047, + "step": 404 + }, + { + "epoch": 0.11, + "logps_train/chosen": -201.47366333007812, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -199.93182373046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.447366237640381, + "rewards_train/margins": -0.6541838645935059, + "rewards_train/rejected": -3.793182373046875, + "step": 404 + }, + { + "epoch": 0.11, + "logps_train/chosen": -68.60775756835938, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -167.0342254638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11077576130628586, + "rewards_train/margins": 5.7426468804478645, + "rewards_train/rejected": -5.85342264175415, + "step": 404 + }, + { + "epoch": 0.11, + "logps_train/chosen": -126.71939086914062, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -110.70854187011719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6219391226768494, + "rewards_train/margins": -0.051084935665130615, + "rewards_train/rejected": -0.5708541870117188, + "step": 404 + }, + { + "epoch": 0.11, + "logps_train/chosen": -13.010284423828125, + "logps_train/ref_chosen": -0.7890625, + "logps_train/ref_rejected": -3.078125, + "logps_train/rejected": -13.876993179321289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2221221923828125, + "rewards_train/margins": -0.1422353982925415, + "rewards_train/rejected": -1.079886794090271, + "step": 405 + }, + { + "epoch": 0.11, + "logps_train/chosen": -101.93634796142578, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -147.23513793945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.093634843826294, + "rewards_train/margins": 1.029879093170166, + "rewards_train/rejected": -2.12351393699646, + "step": 405 + }, + { + "epoch": 0.11, + "logps_train/chosen": -67.78160095214844, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -90.08045196533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22183990478515625, + "rewards_train/margins": 0.0798850953578949, + "rewards_train/rejected": 0.14195480942726135, + "step": 405 + }, + { + "epoch": 0.11, + "logps_train/chosen": -116.55224609375, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -126.55673217773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7552246451377869, + "rewards_train/margins": 1.1504486203193665, + "rewards_train/rejected": -1.9056732654571533, + "step": 405 + }, + { + "epoch": 0.11, + "logps_train/chosen": -115.16551208496094, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -212.64599609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3165512084960938, + "rewards_train/margins": 2.848048686981201, + "rewards_train/rejected": -4.164599895477295, + "step": 405 + }, + { + "epoch": 0.11, + "logps_train/chosen": -2.6143577098846436, + "logps_train/ref_chosen": -1.78125, + "logps_train/ref_rejected": -0.8203125, + "logps_train/rejected": -0.6792260408401489, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.08331077545881271, + "rewards_train/margins": -0.0974194211885333, + "rewards_train/rejected": 0.014108645729720592, + "step": 405 + }, + { + "epoch": 0.11, + "logps_train/chosen": -8.752161026000977, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -3.703125, + "logps_train/rejected": -6.2887678146362305, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2783411145210266, + "rewards_train/margins": -0.01977682113647461, + "rewards_train/rejected": -0.258564293384552, + "step": 405 + }, + { + "epoch": 0.11, + "logps_train/chosen": -20.703840255737305, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -40.25, + "logps_train/rejected": -41.106361389160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12038403004407883, + "rewards_train/margins": -0.0347478911280632, + "rewards_train/rejected": -0.08563613891601562, + "step": 405 + }, + { + "epoch": 0.11, + "learning_rate": 1.93171124700195e-06, + "loss": 0.5598, + "step": 406 + }, + { + "epoch": 0.11, + "logps_train/chosen": -106.60706329345703, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -163.5253448486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8607063293457031, + "rewards_train/margins": 2.5918281078338623, + "rewards_train/rejected": -3.4525344371795654, + "step": 406 + }, + { + "epoch": 0.11, + "logps_train/chosen": -31.03968048095703, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -8.501898765563965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09603195637464523, + "rewards_train/margins": 0.5087218508124352, + "rewards_train/rejected": -0.4126898944377899, + "step": 406 + }, + { + "epoch": 0.11, + "logps_train/chosen": -6.798940658569336, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -7.25, + "logps_train/rejected": -8.154526710510254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1388559341430664, + "rewards_train/margins": 0.2293086051940918, + "rewards_train/rejected": -0.09045267105102539, + "step": 406 + }, + { + "epoch": 0.11, + "logps_train/chosen": -2.5120980739593506, + "logps_train/ref_chosen": -2.59375, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -12.812759399414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008165192790329456, + "rewards_train/margins": 0.09569113422185183, + "rewards_train/rejected": -0.08752594143152237, + "step": 406 + }, + { + "epoch": 0.11, + "logps_train/chosen": -101.80113983154297, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -109.066650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06988602131605148, + "rewards_train/margins": 0.2765510603785515, + "rewards_train/rejected": -0.2066650390625, + "step": 406 + }, + { + "epoch": 0.11, + "logps_train/chosen": -8.108797073364258, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -7.544778347015381, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12337970733642578, + "rewards_train/margins": 0.024848133325576782, + "rewards_train/rejected": -0.14822784066200256, + "step": 406 + }, + { + "epoch": 0.11, + "logps_train/chosen": -3.579486846923828, + "logps_train/ref_chosen": -3.609375, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -15.141475677490234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0029888153076171875, + "rewards_train/margins": 0.32338640093803406, + "rewards_train/rejected": -0.32039758563041687, + "step": 406 + }, + { + "epoch": 0.11, + "logps_train/chosen": -75.22962188720703, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -80.9137954711914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8729621767997742, + "rewards_train/margins": -0.8315826281905174, + "rewards_train/rejected": -0.041379548609256744, + "step": 406 + }, + { + "epoch": 0.11, + "logps_train/chosen": -107.76536560058594, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -181.32257080078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7265366315841675, + "rewards_train/margins": 3.105720639228821, + "rewards_train/rejected": -4.832257270812988, + "step": 407 + }, + { + "epoch": 0.11, + "logps_train/chosen": -0.9055558443069458, + "logps_train/ref_chosen": -0.69140625, + "logps_train/ref_rejected": -2.15625, + "logps_train/rejected": -2.0453975200653076, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02141495980322361, + "rewards_train/margins": -0.03250020835548639, + "rewards_train/rejected": 0.011085248552262783, + "step": 407 + }, + { + "epoch": 0.11, + "logps_train/chosen": -7.285342693328857, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -29.125, + "logps_train/rejected": -36.549285888671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09021573513746262, + "rewards_train/margins": 0.8326443359255791, + "rewards_train/rejected": -0.7424286007881165, + "step": 407 + }, + { + "epoch": 0.11, + "logps_train/chosen": -2.3396124839782715, + "logps_train/ref_chosen": -1.1484375, + "logps_train/ref_rejected": -1.7734375, + "logps_train/rejected": -3.9718053340911865, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11911749839782715, + "rewards_train/margins": 0.10071928799152374, + "rewards_train/rejected": -0.2198367863893509, + "step": 407 + }, + { + "epoch": 0.11, + "logps_train/chosen": -12.994001388549805, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -24.22572898864746, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04315013810992241, + "rewards_train/margins": 1.2356727607548237, + "rewards_train/rejected": -1.278822898864746, + "step": 407 + }, + { + "epoch": 0.11, + "logps_train/chosen": -153.29965209960938, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -131.16036987304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4799652099609375, + "rewards_train/margins": 0.6360716819763184, + "rewards_train/rejected": -4.116036891937256, + "step": 407 + }, + { + "epoch": 0.11, + "logps_train/chosen": -4.486312389373779, + "logps_train/ref_chosen": -2.5, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -19.190746307373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19863124191761017, + "rewards_train/margins": 0.007943391799926758, + "rewards_train/rejected": -0.20657463371753693, + "step": 407 + }, + { + "epoch": 0.11, + "logps_train/chosen": -46.94108581542969, + "logps_train/ref_chosen": -40.75, + "logps_train/ref_rejected": -41.0, + "logps_train/rejected": -53.02119445800781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6191086173057556, + "rewards_train/margins": 0.5830108523368835, + "rewards_train/rejected": -1.2021194696426392, + "step": 407 + }, + { + "epoch": 0.11, + "learning_rate": 1.930747123566625e-06, + "loss": 0.5207, + "step": 408 + }, + { + "epoch": 0.11, + "logps_train/chosen": -81.7017822265625, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -99.6135482788086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5298218131065369, + "rewards_train/margins": 0.39117664098739624, + "rewards_train/rejected": 0.13864517211914062, + "step": 408 + }, + { + "epoch": 0.11, + "logps_train/chosen": -8.030418395996094, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -9.964892387390137, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1811668425798416, + "rewards_train/margins": 0.20282240211963654, + "rewards_train/rejected": -0.38398924469947815, + "step": 408 + }, + { + "epoch": 0.11, + "logps_train/chosen": -135.24453735351562, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -176.48428344726562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4244537353515625, + "rewards_train/margins": -0.3760254383087158, + "rewards_train/rejected": -3.0484282970428467, + "step": 408 + }, + { + "epoch": 0.11, + "logps_train/chosen": -33.57387924194336, + "logps_train/ref_chosen": -30.0, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -56.202152252197266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3573879301548004, + "rewards_train/margins": 0.8628272712230682, + "rewards_train/rejected": -1.2202152013778687, + "step": 408 + }, + { + "epoch": 0.11, + "logps_train/chosen": -120.23014831542969, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -162.89874267578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.023015022277832, + "rewards_train/margins": 0.9168591499328613, + "rewards_train/rejected": -4.939874172210693, + "step": 408 + }, + { + "epoch": 0.11, + "logps_train/chosen": -65.77066040039062, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -67.13204193115234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17706604301929474, + "rewards_train/margins": 0.0361381471157074, + "rewards_train/rejected": -0.21320419013500214, + "step": 408 + }, + { + "epoch": 0.11, + "logps_train/chosen": -105.84549713134766, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -178.89117431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2845497131347656, + "rewards_train/margins": 4.504567623138428, + "rewards_train/rejected": -4.789117336273193, + "step": 408 + }, + { + "epoch": 0.11, + "logps_train/chosen": -39.966739654541016, + "logps_train/ref_chosen": -29.875, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -17.686914443969727, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0091739892959595, + "rewards_train/margins": -0.27798253297805786, + "rewards_train/rejected": -0.7311914563179016, + "step": 408 + }, + { + "epoch": 0.11, + "logps_train/chosen": -0.5449873208999634, + "logps_train/ref_chosen": -0.37109375, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -3.940694570541382, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.017389357089996338, + "rewards_train/margins": 0.1126175969839096, + "rewards_train/rejected": -0.13000695407390594, + "step": 409 + }, + { + "epoch": 0.11, + "logps_train/chosen": -192.80899047851562, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -190.7389678955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3808990716934204, + "rewards_train/margins": 0.6929978132247925, + "rewards_train/rejected": -2.073896884918213, + "step": 409 + }, + { + "epoch": 0.11, + "logps_train/chosen": -12.690430641174316, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -11.643107414245605, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3440430760383606, + "rewards_train/margins": -0.04848232865333557, + "rewards_train/rejected": -0.295560747385025, + "step": 409 + }, + { + "epoch": 0.11, + "logps_train/chosen": -7.604707717895508, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -11.228130340576172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0707792267203331, + "rewards_train/margins": -0.04390773922204971, + "rewards_train/rejected": 0.11468696594238281, + "step": 409 + }, + { + "epoch": 0.11, + "logps_train/chosen": -0.7219802141189575, + "logps_train/ref_chosen": -0.890625, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -17.061813354492188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.016864478588104248, + "rewards_train/margins": -0.17695419490337372, + "rewards_train/rejected": 0.19381867349147797, + "step": 409 + }, + { + "epoch": 0.11, + "logps_train/chosen": -23.839820861816406, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -23.380029678344727, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0714820846915245, + "rewards_train/margins": -0.07097911683376878, + "rewards_train/rejected": -0.0005029678577557206, + "step": 409 + }, + { + "epoch": 0.11, + "logps_train/chosen": -5.312899589538574, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -12.175235748291016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1875399649143219, + "rewards_train/margins": 0.14873361587524414, + "rewards_train/rejected": -0.33627358078956604, + "step": 409 + }, + { + "epoch": 0.11, + "logps_train/chosen": -149.469482421875, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -24.290430068969727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.546948254108429, + "rewards_train/margins": 0.1195947527885437, + "rewards_train/rejected": -0.6665430068969727, + "step": 409 + }, + { + "epoch": 0.11, + "learning_rate": 1.929776485888251e-06, + "loss": 0.592, + "step": 410 + }, + { + "epoch": 0.11, + "logps_train/chosen": -134.18846130371094, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -147.34764099121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.768846273422241, + "rewards_train/margins": 2.6659181118011475, + "rewards_train/rejected": -5.434764385223389, + "step": 410 + }, + { + "epoch": 0.11, + "logps_train/chosen": -102.56260681152344, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -108.71957397460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7062606811523438, + "rewards_train/margins": 1.0656967163085938, + "rewards_train/rejected": -1.7719573974609375, + "step": 410 + }, + { + "epoch": 0.11, + "logps_train/chosen": -24.909507751464844, + "logps_train/ref_chosen": -23.875, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -22.80812644958496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10345077514648438, + "rewards_train/margins": 0.052361875772476196, + "rewards_train/rejected": -0.15581265091896057, + "step": 410 + }, + { + "epoch": 0.11, + "logps_train/chosen": -156.36061096191406, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -116.53962707519531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7360610961914062, + "rewards_train/margins": -1.5820982456207275, + "rewards_train/rejected": -2.1539628505706787, + "step": 410 + }, + { + "epoch": 0.11, + "logps_train/chosen": -96.07965087890625, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -96.24005126953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05796508863568306, + "rewards_train/margins": 0.01604003831744194, + "rewards_train/rejected": -0.074005126953125, + "step": 410 + }, + { + "epoch": 0.11, + "logps_train/chosen": -99.34788513183594, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -133.6400146484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1347885131835938, + "rewards_train/margins": 1.2792129516601562, + "rewards_train/rejected": -4.41400146484375, + "step": 410 + }, + { + "epoch": 0.11, + "logps_train/chosen": -118.9527816772461, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -130.64178466796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8952781558036804, + "rewards_train/margins": 1.9689002633094788, + "rewards_train/rejected": -2.864178419113159, + "step": 410 + }, + { + "epoch": 0.11, + "logps_train/chosen": -6.940527439117432, + "logps_train/ref_chosen": -7.375, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -27.641868591308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.043447256088256836, + "rewards_train/margins": 0.5576341152191162, + "rewards_train/rejected": -0.5141868591308594, + "step": 410 + }, + { + "epoch": 0.11, + "logps_train/chosen": -9.813621520996094, + "logps_train/ref_chosen": -2.46875, + "logps_train/ref_rejected": -1.5078125, + "logps_train/rejected": -8.860892295837402, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7344871759414673, + "rewards_train/margins": 0.0008208155632019043, + "rewards_train/rejected": -0.7353079915046692, + "step": 411 + }, + { + "epoch": 0.11, + "logps_train/chosen": -6.145760536193848, + "logps_train/ref_chosen": -7.125, + "logps_train/ref_rejected": -2.4375, + "logps_train/rejected": -3.5013375282287598, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09792394936084747, + "rewards_train/margins": 0.2043077051639557, + "rewards_train/rejected": -0.10638375580310822, + "step": 411 + }, + { + "epoch": 0.11, + "logps_train/chosen": -89.37995147705078, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -125.61453247070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0379951000213623, + "rewards_train/margins": 1.823458194732666, + "rewards_train/rejected": -3.8614532947540283, + "step": 411 + }, + { + "epoch": 0.11, + "logps_train/chosen": -192.67550659179688, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -158.57627868652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.967550754547119, + "rewards_train/margins": 0.1900773048400879, + "rewards_train/rejected": -4.157628059387207, + "step": 411 + }, + { + "epoch": 0.11, + "logps_train/chosen": -155.15283203125, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -184.9876708984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.365283250808716, + "rewards_train/margins": 1.3334839344024658, + "rewards_train/rejected": -4.698767185211182, + "step": 411 + }, + { + "epoch": 0.11, + "logps_train/chosen": -18.755889892578125, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -27.160655975341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0880889892578125, + "rewards_train/margins": 0.44047659635543823, + "rewards_train/rejected": -0.5285655856132507, + "step": 411 + }, + { + "epoch": 0.11, + "logps_train/chosen": -1.451585054397583, + "logps_train/ref_chosen": -2.3125, + "logps_train/ref_rejected": -1.6328125, + "logps_train/rejected": -2.6369152069091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08609149605035782, + "rewards_train/margins": 0.18650177121162415, + "rewards_train/rejected": -0.10041027516126633, + "step": 411 + }, + { + "epoch": 0.11, + "logps_train/chosen": -67.52418518066406, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -67.3388671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4524185359477997, + "rewards_train/margins": -0.01853179931640625, + "rewards_train/rejected": -0.43388673663139343, + "step": 411 + }, + { + "epoch": 0.12, + "learning_rate": 1.9287993407602635e-06, + "loss": 0.5245, + "step": 412 + }, + { + "epoch": 0.12, + "logps_train/chosen": -6.302057266235352, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -30.213726043701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.002080726670101285, + "rewards_train/margins": 1.0817919492255896, + "rewards_train/rejected": -1.083872675895691, + "step": 412 + }, + { + "epoch": 0.12, + "logps_train/chosen": -100.80916595458984, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -125.9139404296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7309166193008423, + "rewards_train/margins": 1.3604775667190552, + "rewards_train/rejected": -2.0913941860198975, + "step": 412 + }, + { + "epoch": 0.12, + "logps_train/chosen": -89.99259948730469, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -143.3506622314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9492599368095398, + "rewards_train/margins": 1.9358063340187073, + "rewards_train/rejected": -2.885066270828247, + "step": 412 + }, + { + "epoch": 0.12, + "logps_train/chosen": -74.50891876220703, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -80.269775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24910812079906464, + "rewards_train/margins": 0.4760856628417969, + "rewards_train/rejected": -0.22697754204273224, + "step": 412 + }, + { + "epoch": 0.12, + "logps_train/chosen": -8.715766906738281, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -10.275592803955078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00907669123262167, + "rewards_train/margins": -0.06901741307228804, + "rewards_train/rejected": 0.05994072183966637, + "step": 412 + }, + { + "epoch": 0.12, + "logps_train/chosen": -12.90201187133789, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -26.331573486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15270118415355682, + "rewards_train/margins": 0.15545617043972015, + "rewards_train/rejected": -0.308157354593277, + "step": 412 + }, + { + "epoch": 0.12, + "logps_train/chosen": -56.81496810913086, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -93.88771057128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06850319355726242, + "rewards_train/margins": 0.6572742387652397, + "rewards_train/rejected": -0.5887710452079773, + "step": 412 + }, + { + "epoch": 0.12, + "logps_train/chosen": -116.35589599609375, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -122.87945556640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3355896472930908, + "rewards_train/margins": 0.7023558616638184, + "rewards_train/rejected": -2.037945508956909, + "step": 412 + }, + { + "epoch": 0.12, + "logps_train/chosen": -151.79383850097656, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -180.19461059570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02061614952981472, + "rewards_train/margins": 0.04007720947265625, + "rewards_train/rejected": -0.01946105994284153, + "step": 413 + }, + { + "epoch": 0.12, + "logps_train/chosen": -98.54979705810547, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -86.57032775878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1549797058105469, + "rewards_train/margins": 0.052053093910217285, + "rewards_train/rejected": -1.2070327997207642, + "step": 413 + }, + { + "epoch": 0.12, + "logps_train/chosen": -156.63214111328125, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -200.0, + "logps_train/rejected": -229.03646850585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8632141351699829, + "rewards_train/margins": 2.0404328107833862, + "rewards_train/rejected": -2.903646945953369, + "step": 413 + }, + { + "epoch": 0.12, + "logps_train/chosen": -4.021273136138916, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -1.375, + "logps_train/rejected": -1.5907721519470215, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0786898136138916, + "rewards_train/margins": -0.05711259879171848, + "rewards_train/rejected": -0.02157721482217312, + "step": 413 + }, + { + "epoch": 0.12, + "logps_train/chosen": -8.740835189819336, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -2.75, + "logps_train/rejected": -4.195708751678467, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13216648995876312, + "rewards_train/margins": 0.27673736214637756, + "rewards_train/rejected": -0.14457087218761444, + "step": 413 + }, + { + "epoch": 0.12, + "logps_train/chosen": -3.819549560546875, + "logps_train/ref_chosen": -3.515625, + "logps_train/ref_rejected": -2.375, + "logps_train/rejected": -3.4003429412841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03039245679974556, + "rewards_train/margins": 0.07214183732867241, + "rewards_train/rejected": -0.10253429412841797, + "step": 413 + }, + { + "epoch": 0.12, + "logps_train/chosen": -2.412815570831299, + "logps_train/ref_chosen": -1.4453125, + "logps_train/ref_rejected": -2.03125, + "logps_train/rejected": -2.5720977783203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09675031155347824, + "rewards_train/margins": -0.04266553372144699, + "rewards_train/rejected": -0.05408477783203125, + "step": 413 + }, + { + "epoch": 0.12, + "logps_train/chosen": -209.6593017578125, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -167.74948120117188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.165930271148682, + "rewards_train/margins": -0.4909820556640625, + "rewards_train/rejected": -3.674948215484619, + "step": 413 + }, + { + "epoch": 0.12, + "learning_rate": 1.927815695021641e-06, + "loss": 0.5247, + "step": 414 + }, + { + "epoch": 0.12, + "logps_train/chosen": -97.5179672241211, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -183.5243377685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.051796793937683, + "rewards_train/margins": 4.300636887550354, + "rewards_train/rejected": -5.352433681488037, + "step": 414 + }, + { + "epoch": 0.12, + "logps_train/chosen": -5.835383892059326, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -14.315553665161133, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11166339367628098, + "rewards_train/margins": -0.061358027160167694, + "rewards_train/rejected": -0.05030536651611328, + "step": 414 + }, + { + "epoch": 0.12, + "logps_train/chosen": -19.86168670654297, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -21.771923065185547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1611686795949936, + "rewards_train/margins": -0.10897637158632278, + "rewards_train/rejected": -0.05219230800867081, + "step": 414 + }, + { + "epoch": 0.12, + "logps_train/chosen": -126.55156707763672, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -177.33456420898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5051566958427429, + "rewards_train/margins": 3.528299629688263, + "rewards_train/rejected": -4.033456325531006, + "step": 414 + }, + { + "epoch": 0.12, + "logps_train/chosen": -149.4228057861328, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -195.0, + "logps_train/rejected": -263.3646240234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5422806143760681, + "rewards_train/margins": 6.2941818833351135, + "rewards_train/rejected": -6.836462497711182, + "step": 414 + }, + { + "epoch": 0.12, + "logps_train/chosen": -107.64468383789062, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -140.77867126464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1144684553146362, + "rewards_train/margins": 0.2633986473083496, + "rewards_train/rejected": -1.3778671026229858, + "step": 414 + }, + { + "epoch": 0.12, + "logps_train/chosen": -92.48200988769531, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -109.31417846679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2982009947299957, + "rewards_train/margins": 1.1832168996334076, + "rewards_train/rejected": -1.4814178943634033, + "step": 414 + }, + { + "epoch": 0.12, + "logps_train/chosen": -50.6391716003418, + "logps_train/ref_chosen": -49.75, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -99.26099395751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08891715854406357, + "rewards_train/margins": 0.48718223720788956, + "rewards_train/rejected": -0.5760993957519531, + "step": 414 + }, + { + "epoch": 0.12, + "logps_train/chosen": -51.40087890625, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -82.60049438476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3650878965854645, + "rewards_train/margins": 0.8449616134166718, + "rewards_train/rejected": -1.2100495100021362, + "step": 415 + }, + { + "epoch": 0.12, + "logps_train/chosen": -25.42505645751953, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -43.25, + "logps_train/rejected": -44.21308898925781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2925056517124176, + "rewards_train/margins": -0.1961967498064041, + "rewards_train/rejected": -0.09630890190601349, + "step": 415 + }, + { + "epoch": 0.12, + "logps_train/chosen": -194.98388671875, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -189.16888427734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9983887672424316, + "rewards_train/margins": 1.7184996604919434, + "rewards_train/rejected": -4.716888427734375, + "step": 415 + }, + { + "epoch": 0.12, + "logps_train/chosen": -21.58096694946289, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -40.75, + "logps_train/rejected": -44.20509719848633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3580966889858246, + "rewards_train/margins": -0.01258695125579834, + "rewards_train/rejected": -0.34550973773002625, + "step": 415 + }, + { + "epoch": 0.12, + "logps_train/chosen": -132.7778778076172, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -156.94093322753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.727787733078003, + "rewards_train/margins": 3.0663058757781982, + "rewards_train/rejected": -5.794093608856201, + "step": 415 + }, + { + "epoch": 0.12, + "logps_train/chosen": -131.77383422851562, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -119.5311508178711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.077383518218994, + "rewards_train/margins": 0.17573165893554688, + "rewards_train/rejected": -2.253115177154541, + "step": 415 + }, + { + "epoch": 0.12, + "logps_train/chosen": -158.6513671875, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -161.449462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.715137004852295, + "rewards_train/margins": 0.5798091888427734, + "rewards_train/rejected": -5.294946193695068, + "step": 415 + }, + { + "epoch": 0.12, + "logps_train/chosen": -123.5380859375, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -211.7605743408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.353808641433716, + "rewards_train/margins": 0.8222489356994629, + "rewards_train/rejected": -3.1760575771331787, + "step": 415 + }, + { + "epoch": 0.12, + "learning_rate": 1.926825555556862e-06, + "loss": 0.3948, + "step": 416 + }, + { + "epoch": 0.12, + "logps_train/chosen": -77.98352813720703, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -79.80327606201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20164719223976135, + "rewards_train/margins": 0.28197479993104935, + "rewards_train/rejected": -0.080327607691288, + "step": 416 + }, + { + "epoch": 0.12, + "logps_train/chosen": -7.8893656730651855, + "logps_train/ref_chosen": -7.46875, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -7.556779861450195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.042061567306518555, + "rewards_train/margins": 0.035491421818733215, + "rewards_train/rejected": -0.07755298912525177, + "step": 416 + }, + { + "epoch": 0.12, + "logps_train/chosen": -6.121293544769287, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -7.804834842681885, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03400435671210289, + "rewards_train/margins": 0.14022912457585335, + "rewards_train/rejected": -0.17423348128795624, + "step": 416 + }, + { + "epoch": 0.12, + "logps_train/chosen": -13.979820251464844, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -20.13958740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2792320251464844, + "rewards_train/margins": 0.10972672700881958, + "rewards_train/rejected": -0.38895875215530396, + "step": 416 + }, + { + "epoch": 0.12, + "logps_train/chosen": -132.06472778320312, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -144.68177795410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8564727306365967, + "rewards_train/margins": 0.5617051124572754, + "rewards_train/rejected": -3.418177843093872, + "step": 416 + }, + { + "epoch": 0.12, + "logps_train/chosen": -171.34152221679688, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -193.36691284179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.934152364730835, + "rewards_train/margins": 2.302539110183716, + "rewards_train/rejected": -5.236691474914551, + "step": 416 + }, + { + "epoch": 0.12, + "logps_train/chosen": -129.8134765625, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -183.79782104492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.831347703933716, + "rewards_train/margins": 3.99843430519104, + "rewards_train/rejected": -6.829782009124756, + "step": 416 + }, + { + "epoch": 0.12, + "logps_train/chosen": -156.3336181640625, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -144.8271484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.13336181640625, + "rewards_train/margins": 0.19935297966003418, + "rewards_train/rejected": -2.332714796066284, + "step": 416 + }, + { + "epoch": 0.12, + "logps_train/chosen": -170.4681396484375, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -211.0, + "logps_train/rejected": -270.3757019042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.04681396484375, + "rewards_train/margins": 4.890756130218506, + "rewards_train/rejected": -5.937570095062256, + "step": 417 + }, + { + "epoch": 0.12, + "logps_train/chosen": -120.93498992919922, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -137.9385986328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5934990048408508, + "rewards_train/margins": -0.29963913559913635, + "rewards_train/rejected": -0.2938598692417145, + "step": 417 + }, + { + "epoch": 0.12, + "logps_train/chosen": -90.85304260253906, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -81.64985656738281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.014695740304887295, + "rewards_train/margins": -0.020318602211773396, + "rewards_train/rejected": 0.03501434251666069, + "step": 417 + }, + { + "epoch": 0.12, + "logps_train/chosen": -136.83407592773438, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -142.475830078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2334076166152954, + "rewards_train/margins": 2.4141753911972046, + "rewards_train/rejected": -3.6475830078125, + "step": 417 + }, + { + "epoch": 0.12, + "logps_train/chosen": -13.21316146850586, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -32.61277770996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008816147223114967, + "rewards_train/margins": 0.5024616476148367, + "rewards_train/rejected": -0.5112777948379517, + "step": 417 + }, + { + "epoch": 0.12, + "logps_train/chosen": -159.72021484375, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -194.201171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2220215797424316, + "rewards_train/margins": 0.9980955123901367, + "rewards_train/rejected": -4.220117092132568, + "step": 417 + }, + { + "epoch": 0.12, + "logps_train/chosen": -1.686224102973938, + "logps_train/ref_chosen": -0.4609375, + "logps_train/ref_rejected": -0.4609375, + "logps_train/rejected": -1.6052770614624023, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12252866476774216, + "rewards_train/margins": -0.008094705641269684, + "rewards_train/rejected": -0.11443395912647247, + "step": 417 + }, + { + "epoch": 0.12, + "logps_train/chosen": -228.11627197265625, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -244.32208251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.911627292633057, + "rewards_train/margins": 0.5205812454223633, + "rewards_train/rejected": -7.43220853805542, + "step": 417 + }, + { + "epoch": 0.12, + "learning_rate": 1.9258289292958517e-06, + "loss": 0.4542, + "step": 418 + }, + { + "epoch": 0.12, + "logps_train/chosen": -72.0822525024414, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -154.5873260498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4582252502441406, + "rewards_train/margins": 4.90050745010376, + "rewards_train/rejected": -6.3587327003479, + "step": 418 + }, + { + "epoch": 0.12, + "logps_train/chosen": -82.45661163330078, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -118.92033386230469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.045661211013794, + "rewards_train/margins": -0.7536278069019318, + "rewards_train/rejected": -0.2920334041118622, + "step": 418 + }, + { + "epoch": 0.12, + "logps_train/chosen": -5.888883590698242, + "logps_train/ref_chosen": -5.75, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -19.58099365234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.013888359069824219, + "rewards_train/margins": 0.14421100914478302, + "rewards_train/rejected": -0.15809936821460724, + "step": 418 + }, + { + "epoch": 0.12, + "logps_train/chosen": -129.45819091796875, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -161.810302734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.845819115638733, + "rewards_train/margins": 1.1352111101150513, + "rewards_train/rejected": -2.981030225753784, + "step": 418 + }, + { + "epoch": 0.12, + "logps_train/chosen": -1.1242690086364746, + "logps_train/ref_chosen": -0.734375, + "logps_train/ref_rejected": -1.6953125, + "logps_train/rejected": -1.7038164138793945, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03898940235376358, + "rewards_train/margins": -0.038139010954182595, + "rewards_train/rejected": -0.0008503913995809853, + "step": 418 + }, + { + "epoch": 0.12, + "logps_train/chosen": -124.68208312988281, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -178.02212524414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2182083129882812, + "rewards_train/margins": 3.484004497528076, + "rewards_train/rejected": -6.702212810516357, + "step": 418 + }, + { + "epoch": 0.12, + "logps_train/chosen": -4.352880477905273, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -3.109375, + "logps_train/rejected": -3.277163505554199, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08033695071935654, + "rewards_train/margins": 0.09711580164730549, + "rewards_train/rejected": -0.01677885092794895, + "step": 418 + }, + { + "epoch": 0.12, + "logps_train/chosen": -122.37104034423828, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -123.33438110351562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.887104034423828, + "rewards_train/margins": -0.15366578102111816, + "rewards_train/rejected": -2.73343825340271, + "step": 418 + }, + { + "epoch": 0.12, + "logps_train/chosen": -27.48689842224121, + "logps_train/ref_chosen": -27.0, + "logps_train/ref_rejected": -45.5, + "logps_train/rejected": -61.43726348876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.048689842224121094, + "rewards_train/margins": 1.5450365543365479, + "rewards_train/rejected": -1.593726396560669, + "step": 419 + }, + { + "epoch": 0.12, + "logps_train/chosen": -9.276609420776367, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -9.294804573059082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008910941891372204, + "rewards_train/margins": 0.0018195156008005142, + "rewards_train/rejected": -0.010730457492172718, + "step": 419 + }, + { + "epoch": 0.12, + "logps_train/chosen": -16.585002899169922, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -78.74515533447266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.771000325679779, + "rewards_train/margins": -0.04648476839065552, + "rewards_train/rejected": -0.7245155572891235, + "step": 419 + }, + { + "epoch": 0.12, + "logps_train/chosen": -12.486052513122559, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -22.375, + "logps_train/rejected": -22.84327507019043, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11735524982213974, + "rewards_train/margins": -0.07052774354815483, + "rewards_train/rejected": -0.04682750627398491, + "step": 419 + }, + { + "epoch": 0.12, + "logps_train/chosen": -65.50390625, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -57.36623764038086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.775390625, + "rewards_train/margins": -0.03876686096191406, + "rewards_train/rejected": -1.736623764038086, + "step": 419 + }, + { + "epoch": 0.12, + "logps_train/chosen": -80.49996948242188, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -72.6497802734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09999694675207138, + "rewards_train/margins": 0.6649811044335365, + "rewards_train/rejected": -0.7649780511856079, + "step": 419 + }, + { + "epoch": 0.12, + "logps_train/chosen": -105.38424682617188, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -107.90573120117188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2884247303009033, + "rewards_train/margins": -0.19785165786743164, + "rewards_train/rejected": -2.0905730724334717, + "step": 419 + }, + { + "epoch": 0.12, + "logps_train/chosen": -8.982845306396484, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -6.859959602355957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06078452989459038, + "rewards_train/margins": 0.1345864273607731, + "rewards_train/rejected": -0.19537095725536346, + "step": 419 + }, + { + "epoch": 0.12, + "learning_rate": 1.924825823213939e-06, + "loss": 0.568, + "step": 420 + }, + { + "epoch": 0.12, + "logps_train/chosen": -12.756749153137207, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -12.147539138793945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1368250846862793, + "rewards_train/margins": 0.4765790104866028, + "rewards_train/rejected": -0.3397539258003235, + "step": 420 + }, + { + "epoch": 0.12, + "logps_train/chosen": -8.580262184143066, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -23.09150505065918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06697378307580948, + "rewards_train/margins": 0.17612428963184357, + "rewards_train/rejected": -0.10915050655603409, + "step": 420 + }, + { + "epoch": 0.12, + "logps_train/chosen": -89.10458374023438, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -80.35371398925781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4104584455490112, + "rewards_train/margins": 1.4749130010604858, + "rewards_train/rejected": -2.885371446609497, + "step": 420 + }, + { + "epoch": 0.12, + "logps_train/chosen": -86.31429290771484, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -154.0210418701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06857071071863174, + "rewards_train/margins": 5.370675183832645, + "rewards_train/rejected": -5.302104473114014, + "step": 420 + }, + { + "epoch": 0.12, + "logps_train/chosen": -28.295063018798828, + "logps_train/ref_chosen": -30.125, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -7.919182300567627, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18299369513988495, + "rewards_train/margins": 0.4905369430780411, + "rewards_train/rejected": -0.30754324793815613, + "step": 420 + }, + { + "epoch": 0.12, + "logps_train/chosen": -27.91268539428711, + "logps_train/ref_chosen": -26.0, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -27.15991973876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19126854836940765, + "rewards_train/margins": 0.42472346127033234, + "rewards_train/rejected": -0.61599200963974, + "step": 420 + }, + { + "epoch": 0.12, + "logps_train/chosen": -165.27023315429688, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -165.63412475585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.077023506164551, + "rewards_train/margins": 0.4363889694213867, + "rewards_train/rejected": -5.5134124755859375, + "step": 420 + }, + { + "epoch": 0.12, + "logps_train/chosen": -76.81404876708984, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -42.75, + "logps_train/rejected": -49.020206451416016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43140488862991333, + "rewards_train/margins": 0.1956157684326172, + "rewards_train/rejected": -0.6270206570625305, + "step": 420 + }, + { + "epoch": 0.12, + "logps_train/chosen": -145.07806396484375, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -132.1458740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5078064203262329, + "rewards_train/margins": 3.006780982017517, + "rewards_train/rejected": -3.51458740234375, + "step": 421 + }, + { + "epoch": 0.12, + "logps_train/chosen": -27.028215408325195, + "logps_train/ref_chosen": -28.5, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -6.652576923370361, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14717845618724823, + "rewards_train/margins": 0.2936861515045166, + "rewards_train/rejected": -0.14650769531726837, + "step": 421 + }, + { + "epoch": 0.12, + "logps_train/chosen": -184.5019073486328, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -201.0, + "logps_train/rejected": -226.11935424804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7501907348632812, + "rewards_train/margins": 0.7617447376251221, + "rewards_train/rejected": -2.5119354724884033, + "step": 421 + }, + { + "epoch": 0.12, + "logps_train/chosen": -109.97947692871094, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -103.576904296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.197947695851326, + "rewards_train/margins": 1.559742733836174, + "rewards_train/rejected": -1.7576904296875, + "step": 421 + }, + { + "epoch": 0.12, + "logps_train/chosen": -83.07324981689453, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -137.06497192382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1426750272512436, + "rewards_train/margins": 0.5491722375154495, + "rewards_train/rejected": -0.40649721026420593, + "step": 421 + }, + { + "epoch": 0.12, + "logps_train/chosen": -84.6561508178711, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -44.25, + "logps_train/rejected": -53.85974884033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06561508029699326, + "rewards_train/margins": 0.8953597918152809, + "rewards_train/rejected": -0.9609748721122742, + "step": 421 + }, + { + "epoch": 0.12, + "logps_train/chosen": -8.01794719696045, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -1.3828125, + "logps_train/rejected": -1.2936279773712158, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13570527732372284, + "rewards_train/margins": 0.12678682524710894, + "rewards_train/rejected": 0.008918452076613903, + "step": 421 + }, + { + "epoch": 0.12, + "logps_train/chosen": -140.6615447998047, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -115.30619812011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9661545157432556, + "rewards_train/margins": 0.36446529626846313, + "rewards_train/rejected": -1.3306198120117188, + "step": 421 + }, + { + "epoch": 0.12, + "learning_rate": 1.923816244331801e-06, + "loss": 0.4076, + "step": 422 + }, + { + "epoch": 0.12, + "logps_train/chosen": -68.75049591064453, + "logps_train/ref_chosen": -58.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -136.7066650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.075049638748169, + "rewards_train/margins": 3.295616865158081, + "rewards_train/rejected": -4.37066650390625, + "step": 422 + }, + { + "epoch": 0.12, + "logps_train/chosen": -20.137712478637695, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -9.560888290405273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02627124823629856, + "rewards_train/margins": 0.49856761656701565, + "rewards_train/rejected": -0.5248388648033142, + "step": 422 + }, + { + "epoch": 0.12, + "logps_train/chosen": -93.33061218261719, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -133.6968994140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5330612063407898, + "rewards_train/margins": 2.33662873506546, + "rewards_train/rejected": -2.86968994140625, + "step": 422 + }, + { + "epoch": 0.12, + "logps_train/chosen": -100.94013977050781, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -96.06888580322266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8940140008926392, + "rewards_train/margins": 0.8628746271133423, + "rewards_train/rejected": -1.7568886280059814, + "step": 422 + }, + { + "epoch": 0.12, + "logps_train/chosen": -156.751708984375, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -148.92959594726562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1751708984375, + "rewards_train/margins": -0.2822113037109375, + "rewards_train/rejected": -2.8929595947265625, + "step": 422 + }, + { + "epoch": 0.12, + "logps_train/chosen": -88.58784484863281, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -175.04541015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6587845087051392, + "rewards_train/margins": 4.245756506919861, + "rewards_train/rejected": -4.904541015625, + "step": 422 + }, + { + "epoch": 0.12, + "logps_train/chosen": -95.42549896240234, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -131.5411834716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6425499320030212, + "rewards_train/margins": 1.8615685105323792, + "rewards_train/rejected": -2.5041184425354004, + "step": 422 + }, + { + "epoch": 0.12, + "logps_train/chosen": -0.7197068929672241, + "logps_train/ref_chosen": -1.15625, + "logps_train/ref_rejected": -2.46875, + "logps_train/rejected": -4.42484712600708, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04365431144833565, + "rewards_train/margins": 0.23926403000950813, + "rewards_train/rejected": -0.19560971856117249, + "step": 422 + }, + { + "epoch": 0.12, + "logps_train/chosen": -2.1845197677612305, + "logps_train/ref_chosen": -2.28125, + "logps_train/ref_rejected": -0.470703125, + "logps_train/rejected": -0.866439700126648, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009673023596405983, + "rewards_train/margins": 0.04924668185412884, + "rewards_train/rejected": -0.039573658257722855, + "step": 423 + }, + { + "epoch": 0.12, + "logps_train/chosen": -3.559941291809082, + "logps_train/ref_chosen": -2.640625, + "logps_train/ref_rejected": -2.71875, + "logps_train/rejected": -2.8567302227020264, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09193163365125656, + "rewards_train/margins": -0.0781336110085249, + "rewards_train/rejected": -0.013798022642731667, + "step": 423 + }, + { + "epoch": 0.12, + "logps_train/chosen": -18.68598747253418, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -30.455556869506836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10609874874353409, + "rewards_train/margins": 0.47695692628622055, + "rewards_train/rejected": -0.5830556750297546, + "step": 423 + }, + { + "epoch": 0.12, + "logps_train/chosen": -25.553451538085938, + "logps_train/ref_chosen": -26.625, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -43.757408142089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10715484619140625, + "rewards_train/margins": 0.5578956604003906, + "rewards_train/rejected": -0.4507408142089844, + "step": 423 + }, + { + "epoch": 0.12, + "logps_train/chosen": -22.74410629272461, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -27.235658645629883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6244106292724609, + "rewards_train/margins": 0.21165525913238525, + "rewards_train/rejected": -0.8360658884048462, + "step": 423 + }, + { + "epoch": 0.12, + "logps_train/chosen": -22.207048416137695, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -1.5234375, + "logps_train/rejected": -3.4077539443969727, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.24570484459400177, + "rewards_train/margins": -0.05727319419384003, + "rewards_train/rejected": -0.18843165040016174, + "step": 423 + }, + { + "epoch": 0.12, + "logps_train/chosen": -6.669079780578613, + "logps_train/ref_chosen": -4.65625, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -6.456460952758789, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20128297805786133, + "rewards_train/margins": -0.10876188427209854, + "rewards_train/rejected": -0.09252109378576279, + "step": 423 + }, + { + "epoch": 0.12, + "logps_train/chosen": -1.6412029266357422, + "logps_train/ref_chosen": -1.5390625, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -7.828526496887207, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.010214042849838734, + "rewards_train/margins": -0.09611139167100191, + "rewards_train/rejected": 0.08589734882116318, + "step": 423 + }, + { + "epoch": 0.12, + "learning_rate": 1.922800199715421e-06, + "loss": 0.4801, + "step": 424 + }, + { + "epoch": 0.12, + "logps_train/chosen": -122.34318542480469, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -122.18492126464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6343185305595398, + "rewards_train/margins": 0.48417359590530396, + "rewards_train/rejected": -1.1184921264648438, + "step": 424 + }, + { + "epoch": 0.12, + "logps_train/chosen": -129.5221405029297, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -173.7712860107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24778595566749573, + "rewards_train/margins": 3.724914699792862, + "rewards_train/rejected": -3.477128744125366, + "step": 424 + }, + { + "epoch": 0.12, + "logps_train/chosen": -120.8076171875, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -143.89541625976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.130761742591858, + "rewards_train/margins": 0.7587798833847046, + "rewards_train/rejected": -1.8895416259765625, + "step": 424 + }, + { + "epoch": 0.12, + "logps_train/chosen": -10.699609756469727, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -9.969773292541504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21128903329372406, + "rewards_train/margins": 0.5145163685083389, + "rewards_train/rejected": -0.30322733521461487, + "step": 424 + }, + { + "epoch": 0.12, + "logps_train/chosen": -4.709482192993164, + "logps_train/ref_chosen": -4.625, + "logps_train/ref_rejected": -15.4375, + "logps_train/rejected": -19.940275192260742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008448219858109951, + "rewards_train/margins": 0.4418293172493577, + "rewards_train/rejected": -0.45027753710746765, + "step": 424 + }, + { + "epoch": 0.12, + "logps_train/chosen": -4.69478178024292, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -2.125, + "logps_train/rejected": -2.323817729949951, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03989682346582413, + "rewards_train/margins": 0.059778597205877304, + "rewards_train/rejected": -0.019881773740053177, + "step": 424 + }, + { + "epoch": 0.12, + "logps_train/chosen": -13.269472122192383, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -13.173847198486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01680278778076172, + "rewards_train/margins": 0.14668750762939453, + "rewards_train/rejected": -0.1298847198486328, + "step": 424 + }, + { + "epoch": 0.12, + "logps_train/chosen": -3.926727294921875, + "logps_train/ref_chosen": -5.03125, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -8.415488243103027, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11045227199792862, + "rewards_train/margins": 0.1895010992884636, + "rewards_train/rejected": -0.07904882729053497, + "step": 424 + }, + { + "epoch": 0.12, + "logps_train/chosen": -64.90799713134766, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -88.53858184814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7907997369766235, + "rewards_train/margins": 0.6130584478378296, + "rewards_train/rejected": -1.4038581848144531, + "step": 425 + }, + { + "epoch": 0.12, + "logps_train/chosen": -5.482555866241455, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -0.2578125, + "logps_train/rejected": -0.1713695079088211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04825558885931969, + "rewards_train/margins": -0.056899888440966606, + "rewards_train/rejected": 0.00864429958164692, + "step": 425 + }, + { + "epoch": 0.12, + "logps_train/chosen": -149.9917449951172, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -168.891357421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0991744995117188, + "rewards_train/margins": 0.8899612426757812, + "rewards_train/rejected": -2.9891357421875, + "step": 425 + }, + { + "epoch": 0.12, + "logps_train/chosen": -53.9932861328125, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -75.29341125488281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22432862222194672, + "rewards_train/margins": 1.1800124794244766, + "rewards_train/rejected": -1.4043411016464233, + "step": 425 + }, + { + "epoch": 0.12, + "logps_train/chosen": -29.812604904174805, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -26.0, + "logps_train/rejected": -35.03840255737305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34376049041748047, + "rewards_train/margins": 0.5600797533988953, + "rewards_train/rejected": -0.9038402438163757, + "step": 425 + }, + { + "epoch": 0.12, + "logps_train/chosen": -124.0221939086914, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -124.12596130371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09778060764074326, + "rewards_train/margins": 0.010376736521720886, + "rewards_train/rejected": 0.08740387111902237, + "step": 425 + }, + { + "epoch": 0.12, + "logps_train/chosen": -4.020871639251709, + "logps_train/ref_chosen": -3.1875, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -16.760692596435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08333716541528702, + "rewards_train/margins": 0.08648210018873215, + "rewards_train/rejected": -0.16981926560401917, + "step": 425 + }, + { + "epoch": 0.12, + "logps_train/chosen": -7.780781269073486, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -13.513511657714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29057812690734863, + "rewards_train/margins": 0.12952303886413574, + "rewards_train/rejected": -0.4201011657714844, + "step": 425 + }, + { + "epoch": 0.12, + "learning_rate": 1.9217776964760333e-06, + "loss": 0.4956, + "step": 426 + }, + { + "epoch": 0.12, + "logps_train/chosen": -18.729551315307617, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -4.46875, + "logps_train/rejected": -8.277751922607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33545514941215515, + "rewards_train/margins": 0.04544505476951599, + "rewards_train/rejected": -0.38090020418167114, + "step": 426 + }, + { + "epoch": 0.12, + "logps_train/chosen": -27.00760841369629, + "logps_train/ref_chosen": -28.125, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -35.58537673950195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1117391586303711, + "rewards_train/margins": 0.3202768415212631, + "rewards_train/rejected": -0.20853768289089203, + "step": 426 + }, + { + "epoch": 0.12, + "logps_train/chosen": -4.231992721557617, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -7.695858478546143, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.014300728216767311, + "rewards_train/margins": 0.19638658501207829, + "rewards_train/rejected": -0.18208585679531097, + "step": 426 + }, + { + "epoch": 0.12, + "logps_train/chosen": -94.577880859375, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -82.28964233398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04221191629767418, + "rewards_train/margins": 1.3211761973798275, + "rewards_train/rejected": -1.2789642810821533, + "step": 426 + }, + { + "epoch": 0.12, + "logps_train/chosen": -4.623371601104736, + "logps_train/ref_chosen": -3.96875, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -5.15204381942749, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06546216458082199, + "rewards_train/margins": 0.009117215871810913, + "rewards_train/rejected": -0.0745793804526329, + "step": 426 + }, + { + "epoch": 0.12, + "logps_train/chosen": -1.0080595016479492, + "logps_train/ref_chosen": -0.9765625, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -12.508100509643555, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.003149700118228793, + "rewards_train/margins": -0.12733965064398944, + "rewards_train/rejected": 0.12418995052576065, + "step": 426 + }, + { + "epoch": 0.12, + "logps_train/chosen": -11.099730491638184, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -43.65608596801758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05997304990887642, + "rewards_train/margins": 0.7056355588138103, + "rewards_train/rejected": -0.7656086087226868, + "step": 426 + }, + { + "epoch": 0.12, + "logps_train/chosen": -35.2160758972168, + "logps_train/ref_chosen": -34.5, + "logps_train/ref_rejected": -59.0, + "logps_train/rejected": -64.19474792480469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07160758972167969, + "rewards_train/margins": 0.447867214679718, + "rewards_train/rejected": -0.5194748044013977, + "step": 426 + }, + { + "epoch": 0.12, + "logps_train/chosen": -3.5884993076324463, + "logps_train/ref_chosen": -1.7890625, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -52.901432037353516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.17994368076324463, + "rewards_train/margins": -0.039800480008125305, + "rewards_train/rejected": -0.14014320075511932, + "step": 427 + }, + { + "epoch": 0.12, + "logps_train/chosen": -10.507028579711914, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -5.99849796295166, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.038202859461307526, + "rewards_train/margins": -0.06960306316614151, + "rewards_train/rejected": 0.031400203704833984, + "step": 427 + }, + { + "epoch": 0.12, + "logps_train/chosen": -13.95595932006836, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -15.183403015136719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.22684593498706818, + "rewards_train/margins": -0.0022556334733963013, + "rewards_train/rejected": -0.22459030151367188, + "step": 427 + }, + { + "epoch": 0.12, + "logps_train/chosen": -138.62368774414062, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -158.60476684570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.262368768453598, + "rewards_train/margins": 2.9481078684329987, + "rewards_train/rejected": -3.2104766368865967, + "step": 427 + }, + { + "epoch": 0.12, + "logps_train/chosen": -103.00130462646484, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -128.81832885742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6501304507255554, + "rewards_train/margins": 0.9817025065422058, + "rewards_train/rejected": -1.6318329572677612, + "step": 427 + }, + { + "epoch": 0.12, + "logps_train/chosen": -237.2689208984375, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -253.54644775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.226892471313477, + "rewards_train/margins": 0.4277524948120117, + "rewards_train/rejected": -8.654644966125488, + "step": 427 + }, + { + "epoch": 0.12, + "logps_train/chosen": -90.47917175292969, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -128.17919921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.097917318344116, + "rewards_train/margins": 0.07000255584716797, + "rewards_train/rejected": -2.167919874191284, + "step": 427 + }, + { + "epoch": 0.12, + "logps_train/chosen": -10.119283676147461, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -11.861777305603027, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.49630337953567505, + "rewards_train/margins": -0.01012563705444336, + "rewards_train/rejected": -0.4861777424812317, + "step": 427 + }, + { + "epoch": 0.12, + "learning_rate": 1.920748741770077e-06, + "loss": 0.5476, + "step": 428 + }, + { + "epoch": 0.12, + "logps_train/chosen": -132.4593505859375, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -144.3045654296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.545935034751892, + "rewards_train/margins": 0.3345215320587158, + "rewards_train/rejected": -1.880456566810608, + "step": 428 + }, + { + "epoch": 0.12, + "logps_train/chosen": -77.26156616210938, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -121.05642700195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12384338676929474, + "rewards_train/margins": 0.8794860988855362, + "rewards_train/rejected": -0.7556427121162415, + "step": 428 + }, + { + "epoch": 0.12, + "logps_train/chosen": -11.33267593383789, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -7.125, + "logps_train/rejected": -10.518192291259766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11451759189367294, + "rewards_train/margins": 0.22480163723230362, + "rewards_train/rejected": -0.33931922912597656, + "step": 428 + }, + { + "epoch": 0.12, + "logps_train/chosen": -11.762470245361328, + "logps_train/ref_chosen": -8.0, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -10.149519920349121, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.37624701857566833, + "rewards_train/margins": -0.305045023560524, + "rewards_train/rejected": -0.07120199501514435, + "step": 428 + }, + { + "epoch": 0.12, + "logps_train/chosen": -4.6696648597717285, + "logps_train/ref_chosen": -2.765625, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -8.316226959228516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1904039829969406, + "rewards_train/margins": -0.32753129303455353, + "rewards_train/rejected": 0.13712731003761292, + "step": 428 + }, + { + "epoch": 0.12, + "logps_train/chosen": -81.57826232910156, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -22.035888671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1921737641096115, + "rewards_train/margins": 0.2707626298069954, + "rewards_train/rejected": -0.07858886569738388, + "step": 428 + }, + { + "epoch": 0.12, + "logps_train/chosen": -55.63591384887695, + "logps_train/ref_chosen": -58.75, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -95.26524353027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3114086091518402, + "rewards_train/margins": 1.287932962179184, + "rewards_train/rejected": -0.9765243530273438, + "step": 428 + }, + { + "epoch": 0.12, + "logps_train/chosen": -169.96649169921875, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -200.0, + "logps_train/rejected": -239.3314971923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.896649122238159, + "rewards_train/margins": 0.03650069236755371, + "rewards_train/rejected": -3.933149814605713, + "step": 428 + }, + { + "epoch": 0.12, + "logps_train/chosen": -113.94989776611328, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -124.48239135742188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.744989812374115, + "rewards_train/margins": -0.14675068855285645, + "rewards_train/rejected": -0.5982391238212585, + "step": 429 + }, + { + "epoch": 0.12, + "logps_train/chosen": -4.085710525512695, + "logps_train/ref_chosen": -3.359375, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -4.419365406036377, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07263355702161789, + "rewards_train/margins": -0.036947015672922134, + "rewards_train/rejected": -0.035686541348695755, + "step": 429 + }, + { + "epoch": 0.12, + "logps_train/chosen": -70.52397918701172, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -72.81990051269531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6773979067802429, + "rewards_train/margins": 0.47959214448928833, + "rewards_train/rejected": -1.1569900512695312, + "step": 429 + }, + { + "epoch": 0.12, + "logps_train/chosen": -76.07899475097656, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -19.72258758544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30789947509765625, + "rewards_train/margins": 0.7956093549728394, + "rewards_train/rejected": -1.1035088300704956, + "step": 429 + }, + { + "epoch": 0.12, + "logps_train/chosen": -58.86322784423828, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -86.13786315917969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.811322808265686, + "rewards_train/margins": 0.8524634838104248, + "rewards_train/rejected": -1.6637862920761108, + "step": 429 + }, + { + "epoch": 0.12, + "logps_train/chosen": -25.55007553100586, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -16.606292724609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.08249244838953018, + "rewards_train/margins": -0.18187829107046127, + "rewards_train/rejected": 0.26437073945999146, + "step": 429 + }, + { + "epoch": 0.12, + "logps_train/chosen": -4.928534030914307, + "logps_train/ref_chosen": -6.15625, + "logps_train/ref_rejected": -0.90234375, + "logps_train/rejected": -1.4421104192733765, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12277159839868546, + "rewards_train/margins": 0.17674826458096504, + "rewards_train/rejected": -0.05397666618227959, + "step": 429 + }, + { + "epoch": 0.12, + "logps_train/chosen": -5.372790336608887, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -7.8125, + "logps_train/rejected": -12.844054222106934, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05334596708416939, + "rewards_train/margins": 0.5565013773739338, + "rewards_train/rejected": -0.5031554102897644, + "step": 429 + }, + { + "epoch": 0.12, + "learning_rate": 1.9197133427991433e-06, + "loss": 0.5764, + "step": 430 + }, + { + "epoch": 0.12, + "logps_train/chosen": -2.9635467529296875, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -25.334829330444336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08020782470703125, + "rewards_train/margins": 1.1199407577514648, + "rewards_train/rejected": -1.0397329330444336, + "step": 430 + }, + { + "epoch": 0.12, + "logps_train/chosen": -75.77049255371094, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -104.0513916015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3770492672920227, + "rewards_train/margins": 0.6780899167060852, + "rewards_train/rejected": -1.055139183998108, + "step": 430 + }, + { + "epoch": 0.12, + "logps_train/chosen": -99.62141418457031, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -139.3019256591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9621414542198181, + "rewards_train/margins": 1.4180510640144348, + "rewards_train/rejected": -2.380192518234253, + "step": 430 + }, + { + "epoch": 0.12, + "logps_train/chosen": -47.046966552734375, + "logps_train/ref_chosen": -43.5, + "logps_train/ref_rejected": -7.78125, + "logps_train/rejected": -15.01298713684082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.354696661233902, + "rewards_train/margins": 0.36847707629203796, + "rewards_train/rejected": -0.7231737375259399, + "step": 430 + }, + { + "epoch": 0.12, + "logps_train/chosen": -5.10291862487793, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -12.541446685791016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1272081434726715, + "rewards_train/margins": 0.28760281205177307, + "rewards_train/rejected": -0.16039466857910156, + "step": 430 + }, + { + "epoch": 0.12, + "logps_train/chosen": -11.144645690917969, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -18.52656364440918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17696456611156464, + "rewards_train/margins": 0.0006918013095855713, + "rewards_train/rejected": -0.1776563674211502, + "step": 430 + }, + { + "epoch": 0.12, + "logps_train/chosen": -47.683815002441406, + "logps_train/ref_chosen": -51.0, + "logps_train/ref_rejected": -51.0, + "logps_train/rejected": -47.5551872253418, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.3316185176372528, + "rewards_train/margins": -0.01286277174949646, + "rewards_train/rejected": 0.34448128938674927, + "step": 430 + }, + { + "epoch": 0.12, + "logps_train/chosen": -141.71255493164062, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -145.00442504882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7287445068359375, + "rewards_train/margins": 3.179187059402466, + "rewards_train/rejected": -2.4504425525665283, + "step": 430 + }, + { + "epoch": 0.12, + "logps_train/chosen": -1.4628121852874756, + "logps_train/ref_chosen": -1.3515625, + "logps_train/ref_rejected": -2.859375, + "logps_train/rejected": -5.75652551651001, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.011124968528747559, + "rewards_train/margins": 0.2785900831222534, + "rewards_train/rejected": -0.289715051651001, + "step": 431 + }, + { + "epoch": 0.12, + "logps_train/chosen": -101.17242431640625, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -125.50227355957031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.367242455482483, + "rewards_train/margins": 1.4829849004745483, + "rewards_train/rejected": -2.8502273559570312, + "step": 431 + }, + { + "epoch": 0.12, + "logps_train/chosen": -39.005428314208984, + "logps_train/ref_chosen": -39.25, + "logps_train/ref_rejected": -53.25, + "logps_train/rejected": -63.03534698486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.024457169696688652, + "rewards_train/margins": 1.0029918681830168, + "rewards_train/rejected": -0.9785346984863281, + "step": 431 + }, + { + "epoch": 0.12, + "logps_train/chosen": -87.00712585449219, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -87.43448638916016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0007125854608602822, + "rewards_train/margins": 0.04273605271009728, + "rewards_train/rejected": -0.043448638170957565, + "step": 431 + }, + { + "epoch": 0.12, + "logps_train/chosen": -7.033695220947266, + "logps_train/ref_chosen": -6.5, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -5.575146675109863, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05336952209472656, + "rewards_train/margins": 0.029145143926143646, + "rewards_train/rejected": -0.08251466602087021, + "step": 431 + }, + { + "epoch": 0.12, + "logps_train/chosen": -2.0427470207214355, + "logps_train/ref_chosen": -1.484375, + "logps_train/ref_rejected": -0.51953125, + "logps_train/rejected": -0.4865354299545288, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.055837202817201614, + "rewards_train/margins": -0.05913678486831486, + "rewards_train/rejected": 0.003299582051113248, + "step": 431 + }, + { + "epoch": 0.12, + "logps_train/chosen": -157.17337036132812, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -89.77008819580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0173370838165283, + "rewards_train/margins": 0.2096717357635498, + "rewards_train/rejected": -2.227008819580078, + "step": 431 + }, + { + "epoch": 0.12, + "logps_train/chosen": -20.2933292388916, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -43.28868865966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5168329477310181, + "rewards_train/margins": 0.6120359897613525, + "rewards_train/rejected": -1.1288689374923706, + "step": 431 + }, + { + "epoch": 0.12, + "learning_rate": 1.918671506809928e-06, + "loss": 0.4756, + "step": 432 + }, + { + "epoch": 0.12, + "logps_train/chosen": -81.03394317626953, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -51.75, + "logps_train/rejected": -54.769744873046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.653394341468811, + "rewards_train/margins": -0.3514198362827301, + "rewards_train/rejected": -0.30197450518608093, + "step": 432 + }, + { + "epoch": 0.12, + "logps_train/chosen": -193.37216186523438, + "logps_train/ref_chosen": -203.0, + "logps_train/ref_rejected": -264.0, + "logps_train/rejected": -279.90899658203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.9627838134765625, + "rewards_train/margins": 2.5536835193634033, + "rewards_train/rejected": -1.5908997058868408, + "step": 432 + }, + { + "epoch": 0.12, + "logps_train/chosen": -128.74136352539062, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -194.82522583007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4741363525390625, + "rewards_train/margins": 1.3083863258361816, + "rewards_train/rejected": -3.782522678375244, + "step": 432 + }, + { + "epoch": 0.12, + "logps_train/chosen": -236.60194396972656, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -219.80462646484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.160194396972656, + "rewards_train/margins": 2.5702686309814453, + "rewards_train/rejected": -9.730463027954102, + "step": 432 + }, + { + "epoch": 0.12, + "logps_train/chosen": -3.7846148014068604, + "logps_train/ref_chosen": -2.75, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -8.337970733642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10346148163080215, + "rewards_train/margins": 0.03658559173345566, + "rewards_train/rejected": -0.1400470733642578, + "step": 432 + }, + { + "epoch": 0.12, + "logps_train/chosen": -8.584257125854492, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -4.0, + "logps_train/rejected": -7.375975131988525, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04157428815960884, + "rewards_train/margins": 0.37917180731892586, + "rewards_train/rejected": -0.337597519159317, + "step": 432 + }, + { + "epoch": 0.12, + "logps_train/chosen": -209.23963928222656, + "logps_train/ref_chosen": -199.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -141.3647003173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0239639282226562, + "rewards_train/margins": 0.8125060796737671, + "rewards_train/rejected": -1.8364700078964233, + "step": 432 + }, + { + "epoch": 0.12, + "logps_train/chosen": -169.53341674804688, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -169.29196166992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4533417224884033, + "rewards_train/margins": 1.2758545875549316, + "rewards_train/rejected": -2.729196310043335, + "step": 432 + }, + { + "epoch": 0.12, + "logps_train/chosen": -79.04839324951172, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -130.08731079101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3548393249511719, + "rewards_train/margins": 1.7538917064666748, + "rewards_train/rejected": -3.1087310314178467, + "step": 433 + }, + { + "epoch": 0.12, + "logps_train/chosen": -78.48947143554688, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -136.79074096679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5010528564453125, + "rewards_train/margins": 1.9301270246505737, + "rewards_train/rejected": -1.4290741682052612, + "step": 433 + }, + { + "epoch": 0.12, + "logps_train/chosen": -75.26209259033203, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -141.8677520751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5762092471122742, + "rewards_train/margins": 1.860565960407257, + "rewards_train/rejected": -2.4367752075195312, + "step": 433 + }, + { + "epoch": 0.12, + "logps_train/chosen": -80.21861267089844, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -150.4341278076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2718612849712372, + "rewards_train/margins": 3.6715514957904816, + "rewards_train/rejected": -3.9434127807617188, + "step": 433 + }, + { + "epoch": 0.12, + "logps_train/chosen": -73.38192749023438, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -59.746124267578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9381927847862244, + "rewards_train/margins": -0.11358034610748291, + "rewards_train/rejected": -0.8246124386787415, + "step": 433 + }, + { + "epoch": 0.12, + "logps_train/chosen": -56.18637466430664, + "logps_train/ref_chosen": -53.75, + "logps_train/ref_rejected": -53.5, + "logps_train/rejected": -55.2347412109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.24363747239112854, + "rewards_train/margins": -0.07016335427761078, + "rewards_train/rejected": -0.17347411811351776, + "step": 433 + }, + { + "epoch": 0.12, + "logps_train/chosen": -89.48979187011719, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -121.73682403564453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6489791870117188, + "rewards_train/margins": -0.2252967655658722, + "rewards_train/rejected": -0.42368242144584656, + "step": 433 + }, + { + "epoch": 0.12, + "logps_train/chosen": -22.58806800842285, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -12.464853286743164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19119320809841156, + "rewards_train/margins": 0.27517853677272797, + "rewards_train/rejected": -0.0839853286743164, + "step": 433 + }, + { + "epoch": 0.12, + "learning_rate": 1.9176232410941778e-06, + "loss": 0.4003, + "step": 434 + }, + { + "epoch": 0.12, + "logps_train/chosen": -115.3609619140625, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -114.27204895019531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.886096179485321, + "rewards_train/margins": -0.008891284465789795, + "rewards_train/rejected": -0.8772048950195312, + "step": 434 + }, + { + "epoch": 0.12, + "logps_train/chosen": -3.092245578765869, + "logps_train/ref_chosen": -3.296875, + "logps_train/ref_rejected": -3.796875, + "logps_train/rejected": -4.088925361633301, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.020462943241000175, + "rewards_train/margins": 0.04966798052191734, + "rewards_train/rejected": -0.029205037280917168, + "step": 434 + }, + { + "epoch": 0.12, + "logps_train/chosen": -6.086211204528809, + "logps_train/ref_chosen": -2.921875, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -10.585880279541016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3164336383342743, + "rewards_train/margins": -0.0578455924987793, + "rewards_train/rejected": -0.258588045835495, + "step": 434 + }, + { + "epoch": 0.12, + "logps_train/chosen": -91.1675033569336, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -90.81988525390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7167503237724304, + "rewards_train/margins": -0.034761786460876465, + "rewards_train/rejected": -0.681988537311554, + "step": 434 + }, + { + "epoch": 0.12, + "logps_train/chosen": -34.08885955810547, + "logps_train/ref_chosen": -31.875, + "logps_train/ref_rejected": -3.984375, + "logps_train/rejected": -9.972969055175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22138595581054688, + "rewards_train/margins": 0.37747347354888916, + "rewards_train/rejected": -0.598859429359436, + "step": 434 + }, + { + "epoch": 0.12, + "logps_train/chosen": -15.106882095336914, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -41.0, + "logps_train/rejected": -56.37921142578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2018117904663086, + "rewards_train/margins": 1.7397329807281494, + "rewards_train/rejected": -1.5379211902618408, + "step": 434 + }, + { + "epoch": 0.12, + "logps_train/chosen": -111.62396240234375, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -132.6400146484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.962396264076233, + "rewards_train/margins": 1.0016051530838013, + "rewards_train/rejected": -2.964001417160034, + "step": 434 + }, + { + "epoch": 0.12, + "logps_train/chosen": -0.23431500792503357, + "logps_train/ref_chosen": -0.478515625, + "logps_train/ref_rejected": -3.03125, + "logps_train/rejected": -3.6470417976379395, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.024420062080025673, + "rewards_train/margins": 0.08599924109876156, + "rewards_train/rejected": -0.061579179018735886, + "step": 434 + }, + { + "epoch": 0.12, + "logps_train/chosen": -0.7217644453048706, + "logps_train/ref_chosen": -1.0703125, + "logps_train/ref_rejected": -1.46875, + "logps_train/rejected": -2.6821556091308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03485480695962906, + "rewards_train/margins": 0.15619537234306335, + "rewards_train/rejected": -0.1213405653834343, + "step": 435 + }, + { + "epoch": 0.12, + "logps_train/chosen": -0.6716922521591187, + "logps_train/ref_chosen": -0.5078125, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -4.958442211151123, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.016387974843382835, + "rewards_train/margins": -0.05804375372827053, + "rewards_train/rejected": 0.041655778884887695, + "step": 435 + }, + { + "epoch": 0.12, + "logps_train/chosen": -186.204345703125, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -208.100341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.920434474945068, + "rewards_train/margins": 1.589599609375, + "rewards_train/rejected": -6.510034084320068, + "step": 435 + }, + { + "epoch": 0.12, + "logps_train/chosen": -110.76588439941406, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -129.92373657226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7765884399414062, + "rewards_train/margins": 1.465785264968872, + "rewards_train/rejected": -3.2423737049102783, + "step": 435 + }, + { + "epoch": 0.12, + "logps_train/chosen": -137.078857421875, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -187.9947509765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.257885694503784, + "rewards_train/margins": 3.141589403152466, + "rewards_train/rejected": -5.39947509765625, + "step": 435 + }, + { + "epoch": 0.12, + "logps_train/chosen": -20.392515182495117, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -49.25, + "logps_train/rejected": -53.59325408935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22675152122974396, + "rewards_train/margins": 0.20757390558719635, + "rewards_train/rejected": -0.4343254268169403, + "step": 435 + }, + { + "epoch": 0.12, + "logps_train/chosen": -3.4095444679260254, + "logps_train/ref_chosen": -2.453125, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -8.927469253540039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09564194828271866, + "rewards_train/margins": 0.20335497707128525, + "rewards_train/rejected": -0.2989969253540039, + "step": 435 + }, + { + "epoch": 0.12, + "logps_train/chosen": -143.493408203125, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -144.60662841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6993408203125, + "rewards_train/margins": 2.1113219261169434, + "rewards_train/rejected": -5.810662746429443, + "step": 435 + }, + { + "epoch": 0.12, + "learning_rate": 1.916568552988642e-06, + "loss": 0.4706, + "step": 436 + }, + { + "epoch": 0.12, + "logps_train/chosen": -5.137186527252197, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -6.375399589538574, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04184365272521973, + "rewards_train/margins": 0.005071308463811874, + "rewards_train/rejected": -0.0469149611890316, + "step": 436 + }, + { + "epoch": 0.12, + "logps_train/chosen": -5.939291000366211, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -10.791135787963867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17205409705638885, + "rewards_train/margins": 0.18518449366092682, + "rewards_train/rejected": -0.3572385907173157, + "step": 436 + }, + { + "epoch": 0.12, + "logps_train/chosen": -167.65084838867188, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -153.23379516601562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.765084743499756, + "rewards_train/margins": -1.9917051792144775, + "rewards_train/rejected": -2.7733795642852783, + "step": 436 + }, + { + "epoch": 0.12, + "logps_train/chosen": -4.927883148193359, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -12.57790756225586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14158669114112854, + "rewards_train/margins": 0.18062744662165642, + "rewards_train/rejected": -0.03904075548052788, + "step": 436 + }, + { + "epoch": 0.12, + "logps_train/chosen": -20.119539260864258, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -26.198200225830078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3244539201259613, + "rewards_train/margins": 0.170366108417511, + "rewards_train/rejected": -0.4948200285434723, + "step": 436 + }, + { + "epoch": 0.12, + "logps_train/chosen": -69.33171844482422, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -78.8803482055664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2831718623638153, + "rewards_train/margins": -0.6451370418071747, + "rewards_train/rejected": 0.3619651794433594, + "step": 436 + }, + { + "epoch": 0.12, + "logps_train/chosen": -5.0127363204956055, + "logps_train/ref_chosen": -3.90625, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -9.020036697387695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11064863204956055, + "rewards_train/margins": 0.360105037689209, + "rewards_train/rejected": -0.47075366973876953, + "step": 436 + }, + { + "epoch": 0.12, + "logps_train/chosen": -157.97979736328125, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -190.08575439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.147979736328125, + "rewards_train/margins": 2.860595703125, + "rewards_train/rejected": -6.008575439453125, + "step": 436 + }, + { + "epoch": 0.12, + "logps_train/chosen": -97.73442840576172, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -113.22219848632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9234428405761719, + "rewards_train/margins": 0.14877700805664062, + "rewards_train/rejected": -2.0722198486328125, + "step": 437 + }, + { + "epoch": 0.12, + "logps_train/chosen": -109.70230102539062, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -102.53777313232422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7702301144599915, + "rewards_train/margins": -0.5164527893066406, + "rewards_train/rejected": -0.25377732515335083, + "step": 437 + }, + { + "epoch": 0.12, + "logps_train/chosen": -6.0684661865234375, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -21.738908767700195, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1380966156721115, + "rewards_train/margins": -0.11420573852956295, + "rewards_train/rejected": -0.02389087714254856, + "step": 437 + }, + { + "epoch": 0.12, + "logps_train/chosen": -19.080089569091797, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -5.358232021331787, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03300895914435387, + "rewards_train/margins": 0.2637517489492893, + "rewards_train/rejected": -0.2967607080936432, + "step": 437 + }, + { + "epoch": 0.12, + "logps_train/chosen": -16.233827590942383, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -11.802873611450195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.014117240905761719, + "rewards_train/margins": 0.6319046020507812, + "rewards_train/rejected": -0.6177873611450195, + "step": 437 + }, + { + "epoch": 0.12, + "logps_train/chosen": -113.04977416992188, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -207.55368041992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.454977422952652, + "rewards_train/margins": 4.7003909051418304, + "rewards_train/rejected": -5.155368328094482, + "step": 437 + }, + { + "epoch": 0.12, + "logps_train/chosen": -116.66019439697266, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -126.57269287109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1839805692434311, + "rewards_train/margins": 0.3912498652935028, + "rewards_train/rejected": -0.20726929605007172, + "step": 437 + }, + { + "epoch": 0.12, + "logps_train/chosen": -80.22742462158203, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -74.9342269897461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2772575318813324, + "rewards_train/margins": 1.0706802308559418, + "rewards_train/rejected": -0.7934226989746094, + "step": 437 + }, + { + "epoch": 0.12, + "learning_rate": 1.9155074498750184e-06, + "loss": 0.6535, + "step": 438 + }, + { + "epoch": 0.12, + "logps_train/chosen": -23.05931282043457, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -17.90729331970215, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6309313178062439, + "rewards_train/margins": -0.15895196795463562, + "rewards_train/rejected": -0.4719793498516083, + "step": 438 + }, + { + "epoch": 0.12, + "logps_train/chosen": -11.489873886108398, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -15.6875, + "logps_train/rejected": -19.060222625732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1927373856306076, + "rewards_train/margins": 0.1445348709821701, + "rewards_train/rejected": -0.3372722566127777, + "step": 438 + }, + { + "epoch": 0.12, + "logps_train/chosen": -1.609909176826477, + "logps_train/ref_chosen": -1.171875, + "logps_train/ref_rejected": -1.59375, + "logps_train/rejected": -1.3542438745498657, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.043803419917821884, + "rewards_train/margins": -0.06775403209030628, + "rewards_train/rejected": 0.023950612172484398, + "step": 438 + }, + { + "epoch": 0.12, + "logps_train/chosen": -73.85604095458984, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -132.264404296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48560410737991333, + "rewards_train/margins": 2.7408363223075867, + "rewards_train/rejected": -3.2264404296875, + "step": 438 + }, + { + "epoch": 0.12, + "logps_train/chosen": -175.01797485351562, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -215.067626953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7017974853515625, + "rewards_train/margins": 2.704965114593506, + "rewards_train/rejected": -6.406762599945068, + "step": 438 + }, + { + "epoch": 0.12, + "logps_train/chosen": -72.47582244873047, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -88.8500747680664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7975822687149048, + "rewards_train/margins": 0.1374252438545227, + "rewards_train/rejected": -0.9350075125694275, + "step": 438 + }, + { + "epoch": 0.12, + "logps_train/chosen": -128.12689208984375, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -198.3466796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3126892149448395, + "rewards_train/margins": 4.521978944540024, + "rewards_train/rejected": -4.834668159484863, + "step": 438 + }, + { + "epoch": 0.12, + "logps_train/chosen": -88.01485443115234, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -147.63040161132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30148544907569885, + "rewards_train/margins": 3.211554855108261, + "rewards_train/rejected": -3.51304030418396, + "step": 438 + }, + { + "epoch": 0.12, + "logps_train/chosen": -12.652740478515625, + "logps_train/ref_chosen": -15.8125, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -18.158845901489258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.31597596406936646, + "rewards_train/margins": 0.544360563158989, + "rewards_train/rejected": -0.2283845990896225, + "step": 439 + }, + { + "epoch": 0.12, + "logps_train/chosen": -42.37655258178711, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -30.0, + "logps_train/rejected": -47.125518798828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33765527606010437, + "rewards_train/margins": 1.3748966753482819, + "rewards_train/rejected": -1.7125519514083862, + "step": 439 + }, + { + "epoch": 0.12, + "logps_train/chosen": -8.524658203125, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -2.9375, + "logps_train/rejected": -3.8697168827056885, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0899658203125, + "rewards_train/margins": 0.0032558664679527283, + "rewards_train/rejected": -0.09322168678045273, + "step": 439 + }, + { + "epoch": 0.12, + "logps_train/chosen": -146.33038330078125, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -177.96420288085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.083038330078125, + "rewards_train/margins": 2.113381862640381, + "rewards_train/rejected": -6.196420192718506, + "step": 439 + }, + { + "epoch": 0.12, + "logps_train/chosen": -15.928251266479492, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -28.51680564880371, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05717487260699272, + "rewards_train/margins": 0.5838554613292217, + "rewards_train/rejected": -0.526680588722229, + "step": 439 + }, + { + "epoch": 0.12, + "logps_train/chosen": -28.810335159301758, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -14.56895637512207, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7685335278511047, + "rewards_train/margins": -0.3678878843784332, + "rewards_train/rejected": -0.4006456434726715, + "step": 439 + }, + { + "epoch": 0.12, + "logps_train/chosen": -63.9565544128418, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -30.125, + "logps_train/rejected": -34.74603271484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0706554427742958, + "rewards_train/margins": 0.39144783467054367, + "rewards_train/rejected": -0.4621032774448395, + "step": 439 + }, + { + "epoch": 0.12, + "logps_train/chosen": -75.06153869628906, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -163.22164916992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3061538636684418, + "rewards_train/margins": 3.66601100564003, + "rewards_train/rejected": -3.9721648693084717, + "step": 439 + }, + { + "epoch": 0.12, + "learning_rate": 1.9144399391799043e-06, + "loss": 0.394, + "step": 440 + }, + { + "epoch": 0.12, + "logps_train/chosen": -58.29280471801758, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -111.48292541503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.670719563961029, + "rewards_train/margins": 1.6190121173858643, + "rewards_train/rejected": -0.9482925534248352, + "step": 440 + }, + { + "epoch": 0.12, + "logps_train/chosen": -3.4122631549835205, + "logps_train/ref_chosen": -3.578125, + "logps_train/ref_rejected": -4.46875, + "logps_train/rejected": -6.051251411437988, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01658618450164795, + "rewards_train/margins": 0.17483632266521454, + "rewards_train/rejected": -0.1582501381635666, + "step": 440 + }, + { + "epoch": 0.12, + "logps_train/chosen": -52.89386749267578, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -2.34375, + "logps_train/rejected": -3.2637276649475098, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11438675224781036, + "rewards_train/margins": -0.022388987243175507, + "rewards_train/rejected": -0.09199776500463486, + "step": 440 + }, + { + "epoch": 0.12, + "logps_train/chosen": -72.39921569824219, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -162.35528564453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2899215817451477, + "rewards_train/margins": 3.745607078075409, + "rewards_train/rejected": -4.035528659820557, + "step": 440 + }, + { + "epoch": 0.12, + "logps_train/chosen": -104.97676086425781, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -186.20751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3976760804653168, + "rewards_train/margins": 2.7230760157108307, + "rewards_train/rejected": -3.1207520961761475, + "step": 440 + }, + { + "epoch": 0.12, + "logps_train/chosen": -32.98345184326172, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -83.80467224121094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3733452558517456, + "rewards_train/margins": -0.042878031730651855, + "rewards_train/rejected": -1.3304672241210938, + "step": 440 + }, + { + "epoch": 0.12, + "logps_train/chosen": -193.63525390625, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -159.84251403808594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.863525390625, + "rewards_train/margins": -0.7792739868164062, + "rewards_train/rejected": -1.0842514038085938, + "step": 440 + }, + { + "epoch": 0.12, + "logps_train/chosen": -117.34719848632812, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -164.70913696289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2847198247909546, + "rewards_train/margins": 0.28619384765625, + "rewards_train/rejected": -1.5709136724472046, + "step": 440 + }, + { + "epoch": 0.12, + "logps_train/chosen": -95.00154113769531, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -160.443603515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.950154185295105, + "rewards_train/margins": 0.9942063093185425, + "rewards_train/rejected": -2.9443604946136475, + "step": 441 + }, + { + "epoch": 0.12, + "logps_train/chosen": -0.7105062007904053, + "logps_train/ref_chosen": -0.3671875, + "logps_train/ref_rejected": -3.984375, + "logps_train/rejected": -7.741601467132568, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03433186933398247, + "rewards_train/margins": 0.34139077737927437, + "rewards_train/rejected": -0.37572264671325684, + "step": 441 + }, + { + "epoch": 0.12, + "logps_train/chosen": -98.71468353271484, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -167.61932373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.021468354389071465, + "rewards_train/margins": 2.640464114025235, + "rewards_train/rejected": -2.6619324684143066, + "step": 441 + }, + { + "epoch": 0.12, + "logps_train/chosen": -18.098859786987305, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -16.057998657226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12761402130126953, + "rewards_train/margins": 0.327163890004158, + "rewards_train/rejected": -0.1995498687028885, + "step": 441 + }, + { + "epoch": 0.12, + "logps_train/chosen": -92.64743041992188, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -110.0563735961914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26474305987358093, + "rewards_train/margins": 0.9408942759037018, + "rewards_train/rejected": -1.2056373357772827, + "step": 441 + }, + { + "epoch": 0.12, + "logps_train/chosen": -66.70355224609375, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -132.65789794921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3796447813510895, + "rewards_train/margins": 0.9454346001148224, + "rewards_train/rejected": -0.5657898187637329, + "step": 441 + }, + { + "epoch": 0.12, + "logps_train/chosen": -13.97654914855957, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -25.564125061035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16640491783618927, + "rewards_train/margins": 1.1400076597929, + "rewards_train/rejected": -1.3064125776290894, + "step": 441 + }, + { + "epoch": 0.12, + "logps_train/chosen": -68.17312622070312, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -87.68016815185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21731261909008026, + "rewards_train/margins": 0.6007042080163956, + "rewards_train/rejected": -0.8180168271064758, + "step": 441 + }, + { + "epoch": 0.12, + "learning_rate": 1.9133660283747424e-06, + "loss": 0.4278, + "step": 442 + }, + { + "epoch": 0.12, + "logps_train/chosen": -18.374086380004883, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -3.75, + "logps_train/rejected": -12.424093246459961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22490863502025604, + "rewards_train/margins": 0.642500713467598, + "rewards_train/rejected": -0.867409348487854, + "step": 442 + }, + { + "epoch": 0.12, + "logps_train/chosen": -134.79556274414062, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -159.2001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0795562267303467, + "rewards_train/margins": 1.1904633045196533, + "rewards_train/rejected": -4.27001953125, + "step": 442 + }, + { + "epoch": 0.12, + "logps_train/chosen": -10.783161163330078, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -31.91415786743164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.015433884225785732, + "rewards_train/margins": 0.3693496650084853, + "rewards_train/rejected": -0.3539157807826996, + "step": 442 + }, + { + "epoch": 0.12, + "logps_train/chosen": -75.99235534667969, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -124.6574478149414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7992355227470398, + "rewards_train/margins": 0.46650928258895874, + "rewards_train/rejected": -1.2657448053359985, + "step": 442 + }, + { + "epoch": 0.12, + "logps_train/chosen": -87.28517150878906, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -192.0, + "logps_train/rejected": -256.6085510253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5285171866416931, + "rewards_train/margins": 5.932337820529938, + "rewards_train/rejected": -6.460855007171631, + "step": 442 + }, + { + "epoch": 0.12, + "logps_train/chosen": -146.22190856933594, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -159.68695068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0221909284591675, + "rewards_train/margins": 0.8465042114257812, + "rewards_train/rejected": -1.8686951398849487, + "step": 442 + }, + { + "epoch": 0.12, + "logps_train/chosen": -25.134368896484375, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -26.317398071289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12406311184167862, + "rewards_train/margins": 0.4433029368519783, + "rewards_train/rejected": -0.3192398250102997, + "step": 442 + }, + { + "epoch": 0.12, + "logps_train/chosen": -1.8412812948226929, + "logps_train/ref_chosen": -1.90625, + "logps_train/ref_rejected": -0.9453125, + "logps_train/rejected": -1.628726840019226, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006496870424598455, + "rewards_train/margins": 0.07483830442652106, + "rewards_train/rejected": -0.06834143400192261, + "step": 442 + }, + { + "epoch": 0.12, + "logps_train/chosen": -113.0496826171875, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -79.19801330566406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.554968237876892, + "rewards_train/margins": -0.835166871547699, + "rewards_train/rejected": -0.7198013663291931, + "step": 443 + }, + { + "epoch": 0.12, + "logps_train/chosen": -172.30540466308594, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -191.02005004882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.630540609359741, + "rewards_train/margins": 1.5714643001556396, + "rewards_train/rejected": -4.202004909515381, + "step": 443 + }, + { + "epoch": 0.12, + "logps_train/chosen": -97.19729614257812, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -162.4152069091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9197296500205994, + "rewards_train/margins": 2.5217910408973694, + "rewards_train/rejected": -3.4415206909179688, + "step": 443 + }, + { + "epoch": 0.12, + "logps_train/chosen": -85.22604370117188, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -110.83181762695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9726043939590454, + "rewards_train/margins": 1.8105775117874146, + "rewards_train/rejected": -2.78318190574646, + "step": 443 + }, + { + "epoch": 0.12, + "logps_train/chosen": -107.19908905029297, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -169.52957153320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2699089050292969, + "rewards_train/margins": 3.783048152923584, + "rewards_train/rejected": -4.052957057952881, + "step": 443 + }, + { + "epoch": 0.12, + "logps_train/chosen": -13.78731918334961, + "logps_train/ref_chosen": -15.5, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -23.966957092285156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.17126809060573578, + "rewards_train/margins": -0.019536197185516357, + "rewards_train/rejected": 0.19080428779125214, + "step": 443 + }, + { + "epoch": 0.12, + "logps_train/chosen": -78.72127532958984, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -125.31193542480469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3778724670410156, + "rewards_train/margins": 2.6090660095214844, + "rewards_train/rejected": -2.2311935424804688, + "step": 443 + }, + { + "epoch": 0.12, + "logps_train/chosen": -40.72690200805664, + "logps_train/ref_chosen": -38.5, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -12.745712280273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22269020974636078, + "rewards_train/margins": 0.608131006360054, + "rewards_train/rejected": -0.8308212161064148, + "step": 443 + }, + { + "epoch": 0.12, + "learning_rate": 1.9122857249757703e-06, + "loss": 0.3788, + "step": 444 + }, + { + "epoch": 0.12, + "logps_train/chosen": -59.270912170410156, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -28.100751876831055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27709123492240906, + "rewards_train/margins": 0.7704840004444122, + "rewards_train/rejected": -1.0475752353668213, + "step": 444 + }, + { + "epoch": 0.12, + "logps_train/chosen": -78.12905883789062, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -46.5, + "logps_train/rejected": -53.212806701660156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9629058837890625, + "rewards_train/margins": -0.2916252017021179, + "rewards_train/rejected": -0.6712806820869446, + "step": 444 + }, + { + "epoch": 0.12, + "logps_train/chosen": -3.843266487121582, + "logps_train/ref_chosen": -2.640625, + "logps_train/ref_rejected": -2.671875, + "logps_train/rejected": -6.099526405334473, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12026415020227432, + "rewards_train/margins": 0.2225010022521019, + "rewards_train/rejected": -0.3427651524543762, + "step": 444 + }, + { + "epoch": 0.12, + "logps_train/chosen": -183.52011108398438, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -150.83169555664062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6520111560821533, + "rewards_train/margins": -0.31884145736694336, + "rewards_train/rejected": -3.33316969871521, + "step": 444 + }, + { + "epoch": 0.12, + "logps_train/chosen": -8.639245986938477, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -10.651049613952637, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.061075400561094284, + "rewards_train/margins": 0.2636803649365902, + "rewards_train/rejected": -0.2026049643754959, + "step": 444 + }, + { + "epoch": 0.12, + "logps_train/chosen": -191.83853149414062, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -173.36143493652344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.383853435516357, + "rewards_train/margins": -0.7977099418640137, + "rewards_train/rejected": -6.586143493652344, + "step": 444 + }, + { + "epoch": 0.12, + "logps_train/chosen": -187.7746124267578, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -186.28900146484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.277461290359497, + "rewards_train/margins": 3.951439142227173, + "rewards_train/rejected": -5.22890043258667, + "step": 444 + }, + { + "epoch": 0.12, + "logps_train/chosen": -153.3046875, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -127.03192138671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6304687857627869, + "rewards_train/margins": 3.2727234959602356, + "rewards_train/rejected": -3.9031922817230225, + "step": 444 + }, + { + "epoch": 0.12, + "logps_train/chosen": -13.214860916137695, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -6.65625, + "logps_train/rejected": -12.39721965789795, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02851390838623047, + "rewards_train/margins": 0.6026108860969543, + "rewards_train/rejected": -0.5740969777107239, + "step": 445 + }, + { + "epoch": 0.12, + "logps_train/chosen": -30.161272048950195, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -37.928802490234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8661271929740906, + "rewards_train/margins": 0.36425310373306274, + "rewards_train/rejected": -1.2303802967071533, + "step": 445 + }, + { + "epoch": 0.12, + "logps_train/chosen": -116.09443664550781, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -121.02247619628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8094436526298523, + "rewards_train/margins": 0.9428040385246277, + "rewards_train/rejected": -1.75224769115448, + "step": 445 + }, + { + "epoch": 0.12, + "logps_train/chosen": -16.787851333618164, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -20.11856460571289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19753514230251312, + "rewards_train/margins": 0.16432131826877594, + "rewards_train/rejected": -0.36185646057128906, + "step": 445 + }, + { + "epoch": 0.12, + "logps_train/chosen": -9.606891632080078, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -2.046875, + "logps_train/rejected": -3.8449041843414307, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.22631417214870453, + "rewards_train/margins": -0.046511247754096985, + "rewards_train/rejected": -0.17980292439460754, + "step": 445 + }, + { + "epoch": 0.12, + "logps_train/chosen": -99.95327758789062, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -133.5490264892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7453277111053467, + "rewards_train/margins": 0.809575080871582, + "rewards_train/rejected": -3.5549027919769287, + "step": 445 + }, + { + "epoch": 0.12, + "logps_train/chosen": -90.19266510009766, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -179.43472290039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9692665338516235, + "rewards_train/margins": 4.274206042289734, + "rewards_train/rejected": -6.243472576141357, + "step": 445 + }, + { + "epoch": 0.12, + "logps_train/chosen": -73.39517211914062, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -104.97749328613281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2604827880859375, + "rewards_train/margins": 0.9582321047782898, + "rewards_train/rejected": -0.6977493166923523, + "step": 445 + }, + { + "epoch": 0.12, + "learning_rate": 1.9111990365439653e-06, + "loss": 0.4878, + "step": 446 + }, + { + "epoch": 0.12, + "logps_train/chosen": -37.57528305053711, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -53.25, + "logps_train/rejected": -75.06550598144531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20752830803394318, + "rewards_train/margins": 1.9740224331617355, + "rewards_train/rejected": -2.1815507411956787, + "step": 446 + }, + { + "epoch": 0.12, + "logps_train/chosen": -33.51466369628906, + "logps_train/ref_chosen": -26.25, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -16.230880737304688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7264663577079773, + "rewards_train/margins": -0.2283782660961151, + "rewards_train/rejected": -0.4980880916118622, + "step": 446 + }, + { + "epoch": 0.12, + "logps_train/chosen": -10.76193618774414, + "logps_train/ref_chosen": -0.09521484375, + "logps_train/ref_rejected": -0.09521484375, + "logps_train/rejected": -11.013529777526855, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0666722059249878, + "rewards_train/margins": 0.025159358978271484, + "rewards_train/rejected": -1.0918315649032593, + "step": 446 + }, + { + "epoch": 0.12, + "logps_train/chosen": -163.21697998046875, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -215.33245849609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.321697950363159, + "rewards_train/margins": 3.6115481853485107, + "rewards_train/rejected": -6.93324613571167, + "step": 446 + }, + { + "epoch": 0.12, + "logps_train/chosen": -2.650022029876709, + "logps_train/ref_chosen": -1.875, + "logps_train/ref_rejected": -7.8125, + "logps_train/rejected": -10.616704940795898, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07750220596790314, + "rewards_train/margins": 0.20291830599308014, + "rewards_train/rejected": -0.2804205119609833, + "step": 446 + }, + { + "epoch": 0.12, + "logps_train/chosen": -72.28964233398438, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -119.67903900146484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0289642810821533, + "rewards_train/margins": 1.9389395713806152, + "rewards_train/rejected": -2.9679038524627686, + "step": 446 + }, + { + "epoch": 0.12, + "logps_train/chosen": -15.273157119750977, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -27.671142578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5210657119750977, + "rewards_train/margins": 0.12104856967926025, + "rewards_train/rejected": -0.6421142816543579, + "step": 446 + }, + { + "epoch": 0.12, + "logps_train/chosen": -180.94161987304688, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -184.0, + "logps_train/rejected": -230.06878662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.294162273406982, + "rewards_train/margins": 0.3127164840698242, + "rewards_train/rejected": -4.606878757476807, + "step": 446 + }, + { + "epoch": 0.12, + "logps_train/chosen": -120.6225814819336, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -135.622802734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.512258291244507, + "rewards_train/margins": 1.0500218868255615, + "rewards_train/rejected": -4.562280178070068, + "step": 447 + }, + { + "epoch": 0.12, + "logps_train/chosen": -25.3882999420166, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -0.80859375, + "logps_train/rejected": -1.8013525009155273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13617001473903656, + "rewards_train/margins": 0.23544589430093765, + "rewards_train/rejected": -0.09927587956190109, + "step": 447 + }, + { + "epoch": 0.12, + "logps_train/chosen": -24.137969970703125, + "logps_train/ref_chosen": -22.25, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -9.405117988586426, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1887969970703125, + "rewards_train/margins": 0.31108981370925903, + "rewards_train/rejected": -0.49988681077957153, + "step": 447 + }, + { + "epoch": 0.12, + "logps_train/chosen": -99.56527709960938, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -24.0, + "logps_train/rejected": -26.551959991455078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7565277218818665, + "rewards_train/margins": -0.5013317167758942, + "rewards_train/rejected": -0.2551960051059723, + "step": 447 + }, + { + "epoch": 0.12, + "logps_train/chosen": -61.17943572998047, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -70.6761703491211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.082056425511837, + "rewards_train/margins": 0.8246734961867332, + "rewards_train/rejected": -0.7426170706748962, + "step": 447 + }, + { + "epoch": 0.12, + "logps_train/chosen": -5.379275321960449, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -10.138028144836426, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.08167753368616104, + "rewards_train/margins": -0.030374716967344284, + "rewards_train/rejected": -0.05130281671881676, + "step": 447 + }, + { + "epoch": 0.12, + "logps_train/chosen": -119.56320190429688, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -137.5393524169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5563201904296875, + "rewards_train/margins": 2.197615146636963, + "rewards_train/rejected": -3.7539353370666504, + "step": 447 + }, + { + "epoch": 0.12, + "logps_train/chosen": -16.274715423583984, + "logps_train/ref_chosen": -15.1875, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -24.43172836303711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1087215468287468, + "rewards_train/margins": 0.6969512775540352, + "rewards_train/rejected": -0.805672824382782, + "step": 447 + }, + { + "epoch": 0.13, + "learning_rate": 1.9101059706849955e-06, + "loss": 0.4719, + "step": 448 + }, + { + "epoch": 0.13, + "logps_train/chosen": -147.175048828125, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -149.96240234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6675050258636475, + "rewards_train/margins": -0.22126483917236328, + "rewards_train/rejected": -3.446240186691284, + "step": 448 + }, + { + "epoch": 0.13, + "logps_train/chosen": -3.253754138946533, + "logps_train/ref_chosen": -2.78125, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -12.078985214233398, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0472504161298275, + "rewards_train/margins": 0.048148106783628464, + "rewards_train/rejected": -0.09539852291345596, + "step": 448 + }, + { + "epoch": 0.13, + "logps_train/chosen": -12.849907875061035, + "logps_train/ref_chosen": -13.625, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -12.08060359954834, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07750921696424484, + "rewards_train/margins": 0.4230695888400078, + "rewards_train/rejected": -0.34556037187576294, + "step": 448 + }, + { + "epoch": 0.13, + "logps_train/chosen": -143.9907989501953, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -181.0467987060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.249079942703247, + "rewards_train/margins": 2.0555999279022217, + "rewards_train/rejected": -4.304679870605469, + "step": 448 + }, + { + "epoch": 0.13, + "logps_train/chosen": -136.91783142089844, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -161.99221801757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6917831897735596, + "rewards_train/margins": 2.857438802719116, + "rewards_train/rejected": -5.549221992492676, + "step": 448 + }, + { + "epoch": 0.13, + "logps_train/chosen": -5.518972396850586, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -20.90058708190918, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06127224117517471, + "rewards_train/margins": -0.2712135389447212, + "rewards_train/rejected": 0.2099412977695465, + "step": 448 + }, + { + "epoch": 0.13, + "logps_train/chosen": -121.60636901855469, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -130.60342407226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2606368958950043, + "rewards_train/margins": 1.899705559015274, + "rewards_train/rejected": -2.1603424549102783, + "step": 448 + }, + { + "epoch": 0.13, + "logps_train/chosen": -125.47003936767578, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -141.34991455078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6470038890838623, + "rewards_train/margins": -0.5620124340057373, + "rewards_train/rejected": -2.084991455078125, + "step": 448 + }, + { + "epoch": 0.13, + "logps_train/chosen": -171.59378051757812, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -205.1104736328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.1593780517578125, + "rewards_train/margins": 3.151669502258301, + "rewards_train/rejected": -7.311047554016113, + "step": 449 + }, + { + "epoch": 0.13, + "logps_train/chosen": -174.65090942382812, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -284.9498291015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.165091037750244, + "rewards_train/margins": 6.129891872406006, + "rewards_train/rejected": -11.29498291015625, + "step": 449 + }, + { + "epoch": 0.13, + "logps_train/chosen": -3.4970335960388184, + "logps_train/ref_chosen": -1.703125, + "logps_train/ref_rejected": -2.734375, + "logps_train/rejected": -3.529787540435791, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.17939086258411407, + "rewards_train/margins": -0.09984960407018661, + "rewards_train/rejected": -0.07954125851392746, + "step": 449 + }, + { + "epoch": 0.13, + "logps_train/chosen": -180.80064392089844, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -195.7862091064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3800643980503082, + "rewards_train/margins": 1.5985564887523651, + "rewards_train/rejected": -1.9786208868026733, + "step": 449 + }, + { + "epoch": 0.13, + "logps_train/chosen": -18.460628509521484, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -13.617094039916992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1960628479719162, + "rewards_train/margins": 0.2968965619802475, + "rewards_train/rejected": -0.4929594099521637, + "step": 449 + }, + { + "epoch": 0.13, + "logps_train/chosen": -2.5212974548339844, + "logps_train/ref_chosen": -1.65625, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -35.954071044921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0865047499537468, + "rewards_train/margins": 0.20890236645936966, + "rewards_train/rejected": -0.29540711641311646, + "step": 449 + }, + { + "epoch": 0.13, + "logps_train/chosen": -34.97037124633789, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -51.25, + "logps_train/rejected": -65.31539154052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.972037136554718, + "rewards_train/margins": 0.4345020651817322, + "rewards_train/rejected": -1.4065392017364502, + "step": 449 + }, + { + "epoch": 0.13, + "logps_train/chosen": -77.82599639892578, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -173.34950256347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2325996160507202, + "rewards_train/margins": 5.652350544929504, + "rewards_train/rejected": -6.884950160980225, + "step": 449 + }, + { + "epoch": 0.13, + "learning_rate": 1.9090065350491626e-06, + "loss": 0.4237, + "step": 450 + }, + { + "epoch": 0.13, + "logps_train/chosen": -91.30757141113281, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -93.26407623291016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8807571530342102, + "rewards_train/margins": 1.4456505179405212, + "rewards_train/rejected": -2.3264076709747314, + "step": 450 + }, + { + "epoch": 0.13, + "logps_train/chosen": -7.48087739944458, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -7.628049373626709, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.019962741062045097, + "rewards_train/margins": 0.01471719704568386, + "rewards_train/rejected": -0.03467993810772896, + "step": 450 + }, + { + "epoch": 0.13, + "logps_train/chosen": -33.076499938964844, + "logps_train/ref_chosen": -28.875, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -110.6304931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4201500117778778, + "rewards_train/margins": 2.792899399995804, + "rewards_train/rejected": -3.2130494117736816, + "step": 450 + }, + { + "epoch": 0.13, + "logps_train/chosen": -26.836421966552734, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -31.25, + "logps_train/rejected": -35.28165054321289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4961422085762024, + "rewards_train/margins": -0.0929771363735199, + "rewards_train/rejected": -0.4031650722026825, + "step": 450 + }, + { + "epoch": 0.13, + "logps_train/chosen": -69.97904968261719, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -88.24369812011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19790497422218323, + "rewards_train/margins": 0.5764648616313934, + "rewards_train/rejected": -0.7743698358535767, + "step": 450 + }, + { + "epoch": 0.13, + "logps_train/chosen": -8.539949417114258, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -19.15304183959961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.003994941711425781, + "rewards_train/margins": 0.5675592422485352, + "rewards_train/rejected": -0.5715541839599609, + "step": 450 + }, + { + "epoch": 0.13, + "logps_train/chosen": -163.7628173828125, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -187.43218994140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.976281762123108, + "rewards_train/margins": 2.4669374227523804, + "rewards_train/rejected": -4.443219184875488, + "step": 450 + }, + { + "epoch": 0.13, + "logps_train/chosen": -17.30584144592285, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -67.81214904785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.880584180355072, + "rewards_train/margins": 1.5006306767463684, + "rewards_train/rejected": -2.3812148571014404, + "step": 450 + }, + { + "epoch": 0.13, + "logps_train/chosen": -89.49263763427734, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -83.84197998046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3007362484931946, + "rewards_train/margins": 1.0349342823028564, + "rewards_train/rejected": -0.7341980338096619, + "step": 451 + }, + { + "epoch": 0.13, + "logps_train/chosen": -24.381080627441406, + "logps_train/ref_chosen": -22.75, + "logps_train/ref_rejected": -42.75, + "logps_train/rejected": -54.527339935302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16310806572437286, + "rewards_train/margins": 1.0146259516477585, + "rewards_train/rejected": -1.1777340173721313, + "step": 451 + }, + { + "epoch": 0.13, + "logps_train/chosen": -3.974797248840332, + "logps_train/ref_chosen": -2.21875, + "logps_train/ref_rejected": -3.171875, + "logps_train/rejected": -6.698268890380859, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17560473084449768, + "rewards_train/margins": 0.1770346760749817, + "rewards_train/rejected": -0.35263940691947937, + "step": 451 + }, + { + "epoch": 0.13, + "logps_train/chosen": -132.6693115234375, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -170.89361572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.966931104660034, + "rewards_train/margins": 1.772430658340454, + "rewards_train/rejected": -4.739361763000488, + "step": 451 + }, + { + "epoch": 0.13, + "logps_train/chosen": -157.74090576171875, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -175.48178100585938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.3740906715393066, + "rewards_train/margins": -0.925912618637085, + "rewards_train/rejected": -2.4481780529022217, + "step": 451 + }, + { + "epoch": 0.13, + "logps_train/chosen": -18.733108520507812, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -20.72479820251465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06081085279583931, + "rewards_train/margins": 0.16166896745562553, + "rewards_train/rejected": -0.22247982025146484, + "step": 451 + }, + { + "epoch": 0.13, + "logps_train/chosen": -168.28521728515625, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -154.866943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7285218238830566, + "rewards_train/margins": 0.408172607421875, + "rewards_train/rejected": -4.136694431304932, + "step": 451 + }, + { + "epoch": 0.13, + "logps_train/chosen": -7.9900383949279785, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -5.71875, + "logps_train/rejected": -11.901142120361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4365038573741913, + "rewards_train/margins": 0.18173536658287048, + "rewards_train/rejected": -0.6182392239570618, + "step": 451 + }, + { + "epoch": 0.13, + "learning_rate": 1.90790073733135e-06, + "loss": 0.4528, + "step": 452 + }, + { + "epoch": 0.13, + "logps_train/chosen": -34.106590270996094, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -2.578125, + "logps_train/rejected": -8.437060356140137, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.035659026354551315, + "rewards_train/margins": 0.5502345450222492, + "rewards_train/rejected": -0.5858935713768005, + "step": 452 + }, + { + "epoch": 0.13, + "logps_train/chosen": -0.666363000869751, + "logps_train/ref_chosen": -0.609375, + "logps_train/ref_rejected": -0.609375, + "logps_train/rejected": -0.6659039258956909, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.005698800086975098, + "rewards_train/margins": -4.5907218009233475e-05, + "rewards_train/rejected": -0.005652892868965864, + "step": 452 + }, + { + "epoch": 0.13, + "logps_train/chosen": -115.38683319091797, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -127.44154357910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8386833667755127, + "rewards_train/margins": 1.5554709434509277, + "rewards_train/rejected": -3.3941543102264404, + "step": 452 + }, + { + "epoch": 0.13, + "logps_train/chosen": -189.93350219726562, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -194.7802734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.693350315093994, + "rewards_train/margins": -0.41532278060913086, + "rewards_train/rejected": -5.278027534484863, + "step": 452 + }, + { + "epoch": 0.13, + "logps_train/chosen": -101.90550231933594, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -178.96780395507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3405503034591675, + "rewards_train/margins": 5.156230092048645, + "rewards_train/rejected": -6.4967803955078125, + "step": 452 + }, + { + "epoch": 0.13, + "logps_train/chosen": -16.884897232055664, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -52.5, + "logps_train/rejected": -50.29192352294922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5759897232055664, + "rewards_train/margins": -0.7967973798513412, + "rewards_train/rejected": 0.22080765664577484, + "step": 452 + }, + { + "epoch": 0.13, + "logps_train/chosen": -1.502184271812439, + "logps_train/ref_chosen": -0.97265625, + "logps_train/ref_rejected": -1.0859375, + "logps_train/rejected": -1.9083603620529175, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.052952803671360016, + "rewards_train/margins": 0.02928948402404785, + "rewards_train/rejected": -0.08224228769540787, + "step": 452 + }, + { + "epoch": 0.13, + "logps_train/chosen": -107.39009857177734, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -131.6715087890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4890098571777344, + "rewards_train/margins": 1.1781409978866577, + "rewards_train/rejected": -1.667150855064392, + "step": 452 + }, + { + "epoch": 0.13, + "logps_train/chosen": -137.0240478515625, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -149.6924591064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.052404880523682, + "rewards_train/margins": 1.766840934753418, + "rewards_train/rejected": -5.8192458152771, + "step": 453 + }, + { + "epoch": 0.13, + "logps_train/chosen": -13.298364639282227, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -16.14764404296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004836463835090399, + "rewards_train/margins": 0.3724279464222491, + "rewards_train/rejected": -0.3772644102573395, + "step": 453 + }, + { + "epoch": 0.13, + "logps_train/chosen": -126.10325622558594, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -205.34097290039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010325622744858265, + "rewards_train/margins": 4.923771667294204, + "rewards_train/rejected": -4.9340972900390625, + "step": 453 + }, + { + "epoch": 0.13, + "logps_train/chosen": -11.057547569274902, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -11.120665550231934, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37762975692749023, + "rewards_train/margins": 0.11881181597709656, + "rewards_train/rejected": -0.4964415729045868, + "step": 453 + }, + { + "epoch": 0.13, + "logps_train/chosen": -5.655044078826904, + "logps_train/ref_chosen": -3.125, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -12.378866195678711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.25300440192222595, + "rewards_train/margins": -0.3901177793741226, + "rewards_train/rejected": 0.13711337745189667, + "step": 453 + }, + { + "epoch": 0.13, + "logps_train/chosen": -19.37779998779297, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -1.109375, + "logps_train/rejected": -9.955557823181152, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.32471999526023865, + "rewards_train/margins": 1.2093382775783539, + "rewards_train/rejected": -0.8846182823181152, + "step": 453 + }, + { + "epoch": 0.13, + "logps_train/chosen": -0.6267240047454834, + "logps_train/ref_chosen": -0.40234375, + "logps_train/ref_rejected": -2.8125, + "logps_train/rejected": -2.1865170001983643, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02243802510201931, + "rewards_train/margins": -0.08503632806241512, + "rewards_train/rejected": 0.06259830296039581, + "step": 453 + }, + { + "epoch": 0.13, + "logps_train/chosen": -104.9389877319336, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -2.78125, + "logps_train/rejected": -9.939411163330078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2938987910747528, + "rewards_train/margins": 0.4219173491001129, + "rewards_train/rejected": -0.7158161401748657, + "step": 453 + }, + { + "epoch": 0.13, + "learning_rate": 1.9067885852709693e-06, + "loss": 0.5086, + "step": 454 + }, + { + "epoch": 0.13, + "logps_train/chosen": -2.6154072284698486, + "logps_train/ref_chosen": -2.546875, + "logps_train/ref_rejected": -2.09375, + "logps_train/rejected": -4.66728401184082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.006853222846984863, + "rewards_train/margins": 0.2505001723766327, + "rewards_train/rejected": -0.25735339522361755, + "step": 454 + }, + { + "epoch": 0.13, + "logps_train/chosen": -19.980493545532227, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -28.5, + "logps_train/rejected": -35.60547637939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11445064842700958, + "rewards_train/margins": 0.8249982744455338, + "rewards_train/rejected": -0.7105476260185242, + "step": 454 + }, + { + "epoch": 0.13, + "logps_train/chosen": -94.66819763183594, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -111.23088073730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01681976392865181, + "rewards_train/margins": 0.6562683098018169, + "rewards_train/rejected": -0.6730880737304688, + "step": 454 + }, + { + "epoch": 0.13, + "logps_train/chosen": -180.62171936035156, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -260.5641174316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.762171983718872, + "rewards_train/margins": 7.094239950180054, + "rewards_train/rejected": -9.856411933898926, + "step": 454 + }, + { + "epoch": 0.13, + "logps_train/chosen": -19.146190643310547, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -60.300445556640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1478809416294098, + "rewards_train/margins": 1.227925568819046, + "rewards_train/rejected": -1.0800446271896362, + "step": 454 + }, + { + "epoch": 0.13, + "logps_train/chosen": -72.41661071777344, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -86.0913314819336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5583389401435852, + "rewards_train/margins": 0.2674720883369446, + "rewards_train/rejected": 0.2908668518066406, + "step": 454 + }, + { + "epoch": 0.13, + "logps_train/chosen": -13.85115909576416, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -12.552448272705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0773840919137001, + "rewards_train/margins": 0.36387891322374344, + "rewards_train/rejected": -0.28649482131004333, + "step": 454 + }, + { + "epoch": 0.13, + "logps_train/chosen": -4.115243434906006, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -2.8898491859436035, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06777434796094894, + "rewards_train/margins": -0.017851930111646652, + "rewards_train/rejected": -0.04992241784930229, + "step": 454 + }, + { + "epoch": 0.13, + "logps_train/chosen": -58.88154602050781, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -59.689361572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2618454098701477, + "rewards_train/margins": 0.05578155815601349, + "rewards_train/rejected": 0.20606385171413422, + "step": 455 + }, + { + "epoch": 0.13, + "logps_train/chosen": -2.228952407836914, + "logps_train/ref_chosen": -1.9609375, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -16.353439331054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.026801491156220436, + "rewards_train/margins": 0.027292443439364433, + "rewards_train/rejected": -0.05409393459558487, + "step": 455 + }, + { + "epoch": 0.13, + "logps_train/chosen": -11.243709564208984, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -26.375, + "logps_train/rejected": -28.557937622070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11187095940113068, + "rewards_train/margins": 0.10642281174659729, + "rewards_train/rejected": -0.21829377114772797, + "step": 455 + }, + { + "epoch": 0.13, + "logps_train/chosen": -7.234609127044678, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -2.625, + "logps_train/rejected": -4.505634307861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08278908580541611, + "rewards_train/margins": 0.2708525136113167, + "rewards_train/rejected": -0.18806342780590057, + "step": 455 + }, + { + "epoch": 0.13, + "logps_train/chosen": -3.1193349361419678, + "logps_train/ref_chosen": -2.125, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -32.73662185668945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09943349659442902, + "rewards_train/margins": 1.0867287367582321, + "rewards_train/rejected": -1.1861622333526611, + "step": 455 + }, + { + "epoch": 0.13, + "logps_train/chosen": -0.7961549162864685, + "logps_train/ref_chosen": -0.78515625, + "logps_train/ref_rejected": -32.75, + "logps_train/rejected": -38.72380447387695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0010998666984960437, + "rewards_train/margins": 0.5962805926101282, + "rewards_train/rejected": -0.5973804593086243, + "step": 455 + }, + { + "epoch": 0.13, + "logps_train/chosen": -134.9463348388672, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -143.1748504638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.494633436203003, + "rewards_train/margins": 0.32285165786743164, + "rewards_train/rejected": -2.8174850940704346, + "step": 455 + }, + { + "epoch": 0.13, + "logps_train/chosen": -8.91971492767334, + "logps_train/ref_chosen": -7.6875, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -16.490753173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1232214942574501, + "rewards_train/margins": 0.07585383206605911, + "rewards_train/rejected": -0.19907532632350922, + "step": 455 + }, + { + "epoch": 0.13, + "learning_rate": 1.9056700866519061e-06, + "loss": 0.4937, + "step": 456 + }, + { + "epoch": 0.13, + "logps_train/chosen": -98.98228454589844, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -75.2240982055664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.8017715811729431, + "rewards_train/margins": 1.0741814076900482, + "rewards_train/rejected": -0.2724098265171051, + "step": 456 + }, + { + "epoch": 0.13, + "logps_train/chosen": -87.87543487548828, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -103.42619323730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18754349648952484, + "rewards_train/margins": 1.9550758749246597, + "rewards_train/rejected": -2.1426193714141846, + "step": 456 + }, + { + "epoch": 0.13, + "logps_train/chosen": -163.39352416992188, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -104.6941909790039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4393523931503296, + "rewards_train/margins": 0.38006675243377686, + "rewards_train/rejected": -1.8194191455841064, + "step": 456 + }, + { + "epoch": 0.13, + "logps_train/chosen": -152.03994750976562, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -160.89236450195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5039948225021362, + "rewards_train/margins": 0.8852416276931763, + "rewards_train/rejected": -2.3892364501953125, + "step": 456 + }, + { + "epoch": 0.13, + "logps_train/chosen": -114.51559448242188, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -114.6839370727539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20155945420265198, + "rewards_train/margins": 0.016834259033203125, + "rewards_train/rejected": -0.2183937132358551, + "step": 456 + }, + { + "epoch": 0.13, + "logps_train/chosen": -136.86026000976562, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -177.96102905273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6860260367393494, + "rewards_train/margins": 2.5100768208503723, + "rewards_train/rejected": -3.1961028575897217, + "step": 456 + }, + { + "epoch": 0.13, + "logps_train/chosen": -156.15744018554688, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -191.9101104736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.365744113922119, + "rewards_train/margins": 3.3252668380737305, + "rewards_train/rejected": -6.69101095199585, + "step": 456 + }, + { + "epoch": 0.13, + "logps_train/chosen": -56.58723449707031, + "logps_train/ref_chosen": -55.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -130.3149871826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10872345417737961, + "rewards_train/margins": 0.2727752700448036, + "rewards_train/rejected": -0.3814987242221832, + "step": 456 + }, + { + "epoch": 0.13, + "logps_train/chosen": -9.924446105957031, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -5.71875, + "logps_train/rejected": -8.567323684692383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.048694610595703125, + "rewards_train/margins": 0.23616275191307068, + "rewards_train/rejected": -0.2848573625087738, + "step": 457 + }, + { + "epoch": 0.13, + "logps_train/chosen": -136.41912841796875, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -117.7007064819336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.041913032531738, + "rewards_train/margins": -2.371842384338379, + "rewards_train/rejected": -2.6700706481933594, + "step": 457 + }, + { + "epoch": 0.13, + "logps_train/chosen": -1.1774992942810059, + "logps_train/ref_chosen": -1.71875, + "logps_train/ref_rejected": -3.546875, + "logps_train/rejected": -4.213464736938477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.054125070571899414, + "rewards_train/margins": 0.12078404426574707, + "rewards_train/rejected": -0.06665897369384766, + "step": 457 + }, + { + "epoch": 0.13, + "logps_train/chosen": -146.5421905517578, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -162.5690155029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.454219102859497, + "rewards_train/margins": 2.552682638168335, + "rewards_train/rejected": -6.006901741027832, + "step": 457 + }, + { + "epoch": 0.13, + "logps_train/chosen": -64.28699493408203, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -128.86544799804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4786995053291321, + "rewards_train/margins": 1.9078454375267029, + "rewards_train/rejected": -2.386544942855835, + "step": 457 + }, + { + "epoch": 0.13, + "logps_train/chosen": -20.33615493774414, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -17.403331756591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12111549824476242, + "rewards_train/margins": 0.1817176714539528, + "rewards_train/rejected": -0.3028331696987152, + "step": 457 + }, + { + "epoch": 0.13, + "logps_train/chosen": -11.391392707824707, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -17.066585540771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1016392707824707, + "rewards_train/margins": 0.41126930713653564, + "rewards_train/rejected": -0.5129085779190063, + "step": 457 + }, + { + "epoch": 0.13, + "logps_train/chosen": -127.38027954101562, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -125.17338562011719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.43802809715271, + "rewards_train/margins": -0.2206895351409912, + "rewards_train/rejected": -3.2173385620117188, + "step": 457 + }, + { + "epoch": 0.13, + "learning_rate": 1.904545249302465e-06, + "loss": 0.5295, + "step": 458 + }, + { + "epoch": 0.13, + "logps_train/chosen": -105.71859741210938, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -115.00376892089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3281402587890625, + "rewards_train/margins": 2.328517198562622, + "rewards_train/rejected": -2.0003769397735596, + "step": 458 + }, + { + "epoch": 0.13, + "logps_train/chosen": -3.996720314025879, + "logps_train/ref_chosen": -2.125, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -5.0284624099731445, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1871720403432846, + "rewards_train/margins": -0.27182579785585403, + "rewards_train/rejected": 0.08465375751256943, + "step": 458 + }, + { + "epoch": 0.13, + "logps_train/chosen": -42.120758056640625, + "logps_train/ref_chosen": -45.75, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -30.75411033630371, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.362924188375473, + "rewards_train/margins": 0.6883352398872375, + "rewards_train/rejected": -0.3254110515117645, + "step": 458 + }, + { + "epoch": 0.13, + "logps_train/chosen": -215.75869750976562, + "logps_train/ref_chosen": -185.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -194.2263641357422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0758697986602783, + "rewards_train/margins": -1.3532333374023438, + "rewards_train/rejected": -1.7226364612579346, + "step": 458 + }, + { + "epoch": 0.13, + "logps_train/chosen": -217.59600830078125, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -142.3240509033203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.159600734710693, + "rewards_train/margins": -0.8771955966949463, + "rewards_train/rejected": -3.282405138015747, + "step": 458 + }, + { + "epoch": 0.13, + "logps_train/chosen": -158.5601806640625, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -160.99554443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9560182094573975, + "rewards_train/margins": 0.3935363292694092, + "rewards_train/rejected": -3.3495545387268066, + "step": 458 + }, + { + "epoch": 0.13, + "logps_train/chosen": -28.88064956665039, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -28.354660034179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.650564968585968, + "rewards_train/margins": 0.4224010109901428, + "rewards_train/rejected": -1.0729659795761108, + "step": 458 + }, + { + "epoch": 0.13, + "logps_train/chosen": -121.1181640625, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -212.6783447265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.711816430091858, + "rewards_train/margins": 5.756018042564392, + "rewards_train/rejected": -7.46783447265625, + "step": 458 + }, + { + "epoch": 0.13, + "logps_train/chosen": -3.7186801433563232, + "logps_train/ref_chosen": -3.8125, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -28.515560150146484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00938198622316122, + "rewards_train/margins": 0.3609380191192031, + "rewards_train/rejected": -0.35155603289604187, + "step": 459 + }, + { + "epoch": 0.13, + "logps_train/chosen": -65.75064849853516, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -75.40106201171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6250648498535156, + "rewards_train/margins": 0.21504133939743042, + "rewards_train/rejected": -0.840106189250946, + "step": 459 + }, + { + "epoch": 0.13, + "logps_train/chosen": -0.5320519804954529, + "logps_train/ref_chosen": -0.1865234375, + "logps_train/ref_rejected": -0.1865234375, + "logps_train/rejected": -0.5267940759658813, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03455285355448723, + "rewards_train/margins": -0.0005257874727249146, + "rewards_train/rejected": -0.034027066081762314, + "step": 459 + }, + { + "epoch": 0.13, + "logps_train/chosen": -14.881048202514648, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -22.39991569519043, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7943548560142517, + "rewards_train/margins": -0.20436328649520874, + "rewards_train/rejected": -0.589991569519043, + "step": 459 + }, + { + "epoch": 0.13, + "logps_train/chosen": -2.5625431537628174, + "logps_train/ref_chosen": -1.796875, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -10.230436325073242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07656681537628174, + "rewards_train/margins": 0.2089768350124359, + "rewards_train/rejected": -0.28554365038871765, + "step": 459 + }, + { + "epoch": 0.13, + "logps_train/chosen": -97.0167236328125, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -130.20492553710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.751672387123108, + "rewards_train/margins": 0.41882026195526123, + "rewards_train/rejected": -2.170492649078369, + "step": 459 + }, + { + "epoch": 0.13, + "logps_train/chosen": -143.221923828125, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -169.57415771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3221924304962158, + "rewards_train/margins": 5.885223627090454, + "rewards_train/rejected": -7.20741605758667, + "step": 459 + }, + { + "epoch": 0.13, + "logps_train/chosen": -0.6450932621955872, + "logps_train/ref_chosen": -0.75390625, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -3.4844775199890137, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.010881299152970314, + "rewards_train/margins": -0.3406709488481283, + "rewards_train/rejected": 0.35155224800109863, + "step": 459 + }, + { + "epoch": 0.13, + "learning_rate": 1.9034140810953147e-06, + "loss": 0.6104, + "step": 460 + }, + { + "epoch": 0.13, + "logps_train/chosen": -160.37478637695312, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -165.85829162597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7374786734580994, + "rewards_train/margins": 2.1483505368232727, + "rewards_train/rejected": -2.885829210281372, + "step": 460 + }, + { + "epoch": 0.13, + "logps_train/chosen": -110.82052612304688, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -187.0, + "logps_train/rejected": -222.62191772460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0320526361465454, + "rewards_train/margins": 2.5301390886306763, + "rewards_train/rejected": -3.5621917247772217, + "step": 460 + }, + { + "epoch": 0.13, + "logps_train/chosen": -31.749711990356445, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -39.75, + "logps_train/rejected": -45.17698287963867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29997119307518005, + "rewards_train/margins": 0.242727130651474, + "rewards_train/rejected": -0.542698323726654, + "step": 460 + }, + { + "epoch": 0.13, + "logps_train/chosen": -24.412551879882812, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -47.75, + "logps_train/rejected": -59.51630783081055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.233744814991951, + "rewards_train/margins": 1.4103756695985794, + "rewards_train/rejected": -1.1766308546066284, + "step": 460 + }, + { + "epoch": 0.13, + "logps_train/chosen": -14.019774436950684, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -16.084802627563477, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2332274466753006, + "rewards_train/margins": -0.33724718540906906, + "rewards_train/rejected": 0.10401973873376846, + "step": 460 + }, + { + "epoch": 0.13, + "logps_train/chosen": -17.234214782714844, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -21.651416778564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07342147827148438, + "rewards_train/margins": 0.566720187664032, + "rewards_train/rejected": -0.6401416659355164, + "step": 460 + }, + { + "epoch": 0.13, + "logps_train/chosen": -7.357628345489502, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -12.4375, + "logps_train/rejected": -13.237743377685547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.18576283752918243, + "rewards_train/margins": -0.10573849827051163, + "rewards_train/rejected": -0.0800243392586708, + "step": 460 + }, + { + "epoch": 0.13, + "logps_train/chosen": -0.48741623759269714, + "logps_train/ref_chosen": -0.75390625, + "logps_train/ref_rejected": -2.625, + "logps_train/rejected": -3.7039976119995117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.026649001985788345, + "rewards_train/margins": 0.13454876467585564, + "rewards_train/rejected": -0.10789976269006729, + "step": 460 + }, + { + "epoch": 0.13, + "logps_train/chosen": -86.47337341308594, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -142.23318481445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3473373353481293, + "rewards_train/margins": 1.3759811222553253, + "rewards_train/rejected": -1.7233184576034546, + "step": 461 + }, + { + "epoch": 0.13, + "logps_train/chosen": -2.4341390132904053, + "logps_train/ref_chosen": -1.4765625, + "logps_train/ref_rejected": -0.96875, + "logps_train/rejected": -0.6991630792617798, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09575765579938889, + "rewards_train/margins": -0.12271634861826897, + "rewards_train/rejected": 0.02695869281888008, + "step": 461 + }, + { + "epoch": 0.13, + "logps_train/chosen": -2.9071857929229736, + "logps_train/ref_chosen": -2.375, + "logps_train/ref_rejected": -7.5625, + "logps_train/rejected": -15.826423645019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05321858078241348, + "rewards_train/margins": 0.7731737717986107, + "rewards_train/rejected": -0.8263923525810242, + "step": 461 + }, + { + "epoch": 0.13, + "logps_train/chosen": -1.0458433628082275, + "logps_train/ref_chosen": -0.78515625, + "logps_train/ref_rejected": -0.78515625, + "logps_train/rejected": -1.1024373769760132, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.026068711653351784, + "rewards_train/margins": 0.005659403279423714, + "rewards_train/rejected": -0.0317281149327755, + "step": 461 + }, + { + "epoch": 0.13, + "logps_train/chosen": -88.23944091796875, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -103.75437927246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.823944091796875, + "rewards_train/margins": 1.7014939785003662, + "rewards_train/rejected": -2.525438070297241, + "step": 461 + }, + { + "epoch": 0.13, + "logps_train/chosen": -116.24038696289062, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -120.212646484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3740386962890625, + "rewards_train/margins": -0.2027740478515625, + "rewards_train/rejected": -2.1712646484375, + "step": 461 + }, + { + "epoch": 0.13, + "logps_train/chosen": -220.435791015625, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -247.22381591796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.94357967376709, + "rewards_train/margins": 1.7788019180297852, + "rewards_train/rejected": -10.722381591796875, + "step": 461 + }, + { + "epoch": 0.13, + "logps_train/chosen": -208.21946716308594, + "logps_train/ref_chosen": -198.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -125.46131896972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0219467878341675, + "rewards_train/margins": 0.9741851091384888, + "rewards_train/rejected": -1.9961318969726562, + "step": 461 + }, + { + "epoch": 0.13, + "learning_rate": 1.9022765899474331e-06, + "loss": 0.4488, + "step": 462 + }, + { + "epoch": 0.13, + "logps_train/chosen": -126.47944641113281, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -207.61895751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3479446470737457, + "rewards_train/margins": 8.31395110487938, + "rewards_train/rejected": -8.661895751953125, + "step": 462 + }, + { + "epoch": 0.13, + "logps_train/chosen": -63.22783279418945, + "logps_train/ref_chosen": -49.75, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -162.30682373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3477833271026611, + "rewards_train/margins": 4.682899236679077, + "rewards_train/rejected": -6.030682563781738, + "step": 462 + }, + { + "epoch": 0.13, + "logps_train/chosen": -215.0529327392578, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -215.1898956298828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.305293560028076, + "rewards_train/margins": -2.0863037109375, + "rewards_train/rejected": -5.218989849090576, + "step": 462 + }, + { + "epoch": 0.13, + "logps_train/chosen": -77.76689147949219, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -45.76423645019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6733108758926392, + "rewards_train/margins": 2.6997345685958862, + "rewards_train/rejected": -2.026423692703247, + "step": 462 + }, + { + "epoch": 0.13, + "logps_train/chosen": -104.33345031738281, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -121.94122314453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4833450317382812, + "rewards_train/margins": -0.1392226219177246, + "rewards_train/rejected": -2.3441224098205566, + "step": 462 + }, + { + "epoch": 0.13, + "logps_train/chosen": -102.25504302978516, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -170.7942352294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5755043029785156, + "rewards_train/margins": 2.7039194107055664, + "rewards_train/rejected": -4.279423713684082, + "step": 462 + }, + { + "epoch": 0.13, + "logps_train/chosen": -102.66116333007812, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -108.60328674316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5161163806915283, + "rewards_train/margins": 0.5442123413085938, + "rewards_train/rejected": -2.060328722000122, + "step": 462 + }, + { + "epoch": 0.13, + "logps_train/chosen": -54.36772537231445, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -156.12387084960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28677254915237427, + "rewards_train/margins": 5.625614821910858, + "rewards_train/rejected": -5.912387371063232, + "step": 462 + }, + { + "epoch": 0.13, + "logps_train/chosen": -14.075936317443848, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -2.8125, + "logps_train/rejected": -10.758355140686035, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17990636825561523, + "rewards_train/margins": 0.9744918942451477, + "rewards_train/rejected": -0.7945855259895325, + "step": 463 + }, + { + "epoch": 0.13, + "logps_train/chosen": -5.935725212097168, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -15.93661880493164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1779475212097168, + "rewards_train/margins": 0.1094643771648407, + "rewards_train/rejected": -0.2874118983745575, + "step": 463 + }, + { + "epoch": 0.13, + "logps_train/chosen": -8.131744384765625, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -13.188871383666992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.006924438755959272, + "rewards_train/margins": 0.7432127115316689, + "rewards_train/rejected": -0.7501371502876282, + "step": 463 + }, + { + "epoch": 0.13, + "logps_train/chosen": -119.78207397460938, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -173.59849548339844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.278207391500473, + "rewards_train/margins": -0.1183578372001648, + "rewards_train/rejected": -0.15984955430030823, + "step": 463 + }, + { + "epoch": 0.13, + "logps_train/chosen": -90.24195098876953, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -168.2102508544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6741951107978821, + "rewards_train/margins": 4.646830260753632, + "rewards_train/rejected": -5.321025371551514, + "step": 463 + }, + { + "epoch": 0.13, + "logps_train/chosen": -123.48920440673828, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -197.28163146972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3989204168319702, + "rewards_train/margins": 3.8292428255081177, + "rewards_train/rejected": -5.228163242340088, + "step": 463 + }, + { + "epoch": 0.13, + "logps_train/chosen": -111.47067260742188, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -173.05169677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.047067403793335, + "rewards_train/margins": 1.8581023216247559, + "rewards_train/rejected": -3.905169725418091, + "step": 463 + }, + { + "epoch": 0.13, + "logps_train/chosen": -40.63311004638672, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -10.7110595703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8633109927177429, + "rewards_train/margins": -0.36095499992370605, + "rewards_train/rejected": -0.5023559927940369, + "step": 463 + }, + { + "epoch": 0.13, + "learning_rate": 1.9011327838200525e-06, + "loss": 0.4213, + "step": 464 + }, + { + "epoch": 0.13, + "logps_train/chosen": -6.36022424697876, + "logps_train/ref_chosen": -2.40625, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -9.897375106811523, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.395397424697876, + "rewards_train/margins": -0.14628490805625916, + "rewards_train/rejected": -0.24911251664161682, + "step": 464 + }, + { + "epoch": 0.13, + "logps_train/chosen": -0.8001160621643066, + "logps_train/ref_chosen": -0.83203125, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -8.381101608276367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0031915188301354647, + "rewards_train/margins": 0.3756766796577722, + "rewards_train/rejected": -0.3724851608276367, + "step": 464 + }, + { + "epoch": 0.13, + "logps_train/chosen": -151.12100219726562, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -146.8946990966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.412100315093994, + "rewards_train/margins": 0.8273696899414062, + "rewards_train/rejected": -3.2394700050354004, + "step": 464 + }, + { + "epoch": 0.13, + "logps_train/chosen": -0.2667080760002136, + "logps_train/ref_chosen": -0.369140625, + "logps_train/ref_rejected": -0.369140625, + "logps_train/rejected": -0.25798553228378296, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.010243254713714123, + "rewards_train/margins": -0.0008722543716430664, + "rewards_train/rejected": 0.01111550908535719, + "step": 464 + }, + { + "epoch": 0.13, + "logps_train/chosen": -117.86186981201172, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -167.11318969726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2861870527267456, + "rewards_train/margins": 2.1251319646835327, + "rewards_train/rejected": -3.4113190174102783, + "step": 464 + }, + { + "epoch": 0.13, + "logps_train/chosen": -72.98066711425781, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -91.27671813964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2980667054653168, + "rewards_train/margins": 0.479605108499527, + "rewards_train/rejected": -0.7776718139648438, + "step": 464 + }, + { + "epoch": 0.13, + "logps_train/chosen": -233.18399047851562, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -225.50503540039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.618399620056152, + "rewards_train/margins": 0.6321039199829102, + "rewards_train/rejected": -9.250503540039062, + "step": 464 + }, + { + "epoch": 0.13, + "logps_train/chosen": -126.37860870361328, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -232.17092895507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.887860894203186, + "rewards_train/margins": 6.4292320013046265, + "rewards_train/rejected": -7.3170928955078125, + "step": 464 + }, + { + "epoch": 0.13, + "logps_train/chosen": -94.216552734375, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -115.2770004272461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4716552793979645, + "rewards_train/margins": 2.1060448586940765, + "rewards_train/rejected": -2.577700138092041, + "step": 465 + }, + { + "epoch": 0.13, + "logps_train/chosen": -9.709959030151367, + "logps_train/ref_chosen": -6.96875, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -13.562623023986816, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27412089705467224, + "rewards_train/margins": 0.05714142322540283, + "rewards_train/rejected": -0.3312623202800751, + "step": 465 + }, + { + "epoch": 0.13, + "logps_train/chosen": -51.019832611083984, + "logps_train/ref_chosen": -40.0, + "logps_train/ref_rejected": -28.625, + "logps_train/rejected": -37.041507720947266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1019833087921143, + "rewards_train/margins": -0.26033252477645874, + "rewards_train/rejected": -0.8416507840156555, + "step": 465 + }, + { + "epoch": 0.13, + "logps_train/chosen": -80.05343627929688, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -224.34457397460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5053436160087585, + "rewards_train/margins": 5.629113972187042, + "rewards_train/rejected": -6.134457588195801, + "step": 465 + }, + { + "epoch": 0.13, + "logps_train/chosen": -177.69387817382812, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -221.5417022705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.1693878173828125, + "rewards_train/margins": 0.8847823143005371, + "rewards_train/rejected": -6.05417013168335, + "step": 465 + }, + { + "epoch": 0.13, + "logps_train/chosen": -27.258161544799805, + "logps_train/ref_chosen": -28.625, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -15.150816917419434, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.136683851480484, + "rewards_train/margins": 0.6080155372619629, + "rewards_train/rejected": -0.4713316857814789, + "step": 465 + }, + { + "epoch": 0.13, + "logps_train/chosen": -128.3544921875, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -228.88592529296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.785449206829071, + "rewards_train/margins": 6.603143513202667, + "rewards_train/rejected": -7.388592720031738, + "step": 465 + }, + { + "epoch": 0.13, + "logps_train/chosen": -66.32506561279297, + "logps_train/ref_chosen": -58.25, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -96.25157165527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8075065612792969, + "rewards_train/margins": 0.917650580406189, + "rewards_train/rejected": -1.7251571416854858, + "step": 465 + }, + { + "epoch": 0.13, + "learning_rate": 1.899982670718603e-06, + "loss": 0.3813, + "step": 466 + }, + { + "epoch": 0.13, + "logps_train/chosen": -17.23065948486328, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -38.82551574707031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.629315972328186, + "rewards_train/margins": -0.19676437973976135, + "rewards_train/rejected": -0.4325515925884247, + "step": 466 + }, + { + "epoch": 0.13, + "logps_train/chosen": -9.659607887268066, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -14.251121520996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09028921276330948, + "rewards_train/margins": 0.45915136486291885, + "rewards_train/rejected": -0.3688621520996094, + "step": 466 + }, + { + "epoch": 0.13, + "logps_train/chosen": -9.622028350830078, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -22.767305374145508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1815471649169922, + "rewards_train/margins": 0.8582777380943298, + "rewards_train/rejected": -0.6767305731773376, + "step": 466 + }, + { + "epoch": 0.13, + "logps_train/chosen": -15.24876594543457, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -15.538250923156738, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.481126606464386, + "rewards_train/margins": -0.11480149626731873, + "rewards_train/rejected": -0.36632511019706726, + "step": 466 + }, + { + "epoch": 0.13, + "logps_train/chosen": -124.4234848022461, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -138.2580108642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5423485040664673, + "rewards_train/margins": 0.283452570438385, + "rewards_train/rejected": -0.8258010745048523, + "step": 466 + }, + { + "epoch": 0.13, + "logps_train/chosen": -53.49252700805664, + "logps_train/ref_chosen": -38.5, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -24.59119987487793, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4992526769638062, + "rewards_train/margins": -0.8776326775550842, + "rewards_train/rejected": -0.6216199994087219, + "step": 466 + }, + { + "epoch": 0.13, + "logps_train/chosen": -18.805299758911133, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -18.959808349609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0680299773812294, + "rewards_train/margins": 0.0154508575797081, + "rewards_train/rejected": -0.0834808349609375, + "step": 466 + }, + { + "epoch": 0.13, + "logps_train/chosen": -16.950428009033203, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -20.38482666015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6575428247451782, + "rewards_train/margins": -0.2003101408481598, + "rewards_train/rejected": -0.45723268389701843, + "step": 466 + }, + { + "epoch": 0.13, + "logps_train/chosen": -3.881229877471924, + "logps_train/ref_chosen": -3.6875, + "logps_train/ref_rejected": -4.0, + "logps_train/rejected": -4.235865592956543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.019372988492250443, + "rewards_train/margins": 0.004213571548461914, + "rewards_train/rejected": -0.023586560040712357, + "step": 467 + }, + { + "epoch": 0.13, + "logps_train/chosen": -100.46556091308594, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -124.37593078613281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5534439086914062, + "rewards_train/margins": 2.1410369873046875, + "rewards_train/rejected": -1.5875930786132812, + "step": 467 + }, + { + "epoch": 0.13, + "logps_train/chosen": -145.41635131835938, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -189.85406494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9416351318359375, + "rewards_train/margins": 4.143771648406982, + "rewards_train/rejected": -7.08540678024292, + "step": 467 + }, + { + "epoch": 0.13, + "logps_train/chosen": -18.846765518188477, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -28.630413055419922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1721765547990799, + "rewards_train/margins": 0.5408647507429123, + "rewards_train/rejected": -0.7130413055419922, + "step": 467 + }, + { + "epoch": 0.13, + "logps_train/chosen": -158.403076171875, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -134.57693481445312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9403076171875, + "rewards_train/margins": -1.3826141357421875, + "rewards_train/rejected": 0.4423065185546875, + "step": 467 + }, + { + "epoch": 0.13, + "logps_train/chosen": -24.93217658996582, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -20.162805557250977, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.543217658996582, + "rewards_train/margins": -0.3144371062517166, + "rewards_train/rejected": -0.22878055274486542, + "step": 467 + }, + { + "epoch": 0.13, + "logps_train/chosen": -141.42788696289062, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -176.67584228515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.992788791656494, + "rewards_train/margins": 1.7747955322265625, + "rewards_train/rejected": -4.767584323883057, + "step": 467 + }, + { + "epoch": 0.13, + "logps_train/chosen": -111.78079223632812, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -111.44161987304688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1280791759490967, + "rewards_train/margins": -0.03391718864440918, + "rewards_train/rejected": -2.0941619873046875, + "step": 467 + }, + { + "epoch": 0.13, + "learning_rate": 1.8988262586926566e-06, + "loss": 0.642, + "step": 468 + }, + { + "epoch": 0.13, + "logps_train/chosen": -103.1549301147461, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -176.67581176757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5654930472373962, + "rewards_train/margins": 3.402088224887848, + "rewards_train/rejected": -3.967581272125244, + "step": 468 + }, + { + "epoch": 0.13, + "logps_train/chosen": -92.91716003417969, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -169.8580780029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9917160272598267, + "rewards_train/margins": 7.344091773033142, + "rewards_train/rejected": -8.335807800292969, + "step": 468 + }, + { + "epoch": 0.13, + "logps_train/chosen": -0.6823980808258057, + "logps_train/ref_chosen": -0.6875, + "logps_train/ref_rejected": -0.8125, + "logps_train/rejected": -1.1255781650543213, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0005101919523440301, + "rewards_train/margins": 0.03181800845777616, + "rewards_train/rejected": -0.03130781650543213, + "step": 468 + }, + { + "epoch": 0.13, + "logps_train/chosen": -21.923748016357422, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -14.975812911987305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2923748195171356, + "rewards_train/margins": 0.06145647168159485, + "rewards_train/rejected": -0.35383129119873047, + "step": 468 + }, + { + "epoch": 0.13, + "logps_train/chosen": -4.831496238708496, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -28.391056060791016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05814962461590767, + "rewards_train/margins": 0.4184559993445873, + "rewards_train/rejected": -0.476605623960495, + "step": 468 + }, + { + "epoch": 0.13, + "logps_train/chosen": -13.142731666564941, + "logps_train/ref_chosen": -14.5, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -12.548297882080078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13572683930397034, + "rewards_train/margins": 0.15930662862956524, + "rewards_train/rejected": -0.023579789325594902, + "step": 468 + }, + { + "epoch": 0.13, + "logps_train/chosen": -109.34491729736328, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -214.87478637695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4844918251037598, + "rewards_train/margins": 5.952987194061279, + "rewards_train/rejected": -9.437479019165039, + "step": 468 + }, + { + "epoch": 0.13, + "logps_train/chosen": -2.228910446166992, + "logps_train/ref_chosen": -1.2109375, + "logps_train/ref_rejected": -3.015625, + "logps_train/rejected": -2.3848202228546143, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10179729759693146, + "rewards_train/margins": -0.1648777797818184, + "rewards_train/rejected": 0.06308048218488693, + "step": 468 + }, + { + "epoch": 0.13, + "logps_train/chosen": -57.26557159423828, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -20.888225555419922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42655715346336365, + "rewards_train/margins": 0.06851541996002197, + "rewards_train/rejected": -0.4950725734233856, + "step": 469 + }, + { + "epoch": 0.13, + "logps_train/chosen": -165.8516082763672, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -284.0, + "logps_train/rejected": -347.1186828613281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.585160970687866, + "rewards_train/margins": 3.7267072200775146, + "rewards_train/rejected": -6.311868190765381, + "step": 469 + }, + { + "epoch": 0.13, + "logps_train/chosen": -8.005593299865723, + "logps_train/ref_chosen": -3.890625, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -19.5705509185791, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4114968478679657, + "rewards_train/margins": 0.2893082797527313, + "rewards_train/rejected": -0.700805127620697, + "step": 469 + }, + { + "epoch": 0.13, + "logps_train/chosen": -68.47019958496094, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -30.125, + "logps_train/rejected": -40.08144760131836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14701996743679047, + "rewards_train/margins": 0.8486247807741165, + "rewards_train/rejected": -0.995644748210907, + "step": 469 + }, + { + "epoch": 0.13, + "logps_train/chosen": -56.2188835144043, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -94.90386962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1031116470694542, + "rewards_train/margins": 0.1934986114501953, + "rewards_train/rejected": -0.09038696438074112, + "step": 469 + }, + { + "epoch": 0.13, + "logps_train/chosen": -2.5152320861816406, + "logps_train/ref_chosen": -1.75, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -12.17249870300293, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07652320712804794, + "rewards_train/margins": -0.07177333673462272, + "rewards_train/rejected": -0.004749870393425226, + "step": 469 + }, + { + "epoch": 0.13, + "logps_train/chosen": -3.990821599960327, + "logps_train/ref_chosen": -3.984375, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -7.0239057540893555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0006446600309573114, + "rewards_train/margins": 0.15174592431867495, + "rewards_train/rejected": -0.15239058434963226, + "step": 469 + }, + { + "epoch": 0.13, + "logps_train/chosen": -110.16926574707031, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -186.37103271484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5169265270233154, + "rewards_train/margins": 3.8701770305633545, + "rewards_train/rejected": -6.38710355758667, + "step": 469 + }, + { + "epoch": 0.13, + "learning_rate": 1.8976635558358721e-06, + "loss": 0.428, + "step": 470 + }, + { + "epoch": 0.13, + "logps_train/chosen": -10.92322826385498, + "logps_train/ref_chosen": -5.375, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -24.726064682006836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5548228621482849, + "rewards_train/margins": 0.33028364181518555, + "rewards_train/rejected": -0.8851065039634705, + "step": 470 + }, + { + "epoch": 0.13, + "logps_train/chosen": -140.59539794921875, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -58.25, + "logps_train/rejected": -72.8209457397461, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1595399379730225, + "rewards_train/margins": -0.702445387840271, + "rewards_train/rejected": -1.4570945501327515, + "step": 470 + }, + { + "epoch": 0.13, + "logps_train/chosen": -2.6385622024536133, + "logps_train/ref_chosen": -1.6640625, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -15.492208480834961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09744997322559357, + "rewards_train/margins": 0.15177087485790253, + "rewards_train/rejected": -0.2492208480834961, + "step": 470 + }, + { + "epoch": 0.13, + "logps_train/chosen": -51.48078536987305, + "logps_train/ref_chosen": -52.75, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -88.49357604980469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.12692146003246307, + "rewards_train/margins": -0.023720934987068176, + "rewards_train/rejected": 0.15064239501953125, + "step": 470 + }, + { + "epoch": 0.13, + "logps_train/chosen": -31.010272979736328, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -50.0, + "logps_train/rejected": -60.45187759399414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6760272979736328, + "rewards_train/margins": 0.369160532951355, + "rewards_train/rejected": -1.0451878309249878, + "step": 470 + }, + { + "epoch": 0.13, + "logps_train/chosen": -141.50015258789062, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -169.26290893554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7500152587890625, + "rewards_train/margins": 1.5762755870819092, + "rewards_train/rejected": -3.3262908458709717, + "step": 470 + }, + { + "epoch": 0.13, + "logps_train/chosen": -39.02434158325195, + "logps_train/ref_chosen": -30.0, + "logps_train/ref_rejected": -57.0, + "logps_train/rejected": -63.76620101928711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9024341702461243, + "rewards_train/margins": -0.22581404447555542, + "rewards_train/rejected": -0.6766201257705688, + "step": 470 + }, + { + "epoch": 0.13, + "logps_train/chosen": -2.154524564743042, + "logps_train/ref_chosen": -2.796875, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -8.33591365814209, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06423504650592804, + "rewards_train/margins": 0.25407642126083374, + "rewards_train/rejected": -0.1898413747549057, + "step": 470 + }, + { + "epoch": 0.13, + "logps_train/chosen": -10.176851272583008, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -9.50288200378418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14893512427806854, + "rewards_train/margins": 0.2638530880212784, + "rewards_train/rejected": -0.4127882122993469, + "step": 471 + }, + { + "epoch": 0.13, + "logps_train/chosen": -72.99653625488281, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -119.12272644042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.774653673171997, + "rewards_train/margins": 1.1876189708709717, + "rewards_train/rejected": -2.9622726440429688, + "step": 471 + }, + { + "epoch": 0.13, + "logps_train/chosen": -174.94322204589844, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -137.84808349609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.894322156906128, + "rewards_train/margins": 0.9904863834381104, + "rewards_train/rejected": -4.884808540344238, + "step": 471 + }, + { + "epoch": 0.13, + "logps_train/chosen": -4.717167854309082, + "logps_train/ref_chosen": -3.53125, + "logps_train/ref_rejected": -3.109375, + "logps_train/rejected": -5.329412460327148, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1185917854309082, + "rewards_train/margins": 0.1034119576215744, + "rewards_train/rejected": -0.2220037430524826, + "step": 471 + }, + { + "epoch": 0.13, + "logps_train/chosen": -18.82575035095215, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -8.218692779541016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0575750358402729, + "rewards_train/margins": 0.3236692361533642, + "rewards_train/rejected": -0.3812442719936371, + "step": 471 + }, + { + "epoch": 0.13, + "logps_train/chosen": -10.55756950378418, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -12.250399589538574, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42138195037841797, + "rewards_train/margins": 0.3536580204963684, + "rewards_train/rejected": -0.7750399708747864, + "step": 471 + }, + { + "epoch": 0.13, + "logps_train/chosen": -34.4549446105957, + "logps_train/ref_chosen": -26.125, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -76.85330200195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8329944610595703, + "rewards_train/margins": 1.9023358821868896, + "rewards_train/rejected": -2.73533034324646, + "step": 471 + }, + { + "epoch": 0.13, + "logps_train/chosen": -13.657812118530273, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -27.269405364990234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17828121781349182, + "rewards_train/margins": 0.3986593186855316, + "rewards_train/rejected": -0.5769405364990234, + "step": 471 + }, + { + "epoch": 0.13, + "learning_rate": 1.8964945702859363e-06, + "loss": 0.537, + "step": 472 + }, + { + "epoch": 0.13, + "logps_train/chosen": -26.870830535888672, + "logps_train/ref_chosen": -22.0, + "logps_train/ref_rejected": -2.21875, + "logps_train/rejected": -6.8726582527160645, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4870830476284027, + "rewards_train/margins": -0.021692216396331787, + "rewards_train/rejected": -0.4653908312320709, + "step": 472 + }, + { + "epoch": 0.13, + "logps_train/chosen": -71.51596069335938, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -88.82318878173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5515961050987244, + "rewards_train/margins": 2.155722916126251, + "rewards_train/rejected": -2.7073190212249756, + "step": 472 + }, + { + "epoch": 0.13, + "logps_train/chosen": -79.05147552490234, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -78.73848724365234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.4948524534702301, + "rewards_train/margins": -0.03129884600639343, + "rewards_train/rejected": 0.5261512994766235, + "step": 472 + }, + { + "epoch": 0.13, + "logps_train/chosen": -42.857566833496094, + "logps_train/ref_chosen": -41.75, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -107.69770812988281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11075668781995773, + "rewards_train/margins": -0.14098587445914745, + "rewards_train/rejected": 0.03022918663918972, + "step": 472 + }, + { + "epoch": 0.13, + "logps_train/chosen": -25.359588623046875, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -8.649635314941406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5109588503837585, + "rewards_train/margins": -0.3959953188896179, + "rewards_train/rejected": -0.11496353149414062, + "step": 472 + }, + { + "epoch": 0.13, + "logps_train/chosen": -143.44752502441406, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -187.75039672851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.344752788543701, + "rewards_train/margins": 2.4302868843078613, + "rewards_train/rejected": -6.7750396728515625, + "step": 472 + }, + { + "epoch": 0.13, + "logps_train/chosen": -8.588810920715332, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -5.28125, + "logps_train/rejected": -6.887935638427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.021381093189120293, + "rewards_train/margins": 0.13928747363388538, + "rewards_train/rejected": -0.16066856682300568, + "step": 472 + }, + { + "epoch": 0.13, + "logps_train/chosen": -173.91677856445312, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -169.46287536621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4916778802871704, + "rewards_train/margins": 3.9046095609664917, + "rewards_train/rejected": -5.396287441253662, + "step": 472 + }, + { + "epoch": 0.13, + "logps_train/chosen": -7.605256080627441, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -2.984375, + "logps_train/rejected": -6.256370544433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16990061104297638, + "rewards_train/margins": 0.15729893743991852, + "rewards_train/rejected": -0.3271995484828949, + "step": 473 + }, + { + "epoch": 0.13, + "logps_train/chosen": -53.88624954223633, + "logps_train/ref_chosen": -58.0, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -42.17250442504883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4113750457763672, + "rewards_train/margins": 1.253625512123108, + "rewards_train/rejected": -0.8422504663467407, + "step": 473 + }, + { + "epoch": 0.13, + "logps_train/chosen": -103.06258392333984, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -157.31298828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1062583923339844, + "rewards_train/margins": 2.7250404357910156, + "rewards_train/rejected": -5.831298828125, + "step": 473 + }, + { + "epoch": 0.13, + "logps_train/chosen": -13.6814603805542, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -14.80078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20564603805541992, + "rewards_train/margins": 0.011932089924812317, + "rewards_train/rejected": -0.21757812798023224, + "step": 473 + }, + { + "epoch": 0.13, + "logps_train/chosen": -28.897165298461914, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -12.081789016723633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4272165298461914, + "rewards_train/margins": 0.08408737182617188, + "rewards_train/rejected": -0.5113039016723633, + "step": 473 + }, + { + "epoch": 0.13, + "logps_train/chosen": -17.577102661132812, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -3.140625, + "logps_train/rejected": -8.050416946411133, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5264602899551392, + "rewards_train/margins": -0.03548109531402588, + "rewards_train/rejected": -0.4909791946411133, + "step": 473 + }, + { + "epoch": 0.13, + "logps_train/chosen": -158.78611755371094, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -148.4861297607422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.528611898422241, + "rewards_train/margins": -0.279998779296875, + "rewards_train/rejected": -3.248613119125366, + "step": 473 + }, + { + "epoch": 0.13, + "logps_train/chosen": -118.10470581054688, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -162.19129943847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.460470587015152, + "rewards_train/margins": 0.5586594045162201, + "rewards_train/rejected": -1.019129991531372, + "step": 473 + }, + { + "epoch": 0.13, + "learning_rate": 1.8953193102245085e-06, + "loss": 0.5123, + "step": 474 + }, + { + "epoch": 0.13, + "logps_train/chosen": -151.1201171875, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -249.3292236328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.212011694908142, + "rewards_train/margins": 7.8209110498428345, + "rewards_train/rejected": -9.032922744750977, + "step": 474 + }, + { + "epoch": 0.13, + "logps_train/chosen": -83.49779510498047, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -171.4378662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6497794985771179, + "rewards_train/margins": 1.5940070748329163, + "rewards_train/rejected": -2.243786573410034, + "step": 474 + }, + { + "epoch": 0.13, + "logps_train/chosen": -19.86001205444336, + "logps_train/ref_chosen": -21.375, + "logps_train/ref_rejected": -28.125, + "logps_train/rejected": -36.095985412597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15149879455566406, + "rewards_train/margins": 0.9485973715782166, + "rewards_train/rejected": -0.7970985770225525, + "step": 474 + }, + { + "epoch": 0.13, + "logps_train/chosen": -26.213424682617188, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -20.58072853088379, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44634246826171875, + "rewards_train/margins": 0.524230420589447, + "rewards_train/rejected": -0.9705728888511658, + "step": 474 + }, + { + "epoch": 0.13, + "logps_train/chosen": -197.34799194335938, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -215.81118774414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.284799575805664, + "rewards_train/margins": 1.1463193893432617, + "rewards_train/rejected": -9.431118965148926, + "step": 474 + }, + { + "epoch": 0.13, + "logps_train/chosen": -131.60797119140625, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -131.89671325683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5107972621917725, + "rewards_train/margins": 0.6288740634918213, + "rewards_train/rejected": -4.139671325683594, + "step": 474 + }, + { + "epoch": 0.13, + "logps_train/chosen": -56.50353240966797, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -17.434629440307617, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6003532409667969, + "rewards_train/margins": -0.1756402850151062, + "rewards_train/rejected": -0.4247129559516907, + "step": 474 + }, + { + "epoch": 0.13, + "logps_train/chosen": -48.177921295166016, + "logps_train/ref_chosen": -49.25, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -7.525335311889648, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10720787197351456, + "rewards_train/margins": 0.3753663972020149, + "rewards_train/rejected": -0.26815852522850037, + "step": 474 + }, + { + "epoch": 0.13, + "logps_train/chosen": -136.63272094726562, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -177.36175537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4632720947265625, + "rewards_train/margins": 1.3729033470153809, + "rewards_train/rejected": -4.836175441741943, + "step": 475 + }, + { + "epoch": 0.13, + "logps_train/chosen": -45.45881652832031, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -56.5, + "logps_train/rejected": -53.58079528808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3291183412075043, + "rewards_train/margins": 0.03719785809516907, + "rewards_train/rejected": 0.2919204831123352, + "step": 475 + }, + { + "epoch": 0.13, + "logps_train/chosen": -211.89329528808594, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -155.89083862304688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.089329719543457, + "rewards_train/margins": -1.1502459049224854, + "rewards_train/rejected": -2.9390838146209717, + "step": 475 + }, + { + "epoch": 0.13, + "logps_train/chosen": -134.4420166015625, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -188.06346130371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.544201612472534, + "rewards_train/margins": 4.162144422531128, + "rewards_train/rejected": -7.706346035003662, + "step": 475 + }, + { + "epoch": 0.13, + "logps_train/chosen": -21.465999603271484, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -6.111142158508301, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0034000396262854338, + "rewards_train/margins": 0.14263926143758, + "rewards_train/rejected": -0.13923922181129456, + "step": 475 + }, + { + "epoch": 0.13, + "logps_train/chosen": -68.486572265625, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -34.587730407714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8986572623252869, + "rewards_train/margins": 0.060115814208984375, + "rewards_train/rejected": -0.9587730765342712, + "step": 475 + }, + { + "epoch": 0.13, + "logps_train/chosen": -15.364826202392578, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -108.90599060058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28648263216018677, + "rewards_train/margins": 1.0041164755821228, + "rewards_train/rejected": -1.2905991077423096, + "step": 475 + }, + { + "epoch": 0.13, + "logps_train/chosen": -0.8047968745231628, + "logps_train/ref_chosen": -0.7265625, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -9.156216621398926, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007823437452316284, + "rewards_train/margins": 0.37029823660850525, + "rewards_train/rejected": -0.37812167406082153, + "step": 475 + }, + { + "epoch": 0.13, + "learning_rate": 1.8941377838771633e-06, + "loss": 0.466, + "step": 476 + }, + { + "epoch": 0.13, + "logps_train/chosen": -19.36043357849121, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -24.44990348815918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22354336082935333, + "rewards_train/margins": 0.3214469999074936, + "rewards_train/rejected": -0.5449903607368469, + "step": 476 + }, + { + "epoch": 0.13, + "logps_train/chosen": -9.971562385559082, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -24.962085723876953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2252812385559082, + "rewards_train/margins": -0.004072666168212891, + "rewards_train/rejected": -0.2212085723876953, + "step": 476 + }, + { + "epoch": 0.13, + "logps_train/chosen": -124.83406066894531, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -168.92897033691406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3334062099456787, + "rewards_train/margins": -0.240509033203125, + "rewards_train/rejected": -2.0928971767425537, + "step": 476 + }, + { + "epoch": 0.13, + "logps_train/chosen": -136.693359375, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -178.18798828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6193360090255737, + "rewards_train/margins": 3.849463105201721, + "rewards_train/rejected": -5.468799114227295, + "step": 476 + }, + { + "epoch": 0.13, + "logps_train/chosen": -19.96912384033203, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -6.753169536590576, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3344123959541321, + "rewards_train/margins": -0.08097043633460999, + "rewards_train/rejected": -0.2534419596195221, + "step": 476 + }, + { + "epoch": 0.13, + "logps_train/chosen": -175.15647888183594, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -211.774658203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.465648174285889, + "rewards_train/margins": -0.08818244934082031, + "rewards_train/rejected": -7.377465724945068, + "step": 476 + }, + { + "epoch": 0.13, + "logps_train/chosen": -83.14643859863281, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -155.64730834960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6646438837051392, + "rewards_train/margins": 5.300086855888367, + "rewards_train/rejected": -5.964730739593506, + "step": 476 + }, + { + "epoch": 0.13, + "logps_train/chosen": -1.8767567873001099, + "logps_train/ref_chosen": -1.078125, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -12.572433471679688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07986318320035934, + "rewards_train/margins": -0.028869833797216415, + "rewards_train/rejected": -0.05099334940314293, + "step": 476 + }, + { + "epoch": 0.13, + "logps_train/chosen": -5.1182684898376465, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -2.6875, + "logps_train/rejected": -5.4664201736450195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07432685047388077, + "rewards_train/margins": 0.20356517285108566, + "rewards_train/rejected": -0.27789202332496643, + "step": 477 + }, + { + "epoch": 0.13, + "logps_train/chosen": -173.84942626953125, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -156.34805297851562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.334942817687988, + "rewards_train/margins": -0.6001372337341309, + "rewards_train/rejected": -5.734805583953857, + "step": 477 + }, + { + "epoch": 0.13, + "logps_train/chosen": -7.342371940612793, + "logps_train/ref_chosen": -3.859375, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -21.3763427734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34829971194267273, + "rewards_train/margins": 0.43933460116386414, + "rewards_train/rejected": -0.7876343131065369, + "step": 477 + }, + { + "epoch": 0.13, + "logps_train/chosen": -12.423095703125, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -17.926708221435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04855957254767418, + "rewards_train/margins": 0.4253612495958805, + "rewards_train/rejected": -0.4739208221435547, + "step": 477 + }, + { + "epoch": 0.13, + "logps_train/chosen": -143.350341796875, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -243.0814208984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.135034084320068, + "rewards_train/margins": 7.9231085777282715, + "rewards_train/rejected": -12.05814266204834, + "step": 477 + }, + { + "epoch": 0.13, + "logps_train/chosen": -176.95263671875, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -219.53123474121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2952637672424316, + "rewards_train/margins": 4.457859992980957, + "rewards_train/rejected": -7.753123760223389, + "step": 477 + }, + { + "epoch": 0.13, + "logps_train/chosen": -69.15496826171875, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -56.0, + "logps_train/rejected": -79.60371398925781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6904968619346619, + "rewards_train/margins": 1.6698744893074036, + "rewards_train/rejected": -2.3603713512420654, + "step": 477 + }, + { + "epoch": 0.13, + "logps_train/chosen": -27.731842041015625, + "logps_train/ref_chosen": -24.875, + "logps_train/ref_rejected": -41.75, + "logps_train/rejected": -43.988487243652344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.285684198141098, + "rewards_train/margins": -0.06183546781539917, + "rewards_train/rejected": -0.22384873032569885, + "step": 477 + }, + { + "epoch": 0.13, + "learning_rate": 1.8929499995133326e-06, + "loss": 0.4882, + "step": 478 + }, + { + "epoch": 0.13, + "logps_train/chosen": -2.1750426292419434, + "logps_train/ref_chosen": -2.34375, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -10.163656234741211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016870737075805664, + "rewards_train/margins": 0.14573636651039124, + "rewards_train/rejected": -0.12886562943458557, + "step": 478 + }, + { + "epoch": 0.13, + "logps_train/chosen": -122.96965789794922, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -222.7532501220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.396965742111206, + "rewards_train/margins": 4.778359651565552, + "rewards_train/rejected": -8.175325393676758, + "step": 478 + }, + { + "epoch": 0.13, + "logps_train/chosen": -1.564626693725586, + "logps_train/ref_chosen": -0.73046875, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -5.780163288116455, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08341579884290695, + "rewards_train/margins": 0.013350531458854675, + "rewards_train/rejected": -0.09676633030176163, + "step": 478 + }, + { + "epoch": 0.13, + "logps_train/chosen": -9.313492774963379, + "logps_train/ref_chosen": -6.03125, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -23.24124526977539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3282242715358734, + "rewards_train/margins": 0.3209002912044525, + "rewards_train/rejected": -0.6491245627403259, + "step": 478 + }, + { + "epoch": 0.13, + "logps_train/chosen": -10.9270601272583, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -25.877347946166992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12604399025440216, + "rewards_train/margins": 0.20127878338098526, + "rewards_train/rejected": -0.0752347931265831, + "step": 478 + }, + { + "epoch": 0.13, + "logps_train/chosen": -12.827489852905273, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -17.223569869995117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11100101470947266, + "rewards_train/margins": 0.9396080374717712, + "rewards_train/rejected": -0.8286070227622986, + "step": 478 + }, + { + "epoch": 0.13, + "logps_train/chosen": -123.59957885742188, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -157.64651489257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6599578857421875, + "rewards_train/margins": 2.8046936988830566, + "rewards_train/rejected": -5.464651584625244, + "step": 478 + }, + { + "epoch": 0.13, + "logps_train/chosen": -96.83312225341797, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -147.47457885742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1333122253417969, + "rewards_train/margins": 4.964145660400391, + "rewards_train/rejected": -6.0974578857421875, + "step": 478 + }, + { + "epoch": 0.13, + "logps_train/chosen": -143.86105346679688, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -164.67486572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8361053466796875, + "rewards_train/margins": 0.531381368637085, + "rewards_train/rejected": -2.3674867153167725, + "step": 479 + }, + { + "epoch": 0.13, + "logps_train/chosen": -153.92437744140625, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -221.49331665039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9424378871917725, + "rewards_train/margins": 2.4068939685821533, + "rewards_train/rejected": -6.349331855773926, + "step": 479 + }, + { + "epoch": 0.13, + "logps_train/chosen": -7.709747314453125, + "logps_train/ref_chosen": -3.984375, + "logps_train/ref_rejected": -1.65625, + "logps_train/rejected": -6.7818098068237305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.372537225484848, + "rewards_train/margins": 0.1400187909603119, + "rewards_train/rejected": -0.5125560164451599, + "step": 479 + }, + { + "epoch": 0.13, + "logps_train/chosen": -1.5111333131790161, + "logps_train/ref_chosen": -1.6640625, + "logps_train/ref_rejected": -1.7734375, + "logps_train/rejected": -3.3753511905670166, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.015292919240891933, + "rewards_train/margins": 0.17548429127782583, + "rewards_train/rejected": -0.1601913720369339, + "step": 479 + }, + { + "epoch": 0.13, + "logps_train/chosen": -20.115882873535156, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -6.531342506408691, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2990882992744446, + "rewards_train/margins": -0.1584540456533432, + "rewards_train/rejected": -0.14063425362110138, + "step": 479 + }, + { + "epoch": 0.13, + "logps_train/chosen": -0.2076537162065506, + "logps_train/ref_chosen": -0.333984375, + "logps_train/ref_rejected": -0.333984375, + "logps_train/rejected": -0.20346316695213318, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.012633065693080425, + "rewards_train/margins": -0.00041905511170625687, + "rewards_train/rejected": 0.013052120804786682, + "step": 479 + }, + { + "epoch": 0.13, + "logps_train/chosen": -12.111091613769531, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -17.144290924072266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007640838623046875, + "rewards_train/margins": 0.2533199340105057, + "rewards_train/rejected": -0.2456790953874588, + "step": 479 + }, + { + "epoch": 0.13, + "logps_train/chosen": -20.69788360595703, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -11.859262466430664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2197883576154709, + "rewards_train/margins": 0.14738790690898895, + "rewards_train/rejected": -0.36717626452445984, + "step": 479 + }, + { + "epoch": 0.13, + "learning_rate": 1.891755965446247e-06, + "loss": 0.4567, + "step": 480 + }, + { + "epoch": 0.13, + "logps_train/chosen": -86.21295166015625, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -159.71206665039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12870483100414276, + "rewards_train/margins": 4.7999117821455, + "rewards_train/rejected": -4.671206951141357, + "step": 480 + }, + { + "epoch": 0.13, + "logps_train/chosen": -18.34885597229004, + "logps_train/ref_chosen": -15.8125, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -7.5624284744262695, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.25363561511039734, + "rewards_train/margins": -0.10989277064800262, + "rewards_train/rejected": -0.14374284446239471, + "step": 480 + }, + { + "epoch": 0.13, + "logps_train/chosen": -115.14189147949219, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -124.9464111328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3141891658306122, + "rewards_train/margins": 0.8804519474506378, + "rewards_train/rejected": -1.19464111328125, + "step": 480 + }, + { + "epoch": 0.13, + "logps_train/chosen": -164.010498046875, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -231.5311737060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.801049709320068, + "rewards_train/margins": 3.1520676612854004, + "rewards_train/rejected": -7.953117370605469, + "step": 480 + }, + { + "epoch": 0.13, + "logps_train/chosen": -7.984516620635986, + "logps_train/ref_chosen": -6.59375, + "logps_train/ref_rejected": -1.140625, + "logps_train/rejected": -5.4679179191589355, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13907666504383087, + "rewards_train/margins": 0.29365263879299164, + "rewards_train/rejected": -0.4327293038368225, + "step": 480 + }, + { + "epoch": 0.13, + "logps_train/chosen": -1.1402339935302734, + "logps_train/ref_chosen": -0.9765625, + "logps_train/ref_rejected": -6.59375, + "logps_train/rejected": -10.618021965026855, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.016367150470614433, + "rewards_train/margins": 0.3860600460320711, + "rewards_train/rejected": -0.40242719650268555, + "step": 480 + }, + { + "epoch": 0.13, + "logps_train/chosen": -11.44169807434082, + "logps_train/ref_chosen": -12.0, + "logps_train/ref_rejected": -3.640625, + "logps_train/rejected": -8.868433952331543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05583019182085991, + "rewards_train/margins": 0.5786110870540142, + "rewards_train/rejected": -0.5227808952331543, + "step": 480 + }, + { + "epoch": 0.13, + "logps_train/chosen": -128.11383056640625, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -86.83199310302734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3613831996917725, + "rewards_train/margins": -0.37818384170532227, + "rewards_train/rejected": -1.9831993579864502, + "step": 480 + }, + { + "epoch": 0.13, + "logps_train/chosen": -7.683014869689941, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -7.955038547515869, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.25267648696899414, + "rewards_train/margins": -0.257172632496804, + "rewards_train/rejected": 0.004496145527809858, + "step": 481 + }, + { + "epoch": 0.13, + "logps_train/chosen": -7.280458927154541, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -8.37936782836914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03429589420557022, + "rewards_train/margins": 0.19739089161157608, + "rewards_train/rejected": -0.2316867858171463, + "step": 481 + }, + { + "epoch": 0.13, + "logps_train/chosen": -3.7903780937194824, + "logps_train/ref_chosen": -1.6796875, + "logps_train/ref_rejected": -6.8125, + "logps_train/rejected": -4.803529739379883, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21106906235218048, + "rewards_train/margins": -0.41196608543395996, + "rewards_train/rejected": 0.20089702308177948, + "step": 481 + }, + { + "epoch": 0.13, + "logps_train/chosen": -24.656600952148438, + "logps_train/ref_chosen": -25.75, + "logps_train/ref_rejected": -11.6875, + "logps_train/rejected": -14.598702430725098, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10933990776538849, + "rewards_train/margins": 0.4004601687192917, + "rewards_train/rejected": -0.2911202609539032, + "step": 481 + }, + { + "epoch": 0.13, + "logps_train/chosen": -26.29900360107422, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -13.42369270324707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1799003630876541, + "rewards_train/margins": 0.3249689191579819, + "rewards_train/rejected": -0.504869282245636, + "step": 481 + }, + { + "epoch": 0.13, + "logps_train/chosen": -40.12770462036133, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -47.25, + "logps_train/rejected": -74.45620727539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3627704679965973, + "rewards_train/margins": 2.3578504025936127, + "rewards_train/rejected": -2.72062087059021, + "step": 481 + }, + { + "epoch": 0.13, + "logps_train/chosen": -75.65925598144531, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -115.30020141601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21592560410499573, + "rewards_train/margins": 1.7640945613384247, + "rewards_train/rejected": -1.9800201654434204, + "step": 481 + }, + { + "epoch": 0.13, + "logps_train/chosen": -112.89776611328125, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -136.9538116455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5897767543792725, + "rewards_train/margins": 1.655604600906372, + "rewards_train/rejected": -4.2453813552856445, + "step": 481 + }, + { + "epoch": 0.13, + "learning_rate": 1.8905556900328799e-06, + "loss": 0.4623, + "step": 482 + }, + { + "epoch": 0.13, + "logps_train/chosen": -87.52178955078125, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -163.64366149902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0021789551246911287, + "rewards_train/margins": 4.412187290145084, + "rewards_train/rejected": -4.414366245269775, + "step": 482 + }, + { + "epoch": 0.13, + "logps_train/chosen": -8.498355865478516, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -10.408529281616211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4060855805873871, + "rewards_train/margins": -0.1308576464653015, + "rewards_train/rejected": -0.27522793412208557, + "step": 482 + }, + { + "epoch": 0.13, + "logps_train/chosen": -6.911815643310547, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -11.194073677062988, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1411815732717514, + "rewards_train/margins": 0.015725791454315186, + "rewards_train/rejected": -0.1569073647260666, + "step": 482 + }, + { + "epoch": 0.13, + "logps_train/chosen": -10.99846076965332, + "logps_train/ref_chosen": -7.96875, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -27.855615615844727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30297109484672546, + "rewards_train/margins": 1.120090514421463, + "rewards_train/rejected": -1.4230616092681885, + "step": 482 + }, + { + "epoch": 0.13, + "logps_train/chosen": -4.276616096496582, + "logps_train/ref_chosen": -2.984375, + "logps_train/ref_rejected": -0.8984375, + "logps_train/rejected": -0.6613546013832092, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12922410666942596, + "rewards_train/margins": -0.152932396158576, + "rewards_train/rejected": 0.023708289489150047, + "step": 482 + }, + { + "epoch": 0.13, + "logps_train/chosen": -4.15359354019165, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -12.793200492858887, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.002859354019165039, + "rewards_train/margins": 0.4702107012271881, + "rewards_train/rejected": -0.47307005524635315, + "step": 482 + }, + { + "epoch": 0.13, + "logps_train/chosen": -150.19126892089844, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -182.79620361328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08087310940027237, + "rewards_train/margins": 4.960493661463261, + "rewards_train/rejected": -4.879620552062988, + "step": 482 + }, + { + "epoch": 0.13, + "logps_train/chosen": -132.1036376953125, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -231.2814483642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.31036376953125, + "rewards_train/margins": 5.617781162261963, + "rewards_train/rejected": -7.928144931793213, + "step": 482 + }, + { + "epoch": 0.14, + "logps_train/chosen": -208.66653442382812, + "logps_train/ref_chosen": -232.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -117.14822387695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 2.3333466053009033, + "rewards_train/margins": 3.7981690168380737, + "rewards_train/rejected": -1.4648224115371704, + "step": 483 + }, + { + "epoch": 0.14, + "logps_train/chosen": -120.10673522949219, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -196.84579467773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.510673522949219, + "rewards_train/margins": 4.723906517028809, + "rewards_train/rejected": -9.234580039978027, + "step": 483 + }, + { + "epoch": 0.14, + "logps_train/chosen": -54.35701370239258, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -5.84375, + "logps_train/rejected": -12.371855735778809, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13570137321949005, + "rewards_train/margins": 0.5171092003583908, + "rewards_train/rejected": -0.6528105735778809, + "step": 483 + }, + { + "epoch": 0.14, + "logps_train/chosen": -78.25459289550781, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -206.48062133789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6254593133926392, + "rewards_train/margins": 4.522602915763855, + "rewards_train/rejected": -5.148062229156494, + "step": 483 + }, + { + "epoch": 0.14, + "logps_train/chosen": -22.34829330444336, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -123.0987777709961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8910793662071228, + "rewards_train/margins": 2.118798553943634, + "rewards_train/rejected": -3.009877920150757, + "step": 483 + }, + { + "epoch": 0.14, + "logps_train/chosen": -44.66845703125, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -20.76532745361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3668456971645355, + "rewards_train/margins": 0.3471870720386505, + "rewards_train/rejected": -0.714032769203186, + "step": 483 + }, + { + "epoch": 0.14, + "logps_train/chosen": -29.297685623168945, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -16.29969596862793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8922685980796814, + "rewards_train/margins": 0.15020102262496948, + "rewards_train/rejected": -1.0424696207046509, + "step": 483 + }, + { + "epoch": 0.14, + "logps_train/chosen": -138.88082885742188, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -161.2452850341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7880828976631165, + "rewards_train/margins": 2.936445653438568, + "rewards_train/rejected": -3.7245285511016846, + "step": 483 + }, + { + "epoch": 0.14, + "learning_rate": 1.8893491816738857e-06, + "loss": 0.3021, + "step": 484 + }, + { + "epoch": 0.14, + "logps_train/chosen": -188.74951171875, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -193.16806030273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9749512076377869, + "rewards_train/margins": 0.8418548703193665, + "rewards_train/rejected": -1.8168060779571533, + "step": 484 + }, + { + "epoch": 0.14, + "logps_train/chosen": -147.52117919921875, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -200.84373474121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.852117896080017, + "rewards_train/margins": 3.7322555780410767, + "rewards_train/rejected": -5.584373474121094, + "step": 484 + }, + { + "epoch": 0.14, + "logps_train/chosen": -76.4507827758789, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -154.244384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6049217581748962, + "rewards_train/margins": 2.4293602108955383, + "rewards_train/rejected": -1.824438452720642, + "step": 484 + }, + { + "epoch": 0.14, + "logps_train/chosen": -153.73895263671875, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -162.64056396484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8738954067230225, + "rewards_train/margins": 0.24016118049621582, + "rewards_train/rejected": -4.114056587219238, + "step": 484 + }, + { + "epoch": 0.14, + "logps_train/chosen": -108.72958374023438, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -203.51565551757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9729583263397217, + "rewards_train/margins": 6.028607606887817, + "rewards_train/rejected": -9.001565933227539, + "step": 484 + }, + { + "epoch": 0.14, + "logps_train/chosen": -30.16315460205078, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -30.290775299072266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9413154721260071, + "rewards_train/margins": 0.012762069702148438, + "rewards_train/rejected": -0.9540775418281555, + "step": 484 + }, + { + "epoch": 0.14, + "logps_train/chosen": -26.763256072998047, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -21.125, + "logps_train/rejected": -32.10690689086914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7013255953788757, + "rewards_train/margins": 0.3968650698661804, + "rewards_train/rejected": -1.0981906652450562, + "step": 484 + }, + { + "epoch": 0.14, + "logps_train/chosen": -90.59725952148438, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -103.968505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.34027406573295593, + "rewards_train/margins": 1.887124627828598, + "rewards_train/rejected": -1.546850562095642, + "step": 484 + }, + { + "epoch": 0.14, + "logps_train/chosen": -7.671775817871094, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -11.655774116516113, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22342757880687714, + "rewards_train/margins": 0.47027482092380524, + "rewards_train/rejected": -0.6937023997306824, + "step": 485 + }, + { + "epoch": 0.14, + "logps_train/chosen": -10.587728500366211, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -16.85869026184082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05877285078167915, + "rewards_train/margins": 0.2708461694419384, + "rewards_train/rejected": -0.32961902022361755, + "step": 485 + }, + { + "epoch": 0.14, + "logps_train/chosen": -72.17090606689453, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -86.12704467773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.817090630531311, + "rewards_train/margins": 0.09561383724212646, + "rewards_train/rejected": -0.9127044677734375, + "step": 485 + }, + { + "epoch": 0.14, + "logps_train/chosen": -41.52665328979492, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -31.75, + "logps_train/rejected": -39.81778335571289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25266534090042114, + "rewards_train/margins": 0.5541130304336548, + "rewards_train/rejected": -0.8067783713340759, + "step": 485 + }, + { + "epoch": 0.14, + "logps_train/chosen": -186.05752563476562, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -223.35964965820312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.305752754211426, + "rewards_train/margins": -0.06978750228881836, + "rewards_train/rejected": -5.235965251922607, + "step": 485 + }, + { + "epoch": 0.14, + "logps_train/chosen": -113.36885833740234, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -179.79681396484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6368858814239502, + "rewards_train/margins": 5.192795515060425, + "rewards_train/rejected": -6.829681396484375, + "step": 485 + }, + { + "epoch": 0.14, + "logps_train/chosen": -152.23977661132812, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -133.93161010742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5239776372909546, + "rewards_train/margins": 2.5691834688186646, + "rewards_train/rejected": -4.093161106109619, + "step": 485 + }, + { + "epoch": 0.14, + "logps_train/chosen": -134.47976684570312, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -104.91578674316406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0979766845703125, + "rewards_train/margins": -0.8063979148864746, + "rewards_train/rejected": -2.291578769683838, + "step": 485 + }, + { + "epoch": 0.14, + "learning_rate": 1.8881364488135445e-06, + "loss": 0.408, + "step": 486 + }, + { + "epoch": 0.14, + "logps_train/chosen": -55.92763137817383, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -153.0121612548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1677631437778473, + "rewards_train/margins": 4.783453077077866, + "rewards_train/rejected": -4.951216220855713, + "step": 486 + }, + { + "epoch": 0.14, + "logps_train/chosen": -17.524078369140625, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -26.919235229492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.333657830953598, + "rewards_train/margins": 0.05826568603515625, + "rewards_train/rejected": -0.3919235169887543, + "step": 486 + }, + { + "epoch": 0.14, + "logps_train/chosen": -19.384857177734375, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -30.75, + "logps_train/rejected": -40.348655700683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2240142822265625, + "rewards_train/margins": 1.1838798522949219, + "rewards_train/rejected": -0.9598655700683594, + "step": 486 + }, + { + "epoch": 0.14, + "logps_train/chosen": -9.946388244628906, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -7.46875, + "logps_train/rejected": -7.711490154266357, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0053611756302416325, + "rewards_train/margins": 0.029635191429406404, + "rewards_train/rejected": -0.024274015799164772, + "step": 486 + }, + { + "epoch": 0.14, + "logps_train/chosen": -2.1092615127563477, + "logps_train/ref_chosen": -2.1875, + "logps_train/ref_rejected": -2.421875, + "logps_train/rejected": -3.1994502544403076, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007823849096894264, + "rewards_train/margins": 0.08558137901127338, + "rewards_train/rejected": -0.07775752991437912, + "step": 486 + }, + { + "epoch": 0.14, + "logps_train/chosen": -77.9073257446289, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -127.07996368408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6657326221466064, + "rewards_train/margins": 1.6922638416290283, + "rewards_train/rejected": -4.357996463775635, + "step": 486 + }, + { + "epoch": 0.14, + "logps_train/chosen": -47.03771209716797, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -83.94306945800781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4462288022041321, + "rewards_train/margins": 0.9405357539653778, + "rewards_train/rejected": -0.4943069517612457, + "step": 486 + }, + { + "epoch": 0.14, + "logps_train/chosen": -1.9325733184814453, + "logps_train/ref_chosen": -2.65625, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -18.231081008911133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07236766815185547, + "rewards_train/margins": 0.3892257809638977, + "rewards_train/rejected": -0.31685811281204224, + "step": 486 + }, + { + "epoch": 0.14, + "logps_train/chosen": -21.25543212890625, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -13.308511734008789, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4255432188510895, + "rewards_train/margins": -0.0759420394897461, + "rewards_train/rejected": -0.3496011793613434, + "step": 487 + }, + { + "epoch": 0.14, + "logps_train/chosen": -101.28964233398438, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -180.97439575195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2789642810821533, + "rewards_train/margins": 3.3184754848480225, + "rewards_train/rejected": -4.597439765930176, + "step": 487 + }, + { + "epoch": 0.14, + "logps_train/chosen": -110.3177719116211, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -171.4263916015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6317771673202515, + "rewards_train/margins": 3.66086208820343, + "rewards_train/rejected": -5.292639255523682, + "step": 487 + }, + { + "epoch": 0.14, + "logps_train/chosen": -102.55398559570312, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -143.86895751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.444601446390152, + "rewards_train/margins": 1.3314972221851349, + "rewards_train/rejected": -0.8868957757949829, + "step": 487 + }, + { + "epoch": 0.14, + "logps_train/chosen": -3.98334002494812, + "logps_train/ref_chosen": -1.59375, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -10.349466323852539, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.23895899951457977, + "rewards_train/margins": -0.1915123648941517, + "rewards_train/rejected": -0.047446634620428085, + "step": 487 + }, + { + "epoch": 0.14, + "logps_train/chosen": -26.60845947265625, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -22.131088256835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18584595620632172, + "rewards_train/margins": 0.3647628575563431, + "rewards_train/rejected": -0.5506088137626648, + "step": 487 + }, + { + "epoch": 0.14, + "logps_train/chosen": -6.465714931488037, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -27.094087600708008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16592851281166077, + "rewards_train/margins": 1.4003372490406036, + "rewards_train/rejected": -1.2344087362289429, + "step": 487 + }, + { + "epoch": 0.14, + "logps_train/chosen": -26.99505615234375, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -24.375, + "logps_train/rejected": -26.646020889282227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.274505615234375, + "rewards_train/margins": -0.04740352928638458, + "rewards_train/rejected": -0.22710208594799042, + "step": 487 + }, + { + "epoch": 0.14, + "learning_rate": 1.8869174999397e-06, + "loss": 0.4105, + "step": 488 + }, + { + "epoch": 0.14, + "logps_train/chosen": -4.871271133422852, + "logps_train/ref_chosen": -3.609375, + "logps_train/ref_rejected": -2.53125, + "logps_train/rejected": -3.608128547668457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12618961930274963, + "rewards_train/margins": -0.01850176602602005, + "rewards_train/rejected": -0.10768785327672958, + "step": 488 + }, + { + "epoch": 0.14, + "logps_train/chosen": -142.92514038085938, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -155.62619018554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7925140261650085, + "rewards_train/margins": 1.470104992389679, + "rewards_train/rejected": -2.2626190185546875, + "step": 488 + }, + { + "epoch": 0.14, + "logps_train/chosen": -3.6233887672424316, + "logps_train/ref_chosen": -3.984375, + "logps_train/ref_rejected": -6.875, + "logps_train/rejected": -12.35933780670166, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.036098625510931015, + "rewards_train/margins": 0.584532406181097, + "rewards_train/rejected": -0.548433780670166, + "step": 488 + }, + { + "epoch": 0.14, + "logps_train/chosen": -135.99928283691406, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -247.40838623046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4499282836914062, + "rewards_train/margins": 5.590910911560059, + "rewards_train/rejected": -9.040839195251465, + "step": 488 + }, + { + "epoch": 0.14, + "logps_train/chosen": -41.64033889770508, + "logps_train/ref_chosen": -33.5, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -58.460166931152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8140339255332947, + "rewards_train/margins": 1.4069827198982239, + "rewards_train/rejected": -2.2210166454315186, + "step": 488 + }, + { + "epoch": 0.14, + "logps_train/chosen": -5.828211784362793, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -5.09375, + "logps_train/rejected": -9.53808307647705, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.010928821749985218, + "rewards_train/margins": 0.4553621234372258, + "rewards_train/rejected": -0.4444333016872406, + "step": 488 + }, + { + "epoch": 0.14, + "logps_train/chosen": -187.48904418945312, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -182.72003173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.348904609680176, + "rewards_train/margins": 0.2230987548828125, + "rewards_train/rejected": -5.572003364562988, + "step": 488 + }, + { + "epoch": 0.14, + "logps_train/chosen": -47.21136474609375, + "logps_train/ref_chosen": -44.25, + "logps_train/ref_rejected": -37.0, + "logps_train/rejected": -37.37474822998047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2961364686489105, + "rewards_train/margins": -0.2586616463959217, + "rewards_train/rejected": -0.037474822252988815, + "step": 488 + }, + { + "epoch": 0.14, + "logps_train/chosen": -153.8907928466797, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -183.40646362304688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1890792846679688, + "rewards_train/margins": -0.34843289852142334, + "rewards_train/rejected": -1.8406463861465454, + "step": 489 + }, + { + "epoch": 0.14, + "logps_train/chosen": -69.57864379882812, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -125.6148910522461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5078644156455994, + "rewards_train/margins": 3.6036248803138733, + "rewards_train/rejected": -4.111489295959473, + "step": 489 + }, + { + "epoch": 0.14, + "logps_train/chosen": -107.2952651977539, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -185.62551879882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.029526472091675, + "rewards_train/margins": 1.4330253601074219, + "rewards_train/rejected": -3.4625518321990967, + "step": 489 + }, + { + "epoch": 0.14, + "logps_train/chosen": -1.4337623119354248, + "logps_train/ref_chosen": -1.46875, + "logps_train/ref_rejected": -1.46875, + "logps_train/rejected": -1.458742618560791, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.003498768899589777, + "rewards_train/margins": 0.002498030778951943, + "rewards_train/rejected": 0.001000738120637834, + "step": 489 + }, + { + "epoch": 0.14, + "logps_train/chosen": -25.651885986328125, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -12.481595039367676, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5901886224746704, + "rewards_train/margins": -0.22952911257743835, + "rewards_train/rejected": -0.36065950989723206, + "step": 489 + }, + { + "epoch": 0.14, + "logps_train/chosen": -14.503486633300781, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -24.125, + "logps_train/rejected": -34.68169403076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24965134263038635, + "rewards_train/margins": 1.3053207695484161, + "rewards_train/rejected": -1.0556694269180298, + "step": 489 + }, + { + "epoch": 0.14, + "logps_train/chosen": -2.728527307510376, + "logps_train/ref_chosen": -1.6875, + "logps_train/ref_rejected": -15.75, + "logps_train/rejected": -17.490957260131836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1041027307510376, + "rewards_train/margins": 0.06999300420284271, + "rewards_train/rejected": -0.1740957349538803, + "step": 489 + }, + { + "epoch": 0.14, + "logps_train/chosen": -169.89894104003906, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -193.15640258789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3898942470550537, + "rewards_train/margins": 2.425746202468872, + "rewards_train/rejected": -4.815640449523926, + "step": 489 + }, + { + "epoch": 0.14, + "learning_rate": 1.885692343583702e-06, + "loss": 0.4436, + "step": 490 + }, + { + "epoch": 0.14, + "logps_train/chosen": -162.5735321044922, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -301.53070068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1573532819747925, + "rewards_train/margins": 9.895716786384583, + "rewards_train/rejected": -11.053070068359375, + "step": 490 + }, + { + "epoch": 0.14, + "logps_train/chosen": -83.88558197021484, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -210.5684356689453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4885581731796265, + "rewards_train/margins": 6.918285965919495, + "rewards_train/rejected": -8.406844139099121, + "step": 490 + }, + { + "epoch": 0.14, + "logps_train/chosen": -53.46153259277344, + "logps_train/ref_chosen": -52.25, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -96.1488037109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12115325778722763, + "rewards_train/margins": 1.1937271133065224, + "rewards_train/rejected": -1.31488037109375, + "step": 490 + }, + { + "epoch": 0.14, + "logps_train/chosen": -82.87626647949219, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -153.79034423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06237335130572319, + "rewards_train/margins": 1.141407798975706, + "rewards_train/rejected": -1.079034447669983, + "step": 490 + }, + { + "epoch": 0.14, + "logps_train/chosen": -181.49148559570312, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -134.76602172851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8491485118865967, + "rewards_train/margins": 0.32745361328125, + "rewards_train/rejected": -3.1766021251678467, + "step": 490 + }, + { + "epoch": 0.14, + "logps_train/chosen": -177.72958374023438, + "logps_train/ref_chosen": -180.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -188.3984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22704163193702698, + "rewards_train/margins": 3.066885381937027, + "rewards_train/rejected": -2.83984375, + "step": 490 + }, + { + "epoch": 0.14, + "logps_train/chosen": -1.547162652015686, + "logps_train/ref_chosen": -2.140625, + "logps_train/ref_rejected": -2.75, + "logps_train/rejected": -5.354833602905273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.059346236288547516, + "rewards_train/margins": 0.3198295906186104, + "rewards_train/rejected": -0.26048335433006287, + "step": 490 + }, + { + "epoch": 0.14, + "logps_train/chosen": -36.67271423339844, + "logps_train/ref_chosen": -38.25, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -47.6015510559082, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.15772858262062073, + "rewards_train/margins": -0.0571163147687912, + "rewards_train/rejected": 0.21484489738941193, + "step": 490 + }, + { + "epoch": 0.14, + "logps_train/chosen": -9.626617431640625, + "logps_train/ref_chosen": -5.71875, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -23.874582290649414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.390786737203598, + "rewards_train/margins": -0.16582851111888885, + "rewards_train/rejected": -0.22495822608470917, + "step": 491 + }, + { + "epoch": 0.14, + "logps_train/chosen": -131.57855224609375, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -124.40827941894531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.407855272293091, + "rewards_train/margins": -0.6670272350311279, + "rewards_train/rejected": -2.740828037261963, + "step": 491 + }, + { + "epoch": 0.14, + "logps_train/chosen": -4.2262468338012695, + "logps_train/ref_chosen": -3.5625, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -15.93916130065918, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06637468189001083, + "rewards_train/margins": -0.028708551079034805, + "rewards_train/rejected": -0.03766613081097603, + "step": 491 + }, + { + "epoch": 0.14, + "logps_train/chosen": -55.27442169189453, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -80.63192749023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.052442170679569244, + "rewards_train/margins": 0.36075057834386826, + "rewards_train/rejected": -0.4131927490234375, + "step": 491 + }, + { + "epoch": 0.14, + "logps_train/chosen": -48.45997619628906, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -22.2700138092041, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5209976434707642, + "rewards_train/margins": 0.21225374937057495, + "rewards_train/rejected": -0.7332513928413391, + "step": 491 + }, + { + "epoch": 0.14, + "logps_train/chosen": -65.94795989990234, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -65.22708129882812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.055204011499881744, + "rewards_train/margins": -0.072087861597538, + "rewards_train/rejected": 0.12729187309741974, + "step": 491 + }, + { + "epoch": 0.14, + "logps_train/chosen": -65.6181640625, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -28.969255447387695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18681640923023224, + "rewards_train/margins": 0.9851091355085373, + "rewards_train/rejected": -1.1719255447387695, + "step": 491 + }, + { + "epoch": 0.14, + "logps_train/chosen": -40.35429763793945, + "logps_train/ref_chosen": -37.75, + "logps_train/ref_rejected": -38.5, + "logps_train/rejected": -59.468048095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2604297697544098, + "rewards_train/margins": 1.8363750874996185, + "rewards_train/rejected": -2.0968048572540283, + "step": 491 + }, + { + "epoch": 0.14, + "learning_rate": 1.8844609883203464e-06, + "loss": 0.4553, + "step": 492 + }, + { + "epoch": 0.14, + "logps_train/chosen": -22.29351806640625, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -26.5, + "logps_train/rejected": -43.39124298095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21685181558132172, + "rewards_train/margins": 1.4722725301980972, + "rewards_train/rejected": -1.689124345779419, + "step": 492 + }, + { + "epoch": 0.14, + "logps_train/chosen": -4.3391523361206055, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -7.362914085388184, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.012040234170854092, + "rewards_train/margins": 0.03987617511302233, + "rewards_train/rejected": -0.05191640928387642, + "step": 492 + }, + { + "epoch": 0.14, + "logps_train/chosen": -153.56393432617188, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -258.7713317871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.456393718719482, + "rewards_train/margins": 7.920739650726318, + "rewards_train/rejected": -12.3771333694458, + "step": 492 + }, + { + "epoch": 0.14, + "logps_train/chosen": -58.05421829223633, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -45.25, + "logps_train/rejected": -57.51496887207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9304218292236328, + "rewards_train/margins": 0.29607510566711426, + "rewards_train/rejected": -1.226496934890747, + "step": 492 + }, + { + "epoch": 0.14, + "logps_train/chosen": -50.46955490112305, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -67.50880432128906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0030445100273936987, + "rewards_train/margins": -0.2960750518832356, + "rewards_train/rejected": 0.2991195619106293, + "step": 492 + }, + { + "epoch": 0.14, + "logps_train/chosen": -45.971717834472656, + "logps_train/ref_chosen": -36.0, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -18.74457359313965, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9971718192100525, + "rewards_train/margins": -0.2352144718170166, + "rewards_train/rejected": -0.7619573473930359, + "step": 492 + }, + { + "epoch": 0.14, + "logps_train/chosen": -177.64161682128906, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -207.8814697265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.064161777496338, + "rewards_train/margins": 0.9239852428436279, + "rewards_train/rejected": -3.988147020339966, + "step": 492 + }, + { + "epoch": 0.14, + "logps_train/chosen": -313.8852233886719, + "logps_train/ref_chosen": -191.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -274.45843505859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.288522720336914, + "rewards_train/margins": -2.0426788330078125, + "rewards_train/rejected": -10.245843887329102, + "step": 492 + }, + { + "epoch": 0.14, + "logps_train/chosen": -101.84625244140625, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -120.1379623413086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.234625220298767, + "rewards_train/margins": 2.279171109199524, + "rewards_train/rejected": -3.513796329498291, + "step": 493 + }, + { + "epoch": 0.14, + "logps_train/chosen": -55.977298736572266, + "logps_train/ref_chosen": -45.75, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -82.5510025024414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0227298736572266, + "rewards_train/margins": 1.7073705196380615, + "rewards_train/rejected": -2.730100393295288, + "step": 493 + }, + { + "epoch": 0.14, + "logps_train/chosen": -4.479859828948975, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -3.3125, + "logps_train/rejected": -6.014388084411621, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03861098363995552, + "rewards_train/margins": 0.2315778248012066, + "rewards_train/rejected": -0.2701888084411621, + "step": 493 + }, + { + "epoch": 0.14, + "logps_train/chosen": -4.62693977355957, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -18.02927589416504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06543102115392685, + "rewards_train/margins": 0.8058586344122887, + "rewards_train/rejected": -0.7404276132583618, + "step": 493 + }, + { + "epoch": 0.14, + "logps_train/chosen": -127.95182037353516, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -135.8455810546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.345182180404663, + "rewards_train/margins": 1.2393758296966553, + "rewards_train/rejected": -4.584558010101318, + "step": 493 + }, + { + "epoch": 0.14, + "logps_train/chosen": -24.999799728393555, + "logps_train/ref_chosen": -25.625, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -9.746013641357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06252002716064453, + "rewards_train/margins": 0.04337139055132866, + "rewards_train/rejected": 0.019148636609315872, + "step": 493 + }, + { + "epoch": 0.14, + "logps_train/chosen": -113.49085235595703, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -207.98281860351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.84908527135849, + "rewards_train/margins": 4.7491965889930725, + "rewards_train/rejected": -5.5982818603515625, + "step": 493 + }, + { + "epoch": 0.14, + "logps_train/chosen": -11.307425498962402, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -5.858437538146973, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1557425558567047, + "rewards_train/margins": 0.014476194977760315, + "rewards_train/rejected": -0.17021875083446503, + "step": 493 + }, + { + "epoch": 0.14, + "learning_rate": 1.883223442767814e-06, + "loss": 0.5277, + "step": 494 + }, + { + "epoch": 0.14, + "logps_train/chosen": -195.63156127929688, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -147.1940460205078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7631561756134033, + "rewards_train/margins": -0.5937514305114746, + "rewards_train/rejected": -3.1694047451019287, + "step": 494 + }, + { + "epoch": 0.14, + "logps_train/chosen": -196.99656677246094, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -185.0, + "logps_train/rejected": -278.2801513671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.399656772613525, + "rewards_train/margins": 2.928358554840088, + "rewards_train/rejected": -9.328015327453613, + "step": 494 + }, + { + "epoch": 0.14, + "logps_train/chosen": -140.75991821289062, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -191.7120819091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5759918689727783, + "rewards_train/margins": 3.195216417312622, + "rewards_train/rejected": -5.7712082862854, + "step": 494 + }, + { + "epoch": 0.14, + "logps_train/chosen": -120.15840911865234, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -184.4115753173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.215841054916382, + "rewards_train/margins": 4.875316381454468, + "rewards_train/rejected": -7.09115743637085, + "step": 494 + }, + { + "epoch": 0.14, + "logps_train/chosen": -18.84891700744629, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -71.45575714111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28489169478416443, + "rewards_train/margins": 3.0356840193271637, + "rewards_train/rejected": -3.320575714111328, + "step": 494 + }, + { + "epoch": 0.14, + "logps_train/chosen": -219.8612823486328, + "logps_train/ref_chosen": -203.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -185.4606475830078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6861282587051392, + "rewards_train/margins": -1.140063464641571, + "rewards_train/rejected": -0.5460647940635681, + "step": 494 + }, + { + "epoch": 0.14, + "logps_train/chosen": -27.661060333251953, + "logps_train/ref_chosen": -26.625, + "logps_train/ref_rejected": -7.125, + "logps_train/rejected": -13.821569442749023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10360603779554367, + "rewards_train/margins": 0.5660508945584297, + "rewards_train/rejected": -0.6696569323539734, + "step": 494 + }, + { + "epoch": 0.14, + "logps_train/chosen": -18.59809112548828, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -17.637737274169922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.872309148311615, + "rewards_train/margins": -0.1897854208946228, + "rewards_train/rejected": -0.6825237274169922, + "step": 494 + }, + { + "epoch": 0.14, + "logps_train/chosen": -55.44672775268555, + "logps_train/ref_chosen": -49.25, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -41.468284606933594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6196727752685547, + "rewards_train/margins": -0.022844314575195312, + "rewards_train/rejected": -0.5968284606933594, + "step": 495 + }, + { + "epoch": 0.14, + "logps_train/chosen": -120.6417236328125, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -166.97927856445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.364172339439392, + "rewards_train/margins": 3.1337558031082153, + "rewards_train/rejected": -4.497928142547607, + "step": 495 + }, + { + "epoch": 0.14, + "logps_train/chosen": -129.60842895507812, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -130.30487060546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8608428835868835, + "rewards_train/margins": 0.06964421272277832, + "rewards_train/rejected": -0.9304870963096619, + "step": 495 + }, + { + "epoch": 0.14, + "logps_train/chosen": -88.73383331298828, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -146.28912353515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.423383355140686, + "rewards_train/margins": 1.6055289506912231, + "rewards_train/rejected": -3.028912305831909, + "step": 495 + }, + { + "epoch": 0.14, + "logps_train/chosen": -95.13920593261719, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -143.54600524902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18607941269874573, + "rewards_train/margins": 1.9906800091266632, + "rewards_train/rejected": -1.8046005964279175, + "step": 495 + }, + { + "epoch": 0.14, + "logps_train/chosen": -41.674739837646484, + "logps_train/ref_chosen": -37.0, + "logps_train/ref_rejected": -58.0, + "logps_train/rejected": -59.444339752197266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.46747398376464844, + "rewards_train/margins": -0.3230400085449219, + "rewards_train/rejected": -0.14443397521972656, + "step": 495 + }, + { + "epoch": 0.14, + "logps_train/chosen": -28.651105880737305, + "logps_train/ref_chosen": -22.75, + "logps_train/ref_rejected": -1.703125, + "logps_train/rejected": -10.954182624816895, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5901105999946594, + "rewards_train/margins": 0.3349951505661011, + "rewards_train/rejected": -0.9251057505607605, + "step": 495 + }, + { + "epoch": 0.14, + "logps_train/chosen": -9.041572570800781, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -10.723628997802734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06665726006031036, + "rewards_train/margins": 0.11195564270019531, + "rewards_train/rejected": -0.17861290276050568, + "step": 495 + }, + { + "epoch": 0.14, + "learning_rate": 1.8819797155876121e-06, + "loss": 0.4753, + "step": 496 + }, + { + "epoch": 0.14, + "logps_train/chosen": -70.65135192871094, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -80.26017761230469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7901352047920227, + "rewards_train/margins": -0.764117443934083, + "rewards_train/rejected": -0.02601776085793972, + "step": 496 + }, + { + "epoch": 0.14, + "logps_train/chosen": -9.638110160827637, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -8.101581573486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3356860280036926, + "rewards_train/margins": 0.024472147226333618, + "rewards_train/rejected": -0.36015817523002625, + "step": 496 + }, + { + "epoch": 0.14, + "logps_train/chosen": -117.67063903808594, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -151.74630737304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2170639038085938, + "rewards_train/margins": 2.5075669288635254, + "rewards_train/rejected": -4.724630832672119, + "step": 496 + }, + { + "epoch": 0.14, + "logps_train/chosen": -69.29942321777344, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -93.93626403808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6200577020645142, + "rewards_train/margins": 0.663684107363224, + "rewards_train/rejected": -0.04362640529870987, + "step": 496 + }, + { + "epoch": 0.14, + "logps_train/chosen": -14.133280754089355, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -7.928774833679199, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5445780754089355, + "rewards_train/margins": -0.42357558757066727, + "rewards_train/rejected": -0.12100248783826828, + "step": 496 + }, + { + "epoch": 0.14, + "logps_train/chosen": -10.306077003479004, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -7.84375, + "logps_train/rejected": -8.94614028930664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1306077092885971, + "rewards_train/margins": -0.020368680357933044, + "rewards_train/rejected": -0.11023902893066406, + "step": 496 + }, + { + "epoch": 0.14, + "logps_train/chosen": -27.635438919067383, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -55.0, + "logps_train/rejected": -60.671173095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2885439097881317, + "rewards_train/margins": 0.2785734236240387, + "rewards_train/rejected": -0.5671173334121704, + "step": 496 + }, + { + "epoch": 0.14, + "logps_train/chosen": -66.89253234863281, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -51.25, + "logps_train/rejected": -66.59334564208984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5642532110214233, + "rewards_train/margins": -0.029918670654296875, + "rewards_train/rejected": -1.5343345403671265, + "step": 496 + }, + { + "epoch": 0.14, + "logps_train/chosen": -36.25645065307617, + "logps_train/ref_chosen": -42.0, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -16.579273223876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5743549466133118, + "rewards_train/margins": 1.269782304763794, + "rewards_train/rejected": -0.6954273581504822, + "step": 497 + }, + { + "epoch": 0.14, + "logps_train/chosen": -1.7817009687423706, + "logps_train/ref_chosen": -0.369140625, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -10.369832992553711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14125603437423706, + "rewards_train/margins": 0.22697725892066956, + "rewards_train/rejected": -0.3682332932949066, + "step": 497 + }, + { + "epoch": 0.14, + "logps_train/chosen": -4.054640293121338, + "logps_train/ref_chosen": -3.0625, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -5.634933948516846, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09921403229236603, + "rewards_train/margins": -0.07009563781321049, + "rewards_train/rejected": -0.02911839447915554, + "step": 497 + }, + { + "epoch": 0.14, + "logps_train/chosen": -188.00900268554688, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -192.8502960205078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.600900411605835, + "rewards_train/margins": -0.9158706665039062, + "rewards_train/rejected": -2.6850297451019287, + "step": 497 + }, + { + "epoch": 0.14, + "logps_train/chosen": -15.60623836517334, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -23.573299407958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.423123836517334, + "rewards_train/margins": 0.7154561281204224, + "rewards_train/rejected": -1.1385799646377563, + "step": 497 + }, + { + "epoch": 0.14, + "logps_train/chosen": -46.879241943359375, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -12.90671443939209, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5379242300987244, + "rewards_train/margins": 0.3121222257614136, + "rewards_train/rejected": -0.8500464558601379, + "step": 497 + }, + { + "epoch": 0.14, + "logps_train/chosen": -14.092641830444336, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -19.687541961669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29676419496536255, + "rewards_train/margins": 0.1344900131225586, + "rewards_train/rejected": -0.43125420808792114, + "step": 497 + }, + { + "epoch": 0.14, + "logps_train/chosen": -161.34228515625, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -190.2954864501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.384228706359863, + "rewards_train/margins": 0.14531993865966797, + "rewards_train/rejected": -5.529548645019531, + "step": 497 + }, + { + "epoch": 0.14, + "learning_rate": 1.8807298154845118e-06, + "loss": 0.6399, + "step": 498 + }, + { + "epoch": 0.14, + "logps_train/chosen": -75.43817901611328, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -159.72222900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0438178777694702, + "rewards_train/margins": 3.078405022621155, + "rewards_train/rejected": -4.122222900390625, + "step": 498 + }, + { + "epoch": 0.14, + "logps_train/chosen": -148.9552001953125, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -171.63400268554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.295520067214966, + "rewards_train/margins": 0.06788015365600586, + "rewards_train/rejected": -2.3634002208709717, + "step": 498 + }, + { + "epoch": 0.14, + "logps_train/chosen": -77.63349151611328, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -203.21884155273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36334916949272156, + "rewards_train/margins": 6.858535081148148, + "rewards_train/rejected": -7.221884250640869, + "step": 498 + }, + { + "epoch": 0.14, + "logps_train/chosen": -191.9423828125, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -189.58535766601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.09423828125, + "rewards_train/margins": 2.664297580718994, + "rewards_train/rejected": -5.758535861968994, + "step": 498 + }, + { + "epoch": 0.14, + "logps_train/chosen": -126.77303314208984, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -148.56106567382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.327303409576416, + "rewards_train/margins": 4.178803443908691, + "rewards_train/rejected": -6.506106853485107, + "step": 498 + }, + { + "epoch": 0.14, + "logps_train/chosen": -229.62567138671875, + "logps_train/ref_chosen": -186.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -142.96449279785156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.36256742477417, + "rewards_train/margins": -0.16611814498901367, + "rewards_train/rejected": -4.196449279785156, + "step": 498 + }, + { + "epoch": 0.14, + "logps_train/chosen": -2.5660440921783447, + "logps_train/ref_chosen": -1.515625, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -6.969318389892578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10504191368818283, + "rewards_train/margins": -0.06123507395386696, + "rewards_train/rejected": -0.04380683973431587, + "step": 498 + }, + { + "epoch": 0.14, + "logps_train/chosen": -53.493797302246094, + "logps_train/ref_chosen": -55.75, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -174.38827514648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22562026977539062, + "rewards_train/margins": 3.364447832107544, + "rewards_train/rejected": -3.1388275623321533, + "step": 498 + }, + { + "epoch": 0.14, + "logps_train/chosen": -5.2106475830078125, + "logps_train/ref_chosen": -6.5, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -13.693115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12893524765968323, + "rewards_train/margins": 0.5294967889785767, + "rewards_train/rejected": -0.40056154131889343, + "step": 499 + }, + { + "epoch": 0.14, + "logps_train/chosen": -23.436199188232422, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -29.899572372436523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6186199188232422, + "rewards_train/margins": 0.2338373064994812, + "rewards_train/rejected": -0.8524572253227234, + "step": 499 + }, + { + "epoch": 0.14, + "logps_train/chosen": -22.865936279296875, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -31.247623443603516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6490936279296875, + "rewards_train/margins": -0.07433128356933594, + "rewards_train/rejected": -0.5747623443603516, + "step": 499 + }, + { + "epoch": 0.14, + "logps_train/chosen": -16.63283920288086, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -7.812922477722168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2617160975933075, + "rewards_train/margins": 0.5805083513259888, + "rewards_train/rejected": -0.3187922537326813, + "step": 499 + }, + { + "epoch": 0.14, + "logps_train/chosen": -40.31793212890625, + "logps_train/ref_chosen": -34.25, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -32.681331634521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.606793224811554, + "rewards_train/margins": 0.18633997440338135, + "rewards_train/rejected": -0.7931331992149353, + "step": 499 + }, + { + "epoch": 0.14, + "logps_train/chosen": -69.53169250488281, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -63.52295684814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4531692564487457, + "rewards_train/margins": 2.499126523733139, + "rewards_train/rejected": -2.9522957801818848, + "step": 499 + }, + { + "epoch": 0.14, + "logps_train/chosen": -28.72779083251953, + "logps_train/ref_chosen": -25.125, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -12.092381477355957, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3602790832519531, + "rewards_train/margins": -0.07604092359542847, + "rewards_train/rejected": -0.28423815965652466, + "step": 499 + }, + { + "epoch": 0.14, + "logps_train/chosen": -93.06520080566406, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -98.30303955078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.281520128250122, + "rewards_train/margins": -0.25121617317199707, + "rewards_train/rejected": -3.030303955078125, + "step": 499 + }, + { + "epoch": 0.14, + "learning_rate": 1.8794737512064888e-06, + "loss": 0.4242, + "step": 500 + }, + { + "epoch": 0.14, + "logps_train/chosen": -22.803180694580078, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -6.21875, + "logps_train/rejected": -5.599236488342285, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.03218192979693413, + "rewards_train/margins": -0.029769420623779297, + "rewards_train/rejected": 0.061951350420713425, + "step": 500 + }, + { + "epoch": 0.14, + "logps_train/chosen": -176.87393188476562, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -159.55780029296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.387393474578857, + "rewards_train/margins": -0.8316135406494141, + "rewards_train/rejected": -5.555779933929443, + "step": 500 + }, + { + "epoch": 0.14, + "logps_train/chosen": -129.02212524414062, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -214.83560180664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4522125720977783, + "rewards_train/margins": 5.381347417831421, + "rewards_train/rejected": -8.8335599899292, + "step": 500 + }, + { + "epoch": 0.14, + "logps_train/chosen": -54.81737518310547, + "logps_train/ref_chosen": -52.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -140.85421752929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2817375361919403, + "rewards_train/margins": 5.503684312105179, + "rewards_train/rejected": -5.785421848297119, + "step": 500 + }, + { + "epoch": 0.14, + "logps_train/chosen": -21.985124588012695, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -9.660385131835938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0797624588012695, + "rewards_train/margins": -0.9387239366769791, + "rewards_train/rejected": -0.14103852212429047, + "step": 500 + }, + { + "epoch": 0.14, + "logps_train/chosen": -4.11212158203125, + "logps_train/ref_chosen": -1.65625, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -11.831727981567383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24558715522289276, + "rewards_train/margins": 0.4250856786966324, + "rewards_train/rejected": -0.6706728339195251, + "step": 500 + }, + { + "epoch": 0.14, + "logps_train/chosen": -106.6102294921875, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -128.5699462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6610230207443237, + "rewards_train/margins": 0.2959716320037842, + "rewards_train/rejected": -1.956994652748108, + "step": 500 + }, + { + "epoch": 0.14, + "logps_train/chosen": -75.1527099609375, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -90.14018249511719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.065271019935608, + "rewards_train/margins": 0.5987472534179688, + "rewards_train/rejected": -1.6640182733535767, + "step": 500 + }, + { + "epoch": 0.14, + "logps_train/chosen": -27.718215942382812, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -28.057584762573242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4093216061592102, + "rewards_train/margins": 0.22143685817718506, + "rewards_train/rejected": -0.6307584643363953, + "step": 501 + }, + { + "epoch": 0.14, + "logps_train/chosen": -5.519129276275635, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -18.986331939697266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05503792688250542, + "rewards_train/margins": 0.3748452849686146, + "rewards_train/rejected": -0.42988321185112, + "step": 501 + }, + { + "epoch": 0.14, + "logps_train/chosen": -204.70994567871094, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -259.28509521484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.870994567871094, + "rewards_train/margins": 4.857515335083008, + "rewards_train/rejected": -9.728509902954102, + "step": 501 + }, + { + "epoch": 0.14, + "logps_train/chosen": -199.81288146972656, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -189.5419921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.981288194656372, + "rewards_train/margins": -0.3270888328552246, + "rewards_train/rejected": -2.6541993618011475, + "step": 501 + }, + { + "epoch": 0.14, + "logps_train/chosen": -66.32290649414062, + "logps_train/ref_chosen": -55.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -106.29116821289062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0822906494140625, + "rewards_train/margins": -1.103173827752471, + "rewards_train/rejected": 0.02088317833840847, + "step": 501 + }, + { + "epoch": 0.14, + "logps_train/chosen": -7.334289073944092, + "logps_train/ref_chosen": -6.75, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -9.358956336975098, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05842890962958336, + "rewards_train/margins": 0.3743417300283909, + "rewards_train/rejected": -0.43277063965797424, + "step": 501 + }, + { + "epoch": 0.14, + "logps_train/chosen": -187.49693298339844, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -255.07958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.749693393707275, + "rewards_train/margins": 3.8582653999328613, + "rewards_train/rejected": -9.607958793640137, + "step": 501 + }, + { + "epoch": 0.14, + "logps_train/chosen": -96.10724639892578, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -107.23333740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8607246279716492, + "rewards_train/margins": 1.0626091361045837, + "rewards_train/rejected": -1.923333764076233, + "step": 501 + }, + { + "epoch": 0.14, + "learning_rate": 1.8782115315446617e-06, + "loss": 0.5562, + "step": 502 + }, + { + "epoch": 0.14, + "logps_train/chosen": -76.27957153320312, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -106.79893493652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7779571413993835, + "rewards_train/margins": 3.726936638355255, + "rewards_train/rejected": -4.504893779754639, + "step": 502 + }, + { + "epoch": 0.14, + "logps_train/chosen": -50.73139953613281, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -174.18670654296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5231399536132812, + "rewards_train/margins": 2.3955307006835938, + "rewards_train/rejected": -3.918670654296875, + "step": 502 + }, + { + "epoch": 0.14, + "logps_train/chosen": -7.63092041015625, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -14.380290985107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06815796345472336, + "rewards_train/margins": 0.6499370858073235, + "rewards_train/rejected": -0.5817791223526001, + "step": 502 + }, + { + "epoch": 0.14, + "logps_train/chosen": -8.185811042785645, + "logps_train/ref_chosen": -7.65625, + "logps_train/ref_rejected": -3.71875, + "logps_train/rejected": -4.0123677253723145, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05295610427856445, + "rewards_train/margins": -0.023594330996274948, + "rewards_train/rejected": -0.029361773282289505, + "step": 502 + }, + { + "epoch": 0.14, + "logps_train/chosen": -64.13633728027344, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -104.72508239746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08636627346277237, + "rewards_train/margins": 1.5088745132088661, + "rewards_train/rejected": -1.4225082397460938, + "step": 502 + }, + { + "epoch": 0.14, + "logps_train/chosen": -10.49215030670166, + "logps_train/ref_chosen": -7.15625, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -8.454327583312988, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.333590030670166, + "rewards_train/margins": -0.29440727084875107, + "rewards_train/rejected": -0.03918275982141495, + "step": 502 + }, + { + "epoch": 0.14, + "logps_train/chosen": -186.8266143798828, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -154.66017150878906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7826614379882812, + "rewards_train/margins": -0.6166441440582275, + "rewards_train/rejected": -2.1660172939300537, + "step": 502 + }, + { + "epoch": 0.14, + "logps_train/chosen": -21.346220016479492, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -59.87590789794922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5096220374107361, + "rewards_train/margins": -0.047031253576278687, + "rewards_train/rejected": -0.4625907838344574, + "step": 502 + }, + { + "epoch": 0.14, + "logps_train/chosen": -88.92573547363281, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -87.39938354492188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.4074264466762543, + "rewards_train/margins": -0.10263523459434509, + "rewards_train/rejected": 0.5100616812705994, + "step": 503 + }, + { + "epoch": 0.14, + "logps_train/chosen": -35.59392166137695, + "logps_train/ref_chosen": -30.375, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -35.07194137573242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5218921899795532, + "rewards_train/margins": 1.072801947593689, + "rewards_train/rejected": -1.5946941375732422, + "step": 503 + }, + { + "epoch": 0.14, + "logps_train/chosen": -52.56564712524414, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -151.9445037841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1065647602081299, + "rewards_train/margins": 5.287885904312134, + "rewards_train/rejected": -6.394450664520264, + "step": 503 + }, + { + "epoch": 0.14, + "logps_train/chosen": -81.64784240722656, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -134.89529418945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9647842645645142, + "rewards_train/margins": 3.3247450590133667, + "rewards_train/rejected": -4.289529323577881, + "step": 503 + }, + { + "epoch": 0.14, + "logps_train/chosen": -80.66061401367188, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -81.00237274169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7660614252090454, + "rewards_train/margins": 0.4841759204864502, + "rewards_train/rejected": -1.2502373456954956, + "step": 503 + }, + { + "epoch": 0.14, + "logps_train/chosen": -147.20773315429688, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -152.6555938720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8207733631134033, + "rewards_train/margins": 0.6947860717773438, + "rewards_train/rejected": -3.515559434890747, + "step": 503 + }, + { + "epoch": 0.14, + "logps_train/chosen": -51.404571533203125, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -62.25, + "logps_train/rejected": -72.01109313964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18454284965991974, + "rewards_train/margins": 1.1606521755456924, + "rewards_train/rejected": -0.9761093258857727, + "step": 503 + }, + { + "epoch": 0.14, + "logps_train/chosen": -218.95437622070312, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -189.99176025390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.795437812805176, + "rewards_train/margins": -0.6962618827819824, + "rewards_train/rejected": -7.099175930023193, + "step": 503 + }, + { + "epoch": 0.14, + "learning_rate": 1.8769431653332295e-06, + "loss": 0.4619, + "step": 504 + }, + { + "epoch": 0.14, + "logps_train/chosen": -34.89993667602539, + "logps_train/ref_chosen": -30.0, + "logps_train/ref_rejected": -13.3125, + "logps_train/rejected": -23.698328018188477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4899936616420746, + "rewards_train/margins": 0.5485891401767731, + "rewards_train/rejected": -1.0385828018188477, + "step": 504 + }, + { + "epoch": 0.14, + "logps_train/chosen": -1.9383624792099, + "logps_train/ref_chosen": -1.578125, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -7.211472511291504, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03602374717593193, + "rewards_train/margins": -0.308626513928175, + "rewards_train/rejected": 0.27260276675224304, + "step": 504 + }, + { + "epoch": 0.14, + "logps_train/chosen": -6.692302227020264, + "logps_train/ref_chosen": -5.5625, + "logps_train/ref_rejected": -6.1875, + "logps_train/rejected": -7.96938419342041, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11298022419214249, + "rewards_train/margins": 0.06520820409059525, + "rewards_train/rejected": -0.17818842828273773, + "step": 504 + }, + { + "epoch": 0.14, + "logps_train/chosen": -9.532950401306152, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -24.25, + "logps_train/rejected": -36.375938415527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13454504311084747, + "rewards_train/margins": 1.0780488699674606, + "rewards_train/rejected": -1.212593913078308, + "step": 504 + }, + { + "epoch": 0.14, + "logps_train/chosen": -47.13225173950195, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -47.0, + "logps_train/rejected": -59.15941619873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6132251620292664, + "rewards_train/margins": 0.6027165055274963, + "rewards_train/rejected": -1.2159416675567627, + "step": 504 + }, + { + "epoch": 0.14, + "logps_train/chosen": -106.39328002929688, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -207.57769775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7393280267715454, + "rewards_train/margins": 4.218441843986511, + "rewards_train/rejected": -5.957769870758057, + "step": 504 + }, + { + "epoch": 0.14, + "logps_train/chosen": -54.19846725463867, + "logps_train/ref_chosen": -49.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -144.9637451171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4698467254638672, + "rewards_train/margins": 1.8265278339385986, + "rewards_train/rejected": -2.296374559402466, + "step": 504 + }, + { + "epoch": 0.14, + "logps_train/chosen": -106.34288024902344, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -147.1708221435547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5842880010604858, + "rewards_train/margins": -0.06720578670501709, + "rewards_train/rejected": -1.5170822143554688, + "step": 504 + }, + { + "epoch": 0.14, + "logps_train/chosen": -16.57708740234375, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -6.46875, + "logps_train/rejected": -11.164624214172363, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.370208740234375, + "rewards_train/margins": 0.09937867522239685, + "rewards_train/rejected": -0.46958741545677185, + "step": 505 + }, + { + "epoch": 0.14, + "logps_train/chosen": -9.58143424987793, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -9.95767593383789, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.13314342498779297, + "rewards_train/margins": -0.2248758301138878, + "rewards_train/rejected": 0.09173240512609482, + "step": 505 + }, + { + "epoch": 0.14, + "logps_train/chosen": -11.315694808959961, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -3.140625, + "logps_train/rejected": -4.559372425079346, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07531948387622833, + "rewards_train/margins": 0.06655526161193848, + "rewards_train/rejected": -0.1418747454881668, + "step": 505 + }, + { + "epoch": 0.14, + "logps_train/chosen": -175.604736328125, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -165.14254760742188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.6104736328125, + "rewards_train/margins": -0.0462188720703125, + "rewards_train/rejected": -6.5642547607421875, + "step": 505 + }, + { + "epoch": 0.14, + "logps_train/chosen": -102.05628967285156, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -98.77574157714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.15562903881073, + "rewards_train/margins": 0.8219451904296875, + "rewards_train/rejected": -1.9775742292404175, + "step": 505 + }, + { + "epoch": 0.14, + "logps_train/chosen": -144.66986083984375, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -224.2942352294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.516986131668091, + "rewards_train/margins": 4.512437582015991, + "rewards_train/rejected": -7.029423713684082, + "step": 505 + }, + { + "epoch": 0.14, + "logps_train/chosen": -2.770744800567627, + "logps_train/ref_chosen": -1.9921875, + "logps_train/ref_rejected": -0.26953125, + "logps_train/rejected": -0.18722623586654663, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07785572856664658, + "rewards_train/margins": -0.08608623035252094, + "rewards_train/rejected": 0.008230501785874367, + "step": 505 + }, + { + "epoch": 0.14, + "logps_train/chosen": -6.492229461669922, + "logps_train/ref_chosen": -3.75, + "logps_train/ref_rejected": -3.609375, + "logps_train/rejected": -8.754651069641113, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2742229402065277, + "rewards_train/margins": 0.24030467867851257, + "rewards_train/rejected": -0.5145276188850403, + "step": 505 + }, + { + "epoch": 0.14, + "learning_rate": 1.875668661449411e-06, + "loss": 0.5075, + "step": 506 + }, + { + "epoch": 0.14, + "logps_train/chosen": -4.171349048614502, + "logps_train/ref_chosen": -1.59375, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -12.090707778930664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2577598989009857, + "rewards_train/margins": 0.0075608789920806885, + "rewards_train/rejected": -0.2653207778930664, + "step": 506 + }, + { + "epoch": 0.14, + "logps_train/chosen": -11.33987045288086, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -11.14004898071289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1464870423078537, + "rewards_train/margins": 0.1175178736448288, + "rewards_train/rejected": -0.2640049159526825, + "step": 506 + }, + { + "epoch": 0.14, + "logps_train/chosen": -6.073622703552246, + "logps_train/ref_chosen": -3.34375, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -12.07240104675293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2729872763156891, + "rewards_train/margins": 0.34362784028053284, + "rewards_train/rejected": -0.6166151165962219, + "step": 506 + }, + { + "epoch": 0.14, + "logps_train/chosen": -4.732665061950684, + "logps_train/ref_chosen": -3.984375, + "logps_train/ref_rejected": -3.75, + "logps_train/rejected": -3.597972869873047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07482900470495224, + "rewards_train/margins": -0.09003171790391207, + "rewards_train/rejected": 0.015202713198959827, + "step": 506 + }, + { + "epoch": 0.14, + "logps_train/chosen": -25.815837860107422, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -40.76591873168945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4190838038921356, + "rewards_train/margins": 1.1200081408023834, + "rewards_train/rejected": -1.539091944694519, + "step": 506 + }, + { + "epoch": 0.14, + "logps_train/chosen": -4.318109512329102, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -30.12173080444336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16931095719337463, + "rewards_train/margins": 0.46786215901374817, + "rewards_train/rejected": -0.6371731162071228, + "step": 506 + }, + { + "epoch": 0.14, + "logps_train/chosen": -21.68994140625, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -18.34294891357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7877441644668579, + "rewards_train/margins": 0.07780075073242188, + "rewards_train/rejected": -0.8655449151992798, + "step": 506 + }, + { + "epoch": 0.14, + "logps_train/chosen": -10.181779861450195, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -7.917222023010254, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.246302992105484, + "rewards_train/margins": -0.023330792784690857, + "rewards_train/rejected": -0.22297219932079315, + "step": 506 + }, + { + "epoch": 0.14, + "logps_train/chosen": -138.76229858398438, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -163.45779418945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.576230049133301, + "rewards_train/margins": 1.6695494651794434, + "rewards_train/rejected": -6.245779514312744, + "step": 507 + }, + { + "epoch": 0.14, + "logps_train/chosen": -147.0680694580078, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -130.93214416503906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.306807041168213, + "rewards_train/margins": -0.36359262466430664, + "rewards_train/rejected": -2.9432144165039062, + "step": 507 + }, + { + "epoch": 0.14, + "logps_train/chosen": -132.02700805664062, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -170.9197998046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9527008533477783, + "rewards_train/margins": 0.3392791748046875, + "rewards_train/rejected": -3.291980028152466, + "step": 507 + }, + { + "epoch": 0.14, + "logps_train/chosen": -99.98870086669922, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -112.73255920410156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3988702297210693, + "rewards_train/margins": -0.5256142616271973, + "rewards_train/rejected": -1.873255968093872, + "step": 507 + }, + { + "epoch": 0.14, + "logps_train/chosen": -63.25559997558594, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -130.2659912109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17555999755859375, + "rewards_train/margins": 4.151039123535156, + "rewards_train/rejected": -4.32659912109375, + "step": 507 + }, + { + "epoch": 0.14, + "logps_train/chosen": -3.500758647918701, + "logps_train/ref_chosen": -6.46875, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -16.563880920410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2967991530895233, + "rewards_train/margins": 0.29068724485114217, + "rewards_train/rejected": 0.006111908238381147, + "step": 507 + }, + { + "epoch": 0.14, + "logps_train/chosen": -22.955976486206055, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -71.12020874023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008097648620605469, + "rewards_train/margins": 3.328923225402832, + "rewards_train/rejected": -3.3370208740234375, + "step": 507 + }, + { + "epoch": 0.14, + "logps_train/chosen": -138.89598083496094, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -208.40921020507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6895980834960938, + "rewards_train/margins": 5.551322937011719, + "rewards_train/rejected": -6.2409210205078125, + "step": 507 + }, + { + "epoch": 0.14, + "learning_rate": 1.8743880288133818e-06, + "loss": 0.4958, + "step": 508 + }, + { + "epoch": 0.14, + "logps_train/chosen": -107.19306945800781, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -193.0, + "logps_train/rejected": -264.9656982421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1693069487810135, + "rewards_train/margins": 7.027262970805168, + "rewards_train/rejected": -7.196569919586182, + "step": 508 + }, + { + "epoch": 0.14, + "logps_train/chosen": -130.20144653320312, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -215.76748657226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3701446056365967, + "rewards_train/margins": 4.906604051589966, + "rewards_train/rejected": -8.276748657226562, + "step": 508 + }, + { + "epoch": 0.14, + "logps_train/chosen": -109.52056121826172, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -138.98072814941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9020562171936035, + "rewards_train/margins": 1.1460165977478027, + "rewards_train/rejected": -4.048072814941406, + "step": 508 + }, + { + "epoch": 0.14, + "logps_train/chosen": -97.26644897460938, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -142.5602569580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5766448974609375, + "rewards_train/margins": 2.2793807983398438, + "rewards_train/rejected": -3.8560256958007812, + "step": 508 + }, + { + "epoch": 0.14, + "logps_train/chosen": -8.170822143554688, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -9.267948150634766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.19833222031593323, + "rewards_train/margins": -0.0840374007821083, + "rewards_train/rejected": -0.11429481953382492, + "step": 508 + }, + { + "epoch": 0.14, + "logps_train/chosen": -2.3299410343170166, + "logps_train/ref_chosen": -1.28125, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -7.310247421264648, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10486910492181778, + "rewards_train/margins": 0.1230306401848793, + "rewards_train/rejected": -0.22789974510669708, + "step": 508 + }, + { + "epoch": 0.14, + "logps_train/chosen": -18.98919677734375, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -37.014320373535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07391967624425888, + "rewards_train/margins": 0.6025123968720436, + "rewards_train/rejected": -0.6764320731163025, + "step": 508 + }, + { + "epoch": 0.14, + "logps_train/chosen": -195.28082275390625, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -224.0, + "logps_train/rejected": -302.4200744628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.128082275390625, + "rewards_train/margins": 1.7139253616333008, + "rewards_train/rejected": -7.842007637023926, + "step": 508 + }, + { + "epoch": 0.14, + "logps_train/chosen": -30.80866813659668, + "logps_train/ref_chosen": -26.5, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -23.674999237060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4308668076992035, + "rewards_train/margins": 1.036633163690567, + "rewards_train/rejected": -1.4674999713897705, + "step": 509 + }, + { + "epoch": 0.14, + "logps_train/chosen": -1.8385266065597534, + "logps_train/ref_chosen": -1.5390625, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -9.281578063964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02994641102850437, + "rewards_train/margins": 0.16071139834821224, + "rewards_train/rejected": -0.1906578093767166, + "step": 509 + }, + { + "epoch": 0.14, + "logps_train/chosen": -162.9446258544922, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -171.9715118408203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.494462728500366, + "rewards_train/margins": -0.6973114013671875, + "rewards_train/rejected": -2.7971513271331787, + "step": 509 + }, + { + "epoch": 0.14, + "logps_train/chosen": -2.8254737854003906, + "logps_train/ref_chosen": -2.328125, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -19.8615779876709, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04973487928509712, + "rewards_train/margins": 0.5489229075610638, + "rewards_train/rejected": -0.5986577868461609, + "step": 509 + }, + { + "epoch": 0.14, + "logps_train/chosen": -107.5776596069336, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -142.3242645263672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.007766008377075, + "rewards_train/margins": -0.42533957958221436, + "rewards_train/rejected": -1.5824264287948608, + "step": 509 + }, + { + "epoch": 0.14, + "logps_train/chosen": -15.820382118225098, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -7.75, + "logps_train/rejected": -20.797595977783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7757882475852966, + "rewards_train/margins": 0.5289713740348816, + "rewards_train/rejected": -1.3047596216201782, + "step": 509 + }, + { + "epoch": 0.14, + "logps_train/chosen": -87.99996948242188, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -88.56004333496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3999969959259033, + "rewards_train/margins": 0.05600738525390625, + "rewards_train/rejected": -1.4560043811798096, + "step": 509 + }, + { + "epoch": 0.14, + "logps_train/chosen": -160.89279174804688, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -210.26947021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.189279317855835, + "rewards_train/margins": 6.287667512893677, + "rewards_train/rejected": -8.476946830749512, + "step": 509 + }, + { + "epoch": 0.14, + "learning_rate": 1.873101276388213e-06, + "loss": 0.4306, + "step": 510 + }, + { + "epoch": 0.14, + "logps_train/chosen": -73.60911560058594, + "logps_train/ref_chosen": -52.0, + "logps_train/ref_rejected": -48.75, + "logps_train/rejected": -67.83686065673828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1609115600585938, + "rewards_train/margins": -0.25222551822662354, + "rewards_train/rejected": -1.9086860418319702, + "step": 510 + }, + { + "epoch": 0.14, + "logps_train/chosen": -12.305242538452148, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -1.5390625, + "logps_train/rejected": -12.010011672973633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06947574764490128, + "rewards_train/margins": 1.1165706887841225, + "rewards_train/rejected": -1.0470949411392212, + "step": 510 + }, + { + "epoch": 0.14, + "logps_train/chosen": -149.84609985351562, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -161.64442443847656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0846099853515625, + "rewards_train/margins": -0.32016754150390625, + "rewards_train/rejected": -1.7644424438476562, + "step": 510 + }, + { + "epoch": 0.14, + "logps_train/chosen": -217.24905395507812, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -217.67977905273438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.224905490875244, + "rewards_train/margins": -0.256927490234375, + "rewards_train/rejected": -4.967978000640869, + "step": 510 + }, + { + "epoch": 0.14, + "logps_train/chosen": -90.77163696289062, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -15.153199195861816, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3271636962890625, + "rewards_train/margins": -0.8993437588214874, + "rewards_train/rejected": -0.4278199374675751, + "step": 510 + }, + { + "epoch": 0.14, + "logps_train/chosen": -159.79171752929688, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -244.0, + "logps_train/rejected": -359.8480224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.379171848297119, + "rewards_train/margins": 9.205630779266357, + "rewards_train/rejected": -11.584802627563477, + "step": 510 + }, + { + "epoch": 0.14, + "logps_train/chosen": -118.07403564453125, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -128.02890014648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007403564639389515, + "rewards_train/margins": 1.1454865215346217, + "rewards_train/rejected": -1.1528900861740112, + "step": 510 + }, + { + "epoch": 0.14, + "logps_train/chosen": -91.43048858642578, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -179.46371459960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006951141636818647, + "rewards_train/margins": 4.853322792332619, + "rewards_train/rejected": -4.846371650695801, + "step": 510 + }, + { + "epoch": 0.14, + "logps_train/chosen": -67.41453552246094, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -48.75, + "logps_train/rejected": -79.66594696044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4414535760879517, + "rewards_train/margins": 1.6501411199569702, + "rewards_train/rejected": -3.091594696044922, + "step": 511 + }, + { + "epoch": 0.14, + "logps_train/chosen": -5.124414920806885, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -5.142767906188965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018808508291840553, + "rewards_train/margins": 0.0018352977931499481, + "rewards_train/rejected": 0.016973210498690605, + "step": 511 + }, + { + "epoch": 0.14, + "logps_train/chosen": -7.545761585235596, + "logps_train/ref_chosen": -5.40625, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -10.545385360717773, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21395115554332733, + "rewards_train/margins": -0.06566262245178223, + "rewards_train/rejected": -0.1482885330915451, + "step": 511 + }, + { + "epoch": 0.14, + "logps_train/chosen": -66.75521087646484, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -44.38787841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4755210876464844, + "rewards_train/margins": 0.38826674222946167, + "rewards_train/rejected": -0.863787829875946, + "step": 511 + }, + { + "epoch": 0.14, + "logps_train/chosen": -89.64129638671875, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -176.4729461669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3641296327114105, + "rewards_train/margins": 1.883165031671524, + "rewards_train/rejected": -2.2472946643829346, + "step": 511 + }, + { + "epoch": 0.14, + "logps_train/chosen": -231.40399169921875, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -264.9999694824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.64039945602417, + "rewards_train/margins": 2.5595974922180176, + "rewards_train/rejected": -8.199996948242188, + "step": 511 + }, + { + "epoch": 0.14, + "logps_train/chosen": -44.334171295166016, + "logps_train/ref_chosen": -37.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -129.3968048095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7334171533584595, + "rewards_train/margins": 1.9062634706497192, + "rewards_train/rejected": -2.6396806240081787, + "step": 511 + }, + { + "epoch": 0.14, + "logps_train/chosen": -4.504417419433594, + "logps_train/ref_chosen": -2.875, + "logps_train/ref_rejected": -1.7890625, + "logps_train/rejected": -1.76223886013031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.16294173896312714, + "rewards_train/margins": -0.1656241030432284, + "rewards_train/rejected": 0.0026823640801012516, + "step": 511 + }, + { + "epoch": 0.14, + "learning_rate": 1.871808413179807e-06, + "loss": 0.4742, + "step": 512 + }, + { + "epoch": 0.14, + "logps_train/chosen": -114.24748229980469, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -200.3775634765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3247482478618622, + "rewards_train/margins": 5.2130081951618195, + "rewards_train/rejected": -5.537756443023682, + "step": 512 + }, + { + "epoch": 0.14, + "logps_train/chosen": -94.42048645019531, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -57.75, + "logps_train/rejected": -68.11634063720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007951355539262295, + "rewards_train/margins": 1.0445854431018233, + "rewards_train/rejected": -1.036634087562561, + "step": 512 + }, + { + "epoch": 0.14, + "logps_train/chosen": -86.83395385742188, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -79.69134521484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5833954215049744, + "rewards_train/margins": 0.035739123821258545, + "rewards_train/rejected": -0.6191345453262329, + "step": 512 + }, + { + "epoch": 0.14, + "logps_train/chosen": -16.106630325317383, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -27.374496459960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4606630504131317, + "rewards_train/margins": 1.0455366671085358, + "rewards_train/rejected": -1.5061997175216675, + "step": 512 + }, + { + "epoch": 0.14, + "logps_train/chosen": -200.5577392578125, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -246.09817504882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.955773949623108, + "rewards_train/margins": 4.754043459892273, + "rewards_train/rejected": -6.709817409515381, + "step": 512 + }, + { + "epoch": 0.14, + "logps_train/chosen": -105.92848205566406, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -136.859619140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.342848300933838, + "rewards_train/margins": 2.6931138038635254, + "rewards_train/rejected": -5.035962104797363, + "step": 512 + }, + { + "epoch": 0.14, + "logps_train/chosen": -102.40077209472656, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -20.385921478271484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6400773525238037, + "rewards_train/margins": -3.263985186815262, + "rewards_train/rejected": -0.37609216570854187, + "step": 512 + }, + { + "epoch": 0.14, + "logps_train/chosen": -86.41216278076172, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -86.31344604492188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.19121627509593964, + "rewards_train/margins": -0.00987166166305542, + "rewards_train/rejected": -0.18134461343288422, + "step": 512 + }, + { + "epoch": 0.14, + "logps_train/chosen": -7.677071571350098, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -29.194446563720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3302071690559387, + "rewards_train/margins": 0.7892375588417053, + "rewards_train/rejected": -1.119444727897644, + "step": 513 + }, + { + "epoch": 0.14, + "logps_train/chosen": -26.591190338134766, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -28.1926326751709, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2091190367937088, + "rewards_train/margins": 0.67264424264431, + "rewards_train/rejected": -0.8817632794380188, + "step": 513 + }, + { + "epoch": 0.14, + "logps_train/chosen": -48.5487060546875, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -48.75, + "logps_train/rejected": -48.84845733642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02012939564883709, + "rewards_train/margins": 0.029975129291415215, + "rewards_train/rejected": -0.009845733642578125, + "step": 513 + }, + { + "epoch": 0.14, + "logps_train/chosen": -11.707399368286133, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -22.565109252929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.23551006615161896, + "rewards_train/margins": 0.8295210152864456, + "rewards_train/rejected": -0.5940109491348267, + "step": 513 + }, + { + "epoch": 0.14, + "logps_train/chosen": -5.676860809326172, + "logps_train/ref_chosen": -4.0, + "logps_train/ref_rejected": -49.0, + "logps_train/rejected": -56.64912033081055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1676860898733139, + "rewards_train/margins": 0.5972259789705276, + "rewards_train/rejected": -0.7649120688438416, + "step": 513 + }, + { + "epoch": 0.14, + "logps_train/chosen": -19.4825439453125, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -10.926220893859863, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12674561142921448, + "rewards_train/margins": 0.2256176993250847, + "rewards_train/rejected": -0.09887208789587021, + "step": 513 + }, + { + "epoch": 0.14, + "logps_train/chosen": -65.59488677978516, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -47.5, + "logps_train/rejected": -60.27996063232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15948867797851562, + "rewards_train/margins": 1.1185073852539062, + "rewards_train/rejected": -1.2779960632324219, + "step": 513 + }, + { + "epoch": 0.14, + "logps_train/chosen": -99.0791015625, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -149.04136657714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.85791015625, + "rewards_train/margins": 2.096226692199707, + "rewards_train/rejected": -4.954136848449707, + "step": 513 + }, + { + "epoch": 0.14, + "learning_rate": 1.8705094482368358e-06, + "loss": 0.5373, + "step": 514 + }, + { + "epoch": 0.14, + "logps_train/chosen": -143.67010498046875, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -108.95512390136719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.767010450363159, + "rewards_train/margins": -1.1714980602264404, + "rewards_train/rejected": -2.5955123901367188, + "step": 514 + }, + { + "epoch": 0.14, + "logps_train/chosen": -2.5824530124664307, + "logps_train/ref_chosen": -3.015625, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -7.455594062805176, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.043317198753356934, + "rewards_train/margins": 0.23575161397457123, + "rewards_train/rejected": -0.1924344152212143, + "step": 514 + }, + { + "epoch": 0.14, + "logps_train/chosen": -9.20862865447998, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -4.070631980895996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22288714349269867, + "rewards_train/margins": 0.1705753393471241, + "rewards_train/rejected": 0.05231180414557457, + "step": 514 + }, + { + "epoch": 0.14, + "logps_train/chosen": -41.36165237426758, + "logps_train/ref_chosen": -37.0, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -18.793432235717773, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4361652433872223, + "rewards_train/margins": -0.025572001934051514, + "rewards_train/rejected": -0.4105932414531708, + "step": 514 + }, + { + "epoch": 0.14, + "logps_train/chosen": -17.059595108032227, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -21.133703231811523, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6997095346450806, + "rewards_train/margins": -0.011339187622070312, + "rewards_train/rejected": -0.6883703470230103, + "step": 514 + }, + { + "epoch": 0.14, + "logps_train/chosen": -17.425277709960938, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -11.646112442016602, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6362777948379517, + "rewards_train/margins": -0.284166544675827, + "rewards_train/rejected": -0.35211125016212463, + "step": 514 + }, + { + "epoch": 0.14, + "logps_train/chosen": -3.64638352394104, + "logps_train/ref_chosen": -1.765625, + "logps_train/ref_rejected": -2.5, + "logps_train/rejected": -3.416095733642578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.18807585537433624, + "rewards_train/margins": -0.09646628051996231, + "rewards_train/rejected": -0.09160957485437393, + "step": 514 + }, + { + "epoch": 0.14, + "logps_train/chosen": -83.69630432128906, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -210.42337036132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01963043212890625, + "rewards_train/margins": 7.622706890106201, + "rewards_train/rejected": -7.642337322235107, + "step": 514 + }, + { + "epoch": 0.14, + "logps_train/chosen": -90.98180389404297, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -76.97791290283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.148180365562439, + "rewards_train/margins": 0.22461092472076416, + "rewards_train/rejected": -1.3727912902832031, + "step": 515 + }, + { + "epoch": 0.14, + "logps_train/chosen": -34.86833572387695, + "logps_train/ref_chosen": -37.5, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -12.293108940124512, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2631664276123047, + "rewards_train/margins": 0.9549773335456848, + "rewards_train/rejected": -0.6918109059333801, + "step": 515 + }, + { + "epoch": 0.14, + "logps_train/chosen": -7.363579750061035, + "logps_train/ref_chosen": -6.90625, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -9.911310195922852, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.045732975006103516, + "rewards_train/margins": 0.25164803862571716, + "rewards_train/rejected": -0.2973810136318207, + "step": 515 + }, + { + "epoch": 0.14, + "logps_train/chosen": -5.455134391784668, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -9.8876371383667, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11426343768835068, + "rewards_train/margins": 0.2651252821087837, + "rewards_train/rejected": -0.3793887197971344, + "step": 515 + }, + { + "epoch": 0.14, + "logps_train/chosen": -60.55686950683594, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -58.25, + "logps_train/rejected": -59.45045852661133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16931305825710297, + "rewards_train/margins": 0.289358913898468, + "rewards_train/rejected": -0.12004585564136505, + "step": 515 + }, + { + "epoch": 0.14, + "logps_train/chosen": -115.68902587890625, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -177.01739501953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.268902778625488, + "rewards_train/margins": 0.6328368186950684, + "rewards_train/rejected": -4.901739597320557, + "step": 515 + }, + { + "epoch": 0.14, + "logps_train/chosen": -118.60231018066406, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -133.1228485107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3602311611175537, + "rewards_train/margins": 2.7020537853240967, + "rewards_train/rejected": -5.06228494644165, + "step": 515 + }, + { + "epoch": 0.14, + "logps_train/chosen": -137.78106689453125, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -144.8182373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.678106665611267, + "rewards_train/margins": 2.8537169694900513, + "rewards_train/rejected": -4.531823635101318, + "step": 515 + }, + { + "epoch": 0.14, + "learning_rate": 1.8692043906506766e-06, + "loss": 0.5499, + "step": 516 + }, + { + "epoch": 0.14, + "logps_train/chosen": -111.46769714355469, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -106.0634765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5967697501182556, + "rewards_train/margins": 0.4595779776573181, + "rewards_train/rejected": -1.0563477277755737, + "step": 516 + }, + { + "epoch": 0.14, + "logps_train/chosen": -23.187854766845703, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -12.72716236114502, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1437854766845703, + "rewards_train/margins": 0.15393075346946716, + "rewards_train/rejected": -0.2977162301540375, + "step": 516 + }, + { + "epoch": 0.14, + "logps_train/chosen": -17.200634002685547, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -3.65625, + "logps_train/rejected": -3.7965173721313477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04243659973144531, + "rewards_train/margins": 0.05646333750337362, + "rewards_train/rejected": -0.01402673777192831, + "step": 516 + }, + { + "epoch": 0.14, + "logps_train/chosen": -147.79293823242188, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -186.40875244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.629293918609619, + "rewards_train/margins": 0.41158151626586914, + "rewards_train/rejected": -4.040875434875488, + "step": 516 + }, + { + "epoch": 0.14, + "logps_train/chosen": -154.83758544921875, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -155.0061492919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0837585926055908, + "rewards_train/margins": 2.666856288909912, + "rewards_train/rejected": -3.750614881515503, + "step": 516 + }, + { + "epoch": 0.14, + "logps_train/chosen": -226.42327880859375, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -168.75946044921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.842328071594238, + "rewards_train/margins": -2.6663818359375, + "rewards_train/rejected": -5.175946235656738, + "step": 516 + }, + { + "epoch": 0.14, + "logps_train/chosen": -67.87732696533203, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -58.0, + "logps_train/rejected": -71.3170394897461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.71273273229599, + "rewards_train/margins": 0.6189712882041931, + "rewards_train/rejected": -1.331704020500183, + "step": 516 + }, + { + "epoch": 0.14, + "logps_train/chosen": -99.59738159179688, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -116.92485046386719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0097382068634033, + "rewards_train/margins": -0.26725316047668457, + "rewards_train/rejected": -0.7424850463867188, + "step": 516 + }, + { + "epoch": 0.14, + "logps_train/chosen": -122.62075805664062, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -161.7173309326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.762075901031494, + "rewards_train/margins": 2.809657096862793, + "rewards_train/rejected": -5.571732997894287, + "step": 517 + }, + { + "epoch": 0.14, + "logps_train/chosen": -19.8917293548584, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -19.883968353271484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0329229831695557, + "rewards_train/margins": -0.18202614784240723, + "rewards_train/rejected": -0.8508968353271484, + "step": 517 + }, + { + "epoch": 0.14, + "logps_train/chosen": -24.19692611694336, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -20.72534942626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4946926236152649, + "rewards_train/margins": 0.2528423070907593, + "rewards_train/rejected": -0.7475349307060242, + "step": 517 + }, + { + "epoch": 0.14, + "logps_train/chosen": -82.94503021240234, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -103.4773178100586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5445030331611633, + "rewards_train/margins": 1.9032287001609802, + "rewards_train/rejected": -2.4477317333221436, + "step": 517 + }, + { + "epoch": 0.14, + "logps_train/chosen": -20.36317253112793, + "logps_train/ref_chosen": -15.5, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -55.10833740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4863172471523285, + "rewards_train/margins": 0.8995164930820465, + "rewards_train/rejected": -1.385833740234375, + "step": 517 + }, + { + "epoch": 0.14, + "logps_train/chosen": -173.73971557617188, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -275.17755126953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9739716053009033, + "rewards_train/margins": 6.743783712387085, + "rewards_train/rejected": -9.717755317687988, + "step": 517 + }, + { + "epoch": 0.14, + "logps_train/chosen": -18.439620971679688, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -18.02213478088379, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.143962100148201, + "rewards_train/margins": 0.3957514017820358, + "rewards_train/rejected": -0.5397135019302368, + "step": 517 + }, + { + "epoch": 0.14, + "logps_train/chosen": -9.554454803466797, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -33.25, + "logps_train/rejected": -38.282894134521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22107048332691193, + "rewards_train/margins": 0.28221891820430756, + "rewards_train/rejected": -0.5032894015312195, + "step": 517 + }, + { + "epoch": 0.14, + "learning_rate": 1.867893249555349e-06, + "loss": 0.5829, + "step": 518 + }, + { + "epoch": 0.14, + "logps_train/chosen": -12.650216102600098, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -17.24898910522461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3587716221809387, + "rewards_train/margins": 0.4473773241043091, + "rewards_train/rejected": -0.8061489462852478, + "step": 518 + }, + { + "epoch": 0.14, + "logps_train/chosen": -81.80582427978516, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -48.25, + "logps_train/rejected": -53.14920425415039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.030582427978515625, + "rewards_train/margins": 0.4593380093574524, + "rewards_train/rejected": -0.489920437335968, + "step": 518 + }, + { + "epoch": 0.14, + "logps_train/chosen": -5.548550128936768, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -14.78581428527832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23923002183437347, + "rewards_train/margins": 0.3456013947725296, + "rewards_train/rejected": -0.5848314166069031, + "step": 518 + }, + { + "epoch": 0.14, + "logps_train/chosen": -17.247055053710938, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -23.196626663208008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13720551133155823, + "rewards_train/margins": 0.157457172870636, + "rewards_train/rejected": -0.2946626842021942, + "step": 518 + }, + { + "epoch": 0.14, + "logps_train/chosen": -36.85477828979492, + "logps_train/ref_chosen": -28.5, + "logps_train/ref_rejected": -6.21875, + "logps_train/rejected": -12.053653717041016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8354778289794922, + "rewards_train/margins": -0.2519874572753906, + "rewards_train/rejected": -0.5834903717041016, + "step": 518 + }, + { + "epoch": 0.14, + "logps_train/chosen": -21.332096099853516, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -27.541908264160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6519595980644226, + "rewards_train/margins": -0.04776877164840698, + "rewards_train/rejected": -0.6041908264160156, + "step": 518 + }, + { + "epoch": 0.14, + "logps_train/chosen": -11.465479850769043, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -8.511281967163086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5559229850769043, + "rewards_train/margins": -0.28604477643966675, + "rewards_train/rejected": -0.26987820863723755, + "step": 518 + }, + { + "epoch": 0.14, + "logps_train/chosen": -4.032774925231934, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -3.609375, + "logps_train/rejected": -4.379565715789795, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04984750971198082, + "rewards_train/margins": 0.12686658278107643, + "rewards_train/rejected": -0.07701907306909561, + "step": 518 + }, + { + "epoch": 0.15, + "logps_train/chosen": -143.27256774902344, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -214.5108642578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.027256727218628, + "rewards_train/margins": 4.723829984664917, + "rewards_train/rejected": -6.751086711883545, + "step": 519 + }, + { + "epoch": 0.15, + "logps_train/chosen": -10.051255226135254, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -31.75, + "logps_train/rejected": -36.172996520996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09262552112340927, + "rewards_train/margins": 0.34967414289712906, + "rewards_train/rejected": -0.44229966402053833, + "step": 519 + }, + { + "epoch": 0.15, + "logps_train/chosen": -13.369384765625, + "logps_train/ref_chosen": -6.0, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -16.644943237304688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7369384765625, + "rewards_train/margins": -0.6599441543221474, + "rewards_train/rejected": -0.07699432224035263, + "step": 519 + }, + { + "epoch": 0.15, + "logps_train/chosen": -4.3727126121521, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -1.84375, + "logps_train/rejected": -2.9665043354034424, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0966462641954422, + "rewards_train/margins": 0.015629172325134277, + "rewards_train/rejected": -0.11227543652057648, + "step": 519 + }, + { + "epoch": 0.15, + "logps_train/chosen": -177.18492126464844, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -167.400390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.8184921741485596, + "rewards_train/margins": -0.4784531593322754, + "rewards_train/rejected": -3.340039014816284, + "step": 519 + }, + { + "epoch": 0.15, + "logps_train/chosen": -28.712385177612305, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -30.631635665893555, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8587385416030884, + "rewards_train/margins": -0.27057498693466187, + "rewards_train/rejected": -0.5881635546684265, + "step": 519 + }, + { + "epoch": 0.15, + "logps_train/chosen": -73.23207092285156, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -99.6370849609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8732070922851562, + "rewards_train/margins": 0.34050142765045166, + "rewards_train/rejected": -1.213708519935608, + "step": 519 + }, + { + "epoch": 0.15, + "logps_train/chosen": -95.199951171875, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -33.383262634277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5199951529502869, + "rewards_train/margins": 0.6433311104774475, + "rewards_train/rejected": -1.1633262634277344, + "step": 519 + }, + { + "epoch": 0.15, + "learning_rate": 1.8665760341274504e-06, + "loss": 0.6395, + "step": 520 + }, + { + "epoch": 0.15, + "logps_train/chosen": -38.829132080078125, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -12.74751091003418, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2079132795333862, + "rewards_train/margins": -0.3612871766090393, + "rewards_train/rejected": -0.8466261029243469, + "step": 520 + }, + { + "epoch": 0.15, + "logps_train/chosen": -4.253960609436035, + "logps_train/ref_chosen": -4.5, + "logps_train/ref_rejected": -1.5, + "logps_train/rejected": -3.115514039993286, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.024603938683867455, + "rewards_train/margins": 0.18615533970296383, + "rewards_train/rejected": -0.16155140101909637, + "step": 520 + }, + { + "epoch": 0.15, + "logps_train/chosen": -76.40428161621094, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -151.28250122070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9404281377792358, + "rewards_train/margins": 3.287821888923645, + "rewards_train/rejected": -5.228250026702881, + "step": 520 + }, + { + "epoch": 0.15, + "logps_train/chosen": -160.99502563476562, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -222.91253662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.8495025634765625, + "rewards_train/margins": 1.5417513847351074, + "rewards_train/rejected": -7.39125394821167, + "step": 520 + }, + { + "epoch": 0.15, + "logps_train/chosen": -2.705928087234497, + "logps_train/ref_chosen": -2.109375, + "logps_train/ref_rejected": -3.46875, + "logps_train/rejected": -5.589993476867676, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05965530872344971, + "rewards_train/margins": 0.15246903896331787, + "rewards_train/rejected": -0.21212434768676758, + "step": 520 + }, + { + "epoch": 0.15, + "logps_train/chosen": -186.53414916992188, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -204.70082092285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1534149646759033, + "rewards_train/margins": 2.516667127609253, + "rewards_train/rejected": -5.670082092285156, + "step": 520 + }, + { + "epoch": 0.15, + "logps_train/chosen": -10.377766609191895, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -28.201644897460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1497233361005783, + "rewards_train/margins": 1.057387813925743, + "rewards_train/rejected": -0.9076644778251648, + "step": 520 + }, + { + "epoch": 0.15, + "logps_train/chosen": -44.132625579833984, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -13.6875, + "logps_train/rejected": -18.751792907714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2632625699043274, + "rewards_train/margins": 0.2431667447090149, + "rewards_train/rejected": -0.5064293146133423, + "step": 520 + }, + { + "epoch": 0.15, + "logps_train/chosen": -6.048638343811035, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -1.984375, + "logps_train/rejected": -7.317229270935059, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.026738835498690605, + "rewards_train/margins": 0.5065466035157442, + "rewards_train/rejected": -0.5332854390144348, + "step": 521 + }, + { + "epoch": 0.15, + "logps_train/chosen": -2.968966007232666, + "logps_train/ref_chosen": -1.75, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -19.090808868408203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12189660221338272, + "rewards_train/margins": -0.2628157213330269, + "rewards_train/rejected": 0.14091911911964417, + "step": 521 + }, + { + "epoch": 0.15, + "logps_train/chosen": -47.20355224609375, + "logps_train/ref_chosen": -44.0, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -45.63795471191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32035523653030396, + "rewards_train/margins": 0.4684402346611023, + "rewards_train/rejected": -0.7887954711914062, + "step": 521 + }, + { + "epoch": 0.15, + "logps_train/chosen": -78.49535369873047, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -70.56896209716797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9995353817939758, + "rewards_train/margins": -0.4926391839981079, + "rewards_train/rejected": -0.5068961977958679, + "step": 521 + }, + { + "epoch": 0.15, + "logps_train/chosen": -125.5336685180664, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -169.48280334472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7533668875694275, + "rewards_train/margins": 3.994913637638092, + "rewards_train/rejected": -4.7482805252075195, + "step": 521 + }, + { + "epoch": 0.15, + "logps_train/chosen": -44.49434280395508, + "logps_train/ref_chosen": -31.25, + "logps_train/ref_rejected": -30.375, + "logps_train/rejected": -42.62336349487305, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3244342803955078, + "rewards_train/margins": -0.09959793090820312, + "rewards_train/rejected": -1.2248363494873047, + "step": 521 + }, + { + "epoch": 0.15, + "logps_train/chosen": -3.0102343559265137, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -17.45244026184082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.25835156440734863, + "rewards_train/margins": 0.6973455846309662, + "rewards_train/rejected": -0.43899402022361755, + "step": 521 + }, + { + "epoch": 0.15, + "logps_train/chosen": -49.955955505371094, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -110.61283111572266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6705955862998962, + "rewards_train/margins": 1.8906875252723694, + "rewards_train/rejected": -2.5612831115722656, + "step": 521 + }, + { + "epoch": 0.15, + "learning_rate": 1.8652527535860921e-06, + "loss": 0.4605, + "step": 522 + }, + { + "epoch": 0.15, + "logps_train/chosen": -4.49936580657959, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -21.462326049804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09693842381238937, + "rewards_train/margins": 0.5681710466742516, + "rewards_train/rejected": -0.4712326228618622, + "step": 522 + }, + { + "epoch": 0.15, + "logps_train/chosen": -96.64727783203125, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -214.4259033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7147278189659119, + "rewards_train/margins": 6.02786260843277, + "rewards_train/rejected": -6.742590427398682, + "step": 522 + }, + { + "epoch": 0.15, + "logps_train/chosen": -8.906608581542969, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -22.242555618286133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0030891417991369963, + "rewards_train/margins": 0.5398447036277503, + "rewards_train/rejected": -0.5367555618286133, + "step": 522 + }, + { + "epoch": 0.15, + "logps_train/chosen": -197.2205352783203, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -198.0, + "logps_train/rejected": -236.52279663085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7220535278320312, + "rewards_train/margins": 1.1302261352539062, + "rewards_train/rejected": -3.8522796630859375, + "step": 522 + }, + { + "epoch": 0.15, + "logps_train/chosen": -2.817112445831299, + "logps_train/ref_chosen": -2.34375, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -13.55308723449707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04733624681830406, + "rewards_train/margins": 0.45172247663140297, + "rewards_train/rejected": -0.49905872344970703, + "step": 522 + }, + { + "epoch": 0.15, + "logps_train/chosen": -108.79080963134766, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -180.40391540527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07091903686523438, + "rewards_train/margins": 6.41131067276001, + "rewards_train/rejected": -6.340391635894775, + "step": 522 + }, + { + "epoch": 0.15, + "logps_train/chosen": -0.11158426105976105, + "logps_train/ref_chosen": -0.12158203125, + "logps_train/ref_rejected": -0.12158203125, + "logps_train/rejected": -0.11096113920211792, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.000999776995740831, + "rewards_train/margins": -6.231223233044147e-05, + "rewards_train/rejected": 0.0010620892280712724, + "step": 522 + }, + { + "epoch": 0.15, + "logps_train/chosen": -0.5419859886169434, + "logps_train/ref_chosen": -0.50390625, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -10.090102195739746, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.003807973815128207, + "rewards_train/margins": 0.35207726364023983, + "rewards_train/rejected": -0.35588523745536804, + "step": 522 + }, + { + "epoch": 0.15, + "logps_train/chosen": -77.90528869628906, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -121.73439025878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2905288934707642, + "rewards_train/margins": 2.2829102277755737, + "rewards_train/rejected": -3.573439121246338, + "step": 523 + }, + { + "epoch": 0.15, + "logps_train/chosen": -170.90350341796875, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -199.8731689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.890350341796875, + "rewards_train/margins": 2.1969666481018066, + "rewards_train/rejected": -3.0873169898986816, + "step": 523 + }, + { + "epoch": 0.15, + "logps_train/chosen": -177.22817993164062, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -197.0, + "logps_train/rejected": -230.43072509765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0228179693222046, + "rewards_train/margins": 2.320254683494568, + "rewards_train/rejected": -3.3430726528167725, + "step": 523 + }, + { + "epoch": 0.15, + "logps_train/chosen": -60.118865966796875, + "logps_train/ref_chosen": -55.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -85.83056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5118865966796875, + "rewards_train/margins": 0.7211700677871704, + "rewards_train/rejected": -1.233056664466858, + "step": 523 + }, + { + "epoch": 0.15, + "logps_train/chosen": -96.91477966308594, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -97.18936157226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2914779782295227, + "rewards_train/margins": 0.02745819091796875, + "rewards_train/rejected": -0.31893616914749146, + "step": 523 + }, + { + "epoch": 0.15, + "logps_train/chosen": -13.669137001037598, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -15.449983596801758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.135663703083992, + "rewards_train/margins": 0.4030846804380417, + "rewards_train/rejected": -0.5387483835220337, + "step": 523 + }, + { + "epoch": 0.15, + "logps_train/chosen": -126.84918212890625, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -188.669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7849183082580566, + "rewards_train/margins": 4.632073879241943, + "rewards_train/rejected": -8.4169921875, + "step": 523 + }, + { + "epoch": 0.15, + "logps_train/chosen": -126.58390045166016, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -173.19195556640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1583900451660156, + "rewards_train/margins": 3.3108057975769043, + "rewards_train/rejected": -5.46919584274292, + "step": 523 + }, + { + "epoch": 0.15, + "learning_rate": 1.863923417192835e-06, + "loss": 0.3024, + "step": 524 + }, + { + "epoch": 0.15, + "logps_train/chosen": -18.043468475341797, + "logps_train/ref_chosen": -15.1875, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -36.37140655517578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2855968475341797, + "rewards_train/margins": 1.0140438079833984, + "rewards_train/rejected": -1.2996406555175781, + "step": 524 + }, + { + "epoch": 0.15, + "logps_train/chosen": -50.06275177001953, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -12.0, + "logps_train/rejected": -15.866909980773926, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.356275200843811, + "rewards_train/margins": -0.969584196805954, + "rewards_train/rejected": -0.38669100403785706, + "step": 524 + }, + { + "epoch": 0.15, + "logps_train/chosen": -111.3115463256836, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -156.78465270996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8311545848846436, + "rewards_train/margins": 2.647310972213745, + "rewards_train/rejected": -5.478465557098389, + "step": 524 + }, + { + "epoch": 0.15, + "logps_train/chosen": -10.240279197692871, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -22.466205596923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04902791976928711, + "rewards_train/margins": 0.9725927114486694, + "rewards_train/rejected": -1.0216206312179565, + "step": 524 + }, + { + "epoch": 0.15, + "logps_train/chosen": -84.47087860107422, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -233.76516723632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.047087859362363815, + "rewards_train/margins": 8.629428673535585, + "rewards_train/rejected": -8.67651653289795, + "step": 524 + }, + { + "epoch": 0.15, + "logps_train/chosen": -60.11676025390625, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -0.84765625, + "logps_train/rejected": -10.88662052154541, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.886676013469696, + "rewards_train/margins": 0.11722046136856079, + "rewards_train/rejected": -1.0038964748382568, + "step": 524 + }, + { + "epoch": 0.15, + "logps_train/chosen": -14.957351684570312, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -7.880884647369385, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6082351803779602, + "rewards_train/margins": -0.15608420968055725, + "rewards_train/rejected": -0.45215097069740295, + "step": 524 + }, + { + "epoch": 0.15, + "logps_train/chosen": -82.66934967041016, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -91.02436828613281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.23306503891944885, + "rewards_train/margins": 0.18550186604261398, + "rewards_train/rejected": 0.04756317287683487, + "step": 524 + }, + { + "epoch": 0.15, + "logps_train/chosen": -97.0313491821289, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -105.48519134521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10313492268323898, + "rewards_train/margins": 0.2953842058777809, + "rewards_train/rejected": -0.3985191285610199, + "step": 525 + }, + { + "epoch": 0.15, + "logps_train/chosen": -64.37132263183594, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -118.52600860595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06286773830652237, + "rewards_train/margins": 1.4154686704277992, + "rewards_train/rejected": -1.3526009321212769, + "step": 525 + }, + { + "epoch": 0.15, + "logps_train/chosen": -0.01926371455192566, + "logps_train/ref_chosen": -0.0654296875, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -9.644213676452637, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004616597201675177, + "rewards_train/margins": 0.3002879708074033, + "rewards_train/rejected": -0.29567137360572815, + "step": 525 + }, + { + "epoch": 0.15, + "logps_train/chosen": -163.55726623535156, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -158.33889770507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.75572669506073, + "rewards_train/margins": 0.87816321849823, + "rewards_train/rejected": -2.63388991355896, + "step": 525 + }, + { + "epoch": 0.15, + "logps_train/chosen": -123.23786926269531, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -122.98790740966797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3237869441509247, + "rewards_train/margins": -0.024996191263198853, + "rewards_train/rejected": -0.29879075288772583, + "step": 525 + }, + { + "epoch": 0.15, + "logps_train/chosen": -5.596360206604004, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -22.155536651611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08463602513074875, + "rewards_train/margins": 0.10591764003038406, + "rewards_train/rejected": -0.1905536651611328, + "step": 525 + }, + { + "epoch": 0.15, + "logps_train/chosen": -211.14422607421875, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -217.498779296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.314422607421875, + "rewards_train/margins": 0.8354554176330566, + "rewards_train/rejected": -6.149878025054932, + "step": 525 + }, + { + "epoch": 0.15, + "logps_train/chosen": -2.100327491760254, + "logps_train/ref_chosen": -2.15625, + "logps_train/ref_rejected": -1.140625, + "logps_train/rejected": -0.9852952361106873, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.005592250730842352, + "rewards_train/margins": -0.009940726216882467, + "rewards_train/rejected": 0.01553297694772482, + "step": 525 + }, + { + "epoch": 0.15, + "learning_rate": 1.8625880342516247e-06, + "loss": 0.5052, + "step": 526 + }, + { + "epoch": 0.15, + "logps_train/chosen": -133.55142211914062, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -241.87843322753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8051422834396362, + "rewards_train/margins": 5.282701134681702, + "rewards_train/rejected": -7.087843418121338, + "step": 526 + }, + { + "epoch": 0.15, + "logps_train/chosen": -23.594816207885742, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -34.06105422973633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6594816446304321, + "rewards_train/margins": 0.9341237545013428, + "rewards_train/rejected": -1.593605399131775, + "step": 526 + }, + { + "epoch": 0.15, + "logps_train/chosen": -110.09095764160156, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -236.9108428955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.559095859527588, + "rewards_train/margins": 7.93198823928833, + "rewards_train/rejected": -10.491084098815918, + "step": 526 + }, + { + "epoch": 0.15, + "logps_train/chosen": -121.22261047363281, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -147.56463623046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7222610712051392, + "rewards_train/margins": 1.7842026948928833, + "rewards_train/rejected": -3.5064637660980225, + "step": 526 + }, + { + "epoch": 0.15, + "logps_train/chosen": -7.283665657043457, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -12.0, + "logps_train/rejected": -11.484553337097168, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.16586656868457794, + "rewards_train/margins": -0.21741123497486115, + "rewards_train/rejected": 0.0515446662902832, + "step": 526 + }, + { + "epoch": 0.15, + "logps_train/chosen": -133.23519897460938, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -181.2422637939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1735198497772217, + "rewards_train/margins": 6.000706911087036, + "rewards_train/rejected": -8.174226760864258, + "step": 526 + }, + { + "epoch": 0.15, + "logps_train/chosen": -174.24232482910156, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -172.4936065673828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7242324352264404, + "rewards_train/margins": -0.17487168312072754, + "rewards_train/rejected": -2.549360752105713, + "step": 526 + }, + { + "epoch": 0.15, + "logps_train/chosen": -121.22608947753906, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -149.6814422607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.522608995437622, + "rewards_train/margins": 3.3455355167388916, + "rewards_train/rejected": -4.868144512176514, + "step": 526 + }, + { + "epoch": 0.15, + "logps_train/chosen": -47.5557861328125, + "logps_train/ref_chosen": -46.25, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -135.93890380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13057862222194672, + "rewards_train/margins": 0.8633117824792862, + "rewards_train/rejected": -0.9938904047012329, + "step": 527 + }, + { + "epoch": 0.15, + "logps_train/chosen": -150.2325897216797, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -166.8333282470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7232589721679688, + "rewards_train/margins": 0.8600739240646362, + "rewards_train/rejected": -1.583332896232605, + "step": 527 + }, + { + "epoch": 0.15, + "logps_train/chosen": -9.169991493225098, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -1.4765625, + "logps_train/rejected": -2.6174557209014893, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07675085216760635, + "rewards_train/margins": 0.19084017723798752, + "rewards_train/rejected": -0.11408932507038116, + "step": 527 + }, + { + "epoch": 0.15, + "logps_train/chosen": -94.67703247070312, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -127.08516693115234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5677032470703125, + "rewards_train/margins": 2.7408134937286377, + "rewards_train/rejected": -3.30851674079895, + "step": 527 + }, + { + "epoch": 0.15, + "logps_train/chosen": -5.296089172363281, + "logps_train/ref_chosen": -2.0625, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -17.435562133789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3233589231967926, + "rewards_train/margins": 0.22644731402397156, + "rewards_train/rejected": -0.5498062372207642, + "step": 527 + }, + { + "epoch": 0.15, + "logps_train/chosen": -13.502881050109863, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -26.125, + "logps_train/rejected": -28.376550674438477, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3877881169319153, + "rewards_train/margins": -0.1626330465078354, + "rewards_train/rejected": -0.2251550704240799, + "step": 527 + }, + { + "epoch": 0.15, + "logps_train/chosen": -65.02845001220703, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -112.63471984863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24715499579906464, + "rewards_train/margins": 1.1106269806623459, + "rewards_train/rejected": -0.8634719848632812, + "step": 527 + }, + { + "epoch": 0.15, + "logps_train/chosen": -14.920656204223633, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -10.052528381347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25456562638282776, + "rewards_train/margins": 0.19756221771240234, + "rewards_train/rejected": -0.4521278440952301, + "step": 527 + }, + { + "epoch": 0.15, + "learning_rate": 1.861246614108726e-06, + "loss": 0.3592, + "step": 528 + }, + { + "epoch": 0.15, + "logps_train/chosen": -10.227754592895508, + "logps_train/ref_chosen": -3.546875, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -21.240690231323242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6680879592895508, + "rewards_train/margins": 0.3997310400009155, + "rewards_train/rejected": -1.0678189992904663, + "step": 528 + }, + { + "epoch": 0.15, + "logps_train/chosen": -4.345048427581787, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -11.6875, + "logps_train/rejected": -14.531111717224121, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24674515426158905, + "rewards_train/margins": 0.5311063379049301, + "rewards_train/rejected": -0.28436118364334106, + "step": 528 + }, + { + "epoch": 0.15, + "logps_train/chosen": -127.80940246582031, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -216.2201385498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7309402823448181, + "rewards_train/margins": 7.4910741448402405, + "rewards_train/rejected": -8.222014427185059, + "step": 528 + }, + { + "epoch": 0.15, + "logps_train/chosen": -137.3450469970703, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -198.43099975585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9845046997070312, + "rewards_train/margins": 0.7585952281951904, + "rewards_train/rejected": -2.7430999279022217, + "step": 528 + }, + { + "epoch": 0.15, + "logps_train/chosen": -9.106821060180664, + "logps_train/ref_chosen": -7.9375, + "logps_train/ref_rejected": -5.09375, + "logps_train/rejected": -5.533977031707764, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11693210899829865, + "rewards_train/margins": -0.0729094035923481, + "rewards_train/rejected": -0.044022705405950546, + "step": 528 + }, + { + "epoch": 0.15, + "logps_train/chosen": -194.50531005859375, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -175.13539123535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.250530958175659, + "rewards_train/margins": 0.06300830841064453, + "rewards_train/rejected": -3.3135392665863037, + "step": 528 + }, + { + "epoch": 0.15, + "logps_train/chosen": -143.9382781982422, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -233.02853393554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3938279151916504, + "rewards_train/margins": 7.309025287628174, + "rewards_train/rejected": -9.702853202819824, + "step": 528 + }, + { + "epoch": 0.15, + "logps_train/chosen": -21.421939849853516, + "logps_train/ref_chosen": -21.375, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -22.383224487304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0046939849853515625, + "rewards_train/margins": 0.5836284756660461, + "rewards_train/rejected": -0.5883224606513977, + "step": 528 + }, + { + "epoch": 0.15, + "logps_train/chosen": -7.369909286499023, + "logps_train/ref_chosen": -5.84375, + "logps_train/ref_rejected": -3.78125, + "logps_train/rejected": -7.111730575561523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15261593461036682, + "rewards_train/margins": 0.18043214082717896, + "rewards_train/rejected": -0.3330480754375458, + "step": 529 + }, + { + "epoch": 0.15, + "logps_train/chosen": -175.96383666992188, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -228.6601104736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.846383571624756, + "rewards_train/margins": 4.819627285003662, + "rewards_train/rejected": -9.666010856628418, + "step": 529 + }, + { + "epoch": 0.15, + "logps_train/chosen": -147.98287963867188, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -163.46755981445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.4482879638671875, + "rewards_train/margins": 0.3484683036804199, + "rewards_train/rejected": -4.796756267547607, + "step": 529 + }, + { + "epoch": 0.15, + "logps_train/chosen": -237.45664978027344, + "logps_train/ref_chosen": -197.0, + "logps_train/ref_rejected": -207.0, + "logps_train/rejected": -255.87841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.045665264129639, + "rewards_train/margins": 0.8421764373779297, + "rewards_train/rejected": -4.887841701507568, + "step": 529 + }, + { + "epoch": 0.15, + "logps_train/chosen": -101.7296142578125, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -25.75, + "logps_train/rejected": -26.568443298339844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.422961473464966, + "rewards_train/margins": -2.3411171436309814, + "rewards_train/rejected": -0.08184432983398438, + "step": 529 + }, + { + "epoch": 0.15, + "logps_train/chosen": -68.42650604248047, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -1.3984375, + "logps_train/rejected": -12.149373054504395, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7926506400108337, + "rewards_train/margins": 0.28244298696517944, + "rewards_train/rejected": -1.0750936269760132, + "step": 529 + }, + { + "epoch": 0.15, + "logps_train/chosen": -166.20880126953125, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -167.5623779296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07911987602710724, + "rewards_train/margins": 1.4353576451539993, + "rewards_train/rejected": -1.356237769126892, + "step": 529 + }, + { + "epoch": 0.15, + "logps_train/chosen": -15.006548881530762, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -12.430831909179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1618451178073883, + "rewards_train/margins": 0.3486783057451248, + "rewards_train/rejected": -0.1868331879377365, + "step": 529 + }, + { + "epoch": 0.15, + "learning_rate": 1.8598991661526568e-06, + "loss": 0.5281, + "step": 530 + }, + { + "epoch": 0.15, + "logps_train/chosen": -42.180503845214844, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -34.25, + "logps_train/rejected": -48.116180419921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34305039048194885, + "rewards_train/margins": 1.0435676276683807, + "rewards_train/rejected": -1.3866180181503296, + "step": 530 + }, + { + "epoch": 0.15, + "logps_train/chosen": -8.900524139404297, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -18.879791259765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1088024154305458, + "rewards_train/margins": -0.08332328870892525, + "rewards_train/rejected": -0.02547912672162056, + "step": 530 + }, + { + "epoch": 0.15, + "logps_train/chosen": -10.53859806060791, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -13.417531967163086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.014890193939208984, + "rewards_train/margins": 0.27539339661598206, + "rewards_train/rejected": -0.26050320267677307, + "step": 530 + }, + { + "epoch": 0.15, + "logps_train/chosen": -14.351147651672363, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -10.941691398620605, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20386476814746857, + "rewards_train/margins": 0.312179371714592, + "rewards_train/rejected": -0.5160441398620605, + "step": 530 + }, + { + "epoch": 0.15, + "logps_train/chosen": -2.486567735671997, + "logps_train/ref_chosen": -2.65625, + "logps_train/ref_rejected": -3.328125, + "logps_train/rejected": -4.206040859222412, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016968226060271263, + "rewards_train/margins": 0.10475981049239635, + "rewards_train/rejected": -0.08779158443212509, + "step": 530 + }, + { + "epoch": 0.15, + "logps_train/chosen": -20.70282554626465, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -37.28452682495117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1672174483537674, + "rewards_train/margins": 0.6456701308488846, + "rewards_train/rejected": -0.4784526824951172, + "step": 530 + }, + { + "epoch": 0.15, + "logps_train/chosen": -182.92010498046875, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -155.79779052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.29201078414917, + "rewards_train/margins": 0.7877683639526367, + "rewards_train/rejected": -5.079779148101807, + "step": 530 + }, + { + "epoch": 0.15, + "logps_train/chosen": -97.8069076538086, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -216.71798706054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4306907653808594, + "rewards_train/margins": 7.14110803604126, + "rewards_train/rejected": -7.571798801422119, + "step": 530 + }, + { + "epoch": 0.15, + "logps_train/chosen": -120.37870025634766, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -160.6511993408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.73786997795105, + "rewards_train/margins": 0.4272499084472656, + "rewards_train/rejected": -3.1651198863983154, + "step": 531 + }, + { + "epoch": 0.15, + "logps_train/chosen": -5.84698486328125, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -4.03125, + "logps_train/rejected": -6.682387351989746, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.269073486328125, + "rewards_train/margins": -0.003959745168685913, + "rewards_train/rejected": -0.2651137411594391, + "step": 531 + }, + { + "epoch": 0.15, + "logps_train/chosen": -66.06000518798828, + "logps_train/ref_chosen": -43.75, + "logps_train/ref_rejected": -46.5, + "logps_train/rejected": -70.14375305175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2310006618499756, + "rewards_train/margins": 0.13337469100952148, + "rewards_train/rejected": -2.364375352859497, + "step": 531 + }, + { + "epoch": 0.15, + "logps_train/chosen": -0.8574683666229248, + "logps_train/ref_chosen": -1.15625, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -8.873189926147461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02987816371023655, + "rewards_train/margins": 0.4484471622854471, + "rewards_train/rejected": -0.41856899857521057, + "step": 531 + }, + { + "epoch": 0.15, + "logps_train/chosen": -44.57162857055664, + "logps_train/ref_chosen": -35.75, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -73.79622650146484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.882162868976593, + "rewards_train/margins": 0.9724598526954651, + "rewards_train/rejected": -1.854622721672058, + "step": 531 + }, + { + "epoch": 0.15, + "logps_train/chosen": -126.87370300292969, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -175.0404052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.037370443344116, + "rewards_train/margins": 2.766669988632202, + "rewards_train/rejected": -5.804040431976318, + "step": 531 + }, + { + "epoch": 0.15, + "logps_train/chosen": -21.789133071899414, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -23.5, + "logps_train/rejected": -35.150577545166016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7414132952690125, + "rewards_train/margins": 0.423644483089447, + "rewards_train/rejected": -1.1650577783584595, + "step": 531 + }, + { + "epoch": 0.15, + "logps_train/chosen": -211.18057250976562, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -235.8966827392578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.018057346343994, + "rewards_train/margins": 2.9716110229492188, + "rewards_train/rejected": -6.989668369293213, + "step": 531 + }, + { + "epoch": 0.15, + "learning_rate": 1.858545699814125e-06, + "loss": 0.4279, + "step": 532 + }, + { + "epoch": 0.15, + "logps_train/chosen": -67.79231262207031, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -104.90904235839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6792312860488892, + "rewards_train/margins": 1.1616729497909546, + "rewards_train/rejected": -1.8409042358398438, + "step": 532 + }, + { + "epoch": 0.15, + "logps_train/chosen": -93.6684799194336, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -194.28494262695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6168479919433594, + "rewards_train/margins": 4.911646366119385, + "rewards_train/rejected": -6.528494358062744, + "step": 532 + }, + { + "epoch": 0.15, + "logps_train/chosen": -252.09793090820312, + "logps_train/ref_chosen": -184.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -217.5211639404297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.809792995452881, + "rewards_train/margins": -1.3576765060424805, + "rewards_train/rejected": -5.4521164894104, + "step": 532 + }, + { + "epoch": 0.15, + "logps_train/chosen": -109.69519805908203, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -181.58465576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1695198118686676, + "rewards_train/margins": 5.638945668935776, + "rewards_train/rejected": -5.808465480804443, + "step": 532 + }, + { + "epoch": 0.15, + "logps_train/chosen": -143.51571655273438, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -164.33560180664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.601571559906006, + "rewards_train/margins": 1.4319887161254883, + "rewards_train/rejected": -6.033560276031494, + "step": 532 + }, + { + "epoch": 0.15, + "logps_train/chosen": -122.53379821777344, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -276.0667724609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.803379774093628, + "rewards_train/margins": 9.403297662734985, + "rewards_train/rejected": -12.206677436828613, + "step": 532 + }, + { + "epoch": 0.15, + "logps_train/chosen": -15.67917251586914, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -14.974004745483398, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.18666724860668182, + "rewards_train/margins": -0.01426677405834198, + "rewards_train/rejected": -0.17240047454833984, + "step": 532 + }, + { + "epoch": 0.15, + "logps_train/chosen": -152.9761962890625, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -186.99822998046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.297619581222534, + "rewards_train/margins": 1.8022034168243408, + "rewards_train/rejected": -4.099822998046875, + "step": 532 + }, + { + "epoch": 0.15, + "logps_train/chosen": -10.663978576660156, + "logps_train/ref_chosen": -8.0, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -7.243608474731445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2663978636264801, + "rewards_train/margins": 0.014212995767593384, + "rewards_train/rejected": -0.2806108593940735, + "step": 533 + }, + { + "epoch": 0.15, + "logps_train/chosen": -208.17782592773438, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -36.719200134277344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.8177826404571533, + "rewards_train/margins": -2.933362603187561, + "rewards_train/rejected": -0.8844200372695923, + "step": 533 + }, + { + "epoch": 0.15, + "logps_train/chosen": -68.08224487304688, + "logps_train/ref_chosen": -53.75, + "logps_train/ref_rejected": -7.25, + "logps_train/rejected": -6.5500922203063965, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4332245588302612, + "rewards_train/margins": -1.5032153353095055, + "rewards_train/rejected": 0.06999077647924423, + "step": 533 + }, + { + "epoch": 0.15, + "logps_train/chosen": -0.07367724925279617, + "logps_train/ref_chosen": -0.1611328125, + "logps_train/ref_rejected": -0.1611328125, + "logps_train/rejected": -0.06771209090948105, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.008745556697249413, + "rewards_train/margins": -0.0005965158343315125, + "rewards_train/rejected": 0.009342072531580925, + "step": 533 + }, + { + "epoch": 0.15, + "logps_train/chosen": -14.843523979187012, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -48.3372917175293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4843524098396301, + "rewards_train/margins": 0.22437679767608643, + "rewards_train/rejected": -0.7087292075157166, + "step": 533 + }, + { + "epoch": 0.15, + "logps_train/chosen": -79.58718872070312, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -89.36264038085938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.19128112494945526, + "rewards_train/margins": -0.2724548429250717, + "rewards_train/rejected": 0.463735967874527, + "step": 533 + }, + { + "epoch": 0.15, + "logps_train/chosen": -147.72036743164062, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -178.3790740966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6720367670059204, + "rewards_train/margins": 3.7658709287643433, + "rewards_train/rejected": -5.437907695770264, + "step": 533 + }, + { + "epoch": 0.15, + "logps_train/chosen": -8.273866653442383, + "logps_train/ref_chosen": -7.65625, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -47.170997619628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06176166608929634, + "rewards_train/margins": 0.7803381197154522, + "rewards_train/rejected": -0.8420997858047485, + "step": 533 + }, + { + "epoch": 0.15, + "learning_rate": 1.8571862245659599e-06, + "loss": 0.6778, + "step": 534 + }, + { + "epoch": 0.15, + "logps_train/chosen": -6.1719183921813965, + "logps_train/ref_chosen": -5.40625, + "logps_train/ref_rejected": -5.0625, + "logps_train/rejected": -8.042162895202637, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07656683772802353, + "rewards_train/margins": 0.2213994637131691, + "rewards_train/rejected": -0.2979663014411926, + "step": 534 + }, + { + "epoch": 0.15, + "logps_train/chosen": -16.099319458007812, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -31.125, + "logps_train/rejected": -46.803340911865234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10256805270910263, + "rewards_train/margins": 1.6704021915793419, + "rewards_train/rejected": -1.5678341388702393, + "step": 534 + }, + { + "epoch": 0.15, + "logps_train/chosen": -116.22203826904297, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -106.3741455078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.122203826904297, + "rewards_train/margins": -0.43478918075561523, + "rewards_train/rejected": -2.6874146461486816, + "step": 534 + }, + { + "epoch": 0.15, + "logps_train/chosen": -25.414705276489258, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -29.951190948486328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0102205276489258, + "rewards_train/margins": -0.215101420879364, + "rewards_train/rejected": -0.7951191067695618, + "step": 534 + }, + { + "epoch": 0.15, + "logps_train/chosen": -2.298377752304077, + "logps_train/ref_chosen": -1.7109375, + "logps_train/ref_rejected": -0.828125, + "logps_train/rejected": -0.4100229740142822, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.058744024485349655, + "rewards_train/margins": -0.10055422782897949, + "rewards_train/rejected": 0.04181020334362984, + "step": 534 + }, + { + "epoch": 0.15, + "logps_train/chosen": -78.52658081054688, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -50.5, + "logps_train/rejected": -65.376708984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5776580572128296, + "rewards_train/margins": -0.08998715877532959, + "rewards_train/rejected": -1.4876708984375, + "step": 534 + }, + { + "epoch": 0.15, + "logps_train/chosen": -42.85154342651367, + "logps_train/ref_chosen": -31.75, + "logps_train/ref_rejected": -36.5, + "logps_train/rejected": -54.88142395019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.110154390335083, + "rewards_train/margins": 0.7279880046844482, + "rewards_train/rejected": -1.8381423950195312, + "step": 534 + }, + { + "epoch": 0.15, + "logps_train/chosen": -114.57058715820312, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -111.41488647460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6070587635040283, + "rewards_train/margins": 0.5844299793243408, + "rewards_train/rejected": -2.191488742828369, + "step": 534 + }, + { + "epoch": 0.15, + "logps_train/chosen": -100.87305450439453, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -150.63430786132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5373055934906006, + "rewards_train/margins": 3.6261250972747803, + "rewards_train/rejected": -7.163430690765381, + "step": 535 + }, + { + "epoch": 0.15, + "logps_train/chosen": -41.067176818847656, + "logps_train/ref_chosen": -35.0, + "logps_train/ref_rejected": -25.75, + "logps_train/rejected": -47.60987091064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6067177057266235, + "rewards_train/margins": 1.579269528388977, + "rewards_train/rejected": -2.1859872341156006, + "step": 535 + }, + { + "epoch": 0.15, + "logps_train/chosen": -28.427772521972656, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -52.25, + "logps_train/rejected": -58.12326431274414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6802772879600525, + "rewards_train/margins": -0.09295082092285156, + "rewards_train/rejected": -0.5873264670372009, + "step": 535 + }, + { + "epoch": 0.15, + "logps_train/chosen": -1.8250383138656616, + "logps_train/ref_chosen": -0.90625, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -13.398541450500488, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09187883138656616, + "rewards_train/margins": -0.07702468615025282, + "rewards_train/rejected": -0.014854145236313343, + "step": 535 + }, + { + "epoch": 0.15, + "logps_train/chosen": -73.53572082519531, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -126.11498260498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05357208475470543, + "rewards_train/margins": 1.0079261995851994, + "rewards_train/rejected": -1.0614982843399048, + "step": 535 + }, + { + "epoch": 0.15, + "logps_train/chosen": -46.10561752319336, + "logps_train/ref_chosen": -43.0, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -18.028820037841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31056174635887146, + "rewards_train/margins": 0.35482028126716614, + "rewards_train/rejected": -0.6653820276260376, + "step": 535 + }, + { + "epoch": 0.15, + "logps_train/chosen": -6.26836633682251, + "logps_train/ref_chosen": -4.9375, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -21.63540267944336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.13308663666248322, + "rewards_train/margins": -0.16954636946320534, + "rewards_train/rejected": 0.03645973280072212, + "step": 535 + }, + { + "epoch": 0.15, + "logps_train/chosen": -4.141167640686035, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -7.464383602142334, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17650823295116425, + "rewards_train/margins": 0.2541965916752815, + "rewards_train/rejected": -0.07768835872411728, + "step": 535 + }, + { + "epoch": 0.15, + "learning_rate": 1.8558207499230468e-06, + "loss": 0.5441, + "step": 536 + }, + { + "epoch": 0.15, + "logps_train/chosen": -203.59222412109375, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -203.1429443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.05922269821167, + "rewards_train/margins": 0.5550718307495117, + "rewards_train/rejected": -4.614294528961182, + "step": 536 + }, + { + "epoch": 0.15, + "logps_train/chosen": -5.23392915725708, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -5.224420547485352, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05151791498064995, + "rewards_train/margins": -0.0009508579969406128, + "rewards_train/rejected": -0.050567056983709335, + "step": 536 + }, + { + "epoch": 0.15, + "logps_train/chosen": -8.652993202209473, + "logps_train/ref_chosen": -4.8125, + "logps_train/ref_rejected": -2.46875, + "logps_train/rejected": -6.455492973327637, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.38404932618141174, + "rewards_train/margins": 0.014624983072280884, + "rewards_train/rejected": -0.3986743092536926, + "step": 536 + }, + { + "epoch": 0.15, + "logps_train/chosen": -7.180285930633545, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -7.038057327270508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.011778593063354492, + "rewards_train/margins": 0.2639021575450897, + "rewards_train/rejected": -0.2756807506084442, + "step": 536 + }, + { + "epoch": 0.15, + "logps_train/chosen": -142.39877319335938, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -178.42547607421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3898773193359375, + "rewards_train/margins": 5.802670478820801, + "rewards_train/rejected": -8.192547798156738, + "step": 536 + }, + { + "epoch": 0.15, + "logps_train/chosen": -89.85986328125, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -77.74797058105469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5359863638877869, + "rewards_train/margins": -0.31118930876255035, + "rewards_train/rejected": -0.2247970551252365, + "step": 536 + }, + { + "epoch": 0.15, + "logps_train/chosen": -68.95928955078125, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -29.545547485351562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.195928931236267, + "rewards_train/margins": -0.04137420654296875, + "rewards_train/rejected": -1.1545547246932983, + "step": 536 + }, + { + "epoch": 0.15, + "logps_train/chosen": -126.96086120605469, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -266.1860046386719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8960862159729004, + "rewards_train/margins": 7.92251443862915, + "rewards_train/rejected": -11.81860065460205, + "step": 536 + }, + { + "epoch": 0.15, + "logps_train/chosen": -82.6553955078125, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -174.65121459960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.565539538860321, + "rewards_train/margins": 3.4995821118354797, + "rewards_train/rejected": -4.065121650695801, + "step": 537 + }, + { + "epoch": 0.15, + "logps_train/chosen": -124.36038970947266, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -112.20805358886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.73603892326355, + "rewards_train/margins": 1.284766435623169, + "rewards_train/rejected": -4.020805358886719, + "step": 537 + }, + { + "epoch": 0.15, + "logps_train/chosen": -8.418079376220703, + "logps_train/ref_chosen": -6.25, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -16.13548469543457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21680794656276703, + "rewards_train/margins": -0.24075947701931, + "rewards_train/rejected": 0.02395153045654297, + "step": 537 + }, + { + "epoch": 0.15, + "logps_train/chosen": -16.971681594848633, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -24.709556579589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6721681952476501, + "rewards_train/margins": 0.3237874507904053, + "rewards_train/rejected": -0.9959556460380554, + "step": 537 + }, + { + "epoch": 0.15, + "logps_train/chosen": -130.50320434570312, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -111.24996948242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.150320529937744, + "rewards_train/margins": 1.5246765613555908, + "rewards_train/rejected": -3.674997091293335, + "step": 537 + }, + { + "epoch": 0.15, + "logps_train/chosen": -208.42335510253906, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -242.20986938476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.442335605621338, + "rewards_train/margins": 2.6786513328552246, + "rewards_train/rejected": -7.1209869384765625, + "step": 537 + }, + { + "epoch": 0.15, + "logps_train/chosen": -45.485450744628906, + "logps_train/ref_chosen": -45.0, + "logps_train/ref_rejected": -52.0, + "logps_train/rejected": -55.69050216674805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.048545073717832565, + "rewards_train/margins": 0.32050516083836555, + "rewards_train/rejected": -0.3690502345561981, + "step": 537 + }, + { + "epoch": 0.15, + "logps_train/chosen": -6.008852958679199, + "logps_train/ref_chosen": -4.625, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -14.719285011291504, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.13838529586791992, + "rewards_train/margins": -0.06020679324865341, + "rewards_train/rejected": -0.07817850261926651, + "step": 537 + }, + { + "epoch": 0.15, + "learning_rate": 1.8544492854422602e-06, + "loss": 0.4471, + "step": 538 + }, + { + "epoch": 0.15, + "logps_train/chosen": -29.91925048828125, + "logps_train/ref_chosen": -25.5, + "logps_train/ref_rejected": -223.0, + "logps_train/rejected": -232.85519409179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.441925048828125, + "rewards_train/margins": 0.5435943603515625, + "rewards_train/rejected": -0.9855194091796875, + "step": 538 + }, + { + "epoch": 0.15, + "logps_train/chosen": -18.603118896484375, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -53.66490936279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7728118896484375, + "rewards_train/margins": 0.9186791181564331, + "rewards_train/rejected": -1.6914910078048706, + "step": 538 + }, + { + "epoch": 0.15, + "logps_train/chosen": -124.8522720336914, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -95.73749542236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5852272510528564, + "rewards_train/margins": 0.5385222434997559, + "rewards_train/rejected": -2.1237494945526123, + "step": 538 + }, + { + "epoch": 0.15, + "logps_train/chosen": -11.170572280883789, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -45.538795471191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.57330721616745, + "rewards_train/margins": 0.10557234287261963, + "rewards_train/rejected": -0.6788795590400696, + "step": 538 + }, + { + "epoch": 0.15, + "logps_train/chosen": -3.943002700805664, + "logps_train/ref_chosen": -2.46875, + "logps_train/ref_rejected": -12.4375, + "logps_train/rejected": -17.427753448486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14742527902126312, + "rewards_train/margins": 0.3516000658273697, + "rewards_train/rejected": -0.4990253448486328, + "step": 538 + }, + { + "epoch": 0.15, + "logps_train/chosen": -105.10726928710938, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -194.52557373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8607269525527954, + "rewards_train/margins": 4.79183042049408, + "rewards_train/rejected": -6.652557373046875, + "step": 538 + }, + { + "epoch": 0.15, + "logps_train/chosen": -1.9240373373031616, + "logps_train/ref_chosen": -2.015625, + "logps_train/ref_rejected": -3.296875, + "logps_train/rejected": -4.358667373657227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009158766828477383, + "rewards_train/margins": 0.11533800419420004, + "rewards_train/rejected": -0.10617923736572266, + "step": 538 + }, + { + "epoch": 0.15, + "logps_train/chosen": -8.485942840576172, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -2.515625, + "logps_train/rejected": -7.524141311645508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0014057159423828125, + "rewards_train/margins": 0.5022573471069336, + "rewards_train/rejected": -0.5008516311645508, + "step": 538 + }, + { + "epoch": 0.15, + "logps_train/chosen": -13.695880889892578, + "logps_train/ref_chosen": -7.40625, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -17.259323120117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6289631128311157, + "rewards_train/margins": 0.3594692349433899, + "rewards_train/rejected": -0.9884323477745056, + "step": 539 + }, + { + "epoch": 0.15, + "logps_train/chosen": -2.8289742469787598, + "logps_train/ref_chosen": -2.171875, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -13.163264274597168, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0657099261879921, + "rewards_train/margins": -0.03063349798321724, + "rewards_train/rejected": -0.03507642820477486, + "step": 539 + }, + { + "epoch": 0.15, + "logps_train/chosen": -3.2473597526550293, + "logps_train/ref_chosen": -2.03125, + "logps_train/ref_rejected": -2.3125, + "logps_train/rejected": -3.257972240447998, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12161097675561905, + "rewards_train/margins": -0.027063749730587006, + "rewards_train/rejected": -0.09454722702503204, + "step": 539 + }, + { + "epoch": 0.15, + "logps_train/chosen": -227.9256591796875, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -182.61386108398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.392566204071045, + "rewards_train/margins": 0.1188201904296875, + "rewards_train/rejected": -6.511386394500732, + "step": 539 + }, + { + "epoch": 0.15, + "logps_train/chosen": -184.86813354492188, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -169.73333740234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.786813497543335, + "rewards_train/margins": -1.21347975730896, + "rewards_train/rejected": -2.573333740234375, + "step": 539 + }, + { + "epoch": 0.15, + "logps_train/chosen": -3.093557596206665, + "logps_train/ref_chosen": -3.9375, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -10.765276908874512, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08439423888921738, + "rewards_train/margins": 0.354671947658062, + "rewards_train/rejected": -0.2702777087688446, + "step": 539 + }, + { + "epoch": 0.15, + "logps_train/chosen": -139.85055541992188, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -165.96929931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6350555419921875, + "rewards_train/margins": 1.2118744850158691, + "rewards_train/rejected": -3.8469300270080566, + "step": 539 + }, + { + "epoch": 0.15, + "logps_train/chosen": -3.0540566444396973, + "logps_train/ref_chosen": -0.7578125, + "logps_train/ref_rejected": -1.890625, + "logps_train/rejected": -5.122949600219727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2296244204044342, + "rewards_train/margins": 0.09360805153846741, + "rewards_train/rejected": -0.3232324719429016, + "step": 539 + }, + { + "epoch": 0.15, + "learning_rate": 1.8530718407223974e-06, + "loss": 0.566, + "step": 540 + }, + { + "epoch": 0.15, + "logps_train/chosen": -18.1134090423584, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -4.563708305358887, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10115909576416016, + "rewards_train/margins": 0.11377992667257786, + "rewards_train/rejected": -0.012620830908417702, + "step": 540 + }, + { + "epoch": 0.15, + "logps_train/chosen": -102.82965850830078, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -163.23887634277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7329658269882202, + "rewards_train/margins": 2.890921711921692, + "rewards_train/rejected": -4.623887538909912, + "step": 540 + }, + { + "epoch": 0.15, + "logps_train/chosen": -34.885738372802734, + "logps_train/ref_chosen": -39.25, + "logps_train/ref_rejected": -35.0, + "logps_train/rejected": -36.10708999633789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.43642616271972656, + "rewards_train/margins": 0.547135166823864, + "rewards_train/rejected": -0.11070900410413742, + "step": 540 + }, + { + "epoch": 0.15, + "logps_train/chosen": -211.014892578125, + "logps_train/ref_chosen": -183.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -174.57521057128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8014893531799316, + "rewards_train/margins": 0.3560316562652588, + "rewards_train/rejected": -3.1575210094451904, + "step": 540 + }, + { + "epoch": 0.15, + "logps_train/chosen": -81.33695983886719, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -177.33544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9836959838867188, + "rewards_train/margins": 3.849849224090576, + "rewards_train/rejected": -4.833545207977295, + "step": 540 + }, + { + "epoch": 0.15, + "logps_train/chosen": -162.41851806640625, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -133.62155151367188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.241851806640625, + "rewards_train/margins": -0.5296966433525085, + "rewards_train/rejected": -0.7121551632881165, + "step": 540 + }, + { + "epoch": 0.15, + "logps_train/chosen": -74.85194396972656, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -113.99685668945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8351944088935852, + "rewards_train/margins": 1.3144912123680115, + "rewards_train/rejected": -2.1496856212615967, + "step": 540 + }, + { + "epoch": 0.15, + "logps_train/chosen": -104.2853012084961, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -166.62107849121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07853012531995773, + "rewards_train/margins": 3.9835779145359993, + "rewards_train/rejected": -4.062108039855957, + "step": 540 + }, + { + "epoch": 0.15, + "logps_train/chosen": -19.79646110534668, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -23.8708438873291, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.23285388946533203, + "rewards_train/margins": 1.1949383020401, + "rewards_train/rejected": -0.9620844125747681, + "step": 541 + }, + { + "epoch": 0.15, + "logps_train/chosen": -102.90664672851562, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -130.4084014892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0906647443771362, + "rewards_train/margins": 2.000175356864929, + "rewards_train/rejected": -3.0908401012420654, + "step": 541 + }, + { + "epoch": 0.15, + "logps_train/chosen": -17.843080520629883, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -30.297300338745117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0343080535531044, + "rewards_train/margins": 0.7204219922423363, + "rewards_train/rejected": -0.7547300457954407, + "step": 541 + }, + { + "epoch": 0.15, + "logps_train/chosen": -104.25106811523438, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -203.30227661132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4251068830490112, + "rewards_train/margins": 4.4051209688186646, + "rewards_train/rejected": -5.830227851867676, + "step": 541 + }, + { + "epoch": 0.15, + "logps_train/chosen": -28.441890716552734, + "logps_train/ref_chosen": -24.375, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -17.408344268798828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4066890776157379, + "rewards_train/margins": -0.3408546522259712, + "rewards_train/rejected": -0.0658344253897667, + "step": 541 + }, + { + "epoch": 0.15, + "logps_train/chosen": -71.05874633789062, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -63.77979278564453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2558746337890625, + "rewards_train/margins": -0.20289535447955132, + "rewards_train/rejected": -0.052979279309511185, + "step": 541 + }, + { + "epoch": 0.15, + "logps_train/chosen": -119.01777648925781, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -281.21856689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3517777919769287, + "rewards_train/margins": 9.170079469680786, + "rewards_train/rejected": -12.521857261657715, + "step": 541 + }, + { + "epoch": 0.15, + "logps_train/chosen": -1.0554778575897217, + "logps_train/ref_chosen": -1.8203125, + "logps_train/ref_rejected": -2.34375, + "logps_train/rejected": -10.422584533691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07648346573114395, + "rewards_train/margins": 0.8843669071793556, + "rewards_train/rejected": -0.8078834414482117, + "step": 541 + }, + { + "epoch": 0.15, + "learning_rate": 1.8516884254041114e-06, + "loss": 0.3608, + "step": 542 + }, + { + "epoch": 0.15, + "logps_train/chosen": -6.754397392272949, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -12.4609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3870602548122406, + "rewards_train/margins": 0.7956539988517761, + "rewards_train/rejected": -0.4085937440395355, + "step": 542 + }, + { + "epoch": 0.15, + "logps_train/chosen": -63.5536994934082, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -194.0592041015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9053699374198914, + "rewards_train/margins": 6.600550472736359, + "rewards_train/rejected": -7.50592041015625, + "step": 542 + }, + { + "epoch": 0.15, + "logps_train/chosen": -153.11642456054688, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -182.07379150390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5616424083709717, + "rewards_train/margins": 2.245736837387085, + "rewards_train/rejected": -4.807379245758057, + "step": 542 + }, + { + "epoch": 0.15, + "logps_train/chosen": -144.78094482421875, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -153.66079711914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6780946254730225, + "rewards_train/margins": 2.4379851818084717, + "rewards_train/rejected": -5.116079807281494, + "step": 542 + }, + { + "epoch": 0.15, + "logps_train/chosen": -97.34229278564453, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -126.21683502197266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.634229302406311, + "rewards_train/margins": 2.887454390525818, + "rewards_train/rejected": -4.521683692932129, + "step": 542 + }, + { + "epoch": 0.15, + "logps_train/chosen": -29.617923736572266, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -12.0, + "logps_train/rejected": -13.778565406799316, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.43679237365722656, + "rewards_train/margins": -0.2589358240365982, + "rewards_train/rejected": -0.17785654962062836, + "step": 542 + }, + { + "epoch": 0.15, + "logps_train/chosen": -188.66632080078125, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -193.61212158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.466632127761841, + "rewards_train/margins": 1.2945802211761475, + "rewards_train/rejected": -4.761212348937988, + "step": 542 + }, + { + "epoch": 0.15, + "logps_train/chosen": -103.43669891357422, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -150.9285888671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4563301205635071, + "rewards_train/margins": 3.8491891026496887, + "rewards_train/rejected": -3.3928589820861816, + "step": 542 + }, + { + "epoch": 0.15, + "logps_train/chosen": -115.41090393066406, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -188.2451629638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7910903692245483, + "rewards_train/margins": 4.933426022529602, + "rewards_train/rejected": -6.72451639175415, + "step": 543 + }, + { + "epoch": 0.15, + "logps_train/chosen": -60.02318572998047, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -30.375, + "logps_train/rejected": -42.14380645751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07268142700195312, + "rewards_train/margins": 1.24956214427948, + "rewards_train/rejected": -1.1768807172775269, + "step": 543 + }, + { + "epoch": 0.15, + "logps_train/chosen": -42.00749588012695, + "logps_train/ref_chosen": -43.0, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -75.5143814086914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0992504134774208, + "rewards_train/margins": 1.4506885781884193, + "rewards_train/rejected": -1.3514381647109985, + "step": 543 + }, + { + "epoch": 0.15, + "logps_train/chosen": -131.86500549316406, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -188.90066528320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5865005850791931, + "rewards_train/margins": 4.703566133975983, + "rewards_train/rejected": -5.290066719055176, + "step": 543 + }, + { + "epoch": 0.15, + "logps_train/chosen": -225.07818603515625, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -217.85226440429688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.807818412780762, + "rewards_train/margins": -0.32259178161621094, + "rewards_train/rejected": -9.48522663116455, + "step": 543 + }, + { + "epoch": 0.15, + "logps_train/chosen": -75.26766967773438, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -75.55592346191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12676696479320526, + "rewards_train/margins": 0.028825387358665466, + "rewards_train/rejected": -0.15559235215187073, + "step": 543 + }, + { + "epoch": 0.15, + "logps_train/chosen": -0.6728759407997131, + "logps_train/ref_chosen": -1.2109375, + "logps_train/ref_rejected": -1.2109375, + "logps_train/rejected": -0.6815029382705688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.053806155920028687, + "rewards_train/margins": 0.0008626990020275116, + "rewards_train/rejected": 0.052943456918001175, + "step": 543 + }, + { + "epoch": 0.15, + "logps_train/chosen": -75.97661590576172, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -120.99281311035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4976615905761719, + "rewards_train/margins": 0.15161973237991333, + "rewards_train/rejected": -0.6492813229560852, + "step": 543 + }, + { + "epoch": 0.15, + "learning_rate": 1.8502990491698425e-06, + "loss": 0.3153, + "step": 544 + }, + { + "epoch": 0.15, + "logps_train/chosen": -9.69521713256836, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -2.734375, + "logps_train/rejected": -11.960104942321777, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4195217192173004, + "rewards_train/margins": 0.5030513107776642, + "rewards_train/rejected": -0.9225730299949646, + "step": 544 + }, + { + "epoch": 0.15, + "logps_train/chosen": -13.060357093811035, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -11.510883331298828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.18103571236133575, + "rewards_train/margins": -0.03619737923145294, + "rewards_train/rejected": -0.1448383331298828, + "step": 544 + }, + { + "epoch": 0.15, + "logps_train/chosen": -78.35272216796875, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -137.00772094726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.335272192955017, + "rewards_train/margins": 3.6654998064041138, + "rewards_train/rejected": -5.000771999359131, + "step": 544 + }, + { + "epoch": 0.15, + "logps_train/chosen": -33.18067932128906, + "logps_train/ref_chosen": -27.75, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -22.60030746459961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5430679321289062, + "rewards_train/margins": 0.5107128620147705, + "rewards_train/rejected": -1.0537807941436768, + "step": 544 + }, + { + "epoch": 0.15, + "logps_train/chosen": -72.13871002197266, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -132.5347900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4861290156841278, + "rewards_train/margins": 2.1896080672740936, + "rewards_train/rejected": -1.7034790515899658, + "step": 544 + }, + { + "epoch": 0.15, + "logps_train/chosen": -29.26873016357422, + "logps_train/ref_chosen": -26.5, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -27.514265060424805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27687302231788635, + "rewards_train/margins": 0.524553507566452, + "rewards_train/rejected": -0.8014265298843384, + "step": 544 + }, + { + "epoch": 0.15, + "logps_train/chosen": -133.43203735351562, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -133.783447265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.293203830718994, + "rewards_train/margins": 0.0351409912109375, + "rewards_train/rejected": -3.3283448219299316, + "step": 544 + }, + { + "epoch": 0.15, + "logps_train/chosen": -7.85469913482666, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -10.224265098571777, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16359491646289825, + "rewards_train/margins": 0.28070659935474396, + "rewards_train/rejected": -0.4443015158176422, + "step": 544 + }, + { + "epoch": 0.15, + "logps_train/chosen": -101.89696502685547, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -136.8478546142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3396965265274048, + "rewards_train/margins": 2.2450889348983765, + "rewards_train/rejected": -3.5847854614257812, + "step": 545 + }, + { + "epoch": 0.15, + "logps_train/chosen": -28.04532814025879, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -3.140625, + "logps_train/rejected": -8.331696510314941, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42953282594680786, + "rewards_train/margins": 0.08957433700561523, + "rewards_train/rejected": -0.5191071629524231, + "step": 545 + }, + { + "epoch": 0.15, + "logps_train/chosen": -91.15003204345703, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -93.26206970214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2650032043457031, + "rewards_train/margins": 0.21120381355285645, + "rewards_train/rejected": -1.4762070178985596, + "step": 545 + }, + { + "epoch": 0.15, + "logps_train/chosen": -10.634958267211914, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -18.34282875061035, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21150417625904083, + "rewards_train/margins": 0.6457870453596115, + "rewards_train/rejected": -0.4342828691005707, + "step": 545 + }, + { + "epoch": 0.15, + "logps_train/chosen": -135.8948516845703, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -184.65939331054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8394851684570312, + "rewards_train/margins": 5.226454257965088, + "rewards_train/rejected": -6.065939426422119, + "step": 545 + }, + { + "epoch": 0.15, + "logps_train/chosen": -6.672684192657471, + "logps_train/ref_chosen": -6.09375, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -8.6181058883667, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05789342150092125, + "rewards_train/margins": 0.2226671613752842, + "rewards_train/rejected": -0.28056058287620544, + "step": 545 + }, + { + "epoch": 0.15, + "logps_train/chosen": -16.745132446289062, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -17.05406951904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7870132327079773, + "rewards_train/margins": 0.08714371919631958, + "rewards_train/rejected": -0.8741569519042969, + "step": 545 + }, + { + "epoch": 0.15, + "logps_train/chosen": -97.39427947998047, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -246.64401245117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6894279718399048, + "rewards_train/margins": 7.374973654747009, + "rewards_train/rejected": -9.064401626586914, + "step": 545 + }, + { + "epoch": 0.15, + "learning_rate": 1.848903721743751e-06, + "loss": 0.4061, + "step": 546 + }, + { + "epoch": 0.15, + "logps_train/chosen": -72.31492614746094, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -155.87155151367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0064926147460938, + "rewards_train/margins": 3.580662727355957, + "rewards_train/rejected": -4.587155342102051, + "step": 546 + }, + { + "epoch": 0.15, + "logps_train/chosen": -3.380413055419922, + "logps_train/ref_chosen": -2.71875, + "logps_train/ref_rejected": -0.392578125, + "logps_train/rejected": -0.4737635850906372, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06616630405187607, + "rewards_train/margins": -0.0580477574840188, + "rewards_train/rejected": -0.008118546567857265, + "step": 546 + }, + { + "epoch": 0.15, + "logps_train/chosen": -114.99539184570312, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -40.5, + "logps_train/rejected": -45.9122314453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4495391845703125, + "rewards_train/margins": -0.9083160161972046, + "rewards_train/rejected": -0.5412231683731079, + "step": 546 + }, + { + "epoch": 0.15, + "logps_train/chosen": -113.51123809814453, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -103.63743591308594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.801123857498169, + "rewards_train/margins": -0.18738019466400146, + "rewards_train/rejected": -1.6137436628341675, + "step": 546 + }, + { + "epoch": 0.15, + "logps_train/chosen": -89.1741714477539, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -137.00686645507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5674171447753906, + "rewards_train/margins": 1.3332695960998535, + "rewards_train/rejected": -2.900686740875244, + "step": 546 + }, + { + "epoch": 0.15, + "logps_train/chosen": -5.043765068054199, + "logps_train/ref_chosen": -3.359375, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -16.410072326660156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.16843901574611664, + "rewards_train/margins": -0.33993178606033325, + "rewards_train/rejected": 0.1714927703142166, + "step": 546 + }, + { + "epoch": 0.15, + "logps_train/chosen": -39.51215744018555, + "logps_train/ref_chosen": -31.0, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -38.425018310546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8512157797813416, + "rewards_train/margins": 1.4600360989570618, + "rewards_train/rejected": -2.3112518787384033, + "step": 546 + }, + { + "epoch": 0.15, + "logps_train/chosen": -109.10739135742188, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -211.45089721679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.360739141702652, + "rewards_train/margins": 4.8843508660793304, + "rewards_train/rejected": -5.245090007781982, + "step": 546 + }, + { + "epoch": 0.15, + "logps_train/chosen": -2.9524853229522705, + "logps_train/ref_chosen": -3.046875, + "logps_train/ref_rejected": -3.703125, + "logps_train/rejected": -5.0753631591796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009438968263566494, + "rewards_train/margins": 0.14666279312223196, + "rewards_train/rejected": -0.13722382485866547, + "step": 547 + }, + { + "epoch": 0.15, + "logps_train/chosen": -119.0826644897461, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -126.4688720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.858266592025757, + "rewards_train/margins": 0.2886207103729248, + "rewards_train/rejected": -3.1468873023986816, + "step": 547 + }, + { + "epoch": 0.15, + "logps_train/chosen": -48.93410873413086, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -42.63047790527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3434108793735504, + "rewards_train/margins": 1.8321369588375092, + "rewards_train/rejected": -2.1755478382110596, + "step": 547 + }, + { + "epoch": 0.15, + "logps_train/chosen": -148.08050537109375, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -166.93789672851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0580506324768066, + "rewards_train/margins": 1.9357390403747559, + "rewards_train/rejected": -3.9937896728515625, + "step": 547 + }, + { + "epoch": 0.15, + "logps_train/chosen": -99.06468200683594, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -135.3685760498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.906468152999878, + "rewards_train/margins": 1.1303894519805908, + "rewards_train/rejected": -4.036857604980469, + "step": 547 + }, + { + "epoch": 0.15, + "logps_train/chosen": -32.71798324584961, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -28.5, + "logps_train/rejected": -34.06317138671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7467983365058899, + "rewards_train/margins": -0.19048118591308594, + "rewards_train/rejected": -0.556317150592804, + "step": 547 + }, + { + "epoch": 0.15, + "logps_train/chosen": -126.22239685058594, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -120.38275146484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9222397804260254, + "rewards_train/margins": 0.11603546142578125, + "rewards_train/rejected": -3.0382752418518066, + "step": 547 + }, + { + "epoch": 0.15, + "logps_train/chosen": -176.16647338867188, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -153.0778350830078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.08335266262292862, + "rewards_train/margins": -0.7088638171553612, + "rewards_train/rejected": 0.7922164797782898, + "step": 547 + }, + { + "epoch": 0.15, + "learning_rate": 1.8475024528916499e-06, + "loss": 0.5249, + "step": 548 + }, + { + "epoch": 0.15, + "logps_train/chosen": -158.18426513671875, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -139.1317901611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.418426513671875, + "rewards_train/margins": 2.1947524547576904, + "rewards_train/rejected": -2.6131789684295654, + "step": 548 + }, + { + "epoch": 0.15, + "logps_train/chosen": -65.92668151855469, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -44.75, + "logps_train/rejected": -50.6158447265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8176681399345398, + "rewards_train/margins": -0.23108363151550293, + "rewards_train/rejected": -0.5865845084190369, + "step": 548 + }, + { + "epoch": 0.15, + "logps_train/chosen": -7.565524578094482, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -13.655582427978516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20967745780944824, + "rewards_train/margins": 0.49963080883026123, + "rewards_train/rejected": -0.7093082666397095, + "step": 548 + }, + { + "epoch": 0.15, + "logps_train/chosen": -80.77661895751953, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -181.40011596679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7026618719100952, + "rewards_train/margins": 2.037349820137024, + "rewards_train/rejected": -3.740011692047119, + "step": 548 + }, + { + "epoch": 0.15, + "logps_train/chosen": -3.2317867279052734, + "logps_train/ref_chosen": -3.640625, + "logps_train/ref_rejected": -0.2490234375, + "logps_train/rejected": -0.10067804157733917, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.040883827954530716, + "rewards_train/margins": 0.026049287989735603, + "rewards_train/rejected": 0.014834539964795113, + "step": 548 + }, + { + "epoch": 0.15, + "logps_train/chosen": -29.15308380126953, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -30.0, + "logps_train/rejected": -42.44453048706055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.60280841588974, + "rewards_train/margins": 0.6416446566581726, + "rewards_train/rejected": -1.2444530725479126, + "step": 548 + }, + { + "epoch": 0.15, + "logps_train/chosen": -18.284290313720703, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -14.696348190307617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12157096713781357, + "rewards_train/margins": 0.8380807861685753, + "rewards_train/rejected": -0.7165098190307617, + "step": 548 + }, + { + "epoch": 0.15, + "logps_train/chosen": -119.15333557128906, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -167.92767333984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2153337001800537, + "rewards_train/margins": 0.37743377685546875, + "rewards_train/rejected": -2.5927674770355225, + "step": 548 + }, + { + "epoch": 0.15, + "logps_train/chosen": -132.25198364257812, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -226.25430297851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.025198459625244, + "rewards_train/margins": 4.80023193359375, + "rewards_train/rejected": -6.825430393218994, + "step": 549 + }, + { + "epoch": 0.15, + "logps_train/chosen": -55.951725006103516, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -117.3621826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6298274993896484, + "rewards_train/margins": 4.166045904159546, + "rewards_train/rejected": -3.5362184047698975, + "step": 549 + }, + { + "epoch": 0.15, + "logps_train/chosen": -9.124763488769531, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -13.196646690368652, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40622636675834656, + "rewards_train/margins": 0.19156333804130554, + "rewards_train/rejected": -0.5977897047996521, + "step": 549 + }, + { + "epoch": 0.15, + "logps_train/chosen": -78.89353942871094, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -88.78025817871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3893539607524872, + "rewards_train/margins": 1.5886718332767487, + "rewards_train/rejected": -1.9780257940292358, + "step": 549 + }, + { + "epoch": 0.15, + "logps_train/chosen": -107.31119537353516, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -110.40899658203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7811195850372314, + "rewards_train/margins": -0.6402199268341064, + "rewards_train/rejected": -1.140899658203125, + "step": 549 + }, + { + "epoch": 0.15, + "logps_train/chosen": -145.6678466796875, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -149.1541290283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.51678466796875, + "rewards_train/margins": 0.2486283779144287, + "rewards_train/rejected": -2.7654130458831787, + "step": 549 + }, + { + "epoch": 0.15, + "logps_train/chosen": -8.756361961364746, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -3.734375, + "logps_train/rejected": -5.01204252243042, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01811380498111248, + "rewards_train/margins": 0.14588056318461895, + "rewards_train/rejected": -0.12776675820350647, + "step": 549 + }, + { + "epoch": 0.15, + "logps_train/chosen": -16.052490234375, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -15.908177375793457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5364990234375, + "rewards_train/margins": -0.06443127989768982, + "rewards_train/rejected": -0.4720677435398102, + "step": 549 + }, + { + "epoch": 0.15, + "learning_rate": 1.8460952524209353e-06, + "loss": 0.4562, + "step": 550 + }, + { + "epoch": 0.15, + "logps_train/chosen": -377.9046630859375, + "logps_train/ref_chosen": -318.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -148.57269287109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.990466594696045, + "rewards_train/margins": -1.933197021484375, + "rewards_train/rejected": -4.05726957321167, + "step": 550 + }, + { + "epoch": 0.15, + "logps_train/chosen": -1.6695939302444458, + "logps_train/ref_chosen": -0.8203125, + "logps_train/ref_rejected": -0.8203125, + "logps_train/rejected": -1.6536049842834473, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.08492814749479294, + "rewards_train/margins": -0.0015988945960998535, + "rewards_train/rejected": -0.08332925289869308, + "step": 550 + }, + { + "epoch": 0.15, + "logps_train/chosen": -145.69863891601562, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -131.92599487304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.869863986968994, + "rewards_train/margins": 0.07273554801940918, + "rewards_train/rejected": -2.9425995349884033, + "step": 550 + }, + { + "epoch": 0.15, + "logps_train/chosen": -24.94778060913086, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -13.951681137084961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24477806687355042, + "rewards_train/margins": 0.6878900825977325, + "rewards_train/rejected": -0.932668149471283, + "step": 550 + }, + { + "epoch": 0.15, + "logps_train/chosen": -111.91191864013672, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -175.45510864257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.241191864013672, + "rewards_train/margins": 4.954319000244141, + "rewards_train/rejected": -7.1955108642578125, + "step": 550 + }, + { + "epoch": 0.15, + "logps_train/chosen": -146.42787170410156, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -157.50537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.24278724193573, + "rewards_train/margins": 0.7077498435974121, + "rewards_train/rejected": -1.950537085533142, + "step": 550 + }, + { + "epoch": 0.15, + "logps_train/chosen": -20.19959259033203, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -82.32243347167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.38245925307273865, + "rewards_train/margins": 0.44978412985801697, + "rewards_train/rejected": -0.8322433829307556, + "step": 550 + }, + { + "epoch": 0.15, + "logps_train/chosen": -8.406982421875, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -6.8010573387146, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.42180177569389343, + "rewards_train/margins": 0.5987825095653534, + "rewards_train/rejected": -0.17698073387145996, + "step": 550 + }, + { + "epoch": 0.15, + "logps_train/chosen": -59.41557312011719, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -32.469364166259766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8415573835372925, + "rewards_train/margins": -1.144620954990387, + "rewards_train/rejected": -0.6969364285469055, + "step": 551 + }, + { + "epoch": 0.15, + "logps_train/chosen": -83.59976959228516, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -190.37644958496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3099769651889801, + "rewards_train/margins": 6.977667897939682, + "rewards_train/rejected": -7.287644863128662, + "step": 551 + }, + { + "epoch": 0.15, + "logps_train/chosen": -172.47866821289062, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -222.19369506835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.347867012023926, + "rewards_train/margins": 4.371502876281738, + "rewards_train/rejected": -8.719369888305664, + "step": 551 + }, + { + "epoch": 0.15, + "logps_train/chosen": -124.07025909423828, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -123.793701171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2070258855819702, + "rewards_train/margins": -0.22765576839447021, + "rewards_train/rejected": -0.9793701171875, + "step": 551 + }, + { + "epoch": 0.15, + "logps_train/chosen": -1.9315928220748901, + "logps_train/ref_chosen": -2.234375, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -12.41567325592041, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.030278218910098076, + "rewards_train/margins": 0.5030955504626036, + "rewards_train/rejected": -0.4728173315525055, + "step": 551 + }, + { + "epoch": 0.15, + "logps_train/chosen": -83.89787292480469, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -118.85713958740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21021270751953125, + "rewards_train/margins": 0.9459266662597656, + "rewards_train/rejected": -0.7357139587402344, + "step": 551 + }, + { + "epoch": 0.15, + "logps_train/chosen": -94.66162109375, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -50.5, + "logps_train/rejected": -67.97209167480469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2161622047424316, + "rewards_train/margins": -0.468953013420105, + "rewards_train/rejected": -1.7472091913223267, + "step": 551 + }, + { + "epoch": 0.15, + "logps_train/chosen": -62.63045883178711, + "logps_train/ref_chosen": -58.0, + "logps_train/ref_rejected": -56.5, + "logps_train/rejected": -65.67568969726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4630458950996399, + "rewards_train/margins": 0.45452308654785156, + "rewards_train/rejected": -0.9175689816474915, + "step": 551 + }, + { + "epoch": 0.15, + "learning_rate": 1.8446821301805187e-06, + "loss": 0.6035, + "step": 552 + }, + { + "epoch": 0.15, + "logps_train/chosen": -24.388427734375, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -6.375, + "logps_train/rejected": -10.096237182617188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4638427793979645, + "rewards_train/margins": -0.09171906113624573, + "rewards_train/rejected": -0.37212371826171875, + "step": 552 + }, + { + "epoch": 0.15, + "logps_train/chosen": -17.146989822387695, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -11.519586563110352, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.477198988199234, + "rewards_train/margins": 0.024759680032730103, + "rewards_train/rejected": -0.5019586682319641, + "step": 552 + }, + { + "epoch": 0.15, + "logps_train/chosen": -0.5217899084091187, + "logps_train/ref_chosen": -0.494140625, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -8.358637809753418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.002764928387477994, + "rewards_train/margins": 0.18934885854832828, + "rewards_train/rejected": -0.19211378693580627, + "step": 552 + }, + { + "epoch": 0.15, + "logps_train/chosen": -21.377304077148438, + "logps_train/ref_chosen": -14.25, + "logps_train/ref_rejected": -7.875, + "logps_train/rejected": -8.77932357788086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7127304077148438, + "rewards_train/margins": -0.6222980469465256, + "rewards_train/rejected": -0.09043236076831818, + "step": 552 + }, + { + "epoch": 0.15, + "logps_train/chosen": -44.46208953857422, + "logps_train/ref_chosen": -46.25, + "logps_train/ref_rejected": -61.5, + "logps_train/rejected": -64.7054443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17879104614257812, + "rewards_train/margins": 0.49933549761772156, + "rewards_train/rejected": -0.32054445147514343, + "step": 552 + }, + { + "epoch": 0.15, + "logps_train/chosen": -119.04110717773438, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -106.03038787841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5458893179893494, + "rewards_train/margins": 1.94892817735672, + "rewards_train/rejected": -1.4030388593673706, + "step": 552 + }, + { + "epoch": 0.15, + "logps_train/chosen": -11.954010963439941, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -14.594921112060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0641511008143425, + "rewards_train/margins": 0.754716046154499, + "rewards_train/rejected": -0.8188671469688416, + "step": 552 + }, + { + "epoch": 0.15, + "logps_train/chosen": -63.44368362426758, + "logps_train/ref_chosen": -51.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -119.22187805175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2443684339523315, + "rewards_train/margins": 0.3778194189071655, + "rewards_train/rejected": -1.622187852859497, + "step": 552 + }, + { + "epoch": 0.15, + "logps_train/chosen": -14.340154647827148, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -9.591588973999023, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.45901545882225037, + "rewards_train/margins": -0.19985654950141907, + "rewards_train/rejected": -0.2591589093208313, + "step": 553 + }, + { + "epoch": 0.15, + "logps_train/chosen": -14.07642650604248, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -6.923969745635986, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.282642662525177, + "rewards_train/margins": -0.08712068200111389, + "rewards_train/rejected": -0.1955219805240631, + "step": 553 + }, + { + "epoch": 0.15, + "logps_train/chosen": -59.747596740722656, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -88.02899169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.050240326672792435, + "rewards_train/margins": 1.5031395442783833, + "rewards_train/rejected": -1.4528992176055908, + "step": 553 + }, + { + "epoch": 0.15, + "logps_train/chosen": -113.42146301269531, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -180.16297912597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5421463251113892, + "rewards_train/margins": 4.074151873588562, + "rewards_train/rejected": -5.616298198699951, + "step": 553 + }, + { + "epoch": 0.15, + "logps_train/chosen": -36.06840515136719, + "logps_train/ref_chosen": -35.75, + "logps_train/ref_rejected": -42.75, + "logps_train/rejected": -55.931785583496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03184051439166069, + "rewards_train/margins": 1.2863380201160908, + "rewards_train/rejected": -1.3181785345077515, + "step": 553 + }, + { + "epoch": 0.15, + "logps_train/chosen": -114.52153778076172, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -141.18072509765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9521538019180298, + "rewards_train/margins": 2.165918707847595, + "rewards_train/rejected": -4.118072509765625, + "step": 553 + }, + { + "epoch": 0.15, + "logps_train/chosen": -130.06773376464844, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -187.65931701660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3567733764648438, + "rewards_train/margins": 4.409158229827881, + "rewards_train/rejected": -5.765931606292725, + "step": 553 + }, + { + "epoch": 0.15, + "logps_train/chosen": -2.6609010696411133, + "logps_train/ref_chosen": -1.921875, + "logps_train/ref_rejected": -1.2265625, + "logps_train/rejected": -0.7816746830940247, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07390260696411133, + "rewards_train/margins": -0.11839139088988304, + "rewards_train/rejected": 0.04448878392577171, + "step": 553 + }, + { + "epoch": 0.15, + "learning_rate": 1.8432630960607572e-06, + "loss": 0.4664, + "step": 554 + }, + { + "epoch": 0.15, + "logps_train/chosen": -8.761942863464355, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -7.03125, + "logps_train/rejected": -11.22966194152832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28556928038597107, + "rewards_train/margins": 0.13427191972732544, + "rewards_train/rejected": -0.4198412001132965, + "step": 554 + }, + { + "epoch": 0.15, + "logps_train/chosen": -94.40320587158203, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -189.14947509765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.059679415076971054, + "rewards_train/margins": 5.074626829475164, + "rewards_train/rejected": -5.014947414398193, + "step": 554 + }, + { + "epoch": 0.15, + "logps_train/chosen": -16.114940643310547, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -15.519475936889648, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1010059341788292, + "rewards_train/margins": 0.7404535636305809, + "rewards_train/rejected": -0.6394476294517517, + "step": 554 + }, + { + "epoch": 0.15, + "logps_train/chosen": -4.410114288330078, + "logps_train/ref_chosen": -2.265625, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -15.48442554473877, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2144489288330078, + "rewards_train/margins": 0.26524361968040466, + "rewards_train/rejected": -0.4796925485134125, + "step": 554 + }, + { + "epoch": 0.15, + "logps_train/chosen": -74.19176483154297, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -73.84776306152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5308235287666321, + "rewards_train/margins": 1.0155998468399048, + "rewards_train/rejected": -0.4847763180732727, + "step": 554 + }, + { + "epoch": 0.15, + "logps_train/chosen": -95.89326477050781, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -50.606834411621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6393265128135681, + "rewards_train/margins": -0.6536430716514587, + "rewards_train/rejected": 0.014316558837890625, + "step": 554 + }, + { + "epoch": 0.15, + "logps_train/chosen": -19.372833251953125, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -9.826154708862305, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9310333132743835, + "rewards_train/margins": -0.992167841643095, + "rewards_train/rejected": 0.06113452836871147, + "step": 554 + }, + { + "epoch": 0.15, + "logps_train/chosen": -24.742313385009766, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -16.594707489013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3492313325405121, + "rewards_train/margins": 0.09773942828178406, + "rewards_train/rejected": -0.44697076082229614, + "step": 554 + }, + { + "epoch": 0.16, + "logps_train/chosen": -4.332581996917725, + "logps_train/ref_chosen": -7.5, + "logps_train/ref_rejected": -14.1875, + "logps_train/rejected": -18.281421661376953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.31674179434776306, + "rewards_train/margins": 0.7261339724063873, + "rewards_train/rejected": -0.40939217805862427, + "step": 555 + }, + { + "epoch": 0.16, + "logps_train/chosen": -131.75636291503906, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -131.75111389160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.32436370849609375, + "rewards_train/margins": -0.0005249083042144775, + "rewards_train/rejected": 0.3248886168003082, + "step": 555 + }, + { + "epoch": 0.16, + "logps_train/chosen": -5.438468933105469, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -2.6875, + "logps_train/rejected": -4.265406608581543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11259689182043076, + "rewards_train/margins": 0.04519376903772354, + "rewards_train/rejected": -0.1577906608581543, + "step": 555 + }, + { + "epoch": 0.16, + "logps_train/chosen": -122.9176025390625, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -175.20986938476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8917603492736816, + "rewards_train/margins": 4.6292266845703125, + "rewards_train/rejected": -7.520987033843994, + "step": 555 + }, + { + "epoch": 0.16, + "logps_train/chosen": -30.793197631835938, + "logps_train/ref_chosen": -28.125, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -28.749866485595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2668197751045227, + "rewards_train/margins": 0.5581668615341187, + "rewards_train/rejected": -0.8249866366386414, + "step": 555 + }, + { + "epoch": 0.16, + "logps_train/chosen": -122.4884033203125, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -128.6194305419922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1488404273986816, + "rewards_train/margins": 0.4131026268005371, + "rewards_train/rejected": -2.5619430541992188, + "step": 555 + }, + { + "epoch": 0.16, + "logps_train/chosen": -123.17361450195312, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -145.90597534179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6173614263534546, + "rewards_train/margins": 0.27323615550994873, + "rewards_train/rejected": -1.8905975818634033, + "step": 555 + }, + { + "epoch": 0.16, + "logps_train/chosen": -13.997398376464844, + "logps_train/ref_chosen": -3.3125, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -15.375916481018066, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.068489909172058, + "rewards_train/margins": -0.09964823722839355, + "rewards_train/rejected": -0.9688416719436646, + "step": 555 + }, + { + "epoch": 0.16, + "learning_rate": 1.8418381599933851e-06, + "loss": 0.5605, + "step": 556 + }, + { + "epoch": 0.16, + "logps_train/chosen": -103.08228302001953, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -159.80300903320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6582283973693848, + "rewards_train/margins": 2.122072696685791, + "rewards_train/rejected": -4.780301094055176, + "step": 556 + }, + { + "epoch": 0.16, + "logps_train/chosen": -108.15447235107422, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -172.966796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.065447211265564, + "rewards_train/margins": 2.731232523918152, + "rewards_train/rejected": -3.796679735183716, + "step": 556 + }, + { + "epoch": 0.16, + "logps_train/chosen": -14.78962516784668, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -16.275161743164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12103748321533203, + "rewards_train/margins": 0.04855365306138992, + "rewards_train/rejected": 0.07248383015394211, + "step": 556 + }, + { + "epoch": 0.16, + "logps_train/chosen": -109.51187133789062, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -217.21554565429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5511871576309204, + "rewards_train/margins": 5.770367503166199, + "rewards_train/rejected": -6.321554660797119, + "step": 556 + }, + { + "epoch": 0.16, + "logps_train/chosen": -19.203571319580078, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -19.318254470825195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.029642868787050247, + "rewards_train/margins": 0.06146831810474396, + "rewards_train/rejected": -0.03182544931769371, + "step": 556 + }, + { + "epoch": 0.16, + "logps_train/chosen": -89.06452941894531, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -198.45314025878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2564529180526733, + "rewards_train/margins": 6.088861107826233, + "rewards_train/rejected": -7.345314025878906, + "step": 556 + }, + { + "epoch": 0.16, + "logps_train/chosen": -69.6512451171875, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -146.87274169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5848755240440369, + "rewards_train/margins": 1.0721496939659119, + "rewards_train/rejected": -0.487274169921875, + "step": 556 + }, + { + "epoch": 0.16, + "logps_train/chosen": -14.798238754272461, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -3.125, + "logps_train/rejected": -3.228957176208496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.032676126807928085, + "rewards_train/margins": 0.04307184461504221, + "rewards_train/rejected": -0.010395717807114124, + "step": 556 + }, + { + "epoch": 0.16, + "logps_train/chosen": -204.34400939941406, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -219.4072723388672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.734401226043701, + "rewards_train/margins": -0.8936738967895508, + "rewards_train/rejected": -5.84072732925415, + "step": 557 + }, + { + "epoch": 0.16, + "logps_train/chosen": -152.7465362548828, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -175.31842041015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9746536016464233, + "rewards_train/margins": 3.10718834400177, + "rewards_train/rejected": -5.081841945648193, + "step": 557 + }, + { + "epoch": 0.16, + "logps_train/chosen": -15.449579238891602, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -36.52276611328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.013707923702895641, + "rewards_train/margins": 0.2635686816647649, + "rewards_train/rejected": -0.2772766053676605, + "step": 557 + }, + { + "epoch": 0.16, + "logps_train/chosen": -9.71849536895752, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -26.677322387695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009349537082016468, + "rewards_train/margins": 0.6333827255293727, + "rewards_train/rejected": -0.6427322626113892, + "step": 557 + }, + { + "epoch": 0.16, + "logps_train/chosen": -60.042015075683594, + "logps_train/ref_chosen": -49.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -102.44483184814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1042015552520752, + "rewards_train/margins": 0.29028165340423584, + "rewards_train/rejected": -1.394483208656311, + "step": 557 + }, + { + "epoch": 0.16, + "logps_train/chosen": -3.432199001312256, + "logps_train/ref_chosen": -3.265625, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -4.146104335784912, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.016657400876283646, + "rewards_train/margins": 0.06201553717255592, + "rewards_train/rejected": -0.07867293804883957, + "step": 557 + }, + { + "epoch": 0.16, + "logps_train/chosen": -217.13198852539062, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -207.52999877929688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.613198757171631, + "rewards_train/margins": -0.7601988315582275, + "rewards_train/rejected": -3.8529999256134033, + "step": 557 + }, + { + "epoch": 0.16, + "logps_train/chosen": -10.608002662658691, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -10.813950538635254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20455026626586914, + "rewards_train/margins": 0.014344796538352966, + "rewards_train/rejected": -0.2188950628042221, + "step": 557 + }, + { + "epoch": 0.16, + "learning_rate": 1.8404073319514444e-06, + "loss": 0.4881, + "step": 558 + }, + { + "epoch": 0.16, + "logps_train/chosen": -142.65574645996094, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -92.25208282470703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5655746459960938, + "rewards_train/margins": -1.6403663754463196, + "rewards_train/rejected": -0.9252082705497742, + "step": 558 + }, + { + "epoch": 0.16, + "logps_train/chosen": -113.82459259033203, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -164.2486572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.017540741711854935, + "rewards_train/margins": 4.592406559735537, + "rewards_train/rejected": -4.574865818023682, + "step": 558 + }, + { + "epoch": 0.16, + "logps_train/chosen": -114.64259338378906, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -101.06037902832031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6642593741416931, + "rewards_train/margins": 1.341778576374054, + "rewards_train/rejected": -2.006037950515747, + "step": 558 + }, + { + "epoch": 0.16, + "logps_train/chosen": -9.882654190063477, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -13.44338321685791, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2429845780134201, + "rewards_train/margins": 0.5623228996992111, + "rewards_train/rejected": -0.319338321685791, + "step": 558 + }, + { + "epoch": 0.16, + "logps_train/chosen": -2.3866939544677734, + "logps_train/ref_chosen": -2.921875, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -8.400777816772461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.053518105298280716, + "rewards_train/margins": 0.31234588101506233, + "rewards_train/rejected": -0.2588277757167816, + "step": 558 + }, + { + "epoch": 0.16, + "logps_train/chosen": -76.151123046875, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -75.56163787841797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.16511230170726776, + "rewards_train/margins": -0.05894850939512253, + "rewards_train/rejected": -0.10616379231214523, + "step": 558 + }, + { + "epoch": 0.16, + "logps_train/chosen": -139.7184600830078, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -141.98150634765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1718461513519287, + "rewards_train/margins": 1.226304531097412, + "rewards_train/rejected": -3.398150682449341, + "step": 558 + }, + { + "epoch": 0.16, + "logps_train/chosen": -125.46577453613281, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -125.8739242553711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9465774297714233, + "rewards_train/margins": 0.1408151388168335, + "rewards_train/rejected": -2.087392568588257, + "step": 558 + }, + { + "epoch": 0.16, + "logps_train/chosen": -162.7167510986328, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -205.4254150390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8716752529144287, + "rewards_train/margins": 2.9708664417266846, + "rewards_train/rejected": -5.842541694641113, + "step": 559 + }, + { + "epoch": 0.16, + "logps_train/chosen": -15.87417221069336, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -104.56385803222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3561672270298004, + "rewards_train/margins": 1.300218552350998, + "rewards_train/rejected": -1.6563857793807983, + "step": 559 + }, + { + "epoch": 0.16, + "logps_train/chosen": -39.1217041015625, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -37.165260314941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6371704339981079, + "rewards_train/margins": 0.24185562133789062, + "rewards_train/rejected": -0.8790260553359985, + "step": 559 + }, + { + "epoch": 0.16, + "logps_train/chosen": -6.335471153259277, + "logps_train/ref_chosen": -6.71875, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -8.756577491760254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.038327883929014206, + "rewards_train/margins": 0.2827356420457363, + "rewards_train/rejected": -0.2444077581167221, + "step": 559 + }, + { + "epoch": 0.16, + "logps_train/chosen": -10.813596725463867, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -25.938072204589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10010967403650284, + "rewards_train/margins": 0.393697552382946, + "rewards_train/rejected": -0.49380722641944885, + "step": 559 + }, + { + "epoch": 0.16, + "logps_train/chosen": -113.6441879272461, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -95.82162475585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5644187927246094, + "rewards_train/margins": 0.4177436828613281, + "rewards_train/rejected": -1.9821624755859375, + "step": 559 + }, + { + "epoch": 0.16, + "logps_train/chosen": -87.98133087158203, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -127.67628479003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6481330990791321, + "rewards_train/margins": 1.6694955229759216, + "rewards_train/rejected": -2.3176286220550537, + "step": 559 + }, + { + "epoch": 0.16, + "logps_train/chosen": -153.96685791015625, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -196.9571990966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2966859340667725, + "rewards_train/margins": 3.7490341663360596, + "rewards_train/rejected": -7.045720100402832, + "step": 559 + }, + { + "epoch": 0.16, + "learning_rate": 1.8389706219492145e-06, + "loss": 0.4569, + "step": 560 + }, + { + "epoch": 0.16, + "logps_train/chosen": -14.297906875610352, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -14.625, + "logps_train/rejected": -23.259572982788086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9610406756401062, + "rewards_train/margins": -0.0975833535194397, + "rewards_train/rejected": -0.8634573221206665, + "step": 560 + }, + { + "epoch": 0.16, + "logps_train/chosen": -26.572433471679688, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -38.24184799194336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2009934186935425, + "rewards_train/margins": 0.6231913566589355, + "rewards_train/rejected": -1.824184775352478, + "step": 560 + }, + { + "epoch": 0.16, + "logps_train/chosen": -97.88695526123047, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -94.62525939941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.038695525377988815, + "rewards_train/margins": 1.7738304622471333, + "rewards_train/rejected": -1.812525987625122, + "step": 560 + }, + { + "epoch": 0.16, + "logps_train/chosen": -158.62445068359375, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -157.75411987304688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4624451398849487, + "rewards_train/margins": -0.08703315258026123, + "rewards_train/rejected": -1.3754119873046875, + "step": 560 + }, + { + "epoch": 0.16, + "logps_train/chosen": -179.62747192382812, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -224.6670684814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.312747478485107, + "rewards_train/margins": 4.65395975112915, + "rewards_train/rejected": -10.966707229614258, + "step": 560 + }, + { + "epoch": 0.16, + "logps_train/chosen": -128.41387939453125, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -128.22296142578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.091387987136841, + "rewards_train/margins": -0.01909184455871582, + "rewards_train/rejected": -3.072296142578125, + "step": 560 + }, + { + "epoch": 0.16, + "logps_train/chosen": -6.379496097564697, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -9.629205703735352, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2120503932237625, + "rewards_train/margins": 0.33747096359729767, + "rewards_train/rejected": -0.12542057037353516, + "step": 560 + }, + { + "epoch": 0.16, + "logps_train/chosen": -2.2363944053649902, + "logps_train/ref_chosen": -2.734375, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -9.053351402282715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.049798060208559036, + "rewards_train/margins": 0.467633206397295, + "rewards_train/rejected": -0.41783514618873596, + "step": 560 + }, + { + "epoch": 0.16, + "logps_train/chosen": -134.81210327148438, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -203.51235961914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6812103986740112, + "rewards_train/margins": 2.270025610923767, + "rewards_train/rejected": -3.9512360095977783, + "step": 561 + }, + { + "epoch": 0.16, + "logps_train/chosen": -3.374786376953125, + "logps_train/ref_chosen": -2.15625, + "logps_train/ref_rejected": -2.40625, + "logps_train/rejected": -4.781757354736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12185364216566086, + "rewards_train/margins": 0.11569709330797195, + "rewards_train/rejected": -0.2375507354736328, + "step": 561 + }, + { + "epoch": 0.16, + "logps_train/chosen": -1.871507167816162, + "logps_train/ref_chosen": -2.515625, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -14.463561058044434, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06441178172826767, + "rewards_train/margins": 0.742017887532711, + "rewards_train/rejected": -0.6776061058044434, + "step": 561 + }, + { + "epoch": 0.16, + "logps_train/chosen": -3.556718111038208, + "logps_train/ref_chosen": -3.21875, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -22.403078079223633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03379681333899498, + "rewards_train/margins": 1.0252610184252262, + "rewards_train/rejected": -1.0590578317642212, + "step": 561 + }, + { + "epoch": 0.16, + "logps_train/chosen": -10.021462440490723, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -8.279500007629395, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.18339625000953674, + "rewards_train/margins": -0.07732124626636505, + "rewards_train/rejected": -0.10607500374317169, + "step": 561 + }, + { + "epoch": 0.16, + "logps_train/chosen": -67.28079223632812, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -53.5, + "logps_train/rejected": -85.37779235839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2530792951583862, + "rewards_train/margins": 1.9346998929977417, + "rewards_train/rejected": -3.187779188156128, + "step": 561 + }, + { + "epoch": 0.16, + "logps_train/chosen": -4.684248447418213, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -19.006816864013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16595016419887543, + "rewards_train/margins": 0.5916318446397781, + "rewards_train/rejected": -0.4256816804409027, + "step": 561 + }, + { + "epoch": 0.16, + "logps_train/chosen": -8.77861213684082, + "logps_train/ref_chosen": -3.75, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -10.78598690032959, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5028612017631531, + "rewards_train/margins": -0.11801251769065857, + "rewards_train/rejected": -0.3848486840724945, + "step": 561 + }, + { + "epoch": 0.16, + "learning_rate": 1.8375280400421418e-06, + "loss": 0.4561, + "step": 562 + }, + { + "epoch": 0.16, + "logps_train/chosen": -59.31516647338867, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -53.5, + "logps_train/rejected": -51.554012298583984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.018483353778719902, + "rewards_train/margins": -0.17611542530357838, + "rewards_train/rejected": 0.19459877908229828, + "step": 562 + }, + { + "epoch": 0.16, + "logps_train/chosen": -21.26544952392578, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -19.40703010559082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41404494643211365, + "rewards_train/margins": 0.29540809988975525, + "rewards_train/rejected": -0.7094530463218689, + "step": 562 + }, + { + "epoch": 0.16, + "logps_train/chosen": -24.364044189453125, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -3.515625, + "logps_train/rejected": -12.558145523071289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0426543951034546, + "rewards_train/margins": -0.13840234279632568, + "rewards_train/rejected": -0.9042520523071289, + "step": 562 + }, + { + "epoch": 0.16, + "logps_train/chosen": -25.345903396606445, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -7.4375, + "logps_train/rejected": -17.767385482788086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47209033370018005, + "rewards_train/margins": 0.5608982145786285, + "rewards_train/rejected": -1.0329885482788086, + "step": 562 + }, + { + "epoch": 0.16, + "logps_train/chosen": -167.64077758789062, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -180.9622344970703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8640778064727783, + "rewards_train/margins": 2.882145643234253, + "rewards_train/rejected": -6.746223449707031, + "step": 562 + }, + { + "epoch": 0.16, + "logps_train/chosen": -204.02792358398438, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -163.65682983398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.0027923583984375, + "rewards_train/margins": 0.8128905296325684, + "rewards_train/rejected": -5.815682888031006, + "step": 562 + }, + { + "epoch": 0.16, + "logps_train/chosen": -26.742046356201172, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -41.0, + "logps_train/rejected": -39.85395812988281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4367046356201172, + "rewards_train/margins": -0.5513088256120682, + "rewards_train/rejected": 0.11460418999195099, + "step": 562 + }, + { + "epoch": 0.16, + "logps_train/chosen": -7.997229099273682, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -13.508468627929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.025277091190218925, + "rewards_train/margins": 0.3511239718645811, + "rewards_train/rejected": -0.3258468806743622, + "step": 562 + }, + { + "epoch": 0.16, + "logps_train/chosen": -41.605281829833984, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -18.016868591308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26052817702293396, + "rewards_train/margins": 0.6099086701869965, + "rewards_train/rejected": -0.8704368472099304, + "step": 563 + }, + { + "epoch": 0.16, + "logps_train/chosen": -2.397186756134033, + "logps_train/ref_chosen": -1.53125, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -8.037041664123535, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08659368008375168, + "rewards_train/margins": 0.13898549228906631, + "rewards_train/rejected": -0.225579172372818, + "step": 563 + }, + { + "epoch": 0.16, + "logps_train/chosen": -131.83694458007812, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -123.63324737548828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.033694744110107, + "rewards_train/margins": -0.8703699111938477, + "rewards_train/rejected": -3.1633248329162598, + "step": 563 + }, + { + "epoch": 0.16, + "logps_train/chosen": -17.213476181030273, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -13.913263320922852, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5588476061820984, + "rewards_train/margins": 0.22935372591018677, + "rewards_train/rejected": -0.7882013320922852, + "step": 563 + }, + { + "epoch": 0.16, + "logps_train/chosen": -11.745872497558594, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -46.75, + "logps_train/rejected": -52.84367752075195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34958726167678833, + "rewards_train/margins": 0.25978052616119385, + "rewards_train/rejected": -0.6093677878379822, + "step": 563 + }, + { + "epoch": 0.16, + "logps_train/chosen": -157.8843536376953, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -183.76611328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3884353637695312, + "rewards_train/margins": 3.188176155090332, + "rewards_train/rejected": -5.576611518859863, + "step": 563 + }, + { + "epoch": 0.16, + "logps_train/chosen": -18.016448974609375, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -27.35506820678711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27664491534233093, + "rewards_train/margins": 0.6838619410991669, + "rewards_train/rejected": -0.9605068564414978, + "step": 563 + }, + { + "epoch": 0.16, + "logps_train/chosen": -242.77090454101562, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -216.16586303710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.477090358734131, + "rewards_train/margins": 1.4894957542419434, + "rewards_train/rejected": -8.966586112976074, + "step": 563 + }, + { + "epoch": 0.16, + "learning_rate": 1.83607959632677e-06, + "loss": 0.5378, + "step": 564 + }, + { + "epoch": 0.16, + "logps_train/chosen": -190.8590087890625, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -213.2342987060547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.535901069641113, + "rewards_train/margins": -0.8124709129333496, + "rewards_train/rejected": -5.723430156707764, + "step": 564 + }, + { + "epoch": 0.16, + "logps_train/chosen": -157.25860595703125, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -184.5452117919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1258606910705566, + "rewards_train/margins": 4.478660583496094, + "rewards_train/rejected": -6.60452127456665, + "step": 564 + }, + { + "epoch": 0.16, + "logps_train/chosen": -69.7332992553711, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -99.2681884765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8983299136161804, + "rewards_train/margins": -0.12151104211807251, + "rewards_train/rejected": -0.7768188714981079, + "step": 564 + }, + { + "epoch": 0.16, + "logps_train/chosen": -222.11764526367188, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -209.94815063476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.011764526367188, + "rewards_train/margins": 0.9330511093139648, + "rewards_train/rejected": -8.944815635681152, + "step": 564 + }, + { + "epoch": 0.16, + "logps_train/chosen": -175.51939392089844, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -165.88101196289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.951939344406128, + "rewards_train/margins": 0.2361619472503662, + "rewards_train/rejected": -3.188101291656494, + "step": 564 + }, + { + "epoch": 0.16, + "logps_train/chosen": -19.248903274536133, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -15.187044143676758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07489033043384552, + "rewards_train/margins": 0.09381408989429474, + "rewards_train/rejected": -0.16870442032814026, + "step": 564 + }, + { + "epoch": 0.16, + "logps_train/chosen": -196.986328125, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -202.0, + "logps_train/rejected": -290.6371154785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.698632717132568, + "rewards_train/margins": 4.165078639984131, + "rewards_train/rejected": -8.8637113571167, + "step": 564 + }, + { + "epoch": 0.16, + "logps_train/chosen": -30.745914459228516, + "logps_train/ref_chosen": -30.375, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -91.95582580566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0370914451777935, + "rewards_train/margins": 0.6084911711513996, + "rewards_train/rejected": -0.6455826163291931, + "step": 564 + }, + { + "epoch": 0.16, + "logps_train/chosen": -1.8308219909667969, + "logps_train/ref_chosen": -1.3125, + "logps_train/ref_rejected": -1.078125, + "logps_train/rejected": -1.6327223777770996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05183219909667969, + "rewards_train/margins": 0.0036275386810302734, + "rewards_train/rejected": -0.05545973777770996, + "step": 565 + }, + { + "epoch": 0.16, + "logps_train/chosen": -106.07292175292969, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -94.19940185546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8072922229766846, + "rewards_train/margins": -0.13735198974609375, + "rewards_train/rejected": -1.6699402332305908, + "step": 565 + }, + { + "epoch": 0.16, + "logps_train/chosen": -70.64248657226562, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -70.5790023803711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.48575136065483093, + "rewards_train/margins": -0.006348401308059692, + "rewards_train/rejected": 0.4920997619628906, + "step": 565 + }, + { + "epoch": 0.16, + "logps_train/chosen": -134.31764221191406, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -201.3011474609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0817642211914062, + "rewards_train/margins": 1.648350477218628, + "rewards_train/rejected": -3.730114698410034, + "step": 565 + }, + { + "epoch": 0.16, + "logps_train/chosen": -79.86290740966797, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -117.31047058105469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7362907528877258, + "rewards_train/margins": 1.1447563767433167, + "rewards_train/rejected": -1.8810471296310425, + "step": 565 + }, + { + "epoch": 0.16, + "logps_train/chosen": -116.80821990966797, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -153.52566528320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8808220028877258, + "rewards_train/margins": 3.1717445254325867, + "rewards_train/rejected": -4.0525665283203125, + "step": 565 + }, + { + "epoch": 0.16, + "logps_train/chosen": -141.63180541992188, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -135.73397827148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.863180637359619, + "rewards_train/margins": 2.2102174758911133, + "rewards_train/rejected": -5.073398113250732, + "step": 565 + }, + { + "epoch": 0.16, + "logps_train/chosen": -53.65886688232422, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -30.137170791625977, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4908866882324219, + "rewards_train/margins": -0.18966960906982422, + "rewards_train/rejected": -0.30121707916259766, + "step": 565 + }, + { + "epoch": 0.16, + "learning_rate": 1.83462530094067e-06, + "loss": 0.4684, + "step": 566 + }, + { + "epoch": 0.16, + "logps_train/chosen": -11.581528663635254, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -3.5, + "logps_train/rejected": -5.418806552886963, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09809713810682297, + "rewards_train/margins": 0.2899777963757515, + "rewards_train/rejected": -0.19188065826892853, + "step": 566 + }, + { + "epoch": 0.16, + "logps_train/chosen": -137.9551544189453, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -233.77597045898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.545515537261963, + "rewards_train/margins": 2.8320817947387695, + "rewards_train/rejected": -7.377597332000732, + "step": 566 + }, + { + "epoch": 0.16, + "logps_train/chosen": -112.91331481933594, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -141.94802856445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5913314819335938, + "rewards_train/margins": 2.303471326828003, + "rewards_train/rejected": -2.8948028087615967, + "step": 566 + }, + { + "epoch": 0.16, + "logps_train/chosen": -69.82788848876953, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -103.44224548339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.38278886675834656, + "rewards_train/margins": 2.1614356338977814, + "rewards_train/rejected": -2.544224500656128, + "step": 566 + }, + { + "epoch": 0.16, + "logps_train/chosen": -79.0103530883789, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -84.56306457519531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15103530883789062, + "rewards_train/margins": 0.10527116060256958, + "rewards_train/rejected": -0.2563064694404602, + "step": 566 + }, + { + "epoch": 0.16, + "logps_train/chosen": -32.444244384765625, + "logps_train/ref_chosen": -30.0, + "logps_train/ref_rejected": -4.03125, + "logps_train/rejected": -4.1214985847473145, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.24442444741725922, + "rewards_train/margins": -0.23539958894252777, + "rewards_train/rejected": -0.009024858474731445, + "step": 566 + }, + { + "epoch": 0.16, + "logps_train/chosen": -88.98383331298828, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -50.0, + "logps_train/rejected": -75.49319458007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9983833432197571, + "rewards_train/margins": 1.5509361624717712, + "rewards_train/rejected": -2.5493195056915283, + "step": 566 + }, + { + "epoch": 0.16, + "logps_train/chosen": -73.73934936523438, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -9.827280044555664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7239349484443665, + "rewards_train/margins": -0.5787069350481033, + "rewards_train/rejected": -0.14522801339626312, + "step": 566 + }, + { + "epoch": 0.16, + "logps_train/chosen": -2.4435842037200928, + "logps_train/ref_chosen": -2.875, + "logps_train/ref_rejected": -7.8125, + "logps_train/rejected": -10.07104778289795, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04314158111810684, + "rewards_train/margins": 0.26899636536836624, + "rewards_train/rejected": -0.2258547842502594, + "step": 567 + }, + { + "epoch": 0.16, + "logps_train/chosen": -12.972999572753906, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -34.68968200683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4027000367641449, + "rewards_train/margins": 1.6841683089733124, + "rewards_train/rejected": -1.2814682722091675, + "step": 567 + }, + { + "epoch": 0.16, + "logps_train/chosen": -77.48725128173828, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -116.17121887207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1987251043319702, + "rewards_train/margins": 0.018396854400634766, + "rewards_train/rejected": -1.217121958732605, + "step": 567 + }, + { + "epoch": 0.16, + "logps_train/chosen": -84.72628784179688, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -69.14667510986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2273712158203125, + "rewards_train/margins": 1.0920387506484985, + "rewards_train/rejected": -0.864667534828186, + "step": 567 + }, + { + "epoch": 0.16, + "logps_train/chosen": -128.92811584472656, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -139.943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.042811632156372, + "rewards_train/margins": 1.5515244007110596, + "rewards_train/rejected": -2.5943360328674316, + "step": 567 + }, + { + "epoch": 0.16, + "logps_train/chosen": -8.396841049194336, + "logps_train/ref_chosen": -3.984375, + "logps_train/ref_rejected": -51.25, + "logps_train/rejected": -61.272586822509766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4412465989589691, + "rewards_train/margins": 0.5610120594501495, + "rewards_train/rejected": -1.0022586584091187, + "step": 567 + }, + { + "epoch": 0.16, + "logps_train/chosen": -185.44009399414062, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -196.10633850097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.244009494781494, + "rewards_train/margins": 0.9166245460510254, + "rewards_train/rejected": -7.1606340408325195, + "step": 567 + }, + { + "epoch": 0.16, + "logps_train/chosen": -8.33449649810791, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -24.00959014892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23657464981079102, + "rewards_train/margins": 0.20188435912132263, + "rewards_train/rejected": -0.43845900893211365, + "step": 567 + }, + { + "epoch": 0.16, + "learning_rate": 1.8331651640623672e-06, + "loss": 0.4243, + "step": 568 + }, + { + "epoch": 0.16, + "logps_train/chosen": -32.16226577758789, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -23.399066925048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1412266492843628, + "rewards_train/margins": 0.08618009090423584, + "rewards_train/rejected": -1.2274067401885986, + "step": 568 + }, + { + "epoch": 0.16, + "logps_train/chosen": -16.536014556884766, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -2.28125, + "logps_train/rejected": -12.677833557128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18485145270824432, + "rewards_train/margins": 0.85480697453022, + "rewards_train/rejected": -1.0396584272384644, + "step": 568 + }, + { + "epoch": 0.16, + "logps_train/chosen": -41.71875, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -35.33454895019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.846875011920929, + "rewards_train/margins": 0.0990799069404602, + "rewards_train/rejected": -0.9459549188613892, + "step": 568 + }, + { + "epoch": 0.16, + "logps_train/chosen": -90.78601837158203, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -252.28677368164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2286019325256348, + "rewards_train/margins": 8.600075244903564, + "rewards_train/rejected": -10.8286771774292, + "step": 568 + }, + { + "epoch": 0.16, + "logps_train/chosen": -152.55764770507812, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -172.57386779785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.055764675140381, + "rewards_train/margins": 1.5516223907470703, + "rewards_train/rejected": -5.607387065887451, + "step": 568 + }, + { + "epoch": 0.16, + "logps_train/chosen": -13.594443321228027, + "logps_train/ref_chosen": -3.796875, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -28.530338287353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9797568321228027, + "rewards_train/margins": 0.11077702045440674, + "rewards_train/rejected": -1.0905338525772095, + "step": 568 + }, + { + "epoch": 0.16, + "logps_train/chosen": -61.91081619262695, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -38.898582458496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.30891838669776917, + "rewards_train/margins": 0.8237766325473785, + "rewards_train/rejected": -0.5148582458496094, + "step": 568 + }, + { + "epoch": 0.16, + "logps_train/chosen": -39.842491149902344, + "logps_train/ref_chosen": -42.0, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -64.74836730957031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21575088798999786, + "rewards_train/margins": 0.2905876189470291, + "rewards_train/rejected": -0.07483673095703125, + "step": 568 + }, + { + "epoch": 0.16, + "logps_train/chosen": -118.0211181640625, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -152.93699645996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.452111840248108, + "rewards_train/margins": 2.941587805747986, + "rewards_train/rejected": -4.393699645996094, + "step": 569 + }, + { + "epoch": 0.16, + "logps_train/chosen": -75.57294464111328, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -140.96905517578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2572945356369019, + "rewards_train/margins": 2.739611029624939, + "rewards_train/rejected": -3.996905565261841, + "step": 569 + }, + { + "epoch": 0.16, + "logps_train/chosen": -125.5997543334961, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -56.0, + "logps_train/rejected": -88.35891723632812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.5099754333496094, + "rewards_train/margins": -0.27408361434936523, + "rewards_train/rejected": -3.235891819000244, + "step": 569 + }, + { + "epoch": 0.16, + "logps_train/chosen": -6.565257549285889, + "logps_train/ref_chosen": -2.1875, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -12.509857177734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43777576088905334, + "rewards_train/margins": 0.347584992647171, + "rewards_train/rejected": -0.7853607535362244, + "step": 569 + }, + { + "epoch": 0.16, + "logps_train/chosen": -82.248291015625, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -148.2002716064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3248291015625, + "rewards_train/margins": 5.395198345184326, + "rewards_train/rejected": -5.720027446746826, + "step": 569 + }, + { + "epoch": 0.16, + "logps_train/chosen": -52.2841911315918, + "logps_train/ref_chosen": -47.25, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -21.457027435302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5034191012382507, + "rewards_train/margins": 0.8735336661338806, + "rewards_train/rejected": -1.3769527673721313, + "step": 569 + }, + { + "epoch": 0.16, + "logps_train/chosen": -79.24358367919922, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -31.5, + "logps_train/rejected": -46.68250274658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3743583858013153, + "rewards_train/margins": 1.1438919603824615, + "rewards_train/rejected": -1.5182503461837769, + "step": 569 + }, + { + "epoch": 0.16, + "logps_train/chosen": -129.1455078125, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -56.25, + "logps_train/rejected": -78.86002349853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0145509243011475, + "rewards_train/margins": 0.24645137786865234, + "rewards_train/rejected": -2.2610023021698, + "step": 569 + }, + { + "epoch": 0.16, + "learning_rate": 1.8316991959112716e-06, + "loss": 0.3813, + "step": 570 + }, + { + "epoch": 0.16, + "logps_train/chosen": -33.62929153442383, + "logps_train/ref_chosen": -31.625, + "logps_train/ref_rejected": -27.0, + "logps_train/rejected": -33.127567291259766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20042915642261505, + "rewards_train/margins": 0.4123275727033615, + "rewards_train/rejected": -0.6127567291259766, + "step": 570 + }, + { + "epoch": 0.16, + "logps_train/chosen": -2.487013101577759, + "logps_train/ref_chosen": -0.80078125, + "logps_train/ref_rejected": -0.80078125, + "logps_train/rejected": -2.465853214263916, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1686231940984726, + "rewards_train/margins": -0.002115994691848755, + "rewards_train/rejected": -0.16650719940662384, + "step": 570 + }, + { + "epoch": 0.16, + "logps_train/chosen": -12.563124656677246, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -17.000730514526367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32506248354911804, + "rewards_train/margins": 0.05626058578491211, + "rewards_train/rejected": -0.38132306933403015, + "step": 570 + }, + { + "epoch": 0.16, + "logps_train/chosen": -118.55106353759766, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -103.431640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4551063776016235, + "rewards_train/margins": -1.01194229722023, + "rewards_train/rejected": -0.44316408038139343, + "step": 570 + }, + { + "epoch": 0.16, + "logps_train/chosen": -13.73300552368164, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -20.622583389282227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.029550552368164062, + "rewards_train/margins": 0.4327077865600586, + "rewards_train/rejected": -0.46225833892822266, + "step": 570 + }, + { + "epoch": 0.16, + "logps_train/chosen": -167.8831787109375, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -212.057373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.288318157196045, + "rewards_train/margins": 4.617419719696045, + "rewards_train/rejected": -8.90573787689209, + "step": 570 + }, + { + "epoch": 0.16, + "logps_train/chosen": -140.0176544189453, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -148.65863037109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.451765537261963, + "rewards_train/margins": 2.464097499847412, + "rewards_train/rejected": -5.915863037109375, + "step": 570 + }, + { + "epoch": 0.16, + "logps_train/chosen": -136.86370849609375, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -153.56228637695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.936370849609375, + "rewards_train/margins": 1.1698579788208008, + "rewards_train/rejected": -4.106228828430176, + "step": 570 + }, + { + "epoch": 0.16, + "logps_train/chosen": -123.0980224609375, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -171.70806884765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.809802234172821, + "rewards_train/margins": 5.711004555225372, + "rewards_train/rejected": -6.520806789398193, + "step": 571 + }, + { + "epoch": 0.16, + "logps_train/chosen": -6.429039001464844, + "logps_train/ref_chosen": -3.71875, + "logps_train/ref_rejected": -1.9375, + "logps_train/rejected": -3.897599220275879, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.27102890610694885, + "rewards_train/margins": -0.0750189870595932, + "rewards_train/rejected": -0.19600991904735565, + "step": 571 + }, + { + "epoch": 0.16, + "logps_train/chosen": -42.685508728027344, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -25.25, + "logps_train/rejected": -35.26816177368164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1185508742928505, + "rewards_train/margins": 0.8832652792334557, + "rewards_train/rejected": -1.0018161535263062, + "step": 571 + }, + { + "epoch": 0.16, + "logps_train/chosen": -18.39696502685547, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -99.43115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5584465265274048, + "rewards_train/margins": 0.584668755531311, + "rewards_train/rejected": -1.1431152820587158, + "step": 571 + }, + { + "epoch": 0.16, + "logps_train/chosen": -8.489348411560059, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -31.371841430664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14893484115600586, + "rewards_train/margins": 0.7132493257522583, + "rewards_train/rejected": -0.8621841669082642, + "step": 571 + }, + { + "epoch": 0.16, + "logps_train/chosen": -5.623614311218262, + "logps_train/ref_chosen": -2.484375, + "logps_train/ref_rejected": -46.25, + "logps_train/rejected": -57.357784271240234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3139239251613617, + "rewards_train/margins": 0.7968545258045197, + "rewards_train/rejected": -1.1107784509658813, + "step": 571 + }, + { + "epoch": 0.16, + "logps_train/chosen": -50.34407043457031, + "logps_train/ref_chosen": -51.0, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -106.04633331298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06559295952320099, + "rewards_train/margins": 3.5702263861894608, + "rewards_train/rejected": -3.5046334266662598, + "step": 571 + }, + { + "epoch": 0.16, + "logps_train/chosen": -55.39945602416992, + "logps_train/ref_chosen": -58.25, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -100.41482543945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.28505441546440125, + "rewards_train/margins": 3.1765370070934296, + "rewards_train/rejected": -2.8914825916290283, + "step": 571 + }, + { + "epoch": 0.16, + "learning_rate": 1.8302274067476063e-06, + "loss": 0.4005, + "step": 572 + }, + { + "epoch": 0.16, + "logps_train/chosen": -88.05928039550781, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -109.10921478271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3059280514717102, + "rewards_train/margins": 1.7049934267997742, + "rewards_train/rejected": -2.0109214782714844, + "step": 572 + }, + { + "epoch": 0.16, + "logps_train/chosen": -67.48262023925781, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -110.68101501464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1517379730939865, + "rewards_train/margins": 1.1198394745588303, + "rewards_train/rejected": -0.9681015014648438, + "step": 572 + }, + { + "epoch": 0.16, + "logps_train/chosen": -0.3436325192451477, + "logps_train/ref_chosen": -0.33984375, + "logps_train/ref_rejected": -2.703125, + "logps_train/rejected": -2.8718292713165283, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0003788769245147705, + "rewards_train/margins": 0.01649155095219612, + "rewards_train/rejected": -0.01687042787671089, + "step": 572 + }, + { + "epoch": 0.16, + "logps_train/chosen": -170.91038513183594, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -165.85107421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.541038513183594, + "rewards_train/margins": -1.1059308052062988, + "rewards_train/rejected": -4.435107707977295, + "step": 572 + }, + { + "epoch": 0.16, + "logps_train/chosen": -100.01556396484375, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -154.11050415039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9515564441680908, + "rewards_train/margins": 2.909494161605835, + "rewards_train/rejected": -4.861050605773926, + "step": 572 + }, + { + "epoch": 0.16, + "logps_train/chosen": -1.5419999361038208, + "logps_train/ref_chosen": -1.375, + "logps_train/ref_rejected": -3.640625, + "logps_train/rejected": -9.342667579650879, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01669999398291111, + "rewards_train/margins": 0.5535042639821768, + "rewards_train/rejected": -0.5702042579650879, + "step": 572 + }, + { + "epoch": 0.16, + "logps_train/chosen": -12.136907577514648, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -2.125, + "logps_train/rejected": -3.208667516708374, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2011907547712326, + "rewards_train/margins": -0.09282400459051132, + "rewards_train/rejected": -0.10836675018072128, + "step": 572 + }, + { + "epoch": 0.16, + "logps_train/chosen": -145.09402465820312, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -180.4552459716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5094025135040283, + "rewards_train/margins": 2.636121988296509, + "rewards_train/rejected": -4.145524501800537, + "step": 572 + }, + { + "epoch": 0.16, + "logps_train/chosen": -47.910701751708984, + "logps_train/ref_chosen": -43.0, + "logps_train/ref_rejected": -29.0, + "logps_train/rejected": -31.454315185546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4910701811313629, + "rewards_train/margins": -0.2456386536359787, + "rewards_train/rejected": -0.24543152749538422, + "step": 573 + }, + { + "epoch": 0.16, + "logps_train/chosen": -17.240478515625, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -27.069313049316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12404785305261612, + "rewards_train/margins": 1.5516334995627403, + "rewards_train/rejected": -1.6756813526153564, + "step": 573 + }, + { + "epoch": 0.16, + "logps_train/chosen": -45.32314682006836, + "logps_train/ref_chosen": -32.25, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -42.24648666381836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3073147535324097, + "rewards_train/margins": 0.3548339605331421, + "rewards_train/rejected": -1.6621487140655518, + "step": 573 + }, + { + "epoch": 0.16, + "logps_train/chosen": -128.95037841796875, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -147.56771850585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.195037841796875, + "rewards_train/margins": 0.06173408031463623, + "rewards_train/rejected": -1.2567719221115112, + "step": 573 + }, + { + "epoch": 0.16, + "logps_train/chosen": -7.625657081604004, + "logps_train/ref_chosen": -6.15625, + "logps_train/ref_rejected": -7.5625, + "logps_train/rejected": -11.497819900512695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1469407081604004, + "rewards_train/margins": 0.24659129977226257, + "rewards_train/rejected": -0.39353200793266296, + "step": 573 + }, + { + "epoch": 0.16, + "logps_train/chosen": -104.603759765625, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -185.88433837890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4103760719299316, + "rewards_train/margins": 5.828057765960693, + "rewards_train/rejected": -8.238433837890625, + "step": 573 + }, + { + "epoch": 0.16, + "logps_train/chosen": -144.6147003173828, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -180.32675170898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.011470079421997, + "rewards_train/margins": 4.37120509147644, + "rewards_train/rejected": -6.3826751708984375, + "step": 573 + }, + { + "epoch": 0.16, + "logps_train/chosen": -7.802732944488525, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -9.466943740844727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20214830338954926, + "rewards_train/margins": 0.1195460706949234, + "rewards_train/rejected": -0.32169437408447266, + "step": 573 + }, + { + "epoch": 0.16, + "learning_rate": 1.8287498068723347e-06, + "loss": 0.4551, + "step": 574 + }, + { + "epoch": 0.16, + "logps_train/chosen": -6.142693519592285, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -12.727890014648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20489434897899628, + "rewards_train/margins": 0.11789466440677643, + "rewards_train/rejected": -0.3227890133857727, + "step": 574 + }, + { + "epoch": 0.16, + "logps_train/chosen": -187.461669921875, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -132.9498291015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.246167182922363, + "rewards_train/margins": -0.9511842727661133, + "rewards_train/rejected": -4.29498291015625, + "step": 574 + }, + { + "epoch": 0.16, + "logps_train/chosen": -177.91224670410156, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -162.56646728515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0912246704101562, + "rewards_train/margins": -0.23457789421081543, + "rewards_train/rejected": -1.8566467761993408, + "step": 574 + }, + { + "epoch": 0.16, + "logps_train/chosen": -5.89292049407959, + "logps_train/ref_chosen": -6.5625, + "logps_train/ref_rejected": -1.8671875, + "logps_train/rejected": -1.0825388431549072, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.06695795059204102, + "rewards_train/margins": -0.011506915092468262, + "rewards_train/rejected": 0.07846486568450928, + "step": 574 + }, + { + "epoch": 0.16, + "logps_train/chosen": -6.700660228729248, + "logps_train/ref_chosen": -2.984375, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -27.194629669189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3716285228729248, + "rewards_train/margins": 0.3478344678878784, + "rewards_train/rejected": -0.7194629907608032, + "step": 574 + }, + { + "epoch": 0.16, + "logps_train/chosen": -238.68402099609375, + "logps_train/ref_chosen": -187.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -142.14974975585938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.168402194976807, + "rewards_train/margins": -3.403427243232727, + "rewards_train/rejected": -1.7649749517440796, + "step": 574 + }, + { + "epoch": 0.16, + "logps_train/chosen": -0.5386379957199097, + "logps_train/ref_chosen": -0.87890625, + "logps_train/ref_rejected": -0.87890625, + "logps_train/rejected": -0.5653092265129089, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03402682766318321, + "rewards_train/margins": 0.002667125314474106, + "rewards_train/rejected": 0.031359702348709106, + "step": 574 + }, + { + "epoch": 0.16, + "logps_train/chosen": -69.70153045654297, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -115.86410522460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0201530456542969, + "rewards_train/margins": 0.5162575244903564, + "rewards_train/rejected": -1.5364105701446533, + "step": 574 + }, + { + "epoch": 0.16, + "logps_train/chosen": -116.90788269042969, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -127.82291412353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.790788412094116, + "rewards_train/margins": 0.24150300025939941, + "rewards_train/rejected": -3.0322914123535156, + "step": 575 + }, + { + "epoch": 0.16, + "logps_train/chosen": -13.530677795410156, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -47.1488151550293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1781822293996811, + "rewards_train/margins": 1.0180637687444687, + "rewards_train/rejected": -0.8398815393447876, + "step": 575 + }, + { + "epoch": 0.16, + "logps_train/chosen": -24.865234375, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -1.0703125, + "logps_train/rejected": -10.964773178100586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17402343451976776, + "rewards_train/margins": 0.8154226690530777, + "rewards_train/rejected": -0.9894461035728455, + "step": 575 + }, + { + "epoch": 0.16, + "logps_train/chosen": -65.92166137695312, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -80.03541564941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00783386267721653, + "rewards_train/margins": 0.6113754156976938, + "rewards_train/rejected": -0.6035415530204773, + "step": 575 + }, + { + "epoch": 0.16, + "logps_train/chosen": -4.573170185089111, + "logps_train/ref_chosen": -3.671875, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -16.215404510498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09012951701879501, + "rewards_train/margins": 0.5876609459519386, + "rewards_train/rejected": -0.6777904629707336, + "step": 575 + }, + { + "epoch": 0.16, + "logps_train/chosen": -5.790999889373779, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -29.7517147064209, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10215001553297043, + "rewards_train/margins": 1.0023214742541313, + "rewards_train/rejected": -0.9001714587211609, + "step": 575 + }, + { + "epoch": 0.16, + "logps_train/chosen": -164.56436157226562, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -185.08595275878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9564361572265625, + "rewards_train/margins": 0.552159309387207, + "rewards_train/rejected": -4.5085954666137695, + "step": 575 + }, + { + "epoch": 0.16, + "logps_train/chosen": -4.858917236328125, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -3.984375, + "logps_train/rejected": -6.191863059997559, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05776672437787056, + "rewards_train/margins": 0.16298208758234978, + "rewards_train/rejected": -0.22074881196022034, + "step": 575 + }, + { + "epoch": 0.16, + "learning_rate": 1.8272664066270894e-06, + "loss": 0.7547, + "step": 576 + }, + { + "epoch": 0.16, + "logps_train/chosen": -108.68048095703125, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -97.4793701171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11804809421300888, + "rewards_train/margins": 0.7798889055848122, + "rewards_train/rejected": -0.897936999797821, + "step": 576 + }, + { + "epoch": 0.16, + "logps_train/chosen": -93.18134307861328, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -93.59907531738281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5681343078613281, + "rewards_train/margins": 0.04177325963973999, + "rewards_train/rejected": -0.6099075675010681, + "step": 576 + }, + { + "epoch": 0.16, + "logps_train/chosen": -97.22071838378906, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -183.324462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0220718383789062, + "rewards_train/margins": 3.410374641418457, + "rewards_train/rejected": -5.432446479797363, + "step": 576 + }, + { + "epoch": 0.16, + "logps_train/chosen": -7.472857475280762, + "logps_train/ref_chosen": -6.5, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -10.339038848876953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09728574752807617, + "rewards_train/margins": -0.1883818656206131, + "rewards_train/rejected": 0.09109611809253693, + "step": 576 + }, + { + "epoch": 0.16, + "logps_train/chosen": -4.901693820953369, + "logps_train/ref_chosen": -2.734375, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -17.986358642578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21673189103603363, + "rewards_train/margins": 0.30065400898456573, + "rewards_train/rejected": -0.5173859000205994, + "step": 576 + }, + { + "epoch": 0.16, + "logps_train/chosen": -110.73233032226562, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -161.97824096679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.073233127593994, + "rewards_train/margins": 0.7245910167694092, + "rewards_train/rejected": -2.7978241443634033, + "step": 576 + }, + { + "epoch": 0.16, + "logps_train/chosen": -156.03604125976562, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -87.45574188232422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6036041975021362, + "rewards_train/margins": -0.40803003311157227, + "rewards_train/rejected": -1.195574164390564, + "step": 576 + }, + { + "epoch": 0.16, + "logps_train/chosen": -61.017234802246094, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -47.25, + "logps_train/rejected": -62.43442153930664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1517234891653061, + "rewards_train/margins": 1.366718664765358, + "rewards_train/rejected": -1.518442153930664, + "step": 576 + }, + { + "epoch": 0.16, + "logps_train/chosen": -39.51788330078125, + "logps_train/ref_chosen": -33.5, + "logps_train/ref_rejected": -30.875, + "logps_train/rejected": -34.61697006225586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.601788341999054, + "rewards_train/margins": -0.22759133577346802, + "rewards_train/rejected": -0.37419700622558594, + "step": 577 + }, + { + "epoch": 0.16, + "logps_train/chosen": -137.22991943359375, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -214.79925537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8729920387268066, + "rewards_train/margins": 2.0069336891174316, + "rewards_train/rejected": -4.879925727844238, + "step": 577 + }, + { + "epoch": 0.16, + "logps_train/chosen": -20.209033966064453, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -30.293413162231445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14590339362621307, + "rewards_train/margins": 0.6334379464387894, + "rewards_train/rejected": -0.7793413400650024, + "step": 577 + }, + { + "epoch": 0.16, + "logps_train/chosen": -3.429377555847168, + "logps_train/ref_chosen": -5.6875, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -4.9563517570495605, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22581224143505096, + "rewards_train/margins": 0.28707241639494896, + "rewards_train/rejected": -0.061260174959897995, + "step": 577 + }, + { + "epoch": 0.16, + "logps_train/chosen": -193.3279571533203, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -171.7471160888672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.932795763015747, + "rewards_train/margins": -1.3580840826034546, + "rewards_train/rejected": -1.5747116804122925, + "step": 577 + }, + { + "epoch": 0.16, + "logps_train/chosen": -143.84616088867188, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -164.87838745117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8846161365509033, + "rewards_train/margins": 4.253222703933716, + "rewards_train/rejected": -6.137838840484619, + "step": 577 + }, + { + "epoch": 0.16, + "logps_train/chosen": -14.240453720092773, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -29.070064544677734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2634546458721161, + "rewards_train/margins": 1.2454611361026764, + "rewards_train/rejected": -0.9820064902305603, + "step": 577 + }, + { + "epoch": 0.16, + "logps_train/chosen": -167.1781005859375, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -206.01068115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.917809963226318, + "rewards_train/margins": 0.4832582473754883, + "rewards_train/rejected": -5.401068210601807, + "step": 577 + }, + { + "epoch": 0.16, + "learning_rate": 1.8257772163940993e-06, + "loss": 0.5142, + "step": 578 + }, + { + "epoch": 0.16, + "logps_train/chosen": -71.178955078125, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -89.1991958618164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6678955554962158, + "rewards_train/margins": 0.3520240783691406, + "rewards_train/rejected": -2.0199196338653564, + "step": 578 + }, + { + "epoch": 0.16, + "logps_train/chosen": -100.84182739257812, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -144.45599365234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0341827869415283, + "rewards_train/margins": 1.4614167213439941, + "rewards_train/rejected": -2.4955995082855225, + "step": 578 + }, + { + "epoch": 0.16, + "logps_train/chosen": -84.8973388671875, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -98.892822265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.739733874797821, + "rewards_train/margins": 0.29954832792282104, + "rewards_train/rejected": -1.039282202720642, + "step": 578 + }, + { + "epoch": 0.16, + "logps_train/chosen": -20.340198516845703, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -20.86165428161621, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08401985466480255, + "rewards_train/margins": 0.4958956092596054, + "rewards_train/rejected": -0.579915463924408, + "step": 578 + }, + { + "epoch": 0.16, + "logps_train/chosen": -129.06988525390625, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -186.0, + "logps_train/rejected": -227.20169067382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.006988525390625, + "rewards_train/margins": 1.1131806373596191, + "rewards_train/rejected": -4.120169162750244, + "step": 578 + }, + { + "epoch": 0.16, + "logps_train/chosen": -10.068608283996582, + "logps_train/ref_chosen": -6.125, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -57.592159271240234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.39436084032058716, + "rewards_train/margins": -0.16014491021633148, + "rewards_train/rejected": -0.23421593010425568, + "step": 578 + }, + { + "epoch": 0.16, + "logps_train/chosen": -22.554847717285156, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -22.186887741088867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8804847598075867, + "rewards_train/margins": -0.036795973777770996, + "rewards_train/rejected": -0.8436887860298157, + "step": 578 + }, + { + "epoch": 0.16, + "logps_train/chosen": -3.598994016647339, + "logps_train/ref_chosen": -1.921875, + "logps_train/ref_rejected": -1.921875, + "logps_train/rejected": -3.6222050189971924, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16771189868450165, + "rewards_train/margins": 0.002321109175682068, + "rewards_train/rejected": -0.17003300786018372, + "step": 578 + }, + { + "epoch": 0.16, + "logps_train/chosen": -10.8441743850708, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -7.34375, + "logps_train/rejected": -14.349136352539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5030825734138489, + "rewards_train/margins": 1.2036212086677551, + "rewards_train/rejected": -0.7005386352539062, + "step": 579 + }, + { + "epoch": 0.16, + "logps_train/chosen": -80.79571533203125, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -95.27375793457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.029571533203125, + "rewards_train/margins": 0.5978043079376221, + "rewards_train/rejected": -1.627375841140747, + "step": 579 + }, + { + "epoch": 0.16, + "logps_train/chosen": -107.26801300048828, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -178.8642578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.076801300048828, + "rewards_train/margins": 3.709624767303467, + "rewards_train/rejected": -6.786426067352295, + "step": 579 + }, + { + "epoch": 0.16, + "logps_train/chosen": -127.41539764404297, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -161.16981506347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9415397644042969, + "rewards_train/margins": 3.025441884994507, + "rewards_train/rejected": -3.9669816493988037, + "step": 579 + }, + { + "epoch": 0.16, + "logps_train/chosen": -69.42447662353516, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -57.046470642089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4924476742744446, + "rewards_train/margins": 0.062199413776397705, + "rewards_train/rejected": -0.5546470880508423, + "step": 579 + }, + { + "epoch": 0.16, + "logps_train/chosen": -73.90876007080078, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -37.39535903930664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20912399888038635, + "rewards_train/margins": 1.6111599504947662, + "rewards_train/rejected": -1.4020359516143799, + "step": 579 + }, + { + "epoch": 0.16, + "logps_train/chosen": -10.556046485900879, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -11.601868629455566, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.06314535439014435, + "rewards_train/margins": -0.08916778862476349, + "rewards_train/rejected": 0.15231314301490784, + "step": 579 + }, + { + "epoch": 0.16, + "logps_train/chosen": -24.779438018798828, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -79.51216125488281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8841938376426697, + "rewards_train/margins": -0.3329777121543884, + "rewards_train/rejected": -0.5512161254882812, + "step": 579 + }, + { + "epoch": 0.16, + "learning_rate": 1.8242822465961175e-06, + "loss": 0.4661, + "step": 580 + }, + { + "epoch": 0.16, + "logps_train/chosen": -123.36346435546875, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -205.0, + "logps_train/rejected": -309.9546813964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6863465309143066, + "rewards_train/margins": 7.809121608734131, + "rewards_train/rejected": -10.495468139648438, + "step": 580 + }, + { + "epoch": 0.16, + "logps_train/chosen": -11.380996704101562, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -20.332050323486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11934967339038849, + "rewards_train/margins": 0.16385535895824432, + "rewards_train/rejected": -0.2832050323486328, + "step": 580 + }, + { + "epoch": 0.16, + "logps_train/chosen": -6.74625301361084, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -20.694103240966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20662470161914825, + "rewards_train/margins": 1.119785025715828, + "rewards_train/rejected": -0.9131603240966797, + "step": 580 + }, + { + "epoch": 0.16, + "logps_train/chosen": -77.7427978515625, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -133.0829620361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2992799282073975, + "rewards_train/margins": 3.559016466140747, + "rewards_train/rejected": -5.8582963943481445, + "step": 580 + }, + { + "epoch": 0.16, + "logps_train/chosen": -210.8619384765625, + "logps_train/ref_chosen": -205.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -173.5454864501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.586193859577179, + "rewards_train/margins": 2.268354833126068, + "rewards_train/rejected": -2.854548692703247, + "step": 580 + }, + { + "epoch": 0.16, + "logps_train/chosen": -94.48011779785156, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -190.17372131347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29801177978515625, + "rewards_train/margins": 3.519360303878784, + "rewards_train/rejected": -3.8173720836639404, + "step": 580 + }, + { + "epoch": 0.16, + "logps_train/chosen": -53.32636260986328, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -35.88958740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4576362371444702, + "rewards_train/margins": 0.006322503089904785, + "rewards_train/rejected": -1.463958740234375, + "step": 580 + }, + { + "epoch": 0.16, + "logps_train/chosen": -101.68565368652344, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -140.57809448242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8185653686523438, + "rewards_train/margins": 5.689244270324707, + "rewards_train/rejected": -6.507809638977051, + "step": 580 + }, + { + "epoch": 0.16, + "logps_train/chosen": -58.77190017700195, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -105.78245544433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.29780998826026917, + "rewards_train/margins": 0.7760555446147919, + "rewards_train/rejected": -0.4782455563545227, + "step": 581 + }, + { + "epoch": 0.16, + "logps_train/chosen": -26.486339569091797, + "logps_train/ref_chosen": -25.75, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -31.019617080688477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07363396137952805, + "rewards_train/margins": 0.0033277496695518494, + "rewards_train/rejected": -0.0769617110490799, + "step": 581 + }, + { + "epoch": 0.16, + "logps_train/chosen": -1.3058691024780273, + "logps_train/ref_chosen": -0.98828125, + "logps_train/ref_rejected": -1.5859375, + "logps_train/rejected": -1.1006650924682617, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.031758785247802734, + "rewards_train/margins": -0.08028602600097656, + "rewards_train/rejected": 0.04852724075317383, + "step": 581 + }, + { + "epoch": 0.16, + "logps_train/chosen": -50.28875732421875, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -10.44117546081543, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.228875756263733, + "rewards_train/margins": -0.906633198261261, + "rewards_train/rejected": -0.3222425580024719, + "step": 581 + }, + { + "epoch": 0.16, + "logps_train/chosen": -91.96869659423828, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -91.35730743408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.046869661659002304, + "rewards_train/margins": 1.5888610817492008, + "rewards_train/rejected": -1.6357307434082031, + "step": 581 + }, + { + "epoch": 0.16, + "logps_train/chosen": -14.660894393920898, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -5.181339263916016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.42141056060791016, + "rewards_train/margins": 0.4707944877445698, + "rewards_train/rejected": -0.04938392713665962, + "step": 581 + }, + { + "epoch": 0.16, + "logps_train/chosen": -92.55376434326172, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -162.67495727539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.055376410484314, + "rewards_train/margins": 3.212119221687317, + "rewards_train/rejected": -4.267495632171631, + "step": 581 + }, + { + "epoch": 0.16, + "logps_train/chosen": -97.63471984863281, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -167.73251342773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.813472032546997, + "rewards_train/margins": 4.40977931022644, + "rewards_train/rejected": -6.2232513427734375, + "step": 581 + }, + { + "epoch": 0.16, + "learning_rate": 1.8227815076963471e-06, + "loss": 0.3451, + "step": 582 + }, + { + "epoch": 0.16, + "logps_train/chosen": -167.4873809814453, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -93.43574523925781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9487380981445312, + "rewards_train/margins": -1.5551635026931763, + "rewards_train/rejected": -1.393574595451355, + "step": 582 + }, + { + "epoch": 0.16, + "logps_train/chosen": -156.57382202148438, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -79.0716552734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.307382345199585, + "rewards_train/margins": -2.200216770172119, + "rewards_train/rejected": -1.1071655750274658, + "step": 582 + }, + { + "epoch": 0.16, + "logps_train/chosen": -2.980135202407837, + "logps_train/ref_chosen": -3.125, + "logps_train/ref_rejected": -1.046875, + "logps_train/rejected": -1.3376076221466064, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.014486479572951794, + "rewards_train/margins": 0.04355974216014147, + "rewards_train/rejected": -0.029073262587189674, + "step": 582 + }, + { + "epoch": 0.16, + "logps_train/chosen": -116.01502990722656, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -141.9156494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15150299668312073, + "rewards_train/margins": 1.3900619447231293, + "rewards_train/rejected": -1.54156494140625, + "step": 582 + }, + { + "epoch": 0.16, + "logps_train/chosen": -140.3797149658203, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -80.18495178222656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5379714965820312, + "rewards_train/margins": -1.519476294517517, + "rewards_train/rejected": -1.0184952020645142, + "step": 582 + }, + { + "epoch": 0.16, + "logps_train/chosen": -9.804422378540039, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -9.726543426513672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08205776661634445, + "rewards_train/margins": 0.4015871211886406, + "rewards_train/rejected": -0.31952935457229614, + "step": 582 + }, + { + "epoch": 0.16, + "logps_train/chosen": -130.13275146484375, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -172.86932373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2132751941680908, + "rewards_train/margins": 6.7736570835113525, + "rewards_train/rejected": -7.986932277679443, + "step": 582 + }, + { + "epoch": 0.16, + "logps_train/chosen": -4.734275817871094, + "logps_train/ref_chosen": -2.8125, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -7.625541687011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19217757880687714, + "rewards_train/margins": 0.12975160777568817, + "rewards_train/rejected": -0.3219291865825653, + "step": 582 + }, + { + "epoch": 0.16, + "logps_train/chosen": -151.78199768066406, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -167.62673950195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.378199815750122, + "rewards_train/margins": 1.684474229812622, + "rewards_train/rejected": -4.062674045562744, + "step": 583 + }, + { + "epoch": 0.16, + "logps_train/chosen": -48.174766540527344, + "logps_train/ref_chosen": -44.25, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -151.4647216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3924766480922699, + "rewards_train/margins": 4.253995805978775, + "rewards_train/rejected": -4.646472454071045, + "step": 583 + }, + { + "epoch": 0.16, + "logps_train/chosen": -0.20073899626731873, + "logps_train/ref_chosen": -0.3125, + "logps_train/ref_rejected": -0.3125, + "logps_train/rejected": -0.1994805634021759, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.011176100932061672, + "rewards_train/margins": -0.0001258431002497673, + "rewards_train/rejected": 0.01130194403231144, + "step": 583 + }, + { + "epoch": 0.16, + "logps_train/chosen": -100.4238052368164, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -145.17532348632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7923805713653564, + "rewards_train/margins": 2.975151777267456, + "rewards_train/rejected": -4.7675323486328125, + "step": 583 + }, + { + "epoch": 0.16, + "logps_train/chosen": -8.491767883300781, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -10.191709518432617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.088323213160038, + "rewards_train/margins": 0.4949941709637642, + "rewards_train/rejected": -0.4066709578037262, + "step": 583 + }, + { + "epoch": 0.16, + "logps_train/chosen": -33.534523010253906, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -14.743907928466797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8284522891044617, + "rewards_train/margins": -0.08218646049499512, + "rewards_train/rejected": -0.7462658286094666, + "step": 583 + }, + { + "epoch": 0.16, + "logps_train/chosen": -122.68464660644531, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -205.1373291015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4684646129608154, + "rewards_train/margins": 5.595268487930298, + "rewards_train/rejected": -8.063733100891113, + "step": 583 + }, + { + "epoch": 0.16, + "logps_train/chosen": -11.97709846496582, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -28.875, + "logps_train/rejected": -36.9691162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27895984053611755, + "rewards_train/margins": 0.5304518043994904, + "rewards_train/rejected": -0.8094116449356079, + "step": 583 + }, + { + "epoch": 0.16, + "learning_rate": 1.8212750101983692e-06, + "loss": 0.6508, + "step": 584 + }, + { + "epoch": 0.16, + "logps_train/chosen": -182.02114868164062, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -142.51429748535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.50211501121521, + "rewards_train/margins": 0.04931473731994629, + "rewards_train/rejected": -3.5514297485351562, + "step": 584 + }, + { + "epoch": 0.16, + "logps_train/chosen": -65.63801574707031, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -65.48957824707031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.188801571726799, + "rewards_train/margins": 1.3851562291383743, + "rewards_train/rejected": -1.5739578008651733, + "step": 584 + }, + { + "epoch": 0.16, + "logps_train/chosen": -10.631436347961426, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -37.041465759277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0756436362862587, + "rewards_train/margins": 0.8785029277205467, + "rewards_train/rejected": -0.9541465640068054, + "step": 584 + }, + { + "epoch": 0.16, + "logps_train/chosen": -153.28524780273438, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -164.906494140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.978524684906006, + "rewards_train/margins": -0.6378750801086426, + "rewards_train/rejected": -5.340649604797363, + "step": 584 + }, + { + "epoch": 0.16, + "logps_train/chosen": -21.91991424560547, + "logps_train/ref_chosen": -21.375, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -4.983386993408203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.054491426795721054, + "rewards_train/margins": -0.03115272708237171, + "rewards_train/rejected": -0.023338699713349342, + "step": 584 + }, + { + "epoch": 0.16, + "logps_train/chosen": -3.0934605598449707, + "logps_train/ref_chosen": -3.78125, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -4.4901814460754395, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06877894699573517, + "rewards_train/margins": 0.09592209197580814, + "rewards_train/rejected": -0.027143144980072975, + "step": 584 + }, + { + "epoch": 0.16, + "logps_train/chosen": -7.7430548667907715, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -15.681138038635254, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.11319451779127121, + "rewards_train/margins": -0.08119168132543564, + "rewards_train/rejected": 0.19438619911670685, + "step": 584 + }, + { + "epoch": 0.16, + "logps_train/chosen": -16.856901168823242, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -38.257354736328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3544401228427887, + "rewards_train/margins": 0.09629535675048828, + "rewards_train/rejected": -0.450735479593277, + "step": 584 + }, + { + "epoch": 0.16, + "logps_train/chosen": -105.3893814086914, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -98.85247039794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2889381647109985, + "rewards_train/margins": 0.7463089227676392, + "rewards_train/rejected": -2.0352470874786377, + "step": 585 + }, + { + "epoch": 0.16, + "logps_train/chosen": -0.058296505361795425, + "logps_train/ref_chosen": -0.08349609375, + "logps_train/ref_rejected": -0.08349609375, + "logps_train/rejected": -0.08387906104326248, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.002519958885386586, + "rewards_train/margins": 0.00255825561544043, + "rewards_train/rejected": -3.829673005384393e-05, + "step": 585 + }, + { + "epoch": 0.16, + "logps_train/chosen": -199.30081176757812, + "logps_train/ref_chosen": -169.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -132.26406860351562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.030081272125244, + "rewards_train/margins": -0.9036743640899658, + "rewards_train/rejected": -2.1264069080352783, + "step": 585 + }, + { + "epoch": 0.16, + "logps_train/chosen": -99.87532043457031, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -117.05512237548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.637532114982605, + "rewards_train/margins": 0.6679800748825073, + "rewards_train/rejected": -2.3055121898651123, + "step": 585 + }, + { + "epoch": 0.16, + "logps_train/chosen": -111.17460632324219, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -98.13581848144531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8674606680870056, + "rewards_train/margins": -0.2538788318634033, + "rewards_train/rejected": -0.6135818362236023, + "step": 585 + }, + { + "epoch": 0.16, + "logps_train/chosen": -17.401805877685547, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -24.875, + "logps_train/rejected": -26.04789924621582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14731942117214203, + "rewards_train/margins": 0.26460934430360794, + "rewards_train/rejected": -0.11728992313146591, + "step": 585 + }, + { + "epoch": 0.16, + "logps_train/chosen": -109.15191650390625, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -128.40997314453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.315191626548767, + "rewards_train/margins": -0.12419426441192627, + "rewards_train/rejected": -1.1909973621368408, + "step": 585 + }, + { + "epoch": 0.16, + "logps_train/chosen": -20.34429168701172, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -19.487350463867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18442916870117188, + "rewards_train/margins": 0.05180588364601135, + "rewards_train/rejected": -0.23623505234718323, + "step": 585 + }, + { + "epoch": 0.16, + "learning_rate": 1.8197627646460695e-06, + "loss": 0.6625, + "step": 586 + }, + { + "epoch": 0.16, + "logps_train/chosen": -4.545200347900391, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -5.627640247344971, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04547996446490288, + "rewards_train/margins": 0.017618989571928978, + "rewards_train/rejected": 0.0278609748929739, + "step": 586 + }, + { + "epoch": 0.16, + "logps_train/chosen": -8.919580459594727, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -1.46875, + "logps_train/rejected": -3.191013813018799, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.023208046332001686, + "rewards_train/margins": 0.14901833795011044, + "rewards_train/rejected": -0.17222638428211212, + "step": 586 + }, + { + "epoch": 0.16, + "logps_train/chosen": -25.690332412719727, + "logps_train/ref_chosen": -22.75, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -16.614599227905273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2940332591533661, + "rewards_train/margins": 0.029926657676696777, + "rewards_train/rejected": -0.32395991683006287, + "step": 586 + }, + { + "epoch": 0.16, + "logps_train/chosen": -97.95988464355469, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -199.386962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3459885120391846, + "rewards_train/margins": 7.292708158493042, + "rewards_train/rejected": -8.638696670532227, + "step": 586 + }, + { + "epoch": 0.16, + "logps_train/chosen": -27.249954223632812, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -133.48388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8999954462051392, + "rewards_train/margins": 3.498393416404724, + "rewards_train/rejected": -4.398388862609863, + "step": 586 + }, + { + "epoch": 0.16, + "logps_train/chosen": -46.39638137817383, + "logps_train/ref_chosen": -36.75, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -22.912677764892578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9646381735801697, + "rewards_train/margins": -0.19212037324905396, + "rewards_train/rejected": -0.7725178003311157, + "step": 586 + }, + { + "epoch": 0.16, + "logps_train/chosen": -55.498497009277344, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -3.046875, + "logps_train/rejected": -16.462419509887695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37484970688819885, + "rewards_train/margins": 0.9667048156261444, + "rewards_train/rejected": -1.3415545225143433, + "step": 586 + }, + { + "epoch": 0.16, + "logps_train/chosen": -65.5311050415039, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -47.5, + "logps_train/rejected": -58.64360809326172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1281105279922485, + "rewards_train/margins": -0.01374971866607666, + "rewards_train/rejected": -1.1143608093261719, + "step": 586 + }, + { + "epoch": 0.16, + "logps_train/chosen": -6.659104824066162, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -6.178713798522949, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.03778548166155815, + "rewards_train/margins": -0.07303910329937935, + "rewards_train/rejected": 0.0352536216378212, + "step": 587 + }, + { + "epoch": 0.16, + "logps_train/chosen": -1.4960651397705078, + "logps_train/ref_chosen": -3.640625, + "logps_train/ref_rejected": -1.21875, + "logps_train/rejected": -6.4872941970825195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2144559919834137, + "rewards_train/margins": 0.7413104474544525, + "rewards_train/rejected": -0.5268544554710388, + "step": 587 + }, + { + "epoch": 0.16, + "logps_train/chosen": -8.893335342407227, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -24.502826690673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4674585461616516, + "rewards_train/margins": 0.39532411098480225, + "rewards_train/rejected": -0.8627826571464539, + "step": 587 + }, + { + "epoch": 0.16, + "logps_train/chosen": -16.01416778564453, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -1.515625, + "logps_train/rejected": -3.644742012023926, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3485832214355469, + "rewards_train/margins": 0.5614949315786362, + "rewards_train/rejected": -0.2129117101430893, + "step": 587 + }, + { + "epoch": 0.16, + "logps_train/chosen": -22.264497756958008, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -15.385157585144043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4264497756958008, + "rewards_train/margins": 0.08081597089767456, + "rewards_train/rejected": -0.5072657465934753, + "step": 587 + }, + { + "epoch": 0.16, + "logps_train/chosen": -18.19353485107422, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -17.512388229370117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1306465119123459, + "rewards_train/margins": 0.4443853348493576, + "rewards_train/rejected": -0.3137388229370117, + "step": 587 + }, + { + "epoch": 0.16, + "logps_train/chosen": -5.4713215827941895, + "logps_train/ref_chosen": -4.59375, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -16.671100616455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0877571627497673, + "rewards_train/margins": 0.3856028988957405, + "rewards_train/rejected": -0.4733600616455078, + "step": 587 + }, + { + "epoch": 0.16, + "logps_train/chosen": -5.674115180969238, + "logps_train/ref_chosen": -2.359375, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -9.270748138427734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.33147403597831726, + "rewards_train/margins": -0.2043992131948471, + "rewards_train/rejected": -0.12707482278347015, + "step": 587 + }, + { + "epoch": 0.16, + "learning_rate": 1.8182447816235634e-06, + "loss": 0.524, + "step": 588 + }, + { + "epoch": 0.16, + "logps_train/chosen": -61.62096405029297, + "logps_train/ref_chosen": -58.75, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -100.18085479736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28709641098976135, + "rewards_train/margins": 0.5309890806674957, + "rewards_train/rejected": -0.8180854916572571, + "step": 588 + }, + { + "epoch": 0.16, + "logps_train/chosen": -312.43359375, + "logps_train/ref_chosen": -280.0, + "logps_train/ref_rejected": -55.0, + "logps_train/rejected": -56.81493377685547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.243359327316284, + "rewards_train/margins": -3.0618659406900406, + "rewards_train/rejected": -0.1814933866262436, + "step": 588 + }, + { + "epoch": 0.16, + "logps_train/chosen": -153.6412811279297, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -153.28646850585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9641281366348267, + "rewards_train/margins": 3.1145190000534058, + "rewards_train/rejected": -5.078647136688232, + "step": 588 + }, + { + "epoch": 0.16, + "logps_train/chosen": -107.47195434570312, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -183.6832275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5471954345703125, + "rewards_train/margins": 0.9211273193359375, + "rewards_train/rejected": -1.46832275390625, + "step": 588 + }, + { + "epoch": 0.16, + "logps_train/chosen": -90.99237060546875, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -103.88079833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20076294243335724, + "rewards_train/margins": 2.3888427764177322, + "rewards_train/rejected": -2.188079833984375, + "step": 588 + }, + { + "epoch": 0.16, + "logps_train/chosen": -131.17788696289062, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -211.91036987304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7677887678146362, + "rewards_train/margins": 3.2232481241226196, + "rewards_train/rejected": -4.991036891937256, + "step": 588 + }, + { + "epoch": 0.16, + "logps_train/chosen": -17.200408935546875, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -5.106640338897705, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.16379089653491974, + "rewards_train/margins": -0.1281268633902073, + "rewards_train/rejected": -0.03566403314471245, + "step": 588 + }, + { + "epoch": 0.16, + "logps_train/chosen": -40.36793518066406, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -40.3355598449707, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.874293565750122, + "rewards_train/margins": -0.003237605094909668, + "rewards_train/rejected": -1.8710559606552124, + "step": 588 + }, + { + "epoch": 0.16, + "logps_train/chosen": -122.34954833984375, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -120.02745056152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.734954833984375, + "rewards_train/margins": 0.4177902936935425, + "rewards_train/rejected": -1.1527451276779175, + "step": 589 + }, + { + "epoch": 0.16, + "logps_train/chosen": -4.998711109161377, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -12.875, + "logps_train/rejected": -20.34276580810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.031378891319036484, + "rewards_train/margins": 0.7781554721295834, + "rewards_train/rejected": -0.7467765808105469, + "step": 589 + }, + { + "epoch": 0.16, + "logps_train/chosen": -99.41020202636719, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -184.53704833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7410202026367188, + "rewards_train/margins": 4.712684631347656, + "rewards_train/rejected": -5.453704833984375, + "step": 589 + }, + { + "epoch": 0.16, + "logps_train/chosen": -6.339602947235107, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -3.046875, + "logps_train/rejected": -9.36011028289795, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19958530366420746, + "rewards_train/margins": 0.4317382127046585, + "rewards_train/rejected": -0.631323516368866, + "step": 589 + }, + { + "epoch": 0.16, + "logps_train/chosen": -22.47292709350586, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -28.88628387451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6472927331924438, + "rewards_train/margins": 0.14133566617965698, + "rewards_train/rejected": -0.7886283993721008, + "step": 589 + }, + { + "epoch": 0.16, + "logps_train/chosen": -2.6664350032806396, + "logps_train/ref_chosen": -0.44140625, + "logps_train/ref_rejected": -0.44140625, + "logps_train/rejected": -2.657557964324951, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.22250287234783173, + "rewards_train/margins": -0.0008876919746398926, + "rewards_train/rejected": -0.22161518037319183, + "step": 589 + }, + { + "epoch": 0.16, + "logps_train/chosen": -128.12887573242188, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -120.80724334716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6628875732421875, + "rewards_train/margins": 0.3678368330001831, + "rewards_train/rejected": -1.0307244062423706, + "step": 589 + }, + { + "epoch": 0.16, + "logps_train/chosen": -0.8489170074462891, + "logps_train/ref_chosen": -0.51953125, + "logps_train/ref_rejected": -31.5, + "logps_train/rejected": -28.433731079101562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.032938577234745026, + "rewards_train/margins": -0.3395654633641243, + "rewards_train/rejected": 0.3066268861293793, + "step": 589 + }, + { + "epoch": 0.16, + "learning_rate": 1.8167210717551222e-06, + "loss": 0.6029, + "step": 590 + }, + { + "epoch": 0.16, + "logps_train/chosen": -151.91050720214844, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -232.6745147705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9910507202148438, + "rewards_train/margins": 6.0764007568359375, + "rewards_train/rejected": -7.067451477050781, + "step": 590 + }, + { + "epoch": 0.16, + "logps_train/chosen": -113.99609375, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -138.14952087402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.599609375, + "rewards_train/margins": 3.2153427600860596, + "rewards_train/rejected": -3.8149521350860596, + "step": 590 + }, + { + "epoch": 0.16, + "logps_train/chosen": -107.03512573242188, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -184.46412658691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0535125732421875, + "rewards_train/margins": 4.44290018081665, + "rewards_train/rejected": -6.496412754058838, + "step": 590 + }, + { + "epoch": 0.16, + "logps_train/chosen": -89.83129119873047, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -176.86154174804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4831291139125824, + "rewards_train/margins": 5.703025251626968, + "rewards_train/rejected": -6.186154365539551, + "step": 590 + }, + { + "epoch": 0.16, + "logps_train/chosen": -89.79559326171875, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -192.21029663085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12044067680835724, + "rewards_train/margins": 6.991470530629158, + "rewards_train/rejected": -6.871029853820801, + "step": 590 + }, + { + "epoch": 0.16, + "logps_train/chosen": -99.45894622802734, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -83.82044982910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09589462727308273, + "rewards_train/margins": 0.036150358617305756, + "rewards_train/rejected": -0.1320449858903885, + "step": 590 + }, + { + "epoch": 0.16, + "logps_train/chosen": -4.436766624450684, + "logps_train/ref_chosen": -3.296875, + "logps_train/ref_rejected": -2.78125, + "logps_train/rejected": -4.200047016143799, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11398916691541672, + "rewards_train/margins": 0.027890540659427643, + "rewards_train/rejected": -0.14187970757484436, + "step": 590 + }, + { + "epoch": 0.16, + "logps_train/chosen": -136.874267578125, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -142.22975158691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9374268054962158, + "rewards_train/margins": 2.3355486392974854, + "rewards_train/rejected": -4.272975444793701, + "step": 590 + }, + { + "epoch": 0.17, + "logps_train/chosen": -108.81242370605469, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -90.89703369140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08124237507581711, + "rewards_train/margins": 0.4084610119462013, + "rewards_train/rejected": -0.48970338702201843, + "step": 591 + }, + { + "epoch": 0.17, + "logps_train/chosen": -119.33160400390625, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -151.12527465820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2331604957580566, + "rewards_train/margins": 2.379366874694824, + "rewards_train/rejected": -5.612527370452881, + "step": 591 + }, + { + "epoch": 0.17, + "logps_train/chosen": -20.54189682006836, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -36.5911979675293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9854397177696228, + "rewards_train/margins": 0.7361801266670227, + "rewards_train/rejected": -1.7216198444366455, + "step": 591 + }, + { + "epoch": 0.17, + "logps_train/chosen": -65.78741455078125, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -39.5, + "logps_train/rejected": -37.3661003112793, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20374146103858948, + "rewards_train/margins": -0.4171314388513565, + "rewards_train/rejected": 0.21338997781276703, + "step": 591 + }, + { + "epoch": 0.17, + "logps_train/chosen": -12.044872283935547, + "logps_train/ref_chosen": -6.125, + "logps_train/ref_rejected": -24.125, + "logps_train/rejected": -27.44923210144043, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5919872522354126, + "rewards_train/margins": -0.25956404209136963, + "rewards_train/rejected": -0.33242321014404297, + "step": 591 + }, + { + "epoch": 0.17, + "logps_train/chosen": -56.45276641845703, + "logps_train/ref_chosen": -53.75, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -31.126827239990234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27027663588523865, + "rewards_train/margins": 0.9799061119556427, + "rewards_train/rejected": -1.2501827478408813, + "step": 591 + }, + { + "epoch": 0.17, + "logps_train/chosen": -90.07691192626953, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -79.6704330444336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4576911926269531, + "rewards_train/margins": -0.6906478852033615, + "rewards_train/rejected": 0.2329566925764084, + "step": 591 + }, + { + "epoch": 0.17, + "logps_train/chosen": -16.04644012451172, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -9.683812141418457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26714402437210083, + "rewards_train/margins": 0.18248718976974487, + "rewards_train/rejected": -0.4496312141418457, + "step": 591 + }, + { + "epoch": 0.17, + "learning_rate": 1.8151916457051e-06, + "loss": 0.3921, + "step": 592 + }, + { + "epoch": 0.17, + "logps_train/chosen": -153.42469787597656, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -145.96670532226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.942469835281372, + "rewards_train/margins": 0.10420083999633789, + "rewards_train/rejected": -2.04667067527771, + "step": 592 + }, + { + "epoch": 0.17, + "logps_train/chosen": -15.537496566772461, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -24.48567008972168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18499965965747833, + "rewards_train/margins": 1.2635673731565475, + "rewards_train/rejected": -1.4485670328140259, + "step": 592 + }, + { + "epoch": 0.17, + "logps_train/chosen": -23.10165023803711, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -34.832942962646484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3226650357246399, + "rewards_train/margins": 0.4981292486190796, + "rewards_train/rejected": -0.8207942843437195, + "step": 592 + }, + { + "epoch": 0.17, + "logps_train/chosen": -8.011493682861328, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -30.03407859802246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06677436828613281, + "rewards_train/margins": 1.274133563041687, + "rewards_train/rejected": -1.3409079313278198, + "step": 592 + }, + { + "epoch": 0.17, + "logps_train/chosen": -3.3328919410705566, + "logps_train/ref_chosen": -1.65625, + "logps_train/ref_rejected": -5.15625, + "logps_train/rejected": -6.957563877105713, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16766420006752014, + "rewards_train/margins": 0.012467190623283386, + "rewards_train/rejected": -0.18013139069080353, + "step": 592 + }, + { + "epoch": 0.17, + "logps_train/chosen": -129.57501220703125, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -153.79095458984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.557501196861267, + "rewards_train/margins": 2.221594214439392, + "rewards_train/rejected": -3.779095411300659, + "step": 592 + }, + { + "epoch": 0.17, + "logps_train/chosen": -13.737478256225586, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -18.462162017822266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007502174470573664, + "rewards_train/margins": 0.7974684000946581, + "rewards_train/rejected": -0.7899662256240845, + "step": 592 + }, + { + "epoch": 0.17, + "logps_train/chosen": -9.567733764648438, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -30.125, + "logps_train/rejected": -46.0551872253418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3036483824253082, + "rewards_train/margins": 1.2893703877925873, + "rewards_train/rejected": -1.5930187702178955, + "step": 592 + }, + { + "epoch": 0.17, + "logps_train/chosen": -22.569944381713867, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -29.75, + "logps_train/rejected": -35.588706970214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16949443519115448, + "rewards_train/margins": 0.41437627375125885, + "rewards_train/rejected": -0.5838707089424133, + "step": 593 + }, + { + "epoch": 0.17, + "logps_train/chosen": -3.2717764377593994, + "logps_train/ref_chosen": -4.625, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -9.106773376464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13532236218452454, + "rewards_train/margins": 0.6053746938705444, + "rewards_train/rejected": -0.4700523316860199, + "step": 593 + }, + { + "epoch": 0.17, + "logps_train/chosen": -121.24362182617188, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -5.46875, + "logps_train/rejected": -23.24471092224121, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0756378173828125, + "rewards_train/margins": 1.8532339334487915, + "rewards_train/rejected": -1.777596116065979, + "step": 593 + }, + { + "epoch": 0.17, + "logps_train/chosen": -4.54002046585083, + "logps_train/ref_chosen": -2.40625, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -9.639698028564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21337704360485077, + "rewards_train/margins": 0.14434276521205902, + "rewards_train/rejected": -0.3577198088169098, + "step": 593 + }, + { + "epoch": 0.17, + "logps_train/chosen": -5.5097222328186035, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -8.06694221496582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02715277671813965, + "rewards_train/margins": 0.01509699784219265, + "rewards_train/rejected": 0.012055778875946999, + "step": 593 + }, + { + "epoch": 0.17, + "logps_train/chosen": -210.16220092773438, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -195.0078887939453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.0162200927734375, + "rewards_train/margins": -0.8154311180114746, + "rewards_train/rejected": -3.200788974761963, + "step": 593 + }, + { + "epoch": 0.17, + "logps_train/chosen": -4.5359673500061035, + "logps_train/ref_chosen": -4.625, + "logps_train/ref_rejected": -6.34375, + "logps_train/rejected": -11.174185752868652, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008903264999389648, + "rewards_train/margins": 0.49194684624671936, + "rewards_train/rejected": -0.4830435812473297, + "step": 593 + }, + { + "epoch": 0.17, + "logps_train/chosen": -24.82987403869629, + "logps_train/ref_chosen": -26.0, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -23.440711975097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11701259762048721, + "rewards_train/margins": 0.4985838010907173, + "rewards_train/rejected": -0.3815712034702301, + "step": 593 + }, + { + "epoch": 0.17, + "learning_rate": 1.8136565141778571e-06, + "loss": 0.4718, + "step": 594 + }, + { + "epoch": 0.17, + "logps_train/chosen": -40.92516326904297, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -16.649534225463867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4550163745880127, + "rewards_train/margins": -0.8213129639625549, + "rewards_train/rejected": -0.6337034106254578, + "step": 594 + }, + { + "epoch": 0.17, + "logps_train/chosen": -87.07852172851562, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -220.0, + "logps_train/rejected": -255.97518920898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5578522086143494, + "rewards_train/margins": 3.039666712284088, + "rewards_train/rejected": -3.5975189208984375, + "step": 594 + }, + { + "epoch": 0.17, + "logps_train/chosen": -182.66622924804688, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -127.69328308105469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5666229724884033, + "rewards_train/margins": 0.25270533561706543, + "rewards_train/rejected": -2.8193283081054688, + "step": 594 + }, + { + "epoch": 0.17, + "logps_train/chosen": -118.94107818603516, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -114.16944122314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6941078901290894, + "rewards_train/margins": 0.5228363275527954, + "rewards_train/rejected": -2.2169442176818848, + "step": 594 + }, + { + "epoch": 0.17, + "logps_train/chosen": -83.69038391113281, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -156.8051300048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7190383672714233, + "rewards_train/margins": 5.111474633216858, + "rewards_train/rejected": -6.830513000488281, + "step": 594 + }, + { + "epoch": 0.17, + "logps_train/chosen": -227.52474975585938, + "logps_train/ref_chosen": -197.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -228.36978149414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0524749755859375, + "rewards_train/margins": 2.084503173828125, + "rewards_train/rejected": -5.1369781494140625, + "step": 594 + }, + { + "epoch": 0.17, + "logps_train/chosen": -46.51551055908203, + "logps_train/ref_chosen": -35.25, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -18.94013023376465, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1265510320663452, + "rewards_train/margins": -0.04503798484802246, + "rewards_train/rejected": -1.0815130472183228, + "step": 594 + }, + { + "epoch": 0.17, + "logps_train/chosen": -38.571102142333984, + "logps_train/ref_chosen": -30.75, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -47.61796951293945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7821102142333984, + "rewards_train/margins": 0.504686713218689, + "rewards_train/rejected": -1.2867969274520874, + "step": 594 + }, + { + "epoch": 0.17, + "logps_train/chosen": -34.0596923828125, + "logps_train/ref_chosen": -33.5, + "logps_train/ref_rejected": -37.5, + "logps_train/rejected": -39.935726165771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05596923828125, + "rewards_train/margins": 0.18760338425636292, + "rewards_train/rejected": -0.24357262253761292, + "step": 595 + }, + { + "epoch": 0.17, + "logps_train/chosen": -11.242573738098145, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -13.397204399108887, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16800737380981445, + "rewards_train/margins": 0.26546308398246765, + "rewards_train/rejected": -0.4334704577922821, + "step": 595 + }, + { + "epoch": 0.17, + "logps_train/chosen": -64.07203674316406, + "logps_train/ref_chosen": -58.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -111.59732818603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6072036623954773, + "rewards_train/margins": 0.05252915620803833, + "rewards_train/rejected": -0.6597328186035156, + "step": 595 + }, + { + "epoch": 0.17, + "logps_train/chosen": -24.64852523803711, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -43.145721435546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8148525357246399, + "rewards_train/margins": 1.0372195839881897, + "rewards_train/rejected": -1.8520721197128296, + "step": 595 + }, + { + "epoch": 0.17, + "logps_train/chosen": -1.9478706121444702, + "logps_train/ref_chosen": -2.25, + "logps_train/ref_rejected": -3.1875, + "logps_train/rejected": -3.012071132659912, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03021293878555298, + "rewards_train/margins": 0.01267005130648613, + "rewards_train/rejected": 0.01754288747906685, + "step": 595 + }, + { + "epoch": 0.17, + "logps_train/chosen": -10.024003982543945, + "logps_train/ref_chosen": -7.6875, + "logps_train/ref_rejected": -7.0625, + "logps_train/rejected": -8.69282341003418, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.23365040123462677, + "rewards_train/margins": -0.07061806321144104, + "rewards_train/rejected": -0.16303233802318573, + "step": 595 + }, + { + "epoch": 0.17, + "logps_train/chosen": -8.94266128540039, + "logps_train/ref_chosen": -6.75, + "logps_train/ref_rejected": -7.59375, + "logps_train/rejected": -9.504169464111328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2192661315202713, + "rewards_train/margins": -0.02822418510913849, + "rewards_train/rejected": -0.1910419464111328, + "step": 595 + }, + { + "epoch": 0.17, + "logps_train/chosen": -17.13677978515625, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -24.53068733215332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.301177978515625, + "rewards_train/margins": 0.05189076066017151, + "rewards_train/rejected": -0.3530687391757965, + "step": 595 + }, + { + "epoch": 0.17, + "learning_rate": 1.8121156879176872e-06, + "loss": 0.5326, + "step": 596 + }, + { + "epoch": 0.17, + "logps_train/chosen": -93.1617660522461, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -77.82597351074219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9661766290664673, + "rewards_train/margins": 0.41642069816589355, + "rewards_train/rejected": -1.3825973272323608, + "step": 596 + }, + { + "epoch": 0.17, + "logps_train/chosen": -17.987171173095703, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -25.153276443481445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18621711432933807, + "rewards_train/margins": 1.0228606015443802, + "rewards_train/rejected": -1.2090777158737183, + "step": 596 + }, + { + "epoch": 0.17, + "logps_train/chosen": -84.22346496582031, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -168.2151641845703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5223464965820312, + "rewards_train/margins": 3.9491701126098633, + "rewards_train/rejected": -4.4715166091918945, + "step": 596 + }, + { + "epoch": 0.17, + "logps_train/chosen": -67.34371948242188, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -106.99421691894531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.284371942281723, + "rewards_train/margins": 0.3650497496128082, + "rewards_train/rejected": -0.6494216918945312, + "step": 596 + }, + { + "epoch": 0.17, + "logps_train/chosen": -3.852511405944824, + "logps_train/ref_chosen": -1.8515625, + "logps_train/ref_rejected": -1.2265625, + "logps_train/rejected": -4.025731563568115, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20009489357471466, + "rewards_train/margins": 0.07982201874256134, + "rewards_train/rejected": -0.279916912317276, + "step": 596 + }, + { + "epoch": 0.17, + "logps_train/chosen": -35.722442626953125, + "logps_train/ref_chosen": -30.5, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -90.05999755859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5222442746162415, + "rewards_train/margins": -1.0162445306777954, + "rewards_train/rejected": 0.49400025606155396, + "step": 596 + }, + { + "epoch": 0.17, + "logps_train/chosen": -86.81626892089844, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -120.9139404296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8816269040107727, + "rewards_train/margins": 0.8097671866416931, + "rewards_train/rejected": -1.6913940906524658, + "step": 596 + }, + { + "epoch": 0.17, + "logps_train/chosen": -77.13880157470703, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -206.03610229492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.563880205154419, + "rewards_train/margins": 5.8397300243377686, + "rewards_train/rejected": -7.4036102294921875, + "step": 596 + }, + { + "epoch": 0.17, + "logps_train/chosen": -102.31062316894531, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -140.67779541015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2189376801252365, + "rewards_train/margins": 0.686717227101326, + "rewards_train/rejected": -0.4677795469760895, + "step": 597 + }, + { + "epoch": 0.17, + "logps_train/chosen": -131.61874389648438, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -20.79926300048828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8118743896484375, + "rewards_train/margins": -0.3256980776786804, + "rewards_train/rejected": -0.4861763119697571, + "step": 597 + }, + { + "epoch": 0.17, + "logps_train/chosen": -0.48020631074905396, + "logps_train/ref_chosen": -0.283203125, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -14.254090309143066, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.019700318574905396, + "rewards_train/margins": 0.7900837361812592, + "rewards_train/rejected": -0.8097840547561646, + "step": 597 + }, + { + "epoch": 0.17, + "logps_train/chosen": -213.13034057617188, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -185.0, + "logps_train/rejected": -239.86679077148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.213034152984619, + "rewards_train/margins": 1.2736449241638184, + "rewards_train/rejected": -5.4866790771484375, + "step": 597 + }, + { + "epoch": 0.17, + "logps_train/chosen": -56.43042755126953, + "logps_train/ref_chosen": -45.75, + "logps_train/ref_rejected": -26.375, + "logps_train/rejected": -34.13936233520508, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0680427551269531, + "rewards_train/margins": -0.29160648584365845, + "rewards_train/rejected": -0.7764362692832947, + "step": 597 + }, + { + "epoch": 0.17, + "logps_train/chosen": -36.98060989379883, + "logps_train/ref_chosen": -31.75, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -38.46192169189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5230609774589539, + "rewards_train/margins": 1.260631263256073, + "rewards_train/rejected": -1.7836922407150269, + "step": 597 + }, + { + "epoch": 0.17, + "logps_train/chosen": -197.92041015625, + "logps_train/ref_chosen": -194.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -160.30657958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39204102754592896, + "rewards_train/margins": 0.3386169672012329, + "rewards_train/rejected": -0.7306579947471619, + "step": 597 + }, + { + "epoch": 0.17, + "logps_train/chosen": -151.98245239257812, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -204.79705810546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.848245143890381, + "rewards_train/margins": -0.06853914260864258, + "rewards_train/rejected": -7.779706001281738, + "step": 597 + }, + { + "epoch": 0.17, + "learning_rate": 1.8105691777087398e-06, + "loss": 0.4985, + "step": 598 + }, + { + "epoch": 0.17, + "logps_train/chosen": -128.0218505859375, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -231.47512817382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.502185106277466, + "rewards_train/margins": 2.4453279972076416, + "rewards_train/rejected": -4.947513103485107, + "step": 598 + }, + { + "epoch": 0.17, + "logps_train/chosen": -3.357367992401123, + "logps_train/ref_chosen": -3.25, + "logps_train/ref_rejected": -1.3203125, + "logps_train/rejected": -1.6203985214233398, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01073679979890585, + "rewards_train/margins": 0.019271803088486195, + "rewards_train/rejected": -0.030008602887392044, + "step": 598 + }, + { + "epoch": 0.17, + "logps_train/chosen": -7.131855487823486, + "logps_train/ref_chosen": -3.0, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -21.218124389648438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.41318556666374207, + "rewards_train/margins": -0.003873109817504883, + "rewards_train/rejected": -0.4093124568462372, + "step": 598 + }, + { + "epoch": 0.17, + "logps_train/chosen": -21.596485137939453, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -21.429615020751953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3096485137939453, + "rewards_train/margins": -0.016687005758285522, + "rewards_train/rejected": -0.2929615080356598, + "step": 598 + }, + { + "epoch": 0.17, + "logps_train/chosen": -22.854293823242188, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -20.767995834350586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3104293942451477, + "rewards_train/margins": 0.14137020707130432, + "rewards_train/rejected": -0.451799601316452, + "step": 598 + }, + { + "epoch": 0.17, + "logps_train/chosen": -149.3683319091797, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -215.67037963867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7868332862854004, + "rewards_train/margins": 3.7802047729492188, + "rewards_train/rejected": -7.567038059234619, + "step": 598 + }, + { + "epoch": 0.17, + "logps_train/chosen": -7.998485088348389, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -29.373836517333984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25609850883483887, + "rewards_train/margins": 0.7812851667404175, + "rewards_train/rejected": -1.0373836755752563, + "step": 598 + }, + { + "epoch": 0.17, + "logps_train/chosen": -131.0603790283203, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -158.61370849609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1060378551483154, + "rewards_train/margins": 0.9553329944610596, + "rewards_train/rejected": -3.061370849609375, + "step": 598 + }, + { + "epoch": 0.17, + "logps_train/chosen": -124.13658142089844, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -138.97108459472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.263658285140991, + "rewards_train/margins": 0.13345026969909668, + "rewards_train/rejected": -2.397108554840088, + "step": 599 + }, + { + "epoch": 0.17, + "logps_train/chosen": -44.670745849609375, + "logps_train/ref_chosen": -39.5, + "logps_train/ref_rejected": -31.25, + "logps_train/rejected": -47.15884017944336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5170745849609375, + "rewards_train/margins": 1.0738095045089722, + "rewards_train/rejected": -1.5908840894699097, + "step": 599 + }, + { + "epoch": 0.17, + "logps_train/chosen": -13.580004692077637, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -27.887279510498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30800047516822815, + "rewards_train/margins": 0.16822749376296997, + "rewards_train/rejected": -0.4762279689311981, + "step": 599 + }, + { + "epoch": 0.17, + "logps_train/chosen": -19.543190002441406, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -18.108144760131836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8230690360069275, + "rewards_train/margins": -0.24350452423095703, + "rewards_train/rejected": -0.5795645117759705, + "step": 599 + }, + { + "epoch": 0.17, + "logps_train/chosen": -161.59349060058594, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -209.0, + "logps_train/rejected": -242.41845703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9593490362167358, + "rewards_train/margins": 1.38249671459198, + "rewards_train/rejected": -3.341845750808716, + "step": 599 + }, + { + "epoch": 0.17, + "logps_train/chosen": -1.624814510345459, + "logps_train/ref_chosen": -2.09375, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -28.0659236907959, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04689354822039604, + "rewards_train/margins": 1.0909859649837017, + "rewards_train/rejected": -1.0440924167633057, + "step": 599 + }, + { + "epoch": 0.17, + "logps_train/chosen": -99.3221435546875, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -56.75, + "logps_train/rejected": -85.29692840576172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.08221435546875, + "rewards_train/margins": -0.22752141952514648, + "rewards_train/rejected": -2.8546929359436035, + "step": 599 + }, + { + "epoch": 0.17, + "logps_train/chosen": -30.72913360595703, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -21.4049015045166, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.71041339635849, + "rewards_train/margins": -0.10117322206497192, + "rewards_train/rejected": -0.6092401742935181, + "step": 599 + }, + { + "epoch": 0.17, + "learning_rate": 1.8090169943749474e-06, + "loss": 0.4964, + "step": 600 + }, + { + "epoch": 0.17, + "logps_train/chosen": -18.210418701171875, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -14.8125, + "logps_train/rejected": -18.285175323486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.339791864156723, + "rewards_train/margins": 0.007475674152374268, + "rewards_train/rejected": -0.3472675383090973, + "step": 600 + }, + { + "epoch": 0.17, + "logps_train/chosen": -8.880615234375, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -13.056666374206543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10993652790784836, + "rewards_train/margins": 0.601980097591877, + "rewards_train/rejected": -0.7119166254997253, + "step": 600 + }, + { + "epoch": 0.17, + "logps_train/chosen": 0.0, + "logps_train/ref_chosen": 0.0, + "logps_train/ref_rejected": 0.0, + "logps_train/rejected": 0.0, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0, + "rewards_train/margins": 0.0, + "rewards_train/rejected": 0.0, + "step": 600 + }, + { + "epoch": 0.17, + "logps_train/chosen": -81.91292572021484, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -170.96707153320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6912925839424133, + "rewards_train/margins": 5.255414664745331, + "rewards_train/rejected": -5.946707248687744, + "step": 600 + }, + { + "epoch": 0.17, + "logps_train/chosen": -11.57558536529541, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -5.0625, + "logps_train/rejected": -7.736057758331299, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12630854547023773, + "rewards_train/margins": 0.14104722440242767, + "rewards_train/rejected": -0.2673557698726654, + "step": 600 + }, + { + "epoch": 0.17, + "logps_train/chosen": -180.61846923828125, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -177.41807556152344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.361846923828125, + "rewards_train/margins": -0.22003936767578125, + "rewards_train/rejected": -4.141807556152344, + "step": 600 + }, + { + "epoch": 0.17, + "logps_train/chosen": -7.0805745124816895, + "logps_train/ref_chosen": -6.75, + "logps_train/ref_rejected": -3.875, + "logps_train/rejected": -4.642196178436279, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.033057451248168945, + "rewards_train/margins": 0.043662168085575104, + "rewards_train/rejected": -0.07671961933374405, + "step": 600 + }, + { + "epoch": 0.17, + "logps_train/chosen": -123.63818359375, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -210.06231689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.613818347454071, + "rewards_train/margins": 2.8924134373664856, + "rewards_train/rejected": -3.5062317848205566, + "step": 600 + }, + { + "epoch": 0.17, + "logps_train/chosen": -91.05400085449219, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -118.48346710205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35540008544921875, + "rewards_train/margins": 2.392946720123291, + "rewards_train/rejected": -2.7483468055725098, + "step": 601 + }, + { + "epoch": 0.17, + "logps_train/chosen": -65.72877502441406, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -65.32379150390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.07712250202894211, + "rewards_train/margins": -0.04049834609031677, + "rewards_train/rejected": 0.11762084811925888, + "step": 601 + }, + { + "epoch": 0.17, + "logps_train/chosen": -139.7975311279297, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -196.79287719726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1797531098127365, + "rewards_train/margins": 4.999534800648689, + "rewards_train/rejected": -5.179287910461426, + "step": 601 + }, + { + "epoch": 0.17, + "logps_train/chosen": -3.0662407875061035, + "logps_train/ref_chosen": -2.53125, + "logps_train/ref_rejected": -2.71875, + "logps_train/rejected": -1.8777743577957153, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05349908024072647, + "rewards_train/margins": -0.13759664446115494, + "rewards_train/rejected": 0.08409756422042847, + "step": 601 + }, + { + "epoch": 0.17, + "logps_train/chosen": -114.86924743652344, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -98.21205139160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2369247674942017, + "rewards_train/margins": 0.5842803716659546, + "rewards_train/rejected": -1.8212051391601562, + "step": 601 + }, + { + "epoch": 0.17, + "logps_train/chosen": -60.735023498535156, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -23.823638916015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5485023856163025, + "rewards_train/margins": -0.25363847613334656, + "rewards_train/rejected": -0.29486390948295593, + "step": 601 + }, + { + "epoch": 0.17, + "logps_train/chosen": -149.3015594482422, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -236.71458435058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2301559448242188, + "rewards_train/margins": 1.44130277633667, + "rewards_train/rejected": -4.671458721160889, + "step": 601 + }, + { + "epoch": 0.17, + "logps_train/chosen": -15.036735534667969, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -21.135995864868164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5661735534667969, + "rewards_train/margins": -0.165073961019516, + "rewards_train/rejected": -0.4010995924472809, + "step": 601 + }, + { + "epoch": 0.17, + "learning_rate": 1.8074591487799472e-06, + "loss": 0.4882, + "step": 602 + }, + { + "epoch": 0.17, + "logps_train/chosen": -58.359886169433594, + "logps_train/ref_chosen": -37.25, + "logps_train/ref_rejected": -49.25, + "logps_train/rejected": -60.585060119628906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1109886169433594, + "rewards_train/margins": -0.9774825572967529, + "rewards_train/rejected": -1.1335060596466064, + "step": 602 + }, + { + "epoch": 0.17, + "logps_train/chosen": -107.15130615234375, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -124.58894348144531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46513062715530396, + "rewards_train/margins": 2.4437636733055115, + "rewards_train/rejected": -2.9088943004608154, + "step": 602 + }, + { + "epoch": 0.17, + "logps_train/chosen": -86.52699279785156, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -87.31814575195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6026992797851562, + "rewards_train/margins": 0.47911536693573, + "rewards_train/rejected": -1.0818146467208862, + "step": 602 + }, + { + "epoch": 0.17, + "logps_train/chosen": -81.5736083984375, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -109.8331527709961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4573608338832855, + "rewards_train/margins": 2.1259545385837555, + "rewards_train/rejected": -2.583315372467041, + "step": 602 + }, + { + "epoch": 0.17, + "logps_train/chosen": -1.3176213502883911, + "logps_train/ref_chosen": -1.3046875, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -6.057416915893555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0012933850521221757, + "rewards_train/margins": 0.1388233065372333, + "rewards_train/rejected": -0.14011669158935547, + "step": 602 + }, + { + "epoch": 0.17, + "logps_train/chosen": -170.49456787109375, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -150.04537963867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.29945707321167, + "rewards_train/margins": 1.2550811767578125, + "rewards_train/rejected": -5.554538249969482, + "step": 602 + }, + { + "epoch": 0.17, + "logps_train/chosen": -5.878087997436523, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -15.679330825805664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07218380272388458, + "rewards_train/margins": 0.10824927687644958, + "rewards_train/rejected": -0.18043307960033417, + "step": 602 + }, + { + "epoch": 0.17, + "logps_train/chosen": -124.59780883789062, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -167.1428985595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4097808599472046, + "rewards_train/margins": 4.104508996009827, + "rewards_train/rejected": -5.514289855957031, + "step": 602 + }, + { + "epoch": 0.17, + "logps_train/chosen": -7.381138801574707, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -6.993940830230713, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0743861198425293, + "rewards_train/margins": 0.3612802028656006, + "rewards_train/rejected": -0.2868940830230713, + "step": 603 + }, + { + "epoch": 0.17, + "logps_train/chosen": -5.737921714782715, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -19.33771514892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.057457830756902695, + "rewards_train/margins": 0.6787293814122677, + "rewards_train/rejected": -0.621271550655365, + "step": 603 + }, + { + "epoch": 0.17, + "logps_train/chosen": -24.574954986572266, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -18.83048439025879, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46999549865722656, + "rewards_train/margins": 0.2193029522895813, + "rewards_train/rejected": -0.6892984509468079, + "step": 603 + }, + { + "epoch": 0.17, + "logps_train/chosen": -8.111172676086426, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -1.9296875, + "logps_train/rejected": -8.518170356750488, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.434554785490036, + "rewards_train/margins": 0.22429350018501282, + "rewards_train/rejected": -0.6588482856750488, + "step": 603 + }, + { + "epoch": 0.17, + "logps_train/chosen": -45.12312698364258, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -47.75, + "logps_train/rejected": -47.8349723815918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.23768730461597443, + "rewards_train/margins": 0.2461845427751541, + "rewards_train/rejected": -0.008497238159179688, + "step": 603 + }, + { + "epoch": 0.17, + "logps_train/chosen": -2.7741281986236572, + "logps_train/ref_chosen": -2.546875, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -8.225430488586426, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.022725319489836693, + "rewards_train/margins": 0.32481774128973484, + "rewards_train/rejected": -0.34754306077957153, + "step": 603 + }, + { + "epoch": 0.17, + "logps_train/chosen": -16.81406021118164, + "logps_train/ref_chosen": -15.0, + "logps_train/ref_rejected": -2.109375, + "logps_train/rejected": -5.942835807800293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18140602111816406, + "rewards_train/margins": 0.20194005966186523, + "rewards_train/rejected": -0.3833460807800293, + "step": 603 + }, + { + "epoch": 0.17, + "logps_train/chosen": -24.460052490234375, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -23.792381286621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.258505254983902, + "rewards_train/margins": 0.04573288559913635, + "rewards_train/rejected": -0.30423814058303833, + "step": 603 + }, + { + "epoch": 0.17, + "learning_rate": 1.8058956518270065e-06, + "loss": 0.501, + "step": 604 + }, + { + "epoch": 0.17, + "logps_train/chosen": -3.5601439476013184, + "logps_train/ref_chosen": -2.984375, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -26.900833129882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.057576894760131836, + "rewards_train/margins": 0.24500641226768494, + "rewards_train/rejected": -0.3025833070278168, + "step": 604 + }, + { + "epoch": 0.17, + "logps_train/chosen": -0.764002799987793, + "logps_train/ref_chosen": -0.84765625, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -8.277608871459961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008365345187485218, + "rewards_train/margins": 0.18612622935324907, + "rewards_train/rejected": -0.17776088416576385, + "step": 604 + }, + { + "epoch": 0.17, + "logps_train/chosen": -193.1000213623047, + "logps_train/ref_chosen": -185.0, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -216.13258361816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8100021481513977, + "rewards_train/margins": 1.8032562136650085, + "rewards_train/rejected": -2.6132583618164062, + "step": 604 + }, + { + "epoch": 0.17, + "logps_train/chosen": -132.24497985839844, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -224.95086669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2244980335235596, + "rewards_train/margins": 2.9705889225006104, + "rewards_train/rejected": -6.19508695602417, + "step": 604 + }, + { + "epoch": 0.17, + "logps_train/chosen": -136.84912109375, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -197.97119140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18491211533546448, + "rewards_train/margins": 6.512207120656967, + "rewards_train/rejected": -6.697119235992432, + "step": 604 + }, + { + "epoch": 0.17, + "logps_train/chosen": -4.888685703277588, + "logps_train/ref_chosen": -2.59375, + "logps_train/ref_rejected": -1.21875, + "logps_train/rejected": -8.07026195526123, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22949357330799103, + "rewards_train/margins": 0.45565764605998993, + "rewards_train/rejected": -0.685151219367981, + "step": 604 + }, + { + "epoch": 0.17, + "logps_train/chosen": -209.13482666015625, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -188.18832397460938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.713482618331909, + "rewards_train/margins": -0.2946500778198242, + "rewards_train/rejected": -3.418832540512085, + "step": 604 + }, + { + "epoch": 0.17, + "logps_train/chosen": -172.7660675048828, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -114.37034606933594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.5766067504882812, + "rewards_train/margins": -1.3895721435546875, + "rewards_train/rejected": -2.1870346069335938, + "step": 604 + }, + { + "epoch": 0.17, + "logps_train/chosen": -9.607786178588867, + "logps_train/ref_chosen": -7.84375, + "logps_train/ref_rejected": -3.21875, + "logps_train/rejected": -15.764543533325195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17640362679958344, + "rewards_train/margins": 1.0781757980585098, + "rewards_train/rejected": -1.2545794248580933, + "step": 605 + }, + { + "epoch": 0.17, + "logps_train/chosen": -160.19168090820312, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -195.7999267578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4191681146621704, + "rewards_train/margins": 3.560824751853943, + "rewards_train/rejected": -4.979992866516113, + "step": 605 + }, + { + "epoch": 0.17, + "logps_train/chosen": -196.1377410888672, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -194.89752197265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.21377420425415, + "rewards_train/margins": -0.12402200698852539, + "rewards_train/rejected": -4.089752197265625, + "step": 605 + }, + { + "epoch": 0.17, + "logps_train/chosen": -4.676206588745117, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -2.46875, + "logps_train/rejected": -3.7761032581329346, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0910581573843956, + "rewards_train/margins": 0.03967716544866562, + "rewards_train/rejected": -0.13073532283306122, + "step": 605 + }, + { + "epoch": 0.17, + "logps_train/chosen": -6.528665542602539, + "logps_train/ref_chosen": -3.296875, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -11.415458679199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32317906618118286, + "rewards_train/margins": 0.22149181365966797, + "rewards_train/rejected": -0.5446708798408508, + "step": 605 + }, + { + "epoch": 0.17, + "logps_train/chosen": -140.7273406982422, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -113.92802429199219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.27273416519165, + "rewards_train/margins": -0.27993178367614746, + "rewards_train/rejected": -3.992802381515503, + "step": 605 + }, + { + "epoch": 0.17, + "logps_train/chosen": -97.0921401977539, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -28.875, + "logps_train/rejected": -37.69097900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7592140436172485, + "rewards_train/margins": 0.12238389253616333, + "rewards_train/rejected": -0.8815979361534119, + "step": 605 + }, + { + "epoch": 0.17, + "logps_train/chosen": -139.1346435546875, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -120.31932830810547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.763464331626892, + "rewards_train/margins": -0.8315314650535583, + "rewards_train/rejected": -0.9319328665733337, + "step": 605 + }, + { + "epoch": 0.17, + "learning_rate": 1.8043265144589467e-06, + "loss": 0.5847, + "step": 606 + }, + { + "epoch": 0.17, + "logps_train/chosen": -101.72282409667969, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -143.5074462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3222824037075043, + "rewards_train/margins": 1.92846217751503, + "rewards_train/rejected": -2.250744581222534, + "step": 606 + }, + { + "epoch": 0.17, + "logps_train/chosen": -16.65109634399414, + "logps_train/ref_chosen": -15.625, + "logps_train/ref_rejected": -25.25, + "logps_train/rejected": -31.553932189941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10260963439941406, + "rewards_train/margins": 0.5277835726737976, + "rewards_train/rejected": -0.6303932070732117, + "step": 606 + }, + { + "epoch": 0.17, + "logps_train/chosen": -3.7688605785369873, + "logps_train/ref_chosen": -2.40625, + "logps_train/ref_rejected": -5.21875, + "logps_train/rejected": -9.28007984161377, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13626106083393097, + "rewards_train/margins": 0.26987193524837494, + "rewards_train/rejected": -0.4061329960823059, + "step": 606 + }, + { + "epoch": 0.17, + "logps_train/chosen": -5.49241828918457, + "logps_train/ref_chosen": -3.34375, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -23.69983673095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21486683189868927, + "rewards_train/margins": 0.3051168769598007, + "rewards_train/rejected": -0.51998370885849, + "step": 606 + }, + { + "epoch": 0.17, + "logps_train/chosen": -87.06108856201172, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -99.15216064453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6561088562011719, + "rewards_train/margins": 0.9591071605682373, + "rewards_train/rejected": -2.615216016769409, + "step": 606 + }, + { + "epoch": 0.17, + "logps_train/chosen": -113.38771057128906, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -165.3437957763672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7387712001800537, + "rewards_train/margins": 1.1456084251403809, + "rewards_train/rejected": -3.8843796253204346, + "step": 606 + }, + { + "epoch": 0.17, + "logps_train/chosen": -53.32219696044922, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -110.46221923828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2927803099155426, + "rewards_train/margins": 0.8890022337436676, + "rewards_train/rejected": -0.596221923828125, + "step": 606 + }, + { + "epoch": 0.17, + "logps_train/chosen": -133.37535095214844, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -256.7377624511719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.737535238265991, + "rewards_train/margins": 7.536240816116333, + "rewards_train/rejected": -10.273776054382324, + "step": 606 + }, + { + "epoch": 0.17, + "logps_train/chosen": -3.4612767696380615, + "logps_train/ref_chosen": -3.859375, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -12.710067749023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03980982303619385, + "rewards_train/margins": 0.4483166038990021, + "rewards_train/rejected": -0.4085067808628082, + "step": 607 + }, + { + "epoch": 0.17, + "logps_train/chosen": -172.54554748535156, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -210.38462829589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.3045549392700195, + "rewards_train/margins": 1.8339080810546875, + "rewards_train/rejected": -9.138463020324707, + "step": 607 + }, + { + "epoch": 0.17, + "logps_train/chosen": -63.64624786376953, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -63.40340042114258, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.039624787867069244, + "rewards_train/margins": -0.024284745566546917, + "rewards_train/rejected": -0.015340042300522327, + "step": 607 + }, + { + "epoch": 0.17, + "logps_train/chosen": -30.639556884765625, + "logps_train/ref_chosen": -25.75, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -26.73489761352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48895570635795593, + "rewards_train/margins": 0.6720340549945831, + "rewards_train/rejected": -1.160989761352539, + "step": 607 + }, + { + "epoch": 0.17, + "logps_train/chosen": -80.35026550292969, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -131.76206970214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6850265860557556, + "rewards_train/margins": 2.4411805272102356, + "rewards_train/rejected": -3.126207113265991, + "step": 607 + }, + { + "epoch": 0.17, + "logps_train/chosen": -156.4506072998047, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -184.0, + "logps_train/rejected": -200.73904418945312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3450608253479004, + "rewards_train/margins": -0.6711564064025879, + "rewards_train/rejected": -1.6739044189453125, + "step": 607 + }, + { + "epoch": 0.17, + "logps_train/chosen": -153.55206298828125, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -126.37294006347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7552063465118408, + "rewards_train/margins": 0.7820878028869629, + "rewards_train/rejected": -2.5372941493988037, + "step": 607 + }, + { + "epoch": 0.17, + "logps_train/chosen": -94.59687042236328, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -96.0573501586914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.509687066078186, + "rewards_train/margins": 0.6960479021072388, + "rewards_train/rejected": -2.205734968185425, + "step": 607 + }, + { + "epoch": 0.17, + "learning_rate": 1.8027517476580653e-06, + "loss": 0.3982, + "step": 608 + }, + { + "epoch": 0.17, + "logps_train/chosen": -18.141624450683594, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -20.957660675048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2766624391078949, + "rewards_train/margins": 0.5628536641597748, + "rewards_train/rejected": -0.8395161032676697, + "step": 608 + }, + { + "epoch": 0.17, + "logps_train/chosen": -17.692062377929688, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -25.252239227294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5817062258720398, + "rewards_train/margins": 0.031017720699310303, + "rewards_train/rejected": -0.6127239465713501, + "step": 608 + }, + { + "epoch": 0.17, + "logps_train/chosen": -139.0882568359375, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -157.06231689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.958825707435608, + "rewards_train/margins": 4.147405982017517, + "rewards_train/rejected": -6.106231689453125, + "step": 608 + }, + { + "epoch": 0.17, + "logps_train/chosen": -0.9870564937591553, + "logps_train/ref_chosen": -1.3984375, + "logps_train/ref_rejected": -2.953125, + "logps_train/rejected": -2.94889235496521, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04113810136914253, + "rewards_train/margins": 0.040714836854021996, + "rewards_train/rejected": 0.0004232645151205361, + "step": 608 + }, + { + "epoch": 0.17, + "logps_train/chosen": -24.55735206604004, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -33.1331672668457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.418235182762146, + "rewards_train/margins": -0.07991838455200195, + "rewards_train/rejected": -1.338316798210144, + "step": 608 + }, + { + "epoch": 0.17, + "logps_train/chosen": -335.7003173828125, + "logps_train/ref_chosen": -233.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -262.24481201171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.270031929016113, + "rewards_train/margins": -1.2455501556396484, + "rewards_train/rejected": -9.024481773376465, + "step": 608 + }, + { + "epoch": 0.17, + "logps_train/chosen": -17.14191436767578, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -68.05081176757812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6829414367675781, + "rewards_train/margins": -0.37786024808883667, + "rewards_train/rejected": -0.30508118867874146, + "step": 608 + }, + { + "epoch": 0.17, + "logps_train/chosen": -5.501100540161133, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -16.3394832611084, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14073505997657776, + "rewards_train/margins": 0.37446328997612, + "rewards_train/rejected": -0.5151983499526978, + "step": 608 + }, + { + "epoch": 0.17, + "logps_train/chosen": -187.0453643798828, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -175.5205078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2045364379882812, + "rewards_train/margins": 1.6475143432617188, + "rewards_train/rejected": -4.85205078125, + "step": 609 + }, + { + "epoch": 0.17, + "logps_train/chosen": -59.18205261230469, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -32.1180305480957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5817947387695312, + "rewards_train/margins": 2.4060977697372437, + "rewards_train/rejected": -1.8243030309677124, + "step": 609 + }, + { + "epoch": 0.17, + "logps_train/chosen": -13.733789443969727, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -23.727394104003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19837895035743713, + "rewards_train/margins": 0.5868604481220245, + "rewards_train/rejected": -0.7852393984794617, + "step": 609 + }, + { + "epoch": 0.17, + "logps_train/chosen": -41.372467041015625, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -16.63712501525879, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0372467041015625, + "rewards_train/margins": 0.7827157974243164, + "rewards_train/rejected": -0.8199625015258789, + "step": 609 + }, + { + "epoch": 0.17, + "logps_train/chosen": -151.95721435546875, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -269.170654296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.945721387863159, + "rewards_train/margins": 5.671344041824341, + "rewards_train/rejected": -9.6170654296875, + "step": 609 + }, + { + "epoch": 0.17, + "logps_train/chosen": -110.23462677001953, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -117.735595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12653732299804688, + "rewards_train/margins": 1.550096869468689, + "rewards_train/rejected": -1.423559546470642, + "step": 609 + }, + { + "epoch": 0.17, + "logps_train/chosen": -152.54005432128906, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -238.76324462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2540054321289062, + "rewards_train/margins": 6.322319030761719, + "rewards_train/rejected": -8.576324462890625, + "step": 609 + }, + { + "epoch": 0.17, + "logps_train/chosen": -155.479736328125, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -118.21928405761719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.397973537445068, + "rewards_train/margins": -0.576045036315918, + "rewards_train/rejected": -3.8219285011291504, + "step": 609 + }, + { + "epoch": 0.17, + "learning_rate": 1.8011713624460608e-06, + "loss": 0.4863, + "step": 610 + }, + { + "epoch": 0.17, + "logps_train/chosen": -21.50037956237793, + "logps_train/ref_chosen": -15.5, + "logps_train/ref_rejected": -7.15625, + "logps_train/rejected": -16.391733169555664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6000379920005798, + "rewards_train/margins": 0.3235103487968445, + "rewards_train/rejected": -0.9235483407974243, + "step": 610 + }, + { + "epoch": 0.17, + "logps_train/chosen": -125.78291320800781, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -178.932861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1282913237810135, + "rewards_train/margins": 0.9649948328733444, + "rewards_train/rejected": -1.093286156654358, + "step": 610 + }, + { + "epoch": 0.17, + "logps_train/chosen": -146.370361328125, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -224.50927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.287036180496216, + "rewards_train/margins": 5.363891839981079, + "rewards_train/rejected": -7.650928020477295, + "step": 610 + }, + { + "epoch": 0.17, + "logps_train/chosen": -3.3062734603881836, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -2.46875, + "logps_train/rejected": -2.8752031326293945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009997653774917126, + "rewards_train/margins": 0.05064296629279852, + "rewards_train/rejected": -0.04064531251788139, + "step": 610 + }, + { + "epoch": 0.17, + "logps_train/chosen": -43.82434844970703, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -47.25, + "logps_train/rejected": -48.093788146972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.057434845715761185, + "rewards_train/margins": 0.02694397047162056, + "rewards_train/rejected": -0.08437881618738174, + "step": 610 + }, + { + "epoch": 0.17, + "logps_train/chosen": -220.22659301757812, + "logps_train/ref_chosen": -193.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -239.27267456054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7226593494415283, + "rewards_train/margins": 2.9046080112457275, + "rewards_train/rejected": -5.627267360687256, + "step": 610 + }, + { + "epoch": 0.17, + "logps_train/chosen": -67.77326202392578, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -77.12715148925781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07267379760742188, + "rewards_train/margins": 1.4103889465332031, + "rewards_train/rejected": -1.3377151489257812, + "step": 610 + }, + { + "epoch": 0.17, + "logps_train/chosen": -162.22805786132812, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -140.90692138671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.272805690765381, + "rewards_train/margins": -0.28211355209350586, + "rewards_train/rejected": -3.990692138671875, + "step": 610 + }, + { + "epoch": 0.17, + "logps_train/chosen": -9.981998443603516, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -31.748910903930664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03930015489459038, + "rewards_train/margins": 1.3891912214457989, + "rewards_train/rejected": -1.3498910665512085, + "step": 611 + }, + { + "epoch": 0.17, + "logps_train/chosen": -35.947872161865234, + "logps_train/ref_chosen": -27.375, + "logps_train/ref_rejected": -1.9609375, + "logps_train/rejected": -7.770934581756592, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8572872281074524, + "rewards_train/margins": -0.2762874960899353, + "rewards_train/rejected": -0.5809997320175171, + "step": 611 + }, + { + "epoch": 0.17, + "logps_train/chosen": -102.99937438964844, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -148.7963104248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3499374389648438, + "rewards_train/margins": 3.3296937942504883, + "rewards_train/rejected": -5.679631233215332, + "step": 611 + }, + { + "epoch": 0.17, + "logps_train/chosen": -144.2120819091797, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -215.5562744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1212081909179688, + "rewards_train/margins": 4.034419536590576, + "rewards_train/rejected": -5.155627727508545, + "step": 611 + }, + { + "epoch": 0.17, + "logps_train/chosen": -7.952996730804443, + "logps_train/ref_chosen": -6.6875, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -16.758228302001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12654967606067657, + "rewards_train/margins": 0.38677315413951874, + "rewards_train/rejected": -0.5133228302001953, + "step": 611 + }, + { + "epoch": 0.17, + "logps_train/chosen": -110.63375854492188, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -174.363037109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.436624139547348, + "rewards_train/margins": 6.072928041219711, + "rewards_train/rejected": -5.636303901672363, + "step": 611 + }, + { + "epoch": 0.17, + "logps_train/chosen": -68.78997802734375, + "logps_train/ref_chosen": -45.0, + "logps_train/ref_rejected": -40.25, + "logps_train/rejected": -57.65907669067383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.378997802734375, + "rewards_train/margins": -0.6380901336669922, + "rewards_train/rejected": -1.7409076690673828, + "step": 611 + }, + { + "epoch": 0.17, + "logps_train/chosen": -66.6144790649414, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -52.58729553222656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.46144792437553406, + "rewards_train/margins": -0.1777183711528778, + "rewards_train/rejected": -0.28372955322265625, + "step": 611 + }, + { + "epoch": 0.17, + "learning_rate": 1.7995853698839535e-06, + "loss": 0.4263, + "step": 612 + }, + { + "epoch": 0.17, + "logps_train/chosen": -158.24508666992188, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -154.59732055664062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.024508953094482, + "rewards_train/margins": -0.9147768020629883, + "rewards_train/rejected": -4.109732151031494, + "step": 612 + }, + { + "epoch": 0.17, + "logps_train/chosen": -204.61679077148438, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -187.92388916015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.3616790771484375, + "rewards_train/margins": -0.9692902565002441, + "rewards_train/rejected": -5.392388820648193, + "step": 612 + }, + { + "epoch": 0.17, + "logps_train/chosen": -2.8882598876953125, + "logps_train/ref_chosen": -1.9375, + "logps_train/ref_rejected": -1.78125, + "logps_train/rejected": -4.08448600769043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09507598727941513, + "rewards_train/margins": 0.1352476105093956, + "rewards_train/rejected": -0.23032359778881073, + "step": 612 + }, + { + "epoch": 0.17, + "logps_train/chosen": -4.948574066162109, + "logps_train/ref_chosen": -3.65625, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -23.786354064941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12923240661621094, + "rewards_train/margins": 0.9994029998779297, + "rewards_train/rejected": -1.1286354064941406, + "step": 612 + }, + { + "epoch": 0.17, + "logps_train/chosen": -114.59861755371094, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -187.0, + "logps_train/rejected": -281.702880859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.059861898422241, + "rewards_train/margins": 6.410426378250122, + "rewards_train/rejected": -9.470288276672363, + "step": 612 + }, + { + "epoch": 0.17, + "logps_train/chosen": -120.42550659179688, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -205.6895294189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4425506591796875, + "rewards_train/margins": 6.6764020919799805, + "rewards_train/rejected": -9.118952751159668, + "step": 612 + }, + { + "epoch": 0.17, + "logps_train/chosen": -6.140620231628418, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -22.524063110351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004687977023422718, + "rewards_train/margins": 0.6070942999795079, + "rewards_train/rejected": -0.6024063229560852, + "step": 612 + }, + { + "epoch": 0.17, + "logps_train/chosen": -0.0805448442697525, + "logps_train/ref_chosen": -0.3203125, + "logps_train/ref_rejected": -0.3203125, + "logps_train/rejected": -0.07502712309360504, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.02397676557302475, + "rewards_train/margins": -0.0005517732352018356, + "rewards_train/rejected": 0.024528538808226585, + "step": 612 + }, + { + "epoch": 0.17, + "logps_train/chosen": -185.36764526367188, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -193.4410400390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3367645740509033, + "rewards_train/margins": 4.307339429855347, + "rewards_train/rejected": -5.64410400390625, + "step": 613 + }, + { + "epoch": 0.17, + "logps_train/chosen": -2.0928122997283936, + "logps_train/ref_chosen": -3.578125, + "logps_train/ref_rejected": -7.8125, + "logps_train/rejected": -9.761955261230469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14853127300739288, + "rewards_train/margins": 0.343476802110672, + "rewards_train/rejected": -0.1949455291032791, + "step": 613 + }, + { + "epoch": 0.17, + "logps_train/chosen": -4.88502836227417, + "logps_train/ref_chosen": -2.921875, + "logps_train/ref_rejected": -1.8125, + "logps_train/rejected": -4.753727912902832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19631533324718475, + "rewards_train/margins": 0.09780745208263397, + "rewards_train/rejected": -0.2941227853298187, + "step": 613 + }, + { + "epoch": 0.17, + "logps_train/chosen": -2.691873550415039, + "logps_train/ref_chosen": -2.203125, + "logps_train/ref_rejected": -2.625, + "logps_train/rejected": -2.624635696411133, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.048874855041503906, + "rewards_train/margins": -0.04891128540111822, + "rewards_train/rejected": 3.643035961431451e-05, + "step": 613 + }, + { + "epoch": 0.17, + "logps_train/chosen": -13.275716781616211, + "logps_train/ref_chosen": -14.75, + "logps_train/ref_rejected": -2.515625, + "logps_train/rejected": -6.029001712799072, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14742831885814667, + "rewards_train/margins": 0.4987659901380539, + "rewards_train/rejected": -0.3513376712799072, + "step": 613 + }, + { + "epoch": 0.17, + "logps_train/chosen": -26.577377319335938, + "logps_train/ref_chosen": -24.375, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -22.525047302246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22023773193359375, + "rewards_train/margins": 0.3072670102119446, + "rewards_train/rejected": -0.5275047421455383, + "step": 613 + }, + { + "epoch": 0.17, + "logps_train/chosen": -14.348917007446289, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -13.675747871398926, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008858299814164639, + "rewards_train/margins": 0.06393308844417334, + "rewards_train/rejected": -0.0550747886300087, + "step": 613 + }, + { + "epoch": 0.17, + "logps_train/chosen": -146.2375946044922, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -143.05535888671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3737595081329346, + "rewards_train/margins": 0.1817765235900879, + "rewards_train/rejected": -3.5555360317230225, + "step": 613 + }, + { + "epoch": 0.17, + "learning_rate": 1.7979937810720105e-06, + "loss": 0.5514, + "step": 614 + }, + { + "epoch": 0.17, + "logps_train/chosen": -180.50918579101562, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -196.31338500976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.100918769836426, + "rewards_train/margins": 0.5804195404052734, + "rewards_train/rejected": -8.6813383102417, + "step": 614 + }, + { + "epoch": 0.17, + "logps_train/chosen": -91.95614624023438, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -87.79170227050781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.19561462104320526, + "rewards_train/margins": -0.11644439399242401, + "rewards_train/rejected": -0.07917022705078125, + "step": 614 + }, + { + "epoch": 0.17, + "logps_train/chosen": -98.28258514404297, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -97.98332977294922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7782585024833679, + "rewards_train/margins": -0.029925525188446045, + "rewards_train/rejected": -0.7483329772949219, + "step": 614 + }, + { + "epoch": 0.17, + "logps_train/chosen": -7.210833549499512, + "logps_train/ref_chosen": -5.8125, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -16.019065856933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13983336091041565, + "rewards_train/margins": 0.26832324266433716, + "rewards_train/rejected": -0.4081566035747528, + "step": 614 + }, + { + "epoch": 0.17, + "logps_train/chosen": -140.32044982910156, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -147.37156677246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7820451259613037, + "rewards_train/margins": 0.15511155128479004, + "rewards_train/rejected": -3.9371566772460938, + "step": 614 + }, + { + "epoch": 0.17, + "logps_train/chosen": -58.209754943847656, + "logps_train/ref_chosen": -59.25, + "logps_train/ref_rejected": -58.0, + "logps_train/rejected": -70.09513092041016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1040245071053505, + "rewards_train/margins": 1.3135375753045082, + "rewards_train/rejected": -1.2095130681991577, + "step": 614 + }, + { + "epoch": 0.17, + "logps_train/chosen": -13.691254615783691, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -6.875, + "logps_train/rejected": -8.047866821289062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.46287545561790466, + "rewards_train/margins": -0.3455887734889984, + "rewards_train/rejected": -0.11728668212890625, + "step": 614 + }, + { + "epoch": 0.17, + "logps_train/chosen": -17.29772186279297, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -71.62370300292969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5360221862792969, + "rewards_train/margins": -0.5236518858000636, + "rewards_train/rejected": -0.012370300479233265, + "step": 614 + }, + { + "epoch": 0.17, + "logps_train/chosen": -180.36068725585938, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -157.23233032226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2360687255859375, + "rewards_train/margins": 0.187164306640625, + "rewards_train/rejected": -3.4232330322265625, + "step": 615 + }, + { + "epoch": 0.17, + "logps_train/chosen": -6.670753479003906, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -19.875825881958008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16082535684108734, + "rewards_train/margins": 0.7580072432756424, + "rewards_train/rejected": -0.9188326001167297, + "step": 615 + }, + { + "epoch": 0.17, + "logps_train/chosen": -97.94001770019531, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -161.11734008789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19400177896022797, + "rewards_train/margins": 3.3177322298288345, + "rewards_train/rejected": -3.5117340087890625, + "step": 615 + }, + { + "epoch": 0.17, + "logps_train/chosen": -109.01646423339844, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -61.5, + "logps_train/rejected": -63.009830474853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10164642333984375, + "rewards_train/margins": 0.04933662712574005, + "rewards_train/rejected": -0.1509830504655838, + "step": 615 + }, + { + "epoch": 0.17, + "logps_train/chosen": -47.2530517578125, + "logps_train/ref_chosen": -31.375, + "logps_train/ref_rejected": -37.25, + "logps_train/rejected": -66.92074584960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.587805151939392, + "rewards_train/margins": 1.3792694807052612, + "rewards_train/rejected": -2.9670746326446533, + "step": 615 + }, + { + "epoch": 0.17, + "logps_train/chosen": -79.29743957519531, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -137.94921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2297439575195312, + "rewards_train/margins": 3.2151780128479004, + "rewards_train/rejected": -4.444921970367432, + "step": 615 + }, + { + "epoch": 0.17, + "logps_train/chosen": -13.773266792297363, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -6.0589165687561035, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05392332002520561, + "rewards_train/margins": 0.18793998286128044, + "rewards_train/rejected": -0.13401666283607483, + "step": 615 + }, + { + "epoch": 0.17, + "logps_train/chosen": -164.62637329101562, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -161.12722778320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.262637615203857, + "rewards_train/margins": 1.15008544921875, + "rewards_train/rejected": -5.412723064422607, + "step": 615 + }, + { + "epoch": 0.17, + "learning_rate": 1.7963966071496656e-06, + "loss": 0.5019, + "step": 616 + }, + { + "epoch": 0.17, + "logps_train/chosen": -142.5965576171875, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -207.59854125976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5096558332443237, + "rewards_train/margins": 3.550198197364807, + "rewards_train/rejected": -5.059854030609131, + "step": 616 + }, + { + "epoch": 0.17, + "logps_train/chosen": -202.1572265625, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -176.77655029296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.015722751617432, + "rewards_train/margins": -1.238067626953125, + "rewards_train/rejected": -2.7776551246643066, + "step": 616 + }, + { + "epoch": 0.17, + "logps_train/chosen": -195.90794372558594, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -205.06202697753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.090794563293457, + "rewards_train/margins": 1.7654085159301758, + "rewards_train/rejected": -8.856203079223633, + "step": 616 + }, + { + "epoch": 0.17, + "logps_train/chosen": -13.745344161987305, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -41.5, + "logps_train/rejected": -72.50039672851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16828441619873047, + "rewards_train/margins": 2.931755304336548, + "rewards_train/rejected": -3.1000397205352783, + "step": 616 + }, + { + "epoch": 0.17, + "logps_train/chosen": -11.911553382873535, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -41.44498062133789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5036553740501404, + "rewards_train/margins": 0.9283427596092224, + "rewards_train/rejected": -1.4319981336593628, + "step": 616 + }, + { + "epoch": 0.17, + "logps_train/chosen": -149.0583038330078, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -166.69261169433594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7058303356170654, + "rewards_train/margins": -0.5365691184997559, + "rewards_train/rejected": -3.1692612171173096, + "step": 616 + }, + { + "epoch": 0.17, + "logps_train/chosen": -95.1513671875, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -144.44400024414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18486328423023224, + "rewards_train/margins": 0.8292633444070816, + "rewards_train/rejected": -0.6444000601768494, + "step": 616 + }, + { + "epoch": 0.17, + "logps_train/chosen": -2.9789061546325684, + "logps_train/ref_chosen": -3.65625, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -7.316025257110596, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06773438304662704, + "rewards_train/margins": 0.005586907267570496, + "rewards_train/rejected": 0.06214747577905655, + "step": 616 + }, + { + "epoch": 0.17, + "logps_train/chosen": -9.338129997253418, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -13.84652328491211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0338129997253418, + "rewards_train/margins": 0.4195893406867981, + "rewards_train/rejected": -0.4534023404121399, + "step": 617 + }, + { + "epoch": 0.17, + "logps_train/chosen": -48.14150619506836, + "logps_train/ref_chosen": -46.75, + "logps_train/ref_rejected": -52.75, + "logps_train/rejected": -56.48072814941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13915061950683594, + "rewards_train/margins": 0.23392221331596375, + "rewards_train/rejected": -0.3730728328227997, + "step": 617 + }, + { + "epoch": 0.17, + "logps_train/chosen": -12.069161415100098, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -10.1875, + "logps_train/rejected": -16.26421546936035, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3006661534309387, + "rewards_train/margins": 0.3070054054260254, + "rewards_train/rejected": -0.6076715588569641, + "step": 617 + }, + { + "epoch": 0.17, + "logps_train/chosen": -159.4574737548828, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -204.19775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6457473635673523, + "rewards_train/margins": 2.5740280747413635, + "rewards_train/rejected": -3.219775438308716, + "step": 617 + }, + { + "epoch": 0.17, + "logps_train/chosen": -6.171120643615723, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -20.339900970458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14523707330226898, + "rewards_train/margins": 0.3262530416250229, + "rewards_train/rejected": -0.47149011492729187, + "step": 617 + }, + { + "epoch": 0.17, + "logps_train/chosen": -77.27227783203125, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -77.46298217773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02277221716940403, + "rewards_train/margins": 0.019070434849709272, + "rewards_train/rejected": 0.0037017823196947575, + "step": 617 + }, + { + "epoch": 0.17, + "logps_train/chosen": -10.070539474487305, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -3.578125, + "logps_train/rejected": -5.826403617858887, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12580394744873047, + "rewards_train/margins": 0.09902392327785492, + "rewards_train/rejected": -0.2248278707265854, + "step": 617 + }, + { + "epoch": 0.17, + "logps_train/chosen": -4.128464698791504, + "logps_train/ref_chosen": -1.5390625, + "logps_train/ref_rejected": -7.03125, + "logps_train/rejected": -6.696148872375488, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2589402198791504, + "rewards_train/margins": -0.29245033487677574, + "rewards_train/rejected": 0.03351011499762535, + "step": 617 + }, + { + "epoch": 0.17, + "learning_rate": 1.7947938592954426e-06, + "loss": 0.5342, + "step": 618 + }, + { + "epoch": 0.17, + "logps_train/chosen": -53.54166793823242, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -78.58097076416016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.29583320021629333, + "rewards_train/margins": 0.4539302736520767, + "rewards_train/rejected": -0.1580970734357834, + "step": 618 + }, + { + "epoch": 0.17, + "logps_train/chosen": -37.958274841308594, + "logps_train/ref_chosen": -38.25, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -65.1990966796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.029172515496611595, + "rewards_train/margins": 0.049082184210419655, + "rewards_train/rejected": -0.01990966871380806, + "step": 618 + }, + { + "epoch": 0.17, + "logps_train/chosen": -3.722208023071289, + "logps_train/ref_chosen": -3.359375, + "logps_train/ref_rejected": -1.421875, + "logps_train/rejected": -2.2779924869537354, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.036283303052186966, + "rewards_train/margins": 0.04932845011353493, + "rewards_train/rejected": -0.0856117531657219, + "step": 618 + }, + { + "epoch": 0.17, + "logps_train/chosen": -143.2997589111328, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -198.05563354492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7799758911132812, + "rewards_train/margins": 3.525587558746338, + "rewards_train/rejected": -5.305563449859619, + "step": 618 + }, + { + "epoch": 0.17, + "logps_train/chosen": -169.06619262695312, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -150.20925903320312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.606619358062744, + "rewards_train/margins": -1.4856934547424316, + "rewards_train/rejected": -2.1209259033203125, + "step": 618 + }, + { + "epoch": 0.17, + "logps_train/chosen": -75.919189453125, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -75.63899230957031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.008081055246293545, + "rewards_train/margins": -0.028019716031849384, + "rewards_train/rejected": 0.03610077127814293, + "step": 618 + }, + { + "epoch": 0.17, + "logps_train/chosen": -90.6224594116211, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -174.656005859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8622459769248962, + "rewards_train/margins": 5.053354799747467, + "rewards_train/rejected": -5.915600776672363, + "step": 618 + }, + { + "epoch": 0.17, + "logps_train/chosen": -88.14498138427734, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -219.54226684570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3144981861114502, + "rewards_train/margins": 6.539728403091431, + "rewards_train/rejected": -7.854226589202881, + "step": 618 + }, + { + "epoch": 0.17, + "logps_train/chosen": -0.5442370176315308, + "logps_train/ref_chosen": -0.330078125, + "logps_train/ref_rejected": -0.330078125, + "logps_train/rejected": -0.5518090128898621, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.021415889263153076, + "rewards_train/margins": 0.0007572006434202194, + "rewards_train/rejected": -0.022173089906573296, + "step": 619 + }, + { + "epoch": 0.17, + "logps_train/chosen": -81.02445220947266, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -185.88925170898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20244522392749786, + "rewards_train/margins": 1.8864798992872238, + "rewards_train/rejected": -2.0889251232147217, + "step": 619 + }, + { + "epoch": 0.17, + "logps_train/chosen": -94.48973083496094, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -204.73818969726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2489731311798096, + "rewards_train/margins": 7.074846029281616, + "rewards_train/rejected": -8.323819160461426, + "step": 619 + }, + { + "epoch": 0.17, + "logps_train/chosen": -5.5855817794799805, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -8.771463394165039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19605818390846252, + "rewards_train/margins": 0.03733815252780914, + "rewards_train/rejected": -0.23339633643627167, + "step": 619 + }, + { + "epoch": 0.17, + "logps_train/chosen": -78.38278198242188, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -127.56245422363281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.288278192281723, + "rewards_train/margins": 2.367967277765274, + "rewards_train/rejected": -2.656245470046997, + "step": 619 + }, + { + "epoch": 0.17, + "logps_train/chosen": -17.15420150756836, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -23.68975257873535, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015420150943100452, + "rewards_train/margins": 0.6535551426932216, + "rewards_train/rejected": -0.668975293636322, + "step": 619 + }, + { + "epoch": 0.17, + "logps_train/chosen": -147.6639862060547, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -170.04905700683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.416398763656616, + "rewards_train/margins": 1.5385072231292725, + "rewards_train/rejected": -4.954905986785889, + "step": 619 + }, + { + "epoch": 0.17, + "logps_train/chosen": -4.477010726928711, + "logps_train/ref_chosen": -2.875, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -15.814270973205566, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1602010726928711, + "rewards_train/margins": 0.1587260365486145, + "rewards_train/rejected": -0.3189271092414856, + "step": 619 + }, + { + "epoch": 0.17, + "learning_rate": 1.7931855487268779e-06, + "loss": 0.4433, + "step": 620 + }, + { + "epoch": 0.17, + "logps_train/chosen": -29.594650268554688, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -31.811979293823242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5657150745391846, + "rewards_train/margins": 0.10923290252685547, + "rewards_train/rejected": -1.67494797706604, + "step": 620 + }, + { + "epoch": 0.17, + "logps_train/chosen": -9.540960311889648, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -10.089560508728027, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.033403970301151276, + "rewards_train/margins": 0.17986001819372177, + "rewards_train/rejected": -0.1464560478925705, + "step": 620 + }, + { + "epoch": 0.17, + "logps_train/chosen": -40.409423828125, + "logps_train/ref_chosen": -29.625, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -6.958502769470215, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0784424543380737, + "rewards_train/margins": -0.9700921773910522, + "rewards_train/rejected": -0.10835027694702148, + "step": 620 + }, + { + "epoch": 0.17, + "logps_train/chosen": -5.85461950302124, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -2.6875, + "logps_train/rejected": -4.931370735168457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08546195179224014, + "rewards_train/margins": 0.13892512768507004, + "rewards_train/rejected": -0.22438707947731018, + "step": 620 + }, + { + "epoch": 0.17, + "logps_train/chosen": -2.3326504230499268, + "logps_train/ref_chosen": -2.3125, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -38.45182800292969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0020150423515588045, + "rewards_train/margins": 0.3931677758228034, + "rewards_train/rejected": -0.3951828181743622, + "step": 620 + }, + { + "epoch": 0.17, + "logps_train/chosen": -137.36077880859375, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -150.03326416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.786077857017517, + "rewards_train/margins": 2.1172486543655396, + "rewards_train/rejected": -3.9033265113830566, + "step": 620 + }, + { + "epoch": 0.17, + "logps_train/chosen": -99.02538299560547, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -99.19080352783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6525382995605469, + "rewards_train/margins": 0.01654207706451416, + "rewards_train/rejected": -1.669080376625061, + "step": 620 + }, + { + "epoch": 0.17, + "logps_train/chosen": -189.5319061279297, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -140.9712677001953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.953190803527832, + "rewards_train/margins": -1.3560640811920166, + "rewards_train/rejected": -3.5971267223358154, + "step": 620 + }, + { + "epoch": 0.17, + "logps_train/chosen": -30.042245864868164, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -164.48167419433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8792245984077454, + "rewards_train/margins": 4.418943107128143, + "rewards_train/rejected": -5.298167705535889, + "step": 621 + }, + { + "epoch": 0.17, + "logps_train/chosen": -56.60649490356445, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -26.132413864135742, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9106494784355164, + "rewards_train/margins": -0.3599081039428711, + "rewards_train/rejected": -0.5507413744926453, + "step": 621 + }, + { + "epoch": 0.17, + "logps_train/chosen": -124.71000671386719, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -163.94488525390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9210006594657898, + "rewards_train/margins": 1.973487913608551, + "rewards_train/rejected": -2.894488573074341, + "step": 621 + }, + { + "epoch": 0.17, + "logps_train/chosen": -0.5565356612205505, + "logps_train/ref_chosen": -1.09375, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -14.910106658935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.053721435368061066, + "rewards_train/margins": 0.6509820893406868, + "rewards_train/rejected": -0.5972606539726257, + "step": 621 + }, + { + "epoch": 0.17, + "logps_train/chosen": -7.629276275634766, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -1.0546875, + "logps_train/rejected": -2.043154239654541, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06207237392663956, + "rewards_train/margins": 0.16091904789209366, + "rewards_train/rejected": -0.0988466739654541, + "step": 621 + }, + { + "epoch": 0.17, + "logps_train/chosen": -11.108577728271484, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -15.6875, + "logps_train/rejected": -26.69511604309082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19210778176784515, + "rewards_train/margins": 0.9086538702249527, + "rewards_train/rejected": -1.1007616519927979, + "step": 621 + }, + { + "epoch": 0.17, + "logps_train/chosen": -111.8739013671875, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -108.27607727050781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.687390148639679, + "rewards_train/margins": -0.00978243350982666, + "rewards_train/rejected": -0.6776077151298523, + "step": 621 + }, + { + "epoch": 0.17, + "logps_train/chosen": -136.3077850341797, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -114.63742065429688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4307785034179688, + "rewards_train/margins": -0.9170364141464233, + "rewards_train/rejected": -1.5137420892715454, + "step": 621 + }, + { + "epoch": 0.17, + "learning_rate": 1.7915716867004394e-06, + "loss": 0.6515, + "step": 622 + }, + { + "epoch": 0.17, + "logps_train/chosen": -193.14752197265625, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -177.81809997558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.714752197265625, + "rewards_train/margins": 0.36705780029296875, + "rewards_train/rejected": -3.0818099975585938, + "step": 622 + }, + { + "epoch": 0.17, + "logps_train/chosen": -237.15936279296875, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -251.31716918945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.115936279296875, + "rewards_train/margins": 0.5157804489135742, + "rewards_train/rejected": -9.63171672821045, + "step": 622 + }, + { + "epoch": 0.17, + "logps_train/chosen": -5.131784915924072, + "logps_train/ref_chosen": -3.609375, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -6.617682456970215, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15224099159240723, + "rewards_train/margins": 0.0970272570848465, + "rewards_train/rejected": -0.24926824867725372, + "step": 622 + }, + { + "epoch": 0.17, + "logps_train/chosen": -21.753982543945312, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -28.721960067749023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6503982543945312, + "rewards_train/margins": 0.6217978000640869, + "rewards_train/rejected": -1.2721960544586182, + "step": 622 + }, + { + "epoch": 0.17, + "logps_train/chosen": -15.202252388000488, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -8.963123321533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2202252447605133, + "rewards_train/margins": 0.07296210527420044, + "rewards_train/rejected": -0.29318735003471375, + "step": 622 + }, + { + "epoch": 0.17, + "logps_train/chosen": -15.618856430053711, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -8.766866683959961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3993856608867645, + "rewards_train/margins": 0.02417600154876709, + "rewards_train/rejected": -0.4235616624355316, + "step": 622 + }, + { + "epoch": 0.17, + "logps_train/chosen": -8.090642929077148, + "logps_train/ref_chosen": -7.53125, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -8.683442115783691, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05593929439783096, + "rewards_train/margins": 0.4030299112200737, + "rewards_train/rejected": -0.45896920561790466, + "step": 622 + }, + { + "epoch": 0.17, + "logps_train/chosen": -97.48766326904297, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -209.37457275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3487663269042969, + "rewards_train/margins": 8.588690757751465, + "rewards_train/rejected": -9.937457084655762, + "step": 622 + }, + { + "epoch": 0.17, + "logps_train/chosen": -21.923015594482422, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -21.047752380371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.029801560565829277, + "rewards_train/margins": 0.07497368194162846, + "rewards_train/rejected": -0.10477524250745773, + "step": 623 + }, + { + "epoch": 0.17, + "logps_train/chosen": -91.11483764648438, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -124.24615478515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9614837765693665, + "rewards_train/margins": 2.0631317496299744, + "rewards_train/rejected": -3.024615526199341, + "step": 623 + }, + { + "epoch": 0.17, + "logps_train/chosen": -98.61476135253906, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -107.83383178710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18852387368679047, + "rewards_train/margins": 1.0219070762395859, + "rewards_train/rejected": -0.8333832025527954, + "step": 623 + }, + { + "epoch": 0.17, + "logps_train/chosen": -152.28585815429688, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -159.85440063476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2285858392715454, + "rewards_train/margins": 2.6068543195724487, + "rewards_train/rejected": -3.835440158843994, + "step": 623 + }, + { + "epoch": 0.17, + "logps_train/chosen": -12.362157821655273, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -9.941513061523438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.37996578216552734, + "rewards_train/margins": -0.1170644760131836, + "rewards_train/rejected": -0.26290130615234375, + "step": 623 + }, + { + "epoch": 0.17, + "logps_train/chosen": -5.145709037780762, + "logps_train/ref_chosen": -3.921875, + "logps_train/ref_rejected": -15.6875, + "logps_train/rejected": -22.27702522277832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12238340824842453, + "rewards_train/margins": 0.5365691259503365, + "rewards_train/rejected": -0.658952534198761, + "step": 623 + }, + { + "epoch": 0.17, + "logps_train/chosen": -139.53363037109375, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -148.5306854248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5533630847930908, + "rewards_train/margins": 2.2497055530548096, + "rewards_train/rejected": -3.8030686378479004, + "step": 623 + }, + { + "epoch": 0.17, + "logps_train/chosen": -140.58755493164062, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -176.13693237304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.55875563621521, + "rewards_train/margins": 0.5549376010894775, + "rewards_train/rejected": -3.1136932373046875, + "step": 623 + }, + { + "epoch": 0.17, + "learning_rate": 1.789952284511451e-06, + "loss": 0.4276, + "step": 624 + }, + { + "epoch": 0.17, + "logps_train/chosen": -28.48980712890625, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -24.138086318969727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.011480689048767, + "rewards_train/margins": 0.9117029905319214, + "rewards_train/rejected": -1.9231836795806885, + "step": 624 + }, + { + "epoch": 0.17, + "logps_train/chosen": -0.5895859599113464, + "logps_train/ref_chosen": -0.98828125, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -6.113492012023926, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0398695282638073, + "rewards_train/margins": 0.16059372946619987, + "rewards_train/rejected": -0.12072420120239258, + "step": 624 + }, + { + "epoch": 0.17, + "logps_train/chosen": -15.1094970703125, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -7.8125, + "logps_train/rejected": -14.992876052856445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5296997427940369, + "rewards_train/margins": 0.18833786249160767, + "rewards_train/rejected": -0.7180376052856445, + "step": 624 + }, + { + "epoch": 0.17, + "logps_train/chosen": -116.880859375, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -139.3267822265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2880859375, + "rewards_train/margins": 0.44459235668182373, + "rewards_train/rejected": -1.7326782941818237, + "step": 624 + }, + { + "epoch": 0.17, + "logps_train/chosen": -113.76298522949219, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -130.1846466064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12629853188991547, + "rewards_train/margins": 0.5921661406755447, + "rewards_train/rejected": -0.7184646725654602, + "step": 624 + }, + { + "epoch": 0.17, + "logps_train/chosen": -89.48918914794922, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -110.1330795288086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14891891181468964, + "rewards_train/margins": 3.4143890887498856, + "rewards_train/rejected": -3.563308000564575, + "step": 624 + }, + { + "epoch": 0.17, + "logps_train/chosen": -161.4512176513672, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -188.3759307861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.445121765136719, + "rewards_train/margins": 1.0924715995788574, + "rewards_train/rejected": -6.537593364715576, + "step": 624 + }, + { + "epoch": 0.17, + "logps_train/chosen": -5.100756645202637, + "logps_train/ref_chosen": -3.265625, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -18.674057006835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18351316452026367, + "rewards_train/margins": 0.008892536163330078, + "rewards_train/rejected": -0.19240570068359375, + "step": 624 + }, + { + "epoch": 0.17, + "logps_train/chosen": -5.181901931762695, + "logps_train/ref_chosen": -3.921875, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -11.55821704864502, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.126002699136734, + "rewards_train/margins": 0.611068993806839, + "rewards_train/rejected": -0.737071692943573, + "step": 625 + }, + { + "epoch": 0.17, + "logps_train/chosen": -41.38706970214844, + "logps_train/ref_chosen": -27.75, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -25.051712036132812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3637069463729858, + "rewards_train/margins": -0.5210357308387756, + "rewards_train/rejected": -0.8426712155342102, + "step": 625 + }, + { + "epoch": 0.17, + "logps_train/chosen": -16.537940979003906, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -3.8125, + "logps_train/rejected": -6.430255889892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0975440964102745, + "rewards_train/margins": 0.16423148661851883, + "rewards_train/rejected": -0.26177558302879333, + "step": 625 + }, + { + "epoch": 0.17, + "logps_train/chosen": -8.24299144744873, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -11.457210540771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006950855255126953, + "rewards_train/margins": 0.26517191529273987, + "rewards_train/rejected": -0.2582210600376129, + "step": 625 + }, + { + "epoch": 0.17, + "logps_train/chosen": -29.10364532470703, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -102.41329956054688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3103646039962769, + "rewards_train/margins": -0.16903460025787354, + "rewards_train/rejected": -1.1413300037384033, + "step": 625 + }, + { + "epoch": 0.17, + "logps_train/chosen": -87.91806030273438, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -28.74688720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10819397121667862, + "rewards_train/margins": 1.3703827634453773, + "rewards_train/rejected": -1.2621887922286987, + "step": 625 + }, + { + "epoch": 0.17, + "logps_train/chosen": -19.869495391845703, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -1.203125, + "logps_train/rejected": -4.539243221282959, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04944954067468643, + "rewards_train/margins": 0.284162275493145, + "rewards_train/rejected": -0.3336118161678314, + "step": 625 + }, + { + "epoch": 0.17, + "logps_train/chosen": -23.386978149414062, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -38.184139251708984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5761978030204773, + "rewards_train/margins": 0.45471614599227905, + "rewards_train/rejected": -1.0309139490127563, + "step": 625 + }, + { + "epoch": 0.17, + "learning_rate": 1.7883273534940107e-06, + "loss": 0.5105, + "step": 626 + }, + { + "epoch": 0.17, + "logps_train/chosen": -25.55063247680664, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -25.080341339111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.605063259601593, + "rewards_train/margins": 0.5779709219932556, + "rewards_train/rejected": -1.1830341815948486, + "step": 626 + }, + { + "epoch": 0.17, + "logps_train/chosen": -241.10372924804688, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -246.5931396484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.610373020172119, + "rewards_train/margins": 1.4489407539367676, + "rewards_train/rejected": -8.059313774108887, + "step": 626 + }, + { + "epoch": 0.17, + "logps_train/chosen": -60.41446304321289, + "logps_train/ref_chosen": -60.0, + "logps_train/ref_rejected": -60.0, + "logps_train/rejected": -60.16815948486328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04144630581140518, + "rewards_train/margins": -0.024630356580018997, + "rewards_train/rejected": -0.016815949231386185, + "step": 626 + }, + { + "epoch": 0.17, + "logps_train/chosen": -98.18121337890625, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -211.53073120117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.668121337890625, + "rewards_train/margins": 7.034952163696289, + "rewards_train/rejected": -8.703073501586914, + "step": 626 + }, + { + "epoch": 0.17, + "logps_train/chosen": -15.584268569946289, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -17.644073486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11467685550451279, + "rewards_train/margins": 0.6872305050492287, + "rewards_train/rejected": -0.8019073605537415, + "step": 626 + }, + { + "epoch": 0.17, + "logps_train/chosen": -79.34815979003906, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -52.5, + "logps_train/rejected": -49.265045166015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3848159909248352, + "rewards_train/margins": -0.7083114683628082, + "rewards_train/rejected": 0.323495477437973, + "step": 626 + }, + { + "epoch": 0.17, + "logps_train/chosen": -250.33937072753906, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -208.34779357910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.333937168121338, + "rewards_train/margins": 0.6008424758911133, + "rewards_train/rejected": -7.934779644012451, + "step": 626 + }, + { + "epoch": 0.17, + "logps_train/chosen": -16.286020278930664, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -16.453022003173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42235204577445984, + "rewards_train/margins": 0.016700148582458496, + "rewards_train/rejected": -0.43905219435691833, + "step": 626 + }, + { + "epoch": 0.18, + "logps_train/chosen": -57.43105697631836, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -33.25, + "logps_train/rejected": -37.452945709228516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41810569167137146, + "rewards_train/margins": 0.0021888911724090576, + "rewards_train/rejected": -0.4202945828437805, + "step": 627 + }, + { + "epoch": 0.18, + "logps_train/chosen": -104.76396942138672, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -98.16110229492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47639694809913635, + "rewards_train/margins": 0.7897133529186249, + "rewards_train/rejected": -1.2661103010177612, + "step": 627 + }, + { + "epoch": 0.18, + "logps_train/chosen": -219.9297637939453, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -211.38174438476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.192976474761963, + "rewards_train/margins": 0.24519824981689453, + "rewards_train/rejected": -6.438174724578857, + "step": 627 + }, + { + "epoch": 0.18, + "logps_train/chosen": -16.700456619262695, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -21.986644744873047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.16379566490650177, + "rewards_train/margins": -0.2401311919093132, + "rewards_train/rejected": 0.07633552700281143, + "step": 627 + }, + { + "epoch": 0.18, + "logps_train/chosen": -16.119483947753906, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -29.187002182006836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43694838881492615, + "rewards_train/margins": 1.3942518532276154, + "rewards_train/rejected": -1.8312002420425415, + "step": 627 + }, + { + "epoch": 0.18, + "logps_train/chosen": -194.7750244140625, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -219.0, + "logps_train/rejected": -263.0051574707031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.177502632141113, + "rewards_train/margins": -0.7769865989685059, + "rewards_train/rejected": -4.400516033172607, + "step": 627 + }, + { + "epoch": 0.18, + "logps_train/chosen": -91.7772216796875, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -103.93119049072266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.22772216796875, + "rewards_train/margins": -0.08460307121276855, + "rewards_train/rejected": -1.1431190967559814, + "step": 627 + }, + { + "epoch": 0.18, + "logps_train/chosen": -6.158071517944336, + "logps_train/ref_chosen": -4.59375, + "logps_train/ref_rejected": -25.25, + "logps_train/rejected": -26.814926147460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1564321517944336, + "rewards_train/margins": 6.046891212463379e-05, + "rewards_train/rejected": -0.15649262070655823, + "step": 627 + }, + { + "epoch": 0.18, + "learning_rate": 1.7866969050209125e-06, + "loss": 0.5794, + "step": 628 + }, + { + "epoch": 0.18, + "logps_train/chosen": -83.78672790527344, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -65.67073059082031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07132720947265625, + "rewards_train/margins": 0.4384002685546875, + "rewards_train/rejected": -0.36707305908203125, + "step": 628 + }, + { + "epoch": 0.18, + "logps_train/chosen": -71.73905944824219, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -96.79296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32390594482421875, + "rewards_train/margins": 1.005391001701355, + "rewards_train/rejected": -1.3292969465255737, + "step": 628 + }, + { + "epoch": 0.18, + "logps_train/chosen": -7.643394470214844, + "logps_train/ref_chosen": -2.921875, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -10.053162574768066, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4721519649028778, + "rewards_train/margins": -0.3230857104063034, + "rewards_train/rejected": -0.1490662544965744, + "step": 628 + }, + { + "epoch": 0.18, + "logps_train/chosen": -121.17414855957031, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -121.2342529296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06741485744714737, + "rewards_train/margins": 0.8560104593634605, + "rewards_train/rejected": -0.9234253168106079, + "step": 628 + }, + { + "epoch": 0.18, + "logps_train/chosen": -7.646186351776123, + "logps_train/ref_chosen": -6.71875, + "logps_train/ref_rejected": -7.875, + "logps_train/rejected": -10.667072296142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0927436351776123, + "rewards_train/margins": 0.1864635944366455, + "rewards_train/rejected": -0.2792072296142578, + "step": 628 + }, + { + "epoch": 0.18, + "logps_train/chosen": -149.1701202392578, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -199.1636962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2170121669769287, + "rewards_train/margins": 4.149357557296753, + "rewards_train/rejected": -7.366369724273682, + "step": 628 + }, + { + "epoch": 0.18, + "logps_train/chosen": -10.897956848144531, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -1.546875, + "logps_train/rejected": -8.621274948120117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2022956907749176, + "rewards_train/margins": 0.505144327878952, + "rewards_train/rejected": -0.7074400186538696, + "step": 628 + }, + { + "epoch": 0.18, + "logps_train/chosen": -2.341506242752075, + "logps_train/ref_chosen": -2.921875, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -4.696491241455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05803687497973442, + "rewards_train/margins": 0.09956099838018417, + "rewards_train/rejected": -0.04152412340044975, + "step": 628 + }, + { + "epoch": 0.18, + "logps_train/chosen": -3.519535541534424, + "logps_train/ref_chosen": -3.546875, + "logps_train/ref_rejected": -14.4375, + "logps_train/rejected": -19.048078536987305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.002733945846557617, + "rewards_train/margins": 0.4637918174266815, + "rewards_train/rejected": -0.4610578715801239, + "step": 629 + }, + { + "epoch": 0.18, + "logps_train/chosen": -105.34097290039062, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -128.55715942382812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2340972423553467, + "rewards_train/margins": -1.678381323814392, + "rewards_train/rejected": -1.5557159185409546, + "step": 629 + }, + { + "epoch": 0.18, + "logps_train/chosen": -3.8125083446502686, + "logps_train/ref_chosen": -1.796875, + "logps_train/ref_rejected": -1.796875, + "logps_train/rejected": -3.9560136795043945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20156334340572357, + "rewards_train/margins": 0.014350533485412598, + "rewards_train/rejected": -0.21591387689113617, + "step": 629 + }, + { + "epoch": 0.18, + "logps_train/chosen": -62.03118896484375, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -32.540557861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17188110947608948, + "rewards_train/margins": 2.394686847925186, + "rewards_train/rejected": -2.2228057384490967, + "step": 629 + }, + { + "epoch": 0.18, + "logps_train/chosen": -0.07361438125371933, + "logps_train/ref_chosen": -0.2080078125, + "logps_train/ref_rejected": -0.2080078125, + "logps_train/rejected": -0.07496398687362671, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013439342379570007, + "rewards_train/margins": 0.00013495981693267822, + "rewards_train/rejected": 0.013304382562637329, + "step": 629 + }, + { + "epoch": 0.18, + "logps_train/chosen": -121.63838195800781, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -96.01668548583984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3138382136821747, + "rewards_train/margins": -0.4121696650981903, + "rewards_train/rejected": 0.09833145141601562, + "step": 629 + }, + { + "epoch": 0.18, + "logps_train/chosen": -164.6876220703125, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -161.34176635742188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.768762230873108, + "rewards_train/margins": -0.8345856070518494, + "rewards_train/rejected": -0.9341766238212585, + "step": 629 + }, + { + "epoch": 0.18, + "logps_train/chosen": -130.12252807617188, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -79.03388977050781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5122528076171875, + "rewards_train/margins": -1.1088638305664062, + "rewards_train/rejected": -0.40338897705078125, + "step": 629 + }, + { + "epoch": 0.18, + "learning_rate": 1.7850609505035677e-06, + "loss": 0.6925, + "step": 630 + }, + { + "epoch": 0.18, + "logps_train/chosen": -104.97712707519531, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -116.12601470947266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.947712779045105, + "rewards_train/margins": 2.064888596534729, + "rewards_train/rejected": -4.012601375579834, + "step": 630 + }, + { + "epoch": 0.18, + "logps_train/chosen": -136.18701171875, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -99.35454559326172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6187012195587158, + "rewards_train/margins": -0.23324668407440186, + "rewards_train/rejected": -1.385454535484314, + "step": 630 + }, + { + "epoch": 0.18, + "logps_train/chosen": -4.938679218292236, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -22.371837615966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04074292257428169, + "rewards_train/margins": 0.058940839022397995, + "rewards_train/rejected": -0.09968376159667969, + "step": 630 + }, + { + "epoch": 0.18, + "logps_train/chosen": -14.76949691772461, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -23.217622756958008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39569970965385437, + "rewards_train/margins": 1.0448125898838043, + "rewards_train/rejected": -1.4405122995376587, + "step": 630 + }, + { + "epoch": 0.18, + "logps_train/chosen": -11.447372436523438, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -29.427339553833008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.219737246632576, + "rewards_train/margins": 0.26049672067165375, + "rewards_train/rejected": -0.48023396730422974, + "step": 630 + }, + { + "epoch": 0.18, + "logps_train/chosen": -0.06408929824829102, + "logps_train/ref_chosen": -0.984375, + "logps_train/ref_rejected": -0.75390625, + "logps_train/rejected": -6.565090656280518, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09202857315540314, + "rewards_train/margins": 0.6731470376253128, + "rewards_train/rejected": -0.5811184644699097, + "step": 630 + }, + { + "epoch": 0.18, + "logps_train/chosen": -2.5519237518310547, + "logps_train/ref_chosen": -1.6640625, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -6.800380229949951, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08878612518310547, + "rewards_train/margins": 0.16312691569328308, + "rewards_train/rejected": -0.25191304087638855, + "step": 630 + }, + { + "epoch": 0.18, + "logps_train/chosen": -73.5938720703125, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -177.86302185058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.034387230873108, + "rewards_train/margins": 0.9519150257110596, + "rewards_train/rejected": -1.9863022565841675, + "step": 630 + }, + { + "epoch": 0.18, + "logps_train/chosen": -49.64802932739258, + "logps_train/ref_chosen": -52.75, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -21.40365219116211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3101970851421356, + "rewards_train/margins": 0.688062310218811, + "rewards_train/rejected": -0.3778652250766754, + "step": 631 + }, + { + "epoch": 0.18, + "logps_train/chosen": -11.669177055358887, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -8.828560829162598, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6419177055358887, + "rewards_train/margins": -0.3684366047382355, + "rewards_train/rejected": -0.2734811007976532, + "step": 631 + }, + { + "epoch": 0.18, + "logps_train/chosen": -7.668283462524414, + "logps_train/ref_chosen": -3.703125, + "logps_train/ref_rejected": -7.0625, + "logps_train/rejected": -10.678709030151367, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3965158462524414, + "rewards_train/margins": -0.03489494323730469, + "rewards_train/rejected": -0.3616209030151367, + "step": 631 + }, + { + "epoch": 0.18, + "logps_train/chosen": -223.62130737304688, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -174.84747314453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.362130641937256, + "rewards_train/margins": -0.6273832321166992, + "rewards_train/rejected": -4.734747409820557, + "step": 631 + }, + { + "epoch": 0.18, + "logps_train/chosen": -13.150426864624023, + "logps_train/ref_chosen": -12.0, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -19.17776107788086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11504268646240234, + "rewards_train/margins": 0.10273341834545135, + "rewards_train/rejected": -0.2177761048078537, + "step": 631 + }, + { + "epoch": 0.18, + "logps_train/chosen": -36.78252410888672, + "logps_train/ref_chosen": -23.0, + "logps_train/ref_rejected": -38.0, + "logps_train/rejected": -70.83357238769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.378252387046814, + "rewards_train/margins": 1.9051049947738647, + "rewards_train/rejected": -3.2833573818206787, + "step": 631 + }, + { + "epoch": 0.18, + "logps_train/chosen": -6.407531261444092, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -11.28777027130127, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3279968798160553, + "rewards_train/margins": 0.20677390694618225, + "rewards_train/rejected": 0.12122297286987305, + "step": 631 + }, + { + "epoch": 0.18, + "logps_train/chosen": -12.499096870422363, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -25.75, + "logps_train/rejected": -45.710487365722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23115968704223633, + "rewards_train/margins": 1.764889121055603, + "rewards_train/rejected": -1.9960488080978394, + "step": 631 + }, + { + "epoch": 0.18, + "learning_rate": 1.7834195013919228e-06, + "loss": 0.5276, + "step": 632 + }, + { + "epoch": 0.18, + "logps_train/chosen": -16.000957489013672, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -13.968475341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.47490426898002625, + "rewards_train/margins": 0.7280018031597137, + "rewards_train/rejected": -0.2530975341796875, + "step": 632 + }, + { + "epoch": 0.18, + "logps_train/chosen": -11.00436782836914, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -18.275802612304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08793678134679794, + "rewards_train/margins": 0.9396434798836708, + "rewards_train/rejected": -1.0275802612304688, + "step": 632 + }, + { + "epoch": 0.18, + "logps_train/chosen": -207.32699584960938, + "logps_train/ref_chosen": -187.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -166.40316772460938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0326995849609375, + "rewards_train/margins": -0.2923828363418579, + "rewards_train/rejected": -1.7403167486190796, + "step": 632 + }, + { + "epoch": 0.18, + "logps_train/chosen": -57.508872985839844, + "logps_train/ref_chosen": -55.0, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -56.15787887573242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25088730454444885, + "rewards_train/margins": 1.3649005591869354, + "rewards_train/rejected": -1.6157878637313843, + "step": 632 + }, + { + "epoch": 0.18, + "logps_train/chosen": -137.22628784179688, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -120.77511596679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.172628879547119, + "rewards_train/margins": 0.6048827171325684, + "rewards_train/rejected": -2.7775115966796875, + "step": 632 + }, + { + "epoch": 0.18, + "logps_train/chosen": -183.99871826171875, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -259.93084716796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.199871778488159, + "rewards_train/margins": 5.993212938308716, + "rewards_train/rejected": -8.193084716796875, + "step": 632 + }, + { + "epoch": 0.18, + "logps_train/chosen": -107.82779693603516, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -108.05644989013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6327797174453735, + "rewards_train/margins": 0.12286525964736938, + "rewards_train/rejected": -0.7556449770927429, + "step": 632 + }, + { + "epoch": 0.18, + "logps_train/chosen": -26.137832641601562, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -45.75, + "logps_train/rejected": -59.370750427246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6887832880020142, + "rewards_train/margins": 0.673291802406311, + "rewards_train/rejected": -1.3620750904083252, + "step": 632 + }, + { + "epoch": 0.18, + "logps_train/chosen": -34.84645080566406, + "logps_train/ref_chosen": -27.75, + "logps_train/ref_rejected": -31.5, + "logps_train/rejected": -38.612335205078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7096450924873352, + "rewards_train/margins": 0.0015884637832641602, + "rewards_train/rejected": -0.7112335562705994, + "step": 633 + }, + { + "epoch": 0.18, + "logps_train/chosen": -143.55828857421875, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -146.2611541748047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6058290004730225, + "rewards_train/margins": -0.22971343994140625, + "rewards_train/rejected": -3.376115560531616, + "step": 633 + }, + { + "epoch": 0.18, + "logps_train/chosen": -138.6924591064453, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -183.8905029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.419245958328247, + "rewards_train/margins": 1.7698044776916504, + "rewards_train/rejected": -3.1890504360198975, + "step": 633 + }, + { + "epoch": 0.18, + "logps_train/chosen": -105.07273864746094, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -105.61778259277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5572738647460938, + "rewards_train/margins": 0.05450439453125, + "rewards_train/rejected": -0.6117782592773438, + "step": 633 + }, + { + "epoch": 0.18, + "logps_train/chosen": -45.635536193847656, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -8.145418167114258, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5885536074638367, + "rewards_train/margins": -0.25526177883148193, + "rewards_train/rejected": -0.33329182863235474, + "step": 633 + }, + { + "epoch": 0.18, + "logps_train/chosen": -75.03197479248047, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -73.2497329711914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7031974792480469, + "rewards_train/margins": 0.5717757940292358, + "rewards_train/rejected": -1.2749732732772827, + "step": 633 + }, + { + "epoch": 0.18, + "logps_train/chosen": -24.577102661132812, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -43.163570404052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3577102720737457, + "rewards_train/margins": 0.38364675641059875, + "rewards_train/rejected": -0.7413570284843445, + "step": 633 + }, + { + "epoch": 0.18, + "logps_train/chosen": -5.656406402587891, + "logps_train/ref_chosen": -0.05126953125, + "logps_train/ref_rejected": -0.05126953125, + "logps_train/rejected": -5.777818202972412, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5605136752128601, + "rewards_train/margins": 0.012141227722167969, + "rewards_train/rejected": -0.5726549029350281, + "step": 633 + }, + { + "epoch": 0.18, + "learning_rate": 1.7817725691743818e-06, + "loss": 0.506, + "step": 634 + }, + { + "epoch": 0.18, + "logps_train/chosen": -38.64884567260742, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -47.25, + "logps_train/rejected": -74.21966552734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.28511545062065125, + "rewards_train/margins": 2.982082098722458, + "rewards_train/rejected": -2.6969666481018066, + "step": 634 + }, + { + "epoch": 0.18, + "logps_train/chosen": -155.53330993652344, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -182.67514038085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.553331136703491, + "rewards_train/margins": 1.114182949066162, + "rewards_train/rejected": -3.6675140857696533, + "step": 634 + }, + { + "epoch": 0.18, + "logps_train/chosen": -23.972917556762695, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -11.085705757141113, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.24729175865650177, + "rewards_train/margins": -0.02622118592262268, + "rewards_train/rejected": -0.2210705727338791, + "step": 634 + }, + { + "epoch": 0.18, + "logps_train/chosen": -119.68462371826172, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -130.9427490234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.568462371826172, + "rewards_train/margins": 0.7258126735687256, + "rewards_train/rejected": -3.2942750453948975, + "step": 634 + }, + { + "epoch": 0.18, + "logps_train/chosen": -119.27019500732422, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -175.79803466796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4770195186138153, + "rewards_train/margins": 6.302784234285355, + "rewards_train/rejected": -6.77980375289917, + "step": 634 + }, + { + "epoch": 0.18, + "logps_train/chosen": -83.54695892333984, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -74.7007827758789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004695892333984375, + "rewards_train/margins": 1.34038245677948, + "rewards_train/rejected": -1.3450783491134644, + "step": 634 + }, + { + "epoch": 0.18, + "logps_train/chosen": -80.021240234375, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -79.93708038330078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3021240234375, + "rewards_train/margins": -0.008415967226028442, + "rewards_train/rejected": -0.29370805621147156, + "step": 634 + }, + { + "epoch": 0.18, + "logps_train/chosen": -26.986541748046875, + "logps_train/ref_chosen": -24.375, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -15.424560546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2611541748046875, + "rewards_train/margins": -0.35619812458753586, + "rewards_train/rejected": 0.09504394978284836, + "step": 634 + }, + { + "epoch": 0.18, + "logps_train/chosen": -5.6949143409729, + "logps_train/ref_chosen": -2.28125, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -84.47088623046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3413664400577545, + "rewards_train/margins": 1.5557222068309784, + "rewards_train/rejected": -1.897088646888733, + "step": 635 + }, + { + "epoch": 0.18, + "logps_train/chosen": -10.908841133117676, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -8.822978973388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02161588706076145, + "rewards_train/margins": 0.04141378402709961, + "rewards_train/rejected": -0.019797896966338158, + "step": 635 + }, + { + "epoch": 0.18, + "logps_train/chosen": -75.04287719726562, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -100.72500610351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7542877197265625, + "rewards_train/margins": 2.2682130336761475, + "rewards_train/rejected": -3.02250075340271, + "step": 635 + }, + { + "epoch": 0.18, + "logps_train/chosen": -69.35508728027344, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -125.40776062011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.135508731007576, + "rewards_train/margins": 4.355267331004143, + "rewards_train/rejected": -4.490776062011719, + "step": 635 + }, + { + "epoch": 0.18, + "logps_train/chosen": -139.3993377685547, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -191.283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6399338245391846, + "rewards_train/margins": 4.038386583328247, + "rewards_train/rejected": -6.678320407867432, + "step": 635 + }, + { + "epoch": 0.18, + "logps_train/chosen": -138.33685302734375, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -230.0, + "logps_train/rejected": -237.0035858154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5336853265762329, + "rewards_train/margins": 0.16667324304580688, + "rewards_train/rejected": -0.7003585696220398, + "step": 635 + }, + { + "epoch": 0.18, + "logps_train/chosen": -0.06533662974834442, + "logps_train/ref_chosen": -0.1650390625, + "logps_train/ref_rejected": -0.1650390625, + "logps_train/rejected": -0.06530682742595673, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.009970243088901043, + "rewards_train/margins": -2.9802322387695312e-06, + "rewards_train/rejected": 0.009973223321139812, + "step": 635 + }, + { + "epoch": 0.18, + "logps_train/chosen": -9.291378021240234, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -23.560300827026367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05413780361413956, + "rewards_train/margins": 0.7831422910094261, + "rewards_train/rejected": -0.8372800946235657, + "step": 635 + }, + { + "epoch": 0.18, + "learning_rate": 1.7801201653777239e-06, + "loss": 0.3706, + "step": 636 + }, + { + "epoch": 0.18, + "logps_train/chosen": -23.022239685058594, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -28.154590606689453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5147239565849304, + "rewards_train/margins": -0.011764883995056152, + "rewards_train/rejected": -0.5029590725898743, + "step": 636 + }, + { + "epoch": 0.18, + "logps_train/chosen": -1.2376888990402222, + "logps_train/ref_chosen": -0.609375, + "logps_train/ref_rejected": -3.171875, + "logps_train/rejected": -2.555446147918701, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06283139437437057, + "rewards_train/margins": -0.12447427958250046, + "rewards_train/rejected": 0.06164288520812988, + "step": 636 + }, + { + "epoch": 0.18, + "logps_train/chosen": -213.80844116210938, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -214.05210876464844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.080844402313232, + "rewards_train/margins": -0.07563352584838867, + "rewards_train/rejected": -6.005210876464844, + "step": 636 + }, + { + "epoch": 0.18, + "logps_train/chosen": -0.49924230575561523, + "logps_train/ref_chosen": -0.62109375, + "logps_train/ref_rejected": -2.46875, + "logps_train/rejected": -3.1461281776428223, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012185144238173962, + "rewards_train/margins": 0.07992296200245619, + "rewards_train/rejected": -0.06773781776428223, + "step": 636 + }, + { + "epoch": 0.18, + "logps_train/chosen": -159.84136962890625, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -203.85049438476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9841370582580566, + "rewards_train/margins": 3.800912380218506, + "rewards_train/rejected": -6.7850494384765625, + "step": 636 + }, + { + "epoch": 0.18, + "logps_train/chosen": -207.85150146484375, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -216.68893432617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.38515043258667, + "rewards_train/margins": 0.7837433815002441, + "rewards_train/rejected": -8.168893814086914, + "step": 636 + }, + { + "epoch": 0.18, + "logps_train/chosen": -100.89381408691406, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -101.1035385131836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2606185972690582, + "rewards_train/margins": 0.020972445607185364, + "rewards_train/rejected": 0.23964615166187286, + "step": 636 + }, + { + "epoch": 0.18, + "logps_train/chosen": -111.39418029785156, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -39.04581069946289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11058197170495987, + "rewards_train/margins": 0.9901630654931068, + "rewards_train/rejected": -0.879581093788147, + "step": 636 + }, + { + "epoch": 0.18, + "logps_train/chosen": -9.538238525390625, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -2.734375, + "logps_train/rejected": -14.731043815612793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05867614969611168, + "rewards_train/margins": 1.258343007415533, + "rewards_train/rejected": -1.1996668577194214, + "step": 637 + }, + { + "epoch": 0.18, + "logps_train/chosen": -121.27439880371094, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -242.72801208496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6774398684501648, + "rewards_train/margins": 6.295361340045929, + "rewards_train/rejected": -6.972801208496094, + "step": 637 + }, + { + "epoch": 0.18, + "logps_train/chosen": -153.90870666503906, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -204.62661743164062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.390870571136475, + "rewards_train/margins": -0.2282085418701172, + "rewards_train/rejected": -5.162662029266357, + "step": 637 + }, + { + "epoch": 0.18, + "logps_train/chosen": -14.305480003356934, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -15.618734359741211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3819520175457001, + "rewards_train/margins": 1.1813254654407501, + "rewards_train/rejected": -0.79937344789505, + "step": 637 + }, + { + "epoch": 0.18, + "logps_train/chosen": -36.76642608642578, + "logps_train/ref_chosen": -26.5, + "logps_train/ref_rejected": -47.0, + "logps_train/rejected": -74.09341430664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0266426801681519, + "rewards_train/margins": 1.6826988458633423, + "rewards_train/rejected": -2.709341526031494, + "step": 637 + }, + { + "epoch": 0.18, + "logps_train/chosen": -12.955639839172363, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -27.064002990722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10806398838758469, + "rewards_train/margins": 0.035836316645145416, + "rewards_train/rejected": -0.1439003050327301, + "step": 637 + }, + { + "epoch": 0.18, + "logps_train/chosen": -5.813917636871338, + "logps_train/ref_chosen": -3.59375, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -5.645473480224609, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.22201676666736603, + "rewards_train/margins": -0.11059441417455673, + "rewards_train/rejected": -0.1114223524928093, + "step": 637 + }, + { + "epoch": 0.18, + "logps_train/chosen": -126.34477996826172, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -106.79904174804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1844780445098877, + "rewards_train/margins": 0.045426130294799805, + "rewards_train/rejected": -2.2299041748046875, + "step": 637 + }, + { + "epoch": 0.18, + "learning_rate": 1.7784623015670235e-06, + "loss": 0.4904, + "step": 638 + }, + { + "epoch": 0.18, + "logps_train/chosen": -8.610786437988281, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -12.95557975769043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18267135322093964, + "rewards_train/margins": 0.5407293289899826, + "rewards_train/rejected": -0.35805797576904297, + "step": 638 + }, + { + "epoch": 0.18, + "logps_train/chosen": -183.0351104736328, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -199.7716522216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.00351095199585, + "rewards_train/margins": 0.8736543655395508, + "rewards_train/rejected": -6.8771653175354, + "step": 638 + }, + { + "epoch": 0.18, + "logps_train/chosen": -270.8162841796875, + "logps_train/ref_chosen": -188.0, + "logps_train/ref_rejected": -208.0, + "logps_train/rejected": -307.1225280761719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.281628608703613, + "rewards_train/margins": 1.630624771118164, + "rewards_train/rejected": -9.912253379821777, + "step": 638 + }, + { + "epoch": 0.18, + "logps_train/chosen": -156.6782989501953, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -187.41197204589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.167829990386963, + "rewards_train/margins": 5.823367595672607, + "rewards_train/rejected": -8.99119758605957, + "step": 638 + }, + { + "epoch": 0.18, + "logps_train/chosen": -62.03439712524414, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -173.24549865722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021560287103056908, + "rewards_train/margins": 9.646110152825713, + "rewards_train/rejected": -9.624549865722656, + "step": 638 + }, + { + "epoch": 0.18, + "logps_train/chosen": -12.13166618347168, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -15.764898300170898, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.560041606426239, + "rewards_train/margins": 0.19769823551177979, + "rewards_train/rejected": -0.7577398419380188, + "step": 638 + }, + { + "epoch": 0.18, + "logps_train/chosen": -82.1474380493164, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -99.93743896484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7352561950683594, + "rewards_train/margins": 1.8290001153945923, + "rewards_train/rejected": -1.093743920326233, + "step": 638 + }, + { + "epoch": 0.18, + "logps_train/chosen": -25.09590721130371, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -3.921875, + "logps_train/rejected": -5.490266799926758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08459072560071945, + "rewards_train/margins": 0.07224845141172409, + "rewards_train/rejected": -0.15683917701244354, + "step": 638 + }, + { + "epoch": 0.18, + "logps_train/chosen": -24.82754898071289, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -14.125, + "logps_train/rejected": -17.44369888305664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.570254921913147, + "rewards_train/margins": -0.23838502168655396, + "rewards_train/rejected": -0.331869900226593, + "step": 639 + }, + { + "epoch": 0.18, + "logps_train/chosen": -6.995693683624268, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -23.188404083251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3839443624019623, + "rewards_train/margins": 0.6536460220813751, + "rewards_train/rejected": -1.0375903844833374, + "step": 639 + }, + { + "epoch": 0.18, + "logps_train/chosen": -90.45641326904297, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -173.20693969726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.045641303062439, + "rewards_train/margins": 6.2250529527664185, + "rewards_train/rejected": -7.270694255828857, + "step": 639 + }, + { + "epoch": 0.18, + "logps_train/chosen": -167.64439392089844, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -36.703224182128906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.064439535140991, + "rewards_train/margins": -0.44411706924438477, + "rewards_train/rejected": -1.6203224658966064, + "step": 639 + }, + { + "epoch": 0.18, + "logps_train/chosen": -4.070456027984619, + "logps_train/ref_chosen": -3.546875, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -5.551232814788818, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.052358102053403854, + "rewards_train/margins": -0.02848481945693493, + "rewards_train/rejected": -0.023873282596468925, + "step": 639 + }, + { + "epoch": 0.18, + "logps_train/chosen": -35.04150390625, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -12.769174575805664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.129150390625, + "rewards_train/margins": -0.058482930064201355, + "rewards_train/rejected": -0.07066746056079865, + "step": 639 + }, + { + "epoch": 0.18, + "logps_train/chosen": -8.020496368408203, + "logps_train/ref_chosen": -6.65625, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -33.24225616455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13642464578151703, + "rewards_train/margins": 1.1628010421991348, + "rewards_train/rejected": -1.2992256879806519, + "step": 639 + }, + { + "epoch": 0.18, + "logps_train/chosen": -202.98876953125, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -213.34378051757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.798877239227295, + "rewards_train/margins": 1.4355006217956543, + "rewards_train/rejected": -8.23437786102295, + "step": 639 + }, + { + "epoch": 0.18, + "learning_rate": 1.7767989893455694e-06, + "loss": 0.4053, + "step": 640 + }, + { + "epoch": 0.18, + "logps_train/chosen": -133.31185913085938, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -165.013671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4311859607696533, + "rewards_train/margins": 3.6701815128326416, + "rewards_train/rejected": -6.101367473602295, + "step": 640 + }, + { + "epoch": 0.18, + "logps_train/chosen": -86.09870910644531, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -54.0, + "logps_train/rejected": -62.87554168701172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7598708868026733, + "rewards_train/margins": -0.8723167181015015, + "rewards_train/rejected": -0.8875541687011719, + "step": 640 + }, + { + "epoch": 0.18, + "logps_train/chosen": -2.8307104110717773, + "logps_train/ref_chosen": -3.390625, + "logps_train/ref_rejected": -2.453125, + "logps_train/rejected": -4.138213634490967, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.055991459637880325, + "rewards_train/margins": 0.22450033202767372, + "rewards_train/rejected": -0.1685088723897934, + "step": 640 + }, + { + "epoch": 0.18, + "logps_train/chosen": -24.283369064331055, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -26.37619400024414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6658369302749634, + "rewards_train/margins": 0.27178245782852173, + "rewards_train/rejected": -0.9376193881034851, + "step": 640 + }, + { + "epoch": 0.18, + "logps_train/chosen": -9.327808380126953, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -14.574113845825195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2327808439731598, + "rewards_train/margins": 0.2621305584907532, + "rewards_train/rejected": -0.49491140246391296, + "step": 640 + }, + { + "epoch": 0.18, + "logps_train/chosen": -28.321735382080078, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -18.86318588256836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04282646253705025, + "rewards_train/margins": 0.9791450388729572, + "rewards_train/rejected": -0.936318576335907, + "step": 640 + }, + { + "epoch": 0.18, + "logps_train/chosen": -162.1417236328125, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -161.6492919921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.614172458648682, + "rewards_train/margins": -0.5492429733276367, + "rewards_train/rejected": -4.064929485321045, + "step": 640 + }, + { + "epoch": 0.18, + "logps_train/chosen": -82.95278930664062, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -64.61911010742188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.295278936624527, + "rewards_train/margins": -0.28336792532354593, + "rewards_train/rejected": -0.011911011300981045, + "step": 640 + }, + { + "epoch": 0.18, + "logps_train/chosen": -19.968839645385742, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -27.25, + "logps_train/rejected": -48.57123565673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08438396453857422, + "rewards_train/margins": 2.0477397441864014, + "rewards_train/rejected": -2.1321237087249756, + "step": 641 + }, + { + "epoch": 0.18, + "logps_train/chosen": -3.438791513442993, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -12.37914752960205, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.020441651344299316, + "rewards_train/margins": 0.6362231373786926, + "rewards_train/rejected": -0.6566647887229919, + "step": 641 + }, + { + "epoch": 0.18, + "logps_train/chosen": -2.0964691638946533, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -0.78515625, + "logps_train/rejected": -0.558476448059082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07629058510065079, + "rewards_train/margins": 0.05362260527908802, + "rewards_train/rejected": 0.022667979821562767, + "step": 641 + }, + { + "epoch": 0.18, + "logps_train/chosen": -20.462804794311523, + "logps_train/ref_chosen": -13.0625, + "logps_train/ref_rejected": -29.25, + "logps_train/rejected": -35.8465576171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7400304675102234, + "rewards_train/margins": -0.08037471771240234, + "rewards_train/rejected": -0.659655749797821, + "step": 641 + }, + { + "epoch": 0.18, + "logps_train/chosen": -179.39923095703125, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -175.39346313476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.33992338180542, + "rewards_train/margins": 2.2994232177734375, + "rewards_train/rejected": -6.639346599578857, + "step": 641 + }, + { + "epoch": 0.18, + "logps_train/chosen": -17.424728393554688, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -14.875, + "logps_train/rejected": -21.716289520263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6549728512763977, + "rewards_train/margins": 0.029156088829040527, + "rewards_train/rejected": -0.6841289401054382, + "step": 641 + }, + { + "epoch": 0.18, + "logps_train/chosen": -27.20966148376465, + "logps_train/ref_chosen": -15.625, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -29.76812744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1584662199020386, + "rewards_train/margins": 0.030846595764160156, + "rewards_train/rejected": -1.1893128156661987, + "step": 641 + }, + { + "epoch": 0.18, + "logps_train/chosen": -70.15380859375, + "logps_train/ref_chosen": -49.5, + "logps_train/ref_rejected": -11.6875, + "logps_train/rejected": -24.089092254638672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.065380811691284, + "rewards_train/margins": -0.8252215385437012, + "rewards_train/rejected": -1.240159273147583, + "step": 641 + }, + { + "epoch": 0.18, + "learning_rate": 1.7751302403547844e-06, + "loss": 0.6084, + "step": 642 + }, + { + "epoch": 0.18, + "logps_train/chosen": -84.52969360351562, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -80.22105407714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1529693603515625, + "rewards_train/margins": 0.3191360533237457, + "rewards_train/rejected": -0.4721054136753082, + "step": 642 + }, + { + "epoch": 0.18, + "logps_train/chosen": -122.45002746582031, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -132.4154510498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8450028896331787, + "rewards_train/margins": 1.196542501449585, + "rewards_train/rejected": -5.041545391082764, + "step": 642 + }, + { + "epoch": 0.18, + "logps_train/chosen": -115.35231018066406, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -162.37957763671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1852309703826904, + "rewards_train/margins": 3.2027270793914795, + "rewards_train/rejected": -6.38795804977417, + "step": 642 + }, + { + "epoch": 0.18, + "logps_train/chosen": -14.525362968444824, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -16.21766471862793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35878631472587585, + "rewards_train/margins": 0.544230192899704, + "rewards_train/rejected": -0.9030165076255798, + "step": 642 + }, + { + "epoch": 0.18, + "logps_train/chosen": -78.7357177734375, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -103.73314666748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.52642822265625, + "rewards_train/margins": 2.249742865562439, + "rewards_train/rejected": -1.723314642906189, + "step": 642 + }, + { + "epoch": 0.18, + "logps_train/chosen": -121.20907592773438, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -78.59774780273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7790924310684204, + "rewards_train/margins": 1.188867211341858, + "rewards_train/rejected": -0.4097747802734375, + "step": 642 + }, + { + "epoch": 0.18, + "logps_train/chosen": -9.558161735534668, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -21.15264320373535, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4683161675930023, + "rewards_train/margins": -0.4280518479645252, + "rewards_train/rejected": -0.0402643196284771, + "step": 642 + }, + { + "epoch": 0.18, + "logps_train/chosen": -195.11459350585938, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -115.41210174560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2114593982696533, + "rewards_train/margins": 0.2297508716583252, + "rewards_train/rejected": -2.4412102699279785, + "step": 642 + }, + { + "epoch": 0.18, + "logps_train/chosen": -10.222672462463379, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -36.055641174316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2160172462463379, + "rewards_train/margins": 1.5020469427108765, + "rewards_train/rejected": -1.7180641889572144, + "step": 643 + }, + { + "epoch": 0.18, + "logps_train/chosen": -5.682599067687988, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -13.839838981628418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06825990974903107, + "rewards_train/margins": 0.05322398990392685, + "rewards_train/rejected": -0.12148389965295792, + "step": 643 + }, + { + "epoch": 0.18, + "logps_train/chosen": -88.4303970336914, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -89.266845703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1930397003889084, + "rewards_train/margins": -0.4663551300764084, + "rewards_train/rejected": 0.2733154296875, + "step": 643 + }, + { + "epoch": 0.18, + "logps_train/chosen": -159.11964416503906, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -126.37387084960938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.511964321136475, + "rewards_train/margins": -1.374577283859253, + "rewards_train/rejected": -3.1373870372772217, + "step": 643 + }, + { + "epoch": 0.18, + "logps_train/chosen": -48.178924560546875, + "logps_train/ref_chosen": -46.5, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -41.90590286254883, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1678924560546875, + "rewards_train/margins": -0.3773021697998047, + "rewards_train/rejected": 0.2094097137451172, + "step": 643 + }, + { + "epoch": 0.18, + "logps_train/chosen": -196.40330505371094, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -179.7022705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.440330505371094, + "rewards_train/margins": 1.929896354675293, + "rewards_train/rejected": -8.370226860046387, + "step": 643 + }, + { + "epoch": 0.18, + "logps_train/chosen": -17.27259063720703, + "logps_train/ref_chosen": -15.1875, + "logps_train/ref_rejected": -40.25, + "logps_train/rejected": -61.79484558105469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20850907266139984, + "rewards_train/margins": 1.945975437760353, + "rewards_train/rejected": -2.154484510421753, + "step": 643 + }, + { + "epoch": 0.18, + "logps_train/chosen": -188.11773681640625, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -165.59112548828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.511773586273193, + "rewards_train/margins": -2.2526607513427734, + "rewards_train/rejected": -4.25911283493042, + "step": 643 + }, + { + "epoch": 0.18, + "learning_rate": 1.7734560662741415e-06, + "loss": 0.6331, + "step": 644 + }, + { + "epoch": 0.18, + "logps_train/chosen": -40.204627990722656, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -52.25, + "logps_train/rejected": -52.620643615722656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6204627752304077, + "rewards_train/margins": -1.583398412913084, + "rewards_train/rejected": -0.037064362317323685, + "step": 644 + }, + { + "epoch": 0.18, + "logps_train/chosen": -156.78671264648438, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -167.65447998046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9786712527275085, + "rewards_train/margins": 0.48677676916122437, + "rewards_train/rejected": -1.465448021888733, + "step": 644 + }, + { + "epoch": 0.18, + "logps_train/chosen": -79.70543670654297, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -250.7483367919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8705437183380127, + "rewards_train/margins": 9.20429015159607, + "rewards_train/rejected": -11.074833869934082, + "step": 644 + }, + { + "epoch": 0.18, + "logps_train/chosen": -206.5814971923828, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -203.74844360351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.058149814605713, + "rewards_train/margins": 2.4166946411132812, + "rewards_train/rejected": -6.474844455718994, + "step": 644 + }, + { + "epoch": 0.18, + "logps_train/chosen": -29.681976318359375, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -35.6374626159668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49319764971733093, + "rewards_train/margins": 1.5330486595630646, + "rewards_train/rejected": -2.0262463092803955, + "step": 644 + }, + { + "epoch": 0.18, + "logps_train/chosen": -111.43919372558594, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -97.24043273925781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24391937255859375, + "rewards_train/margins": 0.4301239252090454, + "rewards_train/rejected": -0.6740432977676392, + "step": 644 + }, + { + "epoch": 0.18, + "logps_train/chosen": -2.89801287651062, + "logps_train/ref_chosen": -1.140625, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -24.6046085357666, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17573879659175873, + "rewards_train/margins": 0.28472207486629486, + "rewards_train/rejected": -0.4604608714580536, + "step": 644 + }, + { + "epoch": 0.18, + "logps_train/chosen": -184.6150665283203, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -194.75228881835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9615066051483154, + "rewards_train/margins": 5.163722276687622, + "rewards_train/rejected": -8.125228881835938, + "step": 644 + }, + { + "epoch": 0.18, + "logps_train/chosen": -6.749908924102783, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -24.66925621032715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1218658909201622, + "rewards_train/margins": 0.09505973011255264, + "rewards_train/rejected": -0.21692562103271484, + "step": 645 + }, + { + "epoch": 0.18, + "logps_train/chosen": -56.95611572265625, + "logps_train/ref_chosen": -51.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -127.28627014160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.595611572265625, + "rewards_train/margins": 2.783015489578247, + "rewards_train/rejected": -3.378627061843872, + "step": 645 + }, + { + "epoch": 0.18, + "logps_train/chosen": -93.33863830566406, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -115.677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8338638544082642, + "rewards_train/margins": 0.03390955924987793, + "rewards_train/rejected": -1.867773413658142, + "step": 645 + }, + { + "epoch": 0.18, + "logps_train/chosen": -147.0773162841797, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -147.0200958251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.257731914520264, + "rewards_train/margins": 1.4942779541015625, + "rewards_train/rejected": -5.752009868621826, + "step": 645 + }, + { + "epoch": 0.18, + "logps_train/chosen": -105.98313903808594, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -34.38650131225586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2483139038085938, + "rewards_train/margins": -1.109663724899292, + "rewards_train/rejected": -1.1386501789093018, + "step": 645 + }, + { + "epoch": 0.18, + "logps_train/chosen": -203.54483032226562, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -149.6949462890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.354483127593994, + "rewards_train/margins": -1.1849884986877441, + "rewards_train/rejected": -5.16949462890625, + "step": 645 + }, + { + "epoch": 0.18, + "logps_train/chosen": -11.829780578613281, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -2.625, + "logps_train/rejected": -10.219165802001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6236030459403992, + "rewards_train/margins": 0.13581353425979614, + "rewards_train/rejected": -0.7594165802001953, + "step": 645 + }, + { + "epoch": 0.18, + "logps_train/chosen": -102.49362182617188, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -181.3389434814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4493621587753296, + "rewards_train/margins": 5.38453209400177, + "rewards_train/rejected": -6.8338942527771, + "step": 645 + }, + { + "epoch": 0.18, + "learning_rate": 1.7717764788210844e-06, + "loss": 0.5415, + "step": 646 + }, + { + "epoch": 0.18, + "logps_train/chosen": -115.6749267578125, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -167.20022583007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.467492699623108, + "rewards_train/margins": 4.5525301694869995, + "rewards_train/rejected": -6.020022869110107, + "step": 646 + }, + { + "epoch": 0.18, + "logps_train/chosen": -244.2239532470703, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -208.28561401367188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.122395515441895, + "rewards_train/margins": -1.893834114074707, + "rewards_train/rejected": -7.2285614013671875, + "step": 646 + }, + { + "epoch": 0.18, + "logps_train/chosen": -207.66468811035156, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -194.4610595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.366468906402588, + "rewards_train/margins": 1.579637050628662, + "rewards_train/rejected": -6.94610595703125, + "step": 646 + }, + { + "epoch": 0.18, + "logps_train/chosen": -41.981788635253906, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -31.593189239501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9981788992881775, + "rewards_train/margins": 0.6048900485038757, + "rewards_train/rejected": -1.6030689477920532, + "step": 646 + }, + { + "epoch": 0.18, + "logps_train/chosen": -14.146389961242676, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -20.199665069580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10213899612426758, + "rewards_train/margins": 0.6053275465965271, + "rewards_train/rejected": -0.7074665427207947, + "step": 646 + }, + { + "epoch": 0.18, + "logps_train/chosen": -39.786659240722656, + "logps_train/ref_chosen": -40.75, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -15.025134086608887, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0963340774178505, + "rewards_train/margins": 0.4488474801182747, + "rewards_train/rejected": -0.3525134027004242, + "step": 646 + }, + { + "epoch": 0.18, + "logps_train/chosen": -119.91202545166016, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -248.55702209472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9912025928497314, + "rewards_train/margins": 7.464500188827515, + "rewards_train/rejected": -10.455702781677246, + "step": 646 + }, + { + "epoch": 0.18, + "logps_train/chosen": -16.24448013305664, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -17.88459587097168, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4994480311870575, + "rewards_train/margins": -0.11098843812942505, + "rewards_train/rejected": -0.38845959305763245, + "step": 646 + }, + { + "epoch": 0.18, + "logps_train/chosen": -4.319039344787598, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -7.5115275382995605, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06940393894910812, + "rewards_train/margins": 0.1911238208413124, + "rewards_train/rejected": -0.26052775979042053, + "step": 647 + }, + { + "epoch": 0.18, + "logps_train/chosen": -207.63204956054688, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -275.25396728515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.963204860687256, + "rewards_train/margins": 4.762192249298096, + "rewards_train/rejected": -10.725397109985352, + "step": 647 + }, + { + "epoch": 0.18, + "logps_train/chosen": -135.55934143066406, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -277.61962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.505934238433838, + "rewards_train/margins": 10.756028652191162, + "rewards_train/rejected": -13.261962890625, + "step": 647 + }, + { + "epoch": 0.18, + "logps_train/chosen": -213.2913818359375, + "logps_train/ref_chosen": -200.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -165.1006622314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.329138159751892, + "rewards_train/margins": 2.930928349494934, + "rewards_train/rejected": -4.260066509246826, + "step": 647 + }, + { + "epoch": 0.18, + "logps_train/chosen": -11.664338111877441, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -19.974016189575195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0914338156580925, + "rewards_train/margins": 0.6997178271412849, + "rewards_train/rejected": -0.7911516427993774, + "step": 647 + }, + { + "epoch": 0.18, + "logps_train/chosen": -0.7673856616020203, + "logps_train/ref_chosen": -1.25, + "logps_train/ref_rejected": -2.9375, + "logps_train/rejected": -4.625308513641357, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.048261433839797974, + "rewards_train/margins": 0.21704228222370148, + "rewards_train/rejected": -0.1687808483839035, + "step": 647 + }, + { + "epoch": 0.18, + "logps_train/chosen": -120.47703552246094, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -120.55809020996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1977035999298096, + "rewards_train/margins": 0.00810551643371582, + "rewards_train/rejected": -3.2058091163635254, + "step": 647 + }, + { + "epoch": 0.18, + "logps_train/chosen": -21.133808135986328, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -26.174823760986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22588081657886505, + "rewards_train/margins": 0.8041016310453415, + "rewards_train/rejected": -1.0299824476242065, + "step": 647 + }, + { + "epoch": 0.18, + "learning_rate": 1.7700914897509444e-06, + "loss": 0.4413, + "step": 648 + }, + { + "epoch": 0.18, + "logps_train/chosen": -177.50100708007812, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -169.47300720214844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.450100898742676, + "rewards_train/margins": -0.05280017852783203, + "rewards_train/rejected": -4.397300720214844, + "step": 648 + }, + { + "epoch": 0.18, + "logps_train/chosen": -71.15707397460938, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -71.5278549194336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1157073974609375, + "rewards_train/margins": 0.037078097462654114, + "rewards_train/rejected": -0.1527854949235916, + "step": 648 + }, + { + "epoch": 0.18, + "logps_train/chosen": -8.875726699829102, + "logps_train/ref_chosen": -7.90625, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -10.437217712402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09694766998291016, + "rewards_train/margins": 0.2936491072177887, + "rewards_train/rejected": -0.39059677720069885, + "step": 648 + }, + { + "epoch": 0.18, + "logps_train/chosen": -14.438630104064941, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -17.989896774291992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3376130163669586, + "rewards_train/margins": 0.42387667298316956, + "rewards_train/rejected": -0.7614896893501282, + "step": 648 + }, + { + "epoch": 0.18, + "logps_train/chosen": -15.827194213867188, + "logps_train/ref_chosen": -13.875, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -9.732542037963867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19521942734718323, + "rewards_train/margins": 0.18740978837013245, + "rewards_train/rejected": -0.3826292157173157, + "step": 648 + }, + { + "epoch": 0.18, + "logps_train/chosen": -0.32981330156326294, + "logps_train/ref_chosen": -1.265625, + "logps_train/ref_rejected": -2.15625, + "logps_train/rejected": -7.582311630249023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0935811698436737, + "rewards_train/margins": 0.636187344789505, + "rewards_train/rejected": -0.5426061749458313, + "step": 648 + }, + { + "epoch": 0.18, + "logps_train/chosen": -61.28795623779297, + "logps_train/ref_chosen": -55.5, + "logps_train/ref_rejected": -60.25, + "logps_train/rejected": -82.10689544677734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5787956118583679, + "rewards_train/margins": 1.606894075870514, + "rewards_train/rejected": -2.185689687728882, + "step": 648 + }, + { + "epoch": 0.18, + "logps_train/chosen": -84.20426940917969, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -108.7986831665039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7204269170761108, + "rewards_train/margins": 0.45944154262542725, + "rewards_train/rejected": -2.179868459701538, + "step": 648 + }, + { + "epoch": 0.18, + "logps_train/chosen": -2.9220526218414307, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -19.2580509185791, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2202947437763214, + "rewards_train/margins": 0.5523498356342316, + "rewards_train/rejected": -0.33205509185791016, + "step": 649 + }, + { + "epoch": 0.18, + "logps_train/chosen": -16.978588104248047, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -47.5, + "logps_train/rejected": -52.05787658691406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5228587985038757, + "rewards_train/margins": -0.06707113981246948, + "rewards_train/rejected": -0.45578765869140625, + "step": 649 + }, + { + "epoch": 0.18, + "logps_train/chosen": -40.89604949951172, + "logps_train/ref_chosen": -23.875, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -26.025840759277344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.702104926109314, + "rewards_train/margins": -0.24952077865600586, + "rewards_train/rejected": -1.452584147453308, + "step": 649 + }, + { + "epoch": 0.18, + "logps_train/chosen": -6.933438301086426, + "logps_train/ref_chosen": -7.1875, + "logps_train/ref_rejected": -7.25, + "logps_train/rejected": -11.58431625366211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02540617063641548, + "rewards_train/margins": 0.4588377960026264, + "rewards_train/rejected": -0.43343162536621094, + "step": 649 + }, + { + "epoch": 0.18, + "logps_train/chosen": -99.16336822509766, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -152.83018493652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9663368463516235, + "rewards_train/margins": 1.2166815996170044, + "rewards_train/rejected": -2.183018445968628, + "step": 649 + }, + { + "epoch": 0.18, + "logps_train/chosen": -74.67855834960938, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -105.21316528320312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21785584092140198, + "rewards_train/margins": -0.2965393140912056, + "rewards_train/rejected": 0.07868347316980362, + "step": 649 + }, + { + "epoch": 0.18, + "logps_train/chosen": -3.6555287837982178, + "logps_train/ref_chosen": -3.28125, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -25.383499145507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0374278798699379, + "rewards_train/margins": 1.594672106206417, + "rewards_train/rejected": -1.632099986076355, + "step": 649 + }, + { + "epoch": 0.18, + "logps_train/chosen": -99.9090347290039, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -131.62673950195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7409034967422485, + "rewards_train/margins": 0.9717704057693481, + "rewards_train/rejected": -2.7126739025115967, + "step": 649 + }, + { + "epoch": 0.18, + "learning_rate": 1.768401110856859e-06, + "loss": 0.5169, + "step": 650 + }, + { + "epoch": 0.18, + "logps_train/chosen": -46.17219161987305, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -107.573486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13278083503246307, + "rewards_train/margins": 0.640129491686821, + "rewards_train/rejected": -0.5073486566543579, + "step": 650 + }, + { + "epoch": 0.18, + "logps_train/chosen": -17.008071899414062, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -2.734375, + "logps_train/rejected": -16.494789123535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3633072078227997, + "rewards_train/margins": 1.012734204530716, + "rewards_train/rejected": -1.3760414123535156, + "step": 650 + }, + { + "epoch": 0.18, + "logps_train/chosen": -136.35421752929688, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -202.3592987060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1354217529296875, + "rewards_train/margins": 6.600508213043213, + "rewards_train/rejected": -7.7359299659729, + "step": 650 + }, + { + "epoch": 0.18, + "logps_train/chosen": -63.812252044677734, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -20.25604248046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7187747955322266, + "rewards_train/margins": 1.3818790316581726, + "rewards_train/rejected": -0.663104236125946, + "step": 650 + }, + { + "epoch": 0.18, + "logps_train/chosen": -4.757030010223389, + "logps_train/ref_chosen": -3.859375, + "logps_train/ref_rejected": -3.5, + "logps_train/rejected": -10.229612350463867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0897655040025711, + "rewards_train/margins": 0.5831957310438156, + "rewards_train/rejected": -0.6729612350463867, + "step": 650 + }, + { + "epoch": 0.18, + "logps_train/chosen": -4.505670070648193, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -10.776325225830078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0068170069716870785, + "rewards_train/margins": 0.5801905156113207, + "rewards_train/rejected": -0.5870075225830078, + "step": 650 + }, + { + "epoch": 0.18, + "logps_train/chosen": -23.904441833496094, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -53.14521026611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17794418334960938, + "rewards_train/margins": 0.18657684326171875, + "rewards_train/rejected": -0.3645210266113281, + "step": 650 + }, + { + "epoch": 0.18, + "logps_train/chosen": -152.40638732910156, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -253.74169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7406387329101562, + "rewards_train/margins": 8.533531188964844, + "rewards_train/rejected": -9.274169921875, + "step": 650 + }, + { + "epoch": 0.18, + "logps_train/chosen": -9.589600563049316, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -20.375, + "logps_train/rejected": -18.677227020263672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0035399438347667456, + "rewards_train/margins": -0.16623736009933054, + "rewards_train/rejected": 0.1697773039340973, + "step": 651 + }, + { + "epoch": 0.18, + "logps_train/chosen": -6.336175918579102, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -17.41280174255371, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.16174259781837463, + "rewards_train/margins": -0.0704624205827713, + "rewards_train/rejected": -0.09128017723560333, + "step": 651 + }, + { + "epoch": 0.18, + "logps_train/chosen": -76.18222045898438, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -34.96678924560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3182220458984375, + "rewards_train/margins": 1.003456950187683, + "rewards_train/rejected": -1.3216789960861206, + "step": 651 + }, + { + "epoch": 0.18, + "logps_train/chosen": -98.13450622558594, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -39.75, + "logps_train/rejected": -36.705039978027344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3134506940841675, + "rewards_train/margins": -1.6179467141628265, + "rewards_train/rejected": 0.30449602007865906, + "step": 651 + }, + { + "epoch": 0.18, + "logps_train/chosen": -41.97540283203125, + "logps_train/ref_chosen": -29.5, + "logps_train/ref_rejected": -46.25, + "logps_train/rejected": -65.31890869140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2475403547286987, + "rewards_train/margins": 0.6593505144119263, + "rewards_train/rejected": -1.906890869140625, + "step": 651 + }, + { + "epoch": 0.18, + "logps_train/chosen": -143.81182861328125, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -205.0, + "logps_train/rejected": -207.54464721679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08118285983800888, + "rewards_train/margins": 0.17328185588121414, + "rewards_train/rejected": -0.254464715719223, + "step": 651 + }, + { + "epoch": 0.18, + "logps_train/chosen": -6.795295238494873, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -15.138652801513672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11702952533960342, + "rewards_train/margins": 0.31558575481176376, + "rewards_train/rejected": -0.4326152801513672, + "step": 651 + }, + { + "epoch": 0.18, + "logps_train/chosen": -80.64615631103516, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -166.45046997070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7146156430244446, + "rewards_train/margins": 1.5304314494132996, + "rewards_train/rejected": -2.245047092437744, + "step": 651 + }, + { + "epoch": 0.18, + "learning_rate": 1.7667053539696878e-06, + "loss": 0.4899, + "step": 652 + }, + { + "epoch": 0.18, + "logps_train/chosen": -174.62350463867188, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -161.46627807617188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.462350368499756, + "rewards_train/margins": -0.16572237014770508, + "rewards_train/rejected": -4.296627998352051, + "step": 652 + }, + { + "epoch": 0.18, + "logps_train/chosen": -108.72040557861328, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -106.2459716796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.59704065322876, + "rewards_train/margins": -0.4224433898925781, + "rewards_train/rejected": -4.174597263336182, + "step": 652 + }, + { + "epoch": 0.18, + "logps_train/chosen": -2.3323678970336914, + "logps_train/ref_chosen": -2.15625, + "logps_train/ref_rejected": -7.75, + "logps_train/rejected": -9.746750831604004, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0176117904484272, + "rewards_train/margins": 0.1820632927119732, + "rewards_train/rejected": -0.1996750831604004, + "step": 652 + }, + { + "epoch": 0.18, + "logps_train/chosen": -19.12826156616211, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -31.019107818603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07532615959644318, + "rewards_train/margins": 1.0515846461057663, + "rewards_train/rejected": -1.1269108057022095, + "step": 652 + }, + { + "epoch": 0.18, + "logps_train/chosen": -123.71270751953125, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -202.52444458007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.121270775794983, + "rewards_train/margins": 6.3811739683151245, + "rewards_train/rejected": -7.502444744110107, + "step": 652 + }, + { + "epoch": 0.18, + "logps_train/chosen": -3.0156357288360596, + "logps_train/ref_chosen": -3.265625, + "logps_train/ref_rejected": -0.77734375, + "logps_train/rejected": -0.7394694089889526, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.024998927488923073, + "rewards_train/margins": 0.02121149329468608, + "rewards_train/rejected": 0.003787434194236994, + "step": 652 + }, + { + "epoch": 0.18, + "logps_train/chosen": -130.3710174560547, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -130.08621215820312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3871017694473267, + "rewards_train/margins": -0.02848052978515625, + "rewards_train/rejected": -1.3586212396621704, + "step": 652 + }, + { + "epoch": 0.18, + "logps_train/chosen": -94.82353973388672, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -98.50157165527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2176460325717926, + "rewards_train/margins": 1.71780326962471, + "rewards_train/rejected": -1.5001572370529175, + "step": 652 + }, + { + "epoch": 0.18, + "logps_train/chosen": -178.19302368164062, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -243.75656127929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.219302654266357, + "rewards_train/margins": 4.856353282928467, + "rewards_train/rejected": -9.075655937194824, + "step": 653 + }, + { + "epoch": 0.18, + "logps_train/chosen": -75.14326477050781, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -51.0, + "logps_train/rejected": -64.48088836669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9143264889717102, + "rewards_train/margins": 0.4337623715400696, + "rewards_train/rejected": -1.3480888605117798, + "step": 653 + }, + { + "epoch": 0.18, + "logps_train/chosen": -0.2735351026058197, + "logps_train/ref_chosen": -0.70703125, + "logps_train/ref_rejected": -0.70703125, + "logps_train/rejected": -0.2850448191165924, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04334961622953415, + "rewards_train/margins": 0.00115097314119339, + "rewards_train/rejected": 0.04219864308834076, + "step": 653 + }, + { + "epoch": 0.18, + "logps_train/chosen": -140.30465698242188, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -167.651611328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1304657459259033, + "rewards_train/margins": 4.534695386886597, + "rewards_train/rejected": -5.6651611328125, + "step": 653 + }, + { + "epoch": 0.18, + "logps_train/chosen": -57.84137725830078, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -39.0, + "logps_train/rejected": -48.61689758300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.634137749671936, + "rewards_train/margins": 0.32755202054977417, + "rewards_train/rejected": -0.9616897702217102, + "step": 653 + }, + { + "epoch": 0.18, + "logps_train/chosen": -0.47596386075019836, + "logps_train/ref_chosen": -0.451171875, + "logps_train/ref_rejected": -2.953125, + "logps_train/rejected": -5.711777687072754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.002479198621585965, + "rewards_train/margins": 0.27338608796708286, + "rewards_train/rejected": -0.2758652865886688, + "step": 653 + }, + { + "epoch": 0.18, + "logps_train/chosen": -82.49974060058594, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -105.60408782958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9749740362167358, + "rewards_train/margins": 0.38543474674224854, + "rewards_train/rejected": -2.3604087829589844, + "step": 653 + }, + { + "epoch": 0.18, + "logps_train/chosen": -74.04354095458984, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -74.36383056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.29564592242240906, + "rewards_train/margins": 0.03202897310256958, + "rewards_train/rejected": 0.2636169493198395, + "step": 653 + }, + { + "epoch": 0.18, + "learning_rate": 1.7650042309579309e-06, + "loss": 0.4803, + "step": 654 + }, + { + "epoch": 0.18, + "logps_train/chosen": -107.94200134277344, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -127.39350891113281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7942001819610596, + "rewards_train/margins": -0.6548492908477783, + "rewards_train/rejected": -1.1393508911132812, + "step": 654 + }, + { + "epoch": 0.18, + "logps_train/chosen": -176.35653686523438, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -239.57720947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4356536865234375, + "rewards_train/margins": 3.622067451477051, + "rewards_train/rejected": -6.057721138000488, + "step": 654 + }, + { + "epoch": 0.18, + "logps_train/chosen": -10.016767501831055, + "logps_train/ref_chosen": -1.5078125, + "logps_train/ref_rejected": -2.328125, + "logps_train/rejected": -13.493986129760742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8508955240249634, + "rewards_train/margins": 0.26569056510925293, + "rewards_train/rejected": -1.1165860891342163, + "step": 654 + }, + { + "epoch": 0.18, + "logps_train/chosen": -12.322399139404297, + "logps_train/ref_chosen": -5.84375, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -30.474365234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6478649377822876, + "rewards_train/margins": 0.18707162141799927, + "rewards_train/rejected": -0.8349365592002869, + "step": 654 + }, + { + "epoch": 0.18, + "logps_train/chosen": -8.727483749389648, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -21.27656364440918, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0460016243159771, + "rewards_train/margins": -0.1263420172035694, + "rewards_train/rejected": 0.1723436415195465, + "step": 654 + }, + { + "epoch": 0.18, + "logps_train/chosen": -15.643712997436523, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -3.390625, + "logps_train/rejected": -5.731496810913086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.23562870919704437, + "rewards_train/margins": 0.4697158932685852, + "rewards_train/rejected": -0.23408718407154083, + "step": 654 + }, + { + "epoch": 0.18, + "logps_train/chosen": -34.408485412597656, + "logps_train/ref_chosen": -23.0, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -60.23768997192383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1408485174179077, + "rewards_train/margins": 1.6829205751419067, + "rewards_train/rejected": -2.8237690925598145, + "step": 654 + }, + { + "epoch": 0.18, + "logps_train/chosen": -13.536773681640625, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -103.90657043457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10992737114429474, + "rewards_train/margins": 2.4307296723127365, + "rewards_train/rejected": -2.5406570434570312, + "step": 654 + }, + { + "epoch": 0.18, + "logps_train/chosen": -98.72508239746094, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -162.97970581054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6725082397460938, + "rewards_train/margins": 0.5254623889923096, + "rewards_train/rejected": -3.1979706287384033, + "step": 655 + }, + { + "epoch": 0.18, + "logps_train/chosen": -204.56570434570312, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -279.0675354003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.956570625305176, + "rewards_train/margins": 4.050183296203613, + "rewards_train/rejected": -13.006753921508789, + "step": 655 + }, + { + "epoch": 0.18, + "logps_train/chosen": -6.3160319328308105, + "logps_train/ref_chosen": -3.0625, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -10.00735855102539, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.32535320520401, + "rewards_train/margins": -0.08711734414100647, + "rewards_train/rejected": -0.23823586106300354, + "step": 655 + }, + { + "epoch": 0.18, + "logps_train/chosen": -8.011007308959961, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -13.675495147705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3542257249355316, + "rewards_train/margins": 0.4195738136768341, + "rewards_train/rejected": -0.7737995386123657, + "step": 655 + }, + { + "epoch": 0.18, + "logps_train/chosen": -93.44184112548828, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -184.20266723632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49418410658836365, + "rewards_train/margins": 4.026082903146744, + "rewards_train/rejected": -4.520267009735107, + "step": 655 + }, + { + "epoch": 0.18, + "logps_train/chosen": -2.8941729068756104, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -21.491455078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01370770949870348, + "rewards_train/margins": 0.20035321731120348, + "rewards_train/rejected": -0.1866455078125, + "step": 655 + }, + { + "epoch": 0.18, + "logps_train/chosen": -64.82267761230469, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -81.63453674316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11773224174976349, + "rewards_train/margins": 3.3311860114336014, + "rewards_train/rejected": -3.213453769683838, + "step": 655 + }, + { + "epoch": 0.18, + "logps_train/chosen": -183.44781494140625, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -183.83016967773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.344781517982483, + "rewards_train/margins": 2.738235354423523, + "rewards_train/rejected": -4.083016872406006, + "step": 655 + }, + { + "epoch": 0.18, + "learning_rate": 1.7632977537276461e-06, + "loss": 0.388, + "step": 656 + }, + { + "epoch": 0.18, + "logps_train/chosen": -68.28207397460938, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -114.65689849853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4032074213027954, + "rewards_train/margins": 2.262482523918152, + "rewards_train/rejected": -3.6656899452209473, + "step": 656 + }, + { + "epoch": 0.18, + "logps_train/chosen": -34.369300842285156, + "logps_train/ref_chosen": -25.375, + "logps_train/ref_rejected": -15.6875, + "logps_train/rejected": -19.00878143310547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8994300961494446, + "rewards_train/margins": -0.5673019587993622, + "rewards_train/rejected": -0.3321281373500824, + "step": 656 + }, + { + "epoch": 0.18, + "logps_train/chosen": -138.04013061523438, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -185.56320190429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5040130615234375, + "rewards_train/margins": 4.902307033538818, + "rewards_train/rejected": -6.406320095062256, + "step": 656 + }, + { + "epoch": 0.18, + "logps_train/chosen": -13.736039161682129, + "logps_train/ref_chosen": -4.8125, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -10.96600341796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8923539519309998, + "rewards_train/margins": -0.2238786220550537, + "rewards_train/rejected": -0.668475329875946, + "step": 656 + }, + { + "epoch": 0.18, + "logps_train/chosen": -37.10932922363281, + "logps_train/ref_chosen": -36.75, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -67.79499053955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03593292459845543, + "rewards_train/margins": 0.29356614127755165, + "rewards_train/rejected": -0.3294990658760071, + "step": 656 + }, + { + "epoch": 0.18, + "logps_train/chosen": -18.123302459716797, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -22.482025146484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6060802340507507, + "rewards_train/margins": 0.4796223044395447, + "rewards_train/rejected": -1.0857025384902954, + "step": 656 + }, + { + "epoch": 0.18, + "logps_train/chosen": -5.788563251495361, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -10.767162322998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20239368081092834, + "rewards_train/margins": 0.24160991236567497, + "rewards_train/rejected": -0.03921623155474663, + "step": 656 + }, + { + "epoch": 0.18, + "logps_train/chosen": -31.752046585083008, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -25.072189331054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07479534298181534, + "rewards_train/margins": 0.5695142820477486, + "rewards_train/rejected": -0.4947189390659332, + "step": 656 + }, + { + "epoch": 0.18, + "logps_train/chosen": -142.17491149902344, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -220.19522094726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4174911677837372, + "rewards_train/margins": 5.102031022310257, + "rewards_train/rejected": -5.519522190093994, + "step": 657 + }, + { + "epoch": 0.18, + "logps_train/chosen": -118.80545043945312, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -81.39692687988281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4805450439453125, + "rewards_train/margins": -0.31585240364074707, + "rewards_train/rejected": -3.1646926403045654, + "step": 657 + }, + { + "epoch": 0.18, + "logps_train/chosen": -134.98907470703125, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -176.81149291992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.098907470703125, + "rewards_train/margins": 1.3822417259216309, + "rewards_train/rejected": -5.481149196624756, + "step": 657 + }, + { + "epoch": 0.18, + "logps_train/chosen": -42.3264274597168, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -86.71046447753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3673572540283203, + "rewards_train/margins": 1.3384037017822266, + "rewards_train/rejected": -0.9710464477539062, + "step": 657 + }, + { + "epoch": 0.18, + "logps_train/chosen": -7.394299507141113, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -6.1875, + "logps_train/rejected": -9.520946502685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25505495071411133, + "rewards_train/margins": 0.07828971743583679, + "rewards_train/rejected": -0.3333446681499481, + "step": 657 + }, + { + "epoch": 0.18, + "logps_train/chosen": -214.2284698486328, + "logps_train/ref_chosen": -195.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -169.26995849609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.922847032546997, + "rewards_train/margins": -0.19585120677947998, + "rewards_train/rejected": -1.726995825767517, + "step": 657 + }, + { + "epoch": 0.18, + "logps_train/chosen": -80.08135986328125, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -77.99178314208984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.408136010169983, + "rewards_train/margins": 0.6410423517227173, + "rewards_train/rejected": -2.0491783618927, + "step": 657 + }, + { + "epoch": 0.18, + "logps_train/chosen": -56.937252044677734, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -40.75, + "logps_train/rejected": -52.87387466430664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8187252283096313, + "rewards_train/margins": 0.3936622142791748, + "rewards_train/rejected": -1.2123874425888062, + "step": 657 + }, + { + "epoch": 0.18, + "learning_rate": 1.761585934222365e-06, + "loss": 0.4823, + "step": 658 + }, + { + "epoch": 0.18, + "logps_train/chosen": -175.55865478515625, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -186.89599609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.855865478515625, + "rewards_train/margins": 3.93373441696167, + "rewards_train/rejected": -5.789599895477295, + "step": 658 + }, + { + "epoch": 0.18, + "logps_train/chosen": -8.375480651855469, + "logps_train/ref_chosen": -3.390625, + "logps_train/ref_rejected": -2.265625, + "logps_train/rejected": -4.464704990386963, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4984855651855469, + "rewards_train/margins": -0.2785775661468506, + "rewards_train/rejected": -0.2199079990386963, + "step": 658 + }, + { + "epoch": 0.18, + "logps_train/chosen": -15.861133575439453, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -31.25, + "logps_train/rejected": -56.26841354370117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08888664096593857, + "rewards_train/margins": 2.59072794765234, + "rewards_train/rejected": -2.5018413066864014, + "step": 658 + }, + { + "epoch": 0.18, + "logps_train/chosen": -96.29588317871094, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -32.936344146728516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9295883178710938, + "rewards_train/margins": -0.18595391511917114, + "rewards_train/rejected": -0.7436344027519226, + "step": 658 + }, + { + "epoch": 0.18, + "logps_train/chosen": -7.070113658905029, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -18.77132797241211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21013636887073517, + "rewards_train/margins": 0.5232464522123337, + "rewards_train/rejected": -0.7333828210830688, + "step": 658 + }, + { + "epoch": 0.18, + "logps_train/chosen": -240.74710083007812, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -220.14862060546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.474709987640381, + "rewards_train/margins": -0.3598480224609375, + "rewards_train/rejected": -7.114861965179443, + "step": 658 + }, + { + "epoch": 0.18, + "logps_train/chosen": -0.8819989562034607, + "logps_train/ref_chosen": -0.8359375, + "logps_train/ref_rejected": -3.015625, + "logps_train/rejected": -3.6600592136383057, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004606145899742842, + "rewards_train/margins": 0.05983727844431996, + "rewards_train/rejected": -0.0644434243440628, + "step": 658 + }, + { + "epoch": 0.18, + "logps_train/chosen": -203.02639770507812, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -192.0, + "logps_train/rejected": -245.1594696044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.102639675140381, + "rewards_train/margins": 1.2133073806762695, + "rewards_train/rejected": -5.31594705581665, + "step": 658 + }, + { + "epoch": 0.18, + "logps_train/chosen": -138.4012451171875, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -232.39413452148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.740124464035034, + "rewards_train/margins": 6.89928936958313, + "rewards_train/rejected": -9.639413833618164, + "step": 659 + }, + { + "epoch": 0.18, + "logps_train/chosen": -37.03255081176758, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -20.784591674804688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3782551288604736, + "rewards_train/margins": -0.674795925617218, + "rewards_train/rejected": -0.7034592032432556, + "step": 659 + }, + { + "epoch": 0.18, + "logps_train/chosen": -10.05807113647461, + "logps_train/ref_chosen": -4.8125, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -20.21298599243164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5245571136474609, + "rewards_train/margins": 0.2654914855957031, + "rewards_train/rejected": -0.7900485992431641, + "step": 659 + }, + { + "epoch": 0.18, + "logps_train/chosen": -72.05293273925781, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -119.39715576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7552933096885681, + "rewards_train/margins": 3.434422552585602, + "rewards_train/rejected": -4.18971586227417, + "step": 659 + }, + { + "epoch": 0.18, + "logps_train/chosen": -124.45682525634766, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -187.11477661132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2956825494766235, + "rewards_train/margins": 4.515795111656189, + "rewards_train/rejected": -5.8114776611328125, + "step": 659 + }, + { + "epoch": 0.18, + "logps_train/chosen": -16.125646591186523, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -2.40625, + "logps_train/rejected": -2.9276955127716064, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14993534982204437, + "rewards_train/margins": 0.2020799033343792, + "rewards_train/rejected": -0.052144553512334824, + "step": 659 + }, + { + "epoch": 0.18, + "logps_train/chosen": -8.295547485351562, + "logps_train/ref_chosen": -0.1435546875, + "logps_train/ref_rejected": -0.1435546875, + "logps_train/rejected": -8.673160552978516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8151993155479431, + "rewards_train/margins": 0.03776127099990845, + "rewards_train/rejected": -0.8529605865478516, + "step": 659 + }, + { + "epoch": 0.18, + "logps_train/chosen": -224.77191162109375, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -262.583984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.277191162109375, + "rewards_train/margins": 1.1812076568603516, + "rewards_train/rejected": -9.458398818969727, + "step": 659 + }, + { + "epoch": 0.18, + "learning_rate": 1.7598687844230086e-06, + "loss": 0.4525, + "step": 660 + }, + { + "epoch": 0.18, + "logps_train/chosen": -166.74716186523438, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -190.49664306640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.524716377258301, + "rewards_train/margins": 0.22494792938232422, + "rewards_train/rejected": -6.749664306640625, + "step": 660 + }, + { + "epoch": 0.18, + "logps_train/chosen": -2.8659374713897705, + "logps_train/ref_chosen": -6.09375, + "logps_train/ref_rejected": -4.46875, + "logps_train/rejected": -5.054544925689697, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3227812647819519, + "rewards_train/margins": 0.3813607580959797, + "rewards_train/rejected": -0.058579493314027786, + "step": 660 + }, + { + "epoch": 0.18, + "logps_train/chosen": -245.04934692382812, + "logps_train/ref_chosen": -190.0, + "logps_train/ref_rejected": -197.0, + "logps_train/rejected": -238.124267578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.504934787750244, + "rewards_train/margins": -1.3925080299377441, + "rewards_train/rejected": -4.1124267578125, + "step": 660 + }, + { + "epoch": 0.18, + "logps_train/chosen": -104.29649353027344, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -217.61129760742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7296493649482727, + "rewards_train/margins": 6.33148068189621, + "rewards_train/rejected": -7.061130046844482, + "step": 660 + }, + { + "epoch": 0.18, + "logps_train/chosen": -43.882530212402344, + "logps_train/ref_chosen": -27.0, + "logps_train/ref_rejected": -46.25, + "logps_train/rejected": -64.30632019042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6882530450820923, + "rewards_train/margins": 0.11737895011901855, + "rewards_train/rejected": -1.8056319952011108, + "step": 660 + }, + { + "epoch": 0.18, + "logps_train/chosen": -144.56668090820312, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -196.0, + "logps_train/rejected": -243.5800323486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45666810870170593, + "rewards_train/margins": 4.301335126161575, + "rewards_train/rejected": -4.758003234863281, + "step": 660 + }, + { + "epoch": 0.18, + "logps_train/chosen": -8.401612281799316, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -99.88652038574219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3870362341403961, + "rewards_train/margins": 0.8016158044338226, + "rewards_train/rejected": -1.1886520385742188, + "step": 660 + }, + { + "epoch": 0.18, + "logps_train/chosen": -102.17157745361328, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -31.625, + "logps_train/rejected": -50.03883361816406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9671577215194702, + "rewards_train/margins": -0.12577438354492188, + "rewards_train/rejected": -1.8413833379745483, + "step": 660 + }, + { + "epoch": 0.18, + "logps_train/chosen": -79.54298400878906, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -82.99803161621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9042984247207642, + "rewards_train/margins": 0.9455047845840454, + "rewards_train/rejected": -1.8498032093048096, + "step": 661 + }, + { + "epoch": 0.18, + "logps_train/chosen": -199.1876678466797, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -189.38833618164062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.918766975402832, + "rewards_train/margins": -0.5299334526062012, + "rewards_train/rejected": -7.388833522796631, + "step": 661 + }, + { + "epoch": 0.18, + "logps_train/chosen": -41.43550491333008, + "logps_train/ref_chosen": -26.5, + "logps_train/ref_rejected": -3.53125, + "logps_train/rejected": -13.308123588562012, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4935505390167236, + "rewards_train/margins": -0.5158631801605225, + "rewards_train/rejected": -0.9776873588562012, + "step": 661 + }, + { + "epoch": 0.18, + "logps_train/chosen": -1.7168564796447754, + "logps_train/ref_chosen": -2.828125, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -23.617406845092773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1111268550157547, + "rewards_train/margins": 0.560367539525032, + "rewards_train/rejected": -0.44924068450927734, + "step": 661 + }, + { + "epoch": 0.18, + "logps_train/chosen": -13.598340034484863, + "logps_train/ref_chosen": -8.0, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -102.23753356933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5598340034484863, + "rewards_train/margins": 1.5639193058013916, + "rewards_train/rejected": -2.123753309249878, + "step": 661 + }, + { + "epoch": 0.18, + "logps_train/chosen": -0.42102518677711487, + "logps_train/ref_chosen": -0.173828125, + "logps_train/ref_rejected": -0.173828125, + "logps_train/rejected": -0.3795410990715027, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.024719705805182457, + "rewards_train/margins": -0.004148408770561218, + "rewards_train/rejected": -0.02057129703462124, + "step": 661 + }, + { + "epoch": 0.18, + "logps_train/chosen": -2.89565110206604, + "logps_train/ref_chosen": -1.890625, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -35.741485595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.100502610206604, + "rewards_train/margins": 0.9236459732055664, + "rewards_train/rejected": -1.0241485834121704, + "step": 661 + }, + { + "epoch": 0.18, + "logps_train/chosen": -163.67330932617188, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -160.68238830566406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.017331123352051, + "rewards_train/margins": -0.19909238815307617, + "rewards_train/rejected": -4.818238735198975, + "step": 661 + }, + { + "epoch": 0.19, + "learning_rate": 1.758146316347805e-06, + "loss": 0.5792, + "step": 662 + }, + { + "epoch": 0.19, + "logps_train/chosen": -234.66709899902344, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -251.36459350585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.366710186004639, + "rewards_train/margins": 3.269749164581299, + "rewards_train/rejected": -8.636459350585938, + "step": 662 + }, + { + "epoch": 0.19, + "logps_train/chosen": -1.3646353483200073, + "logps_train/ref_chosen": -1.4765625, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -16.57935333251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011192715726792812, + "rewards_train/margins": 0.3066280549392104, + "rewards_train/rejected": -0.2954353392124176, + "step": 662 + }, + { + "epoch": 0.19, + "logps_train/chosen": -31.13562774658203, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -39.13970184326172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7385627627372742, + "rewards_train/margins": -0.19959259033203125, + "rewards_train/rejected": -0.5389701724052429, + "step": 662 + }, + { + "epoch": 0.19, + "logps_train/chosen": -124.42164611816406, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -179.40020751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.892164707183838, + "rewards_train/margins": 2.8978562355041504, + "rewards_train/rejected": -5.790020942687988, + "step": 662 + }, + { + "epoch": 0.19, + "logps_train/chosen": -29.901290893554688, + "logps_train/ref_chosen": -25.5, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -24.1048526763916, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4401291012763977, + "rewards_train/margins": 1.3141061663627625, + "rewards_train/rejected": -1.7542352676391602, + "step": 662 + }, + { + "epoch": 0.19, + "logps_train/chosen": -119.807861328125, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -176.93362426757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1307861804962158, + "rewards_train/margins": 3.0625765323638916, + "rewards_train/rejected": -4.193362712860107, + "step": 662 + }, + { + "epoch": 0.19, + "logps_train/chosen": -6.370513916015625, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -9.851898193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04419860988855362, + "rewards_train/margins": 0.2637634351849556, + "rewards_train/rejected": -0.21956482529640198, + "step": 662 + }, + { + "epoch": 0.19, + "logps_train/chosen": -116.96504211425781, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -117.13827514648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4965041875839233, + "rewards_train/margins": 0.01732337474822998, + "rewards_train/rejected": -1.5138275623321533, + "step": 662 + }, + { + "epoch": 0.19, + "logps_train/chosen": -125.91596221923828, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -134.71624755859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.641596257686615, + "rewards_train/margins": 2.8300285935401917, + "rewards_train/rejected": -3.4716248512268066, + "step": 663 + }, + { + "epoch": 0.19, + "logps_train/chosen": -116.68415069580078, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -113.33787536621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.518415093421936, + "rewards_train/margins": -0.5346275568008423, + "rewards_train/rejected": 0.01621246337890625, + "step": 663 + }, + { + "epoch": 0.19, + "logps_train/chosen": -127.91191101074219, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -248.84786987304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7411911487579346, + "rewards_train/margins": 5.743595838546753, + "rewards_train/rejected": -8.484786987304688, + "step": 663 + }, + { + "epoch": 0.19, + "logps_train/chosen": -20.921310424804688, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -11.00285816192627, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4983810484409332, + "rewards_train/margins": -0.02309522032737732, + "rewards_train/rejected": -0.4752858281135559, + "step": 663 + }, + { + "epoch": 0.19, + "logps_train/chosen": -221.39830017089844, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -175.07888793945312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.239830017089844, + "rewards_train/margins": -1.7319412231445312, + "rewards_train/rejected": -5.5078887939453125, + "step": 663 + }, + { + "epoch": 0.19, + "logps_train/chosen": -33.539207458496094, + "logps_train/ref_chosen": -28.5, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -35.19367218017578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5039207339286804, + "rewards_train/margins": -0.18455350399017334, + "rewards_train/rejected": -0.3193672299385071, + "step": 663 + }, + { + "epoch": 0.19, + "logps_train/chosen": -190.76971435546875, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -180.013916015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7769715785980225, + "rewards_train/margins": -0.375579833984375, + "rewards_train/rejected": -3.4013917446136475, + "step": 663 + }, + { + "epoch": 0.19, + "logps_train/chosen": -23.59127426147461, + "logps_train/ref_chosen": -23.875, + "logps_train/ref_rejected": -28.0, + "logps_train/rejected": -33.60831832885742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028372574597597122, + "rewards_train/margins": 0.5892044194042683, + "rewards_train/rejected": -0.5608318448066711, + "step": 663 + }, + { + "epoch": 0.19, + "learning_rate": 1.7564185420522051e-06, + "loss": 0.5476, + "step": 664 + }, + { + "epoch": 0.19, + "logps_train/chosen": -3.916471242904663, + "logps_train/ref_chosen": -4.8125, + "logps_train/ref_rejected": -1.734375, + "logps_train/rejected": -9.873272895812988, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08960288017988205, + "rewards_train/margins": 0.9034926816821098, + "rewards_train/rejected": -0.8138898015022278, + "step": 664 + }, + { + "epoch": 0.19, + "logps_train/chosen": -94.05357360839844, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -97.67384338378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6053573489189148, + "rewards_train/margins": 0.1620270013809204, + "rewards_train/rejected": -0.7673843502998352, + "step": 664 + }, + { + "epoch": 0.19, + "logps_train/chosen": -74.12968444824219, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -217.90802001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4629684388637543, + "rewards_train/margins": 7.127833753824234, + "rewards_train/rejected": -7.590802192687988, + "step": 664 + }, + { + "epoch": 0.19, + "logps_train/chosen": -157.52308654785156, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -158.52688598632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6523086428642273, + "rewards_train/margins": 0.20037996768951416, + "rewards_train/rejected": -0.8526886105537415, + "step": 664 + }, + { + "epoch": 0.19, + "logps_train/chosen": -81.14006042480469, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -43.0, + "logps_train/rejected": -70.38240814208984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9140060544013977, + "rewards_train/margins": 1.8242349028587341, + "rewards_train/rejected": -2.738240957260132, + "step": 664 + }, + { + "epoch": 0.19, + "logps_train/chosen": -8.761791229248047, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -28.125, + "logps_train/rejected": -34.73827362060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011320876888930798, + "rewards_train/margins": 0.6726482389494777, + "rewards_train/rejected": -0.6613273620605469, + "step": 664 + }, + { + "epoch": 0.19, + "logps_train/chosen": -41.548194885253906, + "logps_train/ref_chosen": -34.25, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -33.2170524597168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7298194766044617, + "rewards_train/margins": 0.8293858170509338, + "rewards_train/rejected": -1.5592052936553955, + "step": 664 + }, + { + "epoch": 0.19, + "logps_train/chosen": -37.45586013793945, + "logps_train/ref_chosen": -31.0, + "logps_train/ref_rejected": -24.5, + "logps_train/rejected": -28.730106353759766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6455860137939453, + "rewards_train/margins": -0.2225753664970398, + "rewards_train/rejected": -0.4230106472969055, + "step": 664 + }, + { + "epoch": 0.19, + "logps_train/chosen": -105.70616149902344, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -141.35202026367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6206161975860596, + "rewards_train/margins": 0.9645857810974121, + "rewards_train/rejected": -3.5852019786834717, + "step": 665 + }, + { + "epoch": 0.19, + "logps_train/chosen": -6.768795490264893, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -32.779579162597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3356204628944397, + "rewards_train/margins": 1.5260784029960632, + "rewards_train/rejected": -1.1904579401016235, + "step": 665 + }, + { + "epoch": 0.19, + "logps_train/chosen": -254.02308654785156, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -247.98216247558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.202308654785156, + "rewards_train/margins": 2.5959081649780273, + "rewards_train/rejected": -10.798216819763184, + "step": 665 + }, + { + "epoch": 0.19, + "logps_train/chosen": -85.16342163085938, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -136.96263122558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2663421630859375, + "rewards_train/margins": 5.4299211502075195, + "rewards_train/rejected": -5.696263313293457, + "step": 665 + }, + { + "epoch": 0.19, + "logps_train/chosen": -172.3719482421875, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -188.3909912109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.537194728851318, + "rewards_train/margins": 0.5019044876098633, + "rewards_train/rejected": -5.039099216461182, + "step": 665 + }, + { + "epoch": 0.19, + "logps_train/chosen": -8.591850280761719, + "logps_train/ref_chosen": -6.09375, + "logps_train/ref_rejected": -7.78125, + "logps_train/rejected": -18.505218505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24981002509593964, + "rewards_train/margins": 0.8225868493318558, + "rewards_train/rejected": -1.0723968744277954, + "step": 665 + }, + { + "epoch": 0.19, + "logps_train/chosen": -211.5426025390625, + "logps_train/ref_chosen": -189.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -176.77072143554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.254260301589966, + "rewards_train/margins": 1.3228118419647217, + "rewards_train/rejected": -3.5770721435546875, + "step": 665 + }, + { + "epoch": 0.19, + "logps_train/chosen": -234.92185974121094, + "logps_train/ref_chosen": -224.0, + "logps_train/ref_rejected": -268.0, + "logps_train/rejected": -277.26568603515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0921859741210938, + "rewards_train/margins": -0.16561734676361084, + "rewards_train/rejected": -0.9265686273574829, + "step": 665 + }, + { + "epoch": 0.19, + "learning_rate": 1.7546854736287964e-06, + "loss": 0.3586, + "step": 666 + }, + { + "epoch": 0.19, + "logps_train/chosen": -14.254066467285156, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -1.671875, + "logps_train/rejected": -5.664511680603027, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4754066467285156, + "rewards_train/margins": -0.07614296674728394, + "rewards_train/rejected": -0.3992636799812317, + "step": 666 + }, + { + "epoch": 0.19, + "logps_train/chosen": -176.05126953125, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -155.7747802734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.455127239227295, + "rewards_train/margins": -0.27764892578125, + "rewards_train/rejected": -5.177478313446045, + "step": 666 + }, + { + "epoch": 0.19, + "logps_train/chosen": -50.21240997314453, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -25.265195846557617, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.696241021156311, + "rewards_train/margins": -0.1197214126586914, + "rewards_train/rejected": -0.5765196084976196, + "step": 666 + }, + { + "epoch": 0.19, + "logps_train/chosen": -17.009502410888672, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -26.375, + "logps_train/rejected": -38.94643783569336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5822002291679382, + "rewards_train/margins": 0.6749436259269714, + "rewards_train/rejected": -1.2571438550949097, + "step": 666 + }, + { + "epoch": 0.19, + "logps_train/chosen": -141.84144592285156, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -209.54299926757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.6341447830200195, + "rewards_train/margins": 2.020155429840088, + "rewards_train/rejected": -6.654300212860107, + "step": 666 + }, + { + "epoch": 0.19, + "logps_train/chosen": -156.11990356445312, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -196.08297729492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.81199049949646, + "rewards_train/margins": 3.746307134628296, + "rewards_train/rejected": -7.558297634124756, + "step": 666 + }, + { + "epoch": 0.19, + "logps_train/chosen": -3.665846109390259, + "logps_train/ref_chosen": -2.890625, + "logps_train/ref_rejected": -5.0, + "logps_train/rejected": -11.532096862792969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07752211391925812, + "rewards_train/margins": 0.5756875723600388, + "rewards_train/rejected": -0.6532096862792969, + "step": 666 + }, + { + "epoch": 0.19, + "logps_train/chosen": -87.91009521484375, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -118.50047302246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.941009521484375, + "rewards_train/margins": 1.3590378761291504, + "rewards_train/rejected": -2.3000473976135254, + "step": 666 + }, + { + "epoch": 0.19, + "logps_train/chosen": -24.013721466064453, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -25.27495574951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9138721823692322, + "rewards_train/margins": 0.6198734641075134, + "rewards_train/rejected": -1.5337456464767456, + "step": 667 + }, + { + "epoch": 0.19, + "logps_train/chosen": -152.27139282226562, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -164.32833862304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1271393299102783, + "rewards_train/margins": 0.40569448471069336, + "rewards_train/rejected": -2.5328338146209717, + "step": 667 + }, + { + "epoch": 0.19, + "logps_train/chosen": -5.9178338050842285, + "logps_train/ref_chosen": -1.7421875, + "logps_train/ref_rejected": -7.15625, + "logps_train/rejected": -10.972174644470215, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.41756463050842285, + "rewards_train/margins": -0.035972148180007935, + "rewards_train/rejected": -0.3815924823284149, + "step": 667 + }, + { + "epoch": 0.19, + "logps_train/chosen": -39.89035415649414, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -60.75, + "logps_train/rejected": -86.85812377929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33903542160987854, + "rewards_train/margins": 2.271777004003525, + "rewards_train/rejected": -2.6108124256134033, + "step": 667 + }, + { + "epoch": 0.19, + "logps_train/chosen": -18.806434631347656, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -5.288465976715088, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18185654282569885, + "rewards_train/margins": 0.2075781412422657, + "rewards_train/rejected": -0.02572159841656685, + "step": 667 + }, + { + "epoch": 0.19, + "logps_train/chosen": -2.6583938598632812, + "logps_train/ref_chosen": -1.765625, + "logps_train/ref_rejected": -5.46875, + "logps_train/rejected": -8.182902336120605, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08927688747644424, + "rewards_train/margins": 0.1821383461356163, + "rewards_train/rejected": -0.27141523361206055, + "step": 667 + }, + { + "epoch": 0.19, + "logps_train/chosen": -100.04273986816406, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -99.55973815917969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.104274034500122, + "rewards_train/margins": 0.0016999244689941406, + "rewards_train/rejected": -2.105973958969116, + "step": 667 + }, + { + "epoch": 0.19, + "logps_train/chosen": -77.87162780761719, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -150.98837280273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4121627807617188, + "rewards_train/margins": 3.086674690246582, + "rewards_train/rejected": -4.498837471008301, + "step": 667 + }, + { + "epoch": 0.19, + "learning_rate": 1.752947123207221e-06, + "loss": 0.4531, + "step": 668 + }, + { + "epoch": 0.19, + "logps_train/chosen": -6.027712821960449, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -12.050232887268066, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1590212881565094, + "rewards_train/margins": 0.5835020244121552, + "rewards_train/rejected": -0.7425233125686646, + "step": 668 + }, + { + "epoch": 0.19, + "logps_train/chosen": -6.168190956115723, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -17.634624481201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11130590736865997, + "rewards_train/margins": 0.47476835548877716, + "rewards_train/rejected": -0.3634624481201172, + "step": 668 + }, + { + "epoch": 0.19, + "logps_train/chosen": -62.30267333984375, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -98.00186157226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14473266899585724, + "rewards_train/margins": 1.5449188500642776, + "rewards_train/rejected": -1.4001861810684204, + "step": 668 + }, + { + "epoch": 0.19, + "logps_train/chosen": -13.260493278503418, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -61.36943435668945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2739506661891937, + "rewards_train/margins": 1.3358940780162811, + "rewards_train/rejected": -1.0619434118270874, + "step": 668 + }, + { + "epoch": 0.19, + "logps_train/chosen": -1.067834734916687, + "logps_train/ref_chosen": -0.8984375, + "logps_train/ref_rejected": -1.921875, + "logps_train/rejected": -2.3961145877838135, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01693972386419773, + "rewards_train/margins": 0.030484234914183617, + "rewards_train/rejected": -0.04742395877838135, + "step": 668 + }, + { + "epoch": 0.19, + "logps_train/chosen": -124.55416870117188, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -166.7618865966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7054168581962585, + "rewards_train/margins": 1.7707718014717102, + "rewards_train/rejected": -2.4761886596679688, + "step": 668 + }, + { + "epoch": 0.19, + "logps_train/chosen": -85.88764953613281, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -65.3445816040039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6887649893760681, + "rewards_train/margins": -0.9543068408966064, + "rewards_train/rejected": 0.26554185152053833, + "step": 668 + }, + { + "epoch": 0.19, + "logps_train/chosen": -103.38790130615234, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -112.41754913330078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8887901306152344, + "rewards_train/margins": 0.3529648780822754, + "rewards_train/rejected": -2.2417550086975098, + "step": 668 + }, + { + "epoch": 0.19, + "logps_train/chosen": -12.890013694763184, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -5.84375, + "logps_train/rejected": -15.345709800720215, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5796263813972473, + "rewards_train/margins": 0.3705695867538452, + "rewards_train/rejected": -0.9501959681510925, + "step": 669 + }, + { + "epoch": 0.19, + "logps_train/chosen": -109.25424194335938, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -116.31233215332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12542419135570526, + "rewards_train/margins": 0.05580902099609375, + "rewards_train/rejected": -0.181233212351799, + "step": 669 + }, + { + "epoch": 0.19, + "logps_train/chosen": -93.8818359375, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -108.919921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.238183617591858, + "rewards_train/margins": 0.7038085460662842, + "rewards_train/rejected": -1.941992163658142, + "step": 669 + }, + { + "epoch": 0.19, + "logps_train/chosen": -13.35640811920166, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -28.75, + "logps_train/rejected": -33.65991973876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.38564082980155945, + "rewards_train/margins": 0.10535115003585815, + "rewards_train/rejected": -0.4909919798374176, + "step": 669 + }, + { + "epoch": 0.19, + "logps_train/chosen": -99.21965026855469, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -160.75439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3280349671840668, + "rewards_train/margins": 0.9034744203090668, + "rewards_train/rejected": -0.575439453125, + "step": 669 + }, + { + "epoch": 0.19, + "logps_train/chosen": -99.046875, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -17.458053588867188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7046875357627869, + "rewards_train/margins": -0.1151321530342102, + "rewards_train/rejected": -0.5895553827285767, + "step": 669 + }, + { + "epoch": 0.19, + "logps_train/chosen": -163.509765625, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -169.88682556152344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.300976753234863, + "rewards_train/margins": -0.2122941017150879, + "rewards_train/rejected": -4.088682651519775, + "step": 669 + }, + { + "epoch": 0.19, + "logps_train/chosen": -48.03642272949219, + "logps_train/ref_chosen": -44.25, + "logps_train/ref_rejected": -24.375, + "logps_train/rejected": -27.83172035217285, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3786422908306122, + "rewards_train/margins": -0.03297024965286255, + "rewards_train/rejected": -0.34567204117774963, + "step": 669 + }, + { + "epoch": 0.19, + "learning_rate": 1.7512035029540884e-06, + "loss": 0.5525, + "step": 670 + }, + { + "epoch": 0.19, + "logps_train/chosen": -95.03804016113281, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -104.7800521850586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7038040161132812, + "rewards_train/margins": 1.074201226234436, + "rewards_train/rejected": -1.7780052423477173, + "step": 670 + }, + { + "epoch": 0.19, + "logps_train/chosen": -47.50535202026367, + "logps_train/ref_chosen": -37.0, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -51.04826736450195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0505352020263672, + "rewards_train/margins": 1.2167916297912598, + "rewards_train/rejected": -2.267326831817627, + "step": 670 + }, + { + "epoch": 0.19, + "logps_train/chosen": -157.69052124023438, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -149.9307403564453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.969052314758301, + "rewards_train/margins": -0.6759781837463379, + "rewards_train/rejected": -5.293074131011963, + "step": 670 + }, + { + "epoch": 0.19, + "logps_train/chosen": -1.9760541915893555, + "logps_train/ref_chosen": -2.125, + "logps_train/ref_rejected": -1.109375, + "logps_train/rejected": -1.5756206512451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.014894581399857998, + "rewards_train/margins": 0.061519148759543896, + "rewards_train/rejected": -0.0466245673596859, + "step": 670 + }, + { + "epoch": 0.19, + "logps_train/chosen": -141.15744018554688, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -226.3603515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.115744113922119, + "rewards_train/margins": 1.9202909469604492, + "rewards_train/rejected": -5.036035060882568, + "step": 670 + }, + { + "epoch": 0.19, + "logps_train/chosen": -10.448739051818848, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -7.71875, + "logps_train/rejected": -15.48756217956543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4823739230632782, + "rewards_train/margins": 0.29450729489326477, + "rewards_train/rejected": -0.776881217956543, + "step": 670 + }, + { + "epoch": 0.19, + "logps_train/chosen": -7.196741104125977, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -5.71875, + "logps_train/rejected": -8.735796928405762, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16782589256763458, + "rewards_train/margins": 0.4695305973291397, + "rewards_train/rejected": -0.3017047047615051, + "step": 670 + }, + { + "epoch": 0.19, + "logps_train/chosen": -21.14236068725586, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -24.625778198242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7954860925674438, + "rewards_train/margins": 0.3920917510986328, + "rewards_train/rejected": -1.1875778436660767, + "step": 670 + }, + { + "epoch": 0.19, + "logps_train/chosen": -19.323497772216797, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -55.5, + "logps_train/rejected": -69.03467559814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5948497653007507, + "rewards_train/margins": 0.7586178183555603, + "rewards_train/rejected": -1.353467583656311, + "step": 671 + }, + { + "epoch": 0.19, + "logps_train/chosen": -32.270050048828125, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -48.61396789550781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4770050048828125, + "rewards_train/margins": 0.15939176082611084, + "rewards_train/rejected": -1.6363967657089233, + "step": 671 + }, + { + "epoch": 0.19, + "logps_train/chosen": -127.38914489746094, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -174.64752197265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.588914632797241, + "rewards_train/margins": 1.2758376598358154, + "rewards_train/rejected": -3.8647522926330566, + "step": 671 + }, + { + "epoch": 0.19, + "logps_train/chosen": -106.63150787353516, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -146.39898681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0631507635116577, + "rewards_train/margins": 0.9267479181289673, + "rewards_train/rejected": -1.989898681640625, + "step": 671 + }, + { + "epoch": 0.19, + "logps_train/chosen": -14.398404121398926, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -35.59193801879883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27109041810035706, + "rewards_train/margins": 0.9506034553050995, + "rewards_train/rejected": -1.2216938734054565, + "step": 671 + }, + { + "epoch": 0.19, + "logps_train/chosen": -1.8426148891448975, + "logps_train/ref_chosen": -1.4140625, + "logps_train/ref_rejected": -2.046875, + "logps_train/rejected": -2.6194372177124023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.042855240404605865, + "rewards_train/margins": 0.014400981366634369, + "rewards_train/rejected": -0.057256221771240234, + "step": 671 + }, + { + "epoch": 0.19, + "logps_train/chosen": -38.43605041503906, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -86.63370513916016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04360504075884819, + "rewards_train/margins": -0.08023452758789062, + "rewards_train/rejected": 0.036629486829042435, + "step": 671 + }, + { + "epoch": 0.19, + "logps_train/chosen": -12.74402141571045, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -2.828125, + "logps_train/rejected": -2.810235023498535, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1431521475315094, + "rewards_train/margins": -0.14494114520493895, + "rewards_train/rejected": 0.0017889976734295487, + "step": 671 + }, + { + "epoch": 0.19, + "learning_rate": 1.7494546250728916e-06, + "loss": 0.5056, + "step": 672 + }, + { + "epoch": 0.19, + "logps_train/chosen": -117.27192687988281, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -217.91578674316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5771927237510681, + "rewards_train/margins": 3.9143858551979065, + "rewards_train/rejected": -4.491578578948975, + "step": 672 + }, + { + "epoch": 0.19, + "logps_train/chosen": -86.82733154296875, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -87.839111328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.432733178138733, + "rewards_train/margins": -0.14882206916809082, + "rewards_train/rejected": -1.283911108970642, + "step": 672 + }, + { + "epoch": 0.19, + "logps_train/chosen": -49.66050338745117, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -87.01566314697266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1660503149032593, + "rewards_train/margins": 0.9355159997940063, + "rewards_train/rejected": -2.1015663146972656, + "step": 672 + }, + { + "epoch": 0.19, + "logps_train/chosen": -108.00298309326172, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -153.92254638671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4002983272075653, + "rewards_train/margins": 1.6919563114643097, + "rewards_train/rejected": -2.092254638671875, + "step": 672 + }, + { + "epoch": 0.19, + "logps_train/chosen": -26.764720916748047, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -12.875, + "logps_train/rejected": -19.679027557373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3514721095561981, + "rewards_train/margins": 0.32893064618110657, + "rewards_train/rejected": -0.6804027557373047, + "step": 672 + }, + { + "epoch": 0.19, + "logps_train/chosen": -4.487688064575195, + "logps_train/ref_chosen": -3.9375, + "logps_train/ref_rejected": -5.21875, + "logps_train/rejected": -5.7816162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05501880869269371, + "rewards_train/margins": 0.0012678131461143494, + "rewards_train/rejected": -0.05628662183880806, + "step": 672 + }, + { + "epoch": 0.19, + "logps_train/chosen": -126.16181182861328, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -154.65380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3661812543869019, + "rewards_train/margins": 1.8991996049880981, + "rewards_train/rejected": -3.265380859375, + "step": 672 + }, + { + "epoch": 0.19, + "logps_train/chosen": -90.63775634765625, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -122.32805633544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.563775658607483, + "rewards_train/margins": 1.8190299272537231, + "rewards_train/rejected": -3.382805585861206, + "step": 672 + }, + { + "epoch": 0.19, + "logps_train/chosen": -4.188851833343506, + "logps_train/ref_chosen": -2.140625, + "logps_train/ref_rejected": -1.1484375, + "logps_train/rejected": -1.6662302017211914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20482268929481506, + "rewards_train/margins": -0.15304341912269592, + "rewards_train/rejected": -0.05177927017211914, + "step": 673 + }, + { + "epoch": 0.19, + "logps_train/chosen": -36.82955551147461, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -29.401756286621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.632955551147461, + "rewards_train/margins": -0.7177799344062805, + "rewards_train/rejected": -0.9151756167411804, + "step": 673 + }, + { + "epoch": 0.19, + "logps_train/chosen": -132.5766143798828, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -160.6631317138672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.357661485671997, + "rewards_train/margins": 4.458651781082153, + "rewards_train/rejected": -5.81631326675415, + "step": 673 + }, + { + "epoch": 0.19, + "logps_train/chosen": -22.57965850830078, + "logps_train/ref_chosen": -22.0, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -33.231109619140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.057965852320194244, + "rewards_train/margins": 1.602645181119442, + "rewards_train/rejected": -1.6606110334396362, + "step": 673 + }, + { + "epoch": 0.19, + "logps_train/chosen": -57.66034698486328, + "logps_train/ref_chosen": -48.25, + "logps_train/ref_rejected": -44.5, + "logps_train/rejected": -69.2571029663086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.941034734249115, + "rewards_train/margins": 1.534675657749176, + "rewards_train/rejected": -2.475710391998291, + "step": 673 + }, + { + "epoch": 0.19, + "logps_train/chosen": -10.020683288574219, + "logps_train/ref_chosen": -7.59375, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -15.019119262695312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.24269333481788635, + "rewards_train/margins": -0.3282814100384712, + "rewards_train/rejected": 0.08558807522058487, + "step": 673 + }, + { + "epoch": 0.19, + "logps_train/chosen": -137.60104370117188, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -106.61507415771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.660104513168335, + "rewards_train/margins": 0.9014029502868652, + "rewards_train/rejected": -3.5615074634552, + "step": 673 + }, + { + "epoch": 0.19, + "logps_train/chosen": -18.79454231262207, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -5.468633651733398, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.41695424914360046, + "rewards_train/margins": -0.2794658839702606, + "rewards_train/rejected": -0.13748836517333984, + "step": 673 + }, + { + "epoch": 0.19, + "learning_rate": 1.747700501803922e-06, + "loss": 0.4467, + "step": 674 + }, + { + "epoch": 0.19, + "logps_train/chosen": -38.759124755859375, + "logps_train/ref_chosen": -29.125, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -37.28526306152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9634124636650085, + "rewards_train/margins": 0.01511383056640625, + "rewards_train/rejected": -0.9785262942314148, + "step": 674 + }, + { + "epoch": 0.19, + "logps_train/chosen": -44.93244552612305, + "logps_train/ref_chosen": -45.25, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -16.88510513305664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03175544738769531, + "rewards_train/margins": 0.8015159964561462, + "rewards_train/rejected": -0.7697605490684509, + "step": 674 + }, + { + "epoch": 0.19, + "logps_train/chosen": -4.329645156860352, + "logps_train/ref_chosen": -2.25, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -9.009256362915039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20796452462673187, + "rewards_train/margins": 0.07733611762523651, + "rewards_train/rejected": -0.2853006422519684, + "step": 674 + }, + { + "epoch": 0.19, + "logps_train/chosen": -5.554853439331055, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -3.1875, + "logps_train/rejected": -11.753402709960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02423534356057644, + "rewards_train/margins": 0.8323549274355173, + "rewards_train/rejected": -0.8565902709960938, + "step": 674 + }, + { + "epoch": 0.19, + "logps_train/chosen": -72.20649719238281, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -47.25, + "logps_train/rejected": -72.76339721679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.920649766921997, + "rewards_train/margins": 0.6306900978088379, + "rewards_train/rejected": -2.551339864730835, + "step": 674 + }, + { + "epoch": 0.19, + "logps_train/chosen": -181.31787109375, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -207.0550537109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.631787300109863, + "rewards_train/margins": -0.22628164291381836, + "rewards_train/rejected": -4.405505657196045, + "step": 674 + }, + { + "epoch": 0.19, + "logps_train/chosen": -10.160505294799805, + "logps_train/ref_chosen": -2.90625, + "logps_train/ref_rejected": -1.4140625, + "logps_train/rejected": -4.252464294433594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7254255414009094, + "rewards_train/margins": -0.44158536195755005, + "rewards_train/rejected": -0.2838401794433594, + "step": 674 + }, + { + "epoch": 0.19, + "logps_train/chosen": -238.25160217285156, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -206.8641357421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.4251604080200195, + "rewards_train/margins": 1.9112529754638672, + "rewards_train/rejected": -8.336413383483887, + "step": 674 + }, + { + "epoch": 0.19, + "logps_train/chosen": -66.944580078125, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -106.99005889892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.25554201006889343, + "rewards_train/margins": 3.1045479476451874, + "rewards_train/rejected": -2.849005937576294, + "step": 675 + }, + { + "epoch": 0.19, + "logps_train/chosen": -9.959388732910156, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -13.700420379638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09593887627124786, + "rewards_train/margins": 0.6178531497716904, + "rewards_train/rejected": -0.7137920260429382, + "step": 675 + }, + { + "epoch": 0.19, + "logps_train/chosen": -101.2388687133789, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -170.5692901611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2238868474960327, + "rewards_train/margins": 1.3330422639846802, + "rewards_train/rejected": -2.556929111480713, + "step": 675 + }, + { + "epoch": 0.19, + "logps_train/chosen": -10.523658752441406, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -20.566064834594727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23986588418483734, + "rewards_train/margins": 0.2042406052350998, + "rewards_train/rejected": -0.44410648941993713, + "step": 675 + }, + { + "epoch": 0.19, + "logps_train/chosen": -3.695795774459839, + "logps_train/ref_chosen": -1.5546875, + "logps_train/ref_rejected": -0.5078125, + "logps_train/rejected": -0.5727756023406982, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2141108363866806, + "rewards_train/margins": -0.20761452615261078, + "rewards_train/rejected": -0.006496310234069824, + "step": 675 + }, + { + "epoch": 0.19, + "logps_train/chosen": -172.24844360351562, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -155.05352783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.274844646453857, + "rewards_train/margins": 0.18050813674926758, + "rewards_train/rejected": -5.455352783203125, + "step": 675 + }, + { + "epoch": 0.19, + "logps_train/chosen": -37.845542907714844, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -15.370899200439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.015445709228515625, + "rewards_train/margins": 0.2900356352329254, + "rewards_train/rejected": -0.2745899260044098, + "step": 675 + }, + { + "epoch": 0.19, + "logps_train/chosen": -114.86665344238281, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -115.80418395996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.336665630340576, + "rewards_train/margins": 0.09375286102294922, + "rewards_train/rejected": -4.430418491363525, + "step": 675 + }, + { + "epoch": 0.19, + "learning_rate": 1.7459411454241822e-06, + "loss": 0.5194, + "step": 676 + }, + { + "epoch": 0.19, + "logps_train/chosen": -27.435794830322266, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -3.96875, + "logps_train/rejected": -9.045988082885742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41857948899269104, + "rewards_train/margins": 0.08914431929588318, + "rewards_train/rejected": -0.5077238082885742, + "step": 676 + }, + { + "epoch": 0.19, + "logps_train/chosen": -167.30587768554688, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -220.6112060546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7305877208709717, + "rewards_train/margins": 3.5305330753326416, + "rewards_train/rejected": -7.261120796203613, + "step": 676 + }, + { + "epoch": 0.19, + "logps_train/chosen": -120.14952087402344, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -230.6644287109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8649520874023438, + "rewards_train/margins": 5.301490783691406, + "rewards_train/rejected": -6.16644287109375, + "step": 676 + }, + { + "epoch": 0.19, + "logps_train/chosen": -28.009008407592773, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -11.6875, + "logps_train/rejected": -26.949718475341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4259008169174194, + "rewards_train/margins": 0.10032105445861816, + "rewards_train/rejected": -1.5262218713760376, + "step": 676 + }, + { + "epoch": 0.19, + "logps_train/chosen": -79.00296783447266, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -147.3577880859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8502967953681946, + "rewards_train/margins": 2.3854820132255554, + "rewards_train/rejected": -3.23577880859375, + "step": 676 + }, + { + "epoch": 0.19, + "logps_train/chosen": -96.97806549072266, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -58.75, + "logps_train/rejected": -55.35004806518555, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.002193450927734375, + "rewards_train/margins": -0.3378017544746399, + "rewards_train/rejected": 0.33999520540237427, + "step": 676 + }, + { + "epoch": 0.19, + "logps_train/chosen": -71.11849975585938, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -97.70537567138672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3868499994277954, + "rewards_train/margins": -0.3663123846054077, + "rewards_train/rejected": -1.0205376148223877, + "step": 676 + }, + { + "epoch": 0.19, + "logps_train/chosen": -49.78160858154297, + "logps_train/ref_chosen": -42.75, + "logps_train/ref_rejected": -55.75, + "logps_train/rejected": -65.06829071044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7031608819961548, + "rewards_train/margins": 0.228668212890625, + "rewards_train/rejected": -0.9318290948867798, + "step": 676 + }, + { + "epoch": 0.19, + "logps_train/chosen": -20.17424964904785, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -2.5, + "logps_train/rejected": -8.527464866638184, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4736749827861786, + "rewards_train/margins": 0.12907150387763977, + "rewards_train/rejected": -0.6027464866638184, + "step": 677 + }, + { + "epoch": 0.19, + "logps_train/chosen": -124.12001037597656, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -163.25796508789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3620011806488037, + "rewards_train/margins": 3.163795232772827, + "rewards_train/rejected": -5.525796413421631, + "step": 677 + }, + { + "epoch": 0.19, + "logps_train/chosen": -11.113980293273926, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -29.759836196899414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07389803230762482, + "rewards_train/margins": 1.3770856112241745, + "rewards_train/rejected": -1.4509836435317993, + "step": 677 + }, + { + "epoch": 0.19, + "logps_train/chosen": -9.79430103302002, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -18.572092056274414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0018198967445641756, + "rewards_train/margins": 0.40902911429293454, + "rewards_train/rejected": -0.40720921754837036, + "step": 677 + }, + { + "epoch": 0.19, + "logps_train/chosen": -147.97882080078125, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -165.88665771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.597882091999054, + "rewards_train/margins": 0.8907837271690369, + "rewards_train/rejected": -1.4886658191680908, + "step": 677 + }, + { + "epoch": 0.19, + "logps_train/chosen": -96.02760314941406, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -157.6876220703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.002760410308838, + "rewards_train/margins": 3.7160019874572754, + "rewards_train/rejected": -6.718762397766113, + "step": 677 + }, + { + "epoch": 0.19, + "logps_train/chosen": -23.99462890625, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -11.099937438964844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.074462890625, + "rewards_train/margins": -0.058219145983457565, + "rewards_train/rejected": -0.016243744641542435, + "step": 677 + }, + { + "epoch": 0.19, + "logps_train/chosen": -4.759437084197998, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -19.238033294677734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0009437084081582725, + "rewards_train/margins": -0.5396404147031717, + "rewards_train/rejected": 0.5386967062950134, + "step": 677 + }, + { + "epoch": 0.19, + "learning_rate": 1.7441765682473015e-06, + "loss": 0.4542, + "step": 678 + }, + { + "epoch": 0.19, + "logps_train/chosen": -127.5535888671875, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -131.70489501953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.255359172821045, + "rewards_train/margins": -0.13486957550048828, + "rewards_train/rejected": -5.120489597320557, + "step": 678 + }, + { + "epoch": 0.19, + "logps_train/chosen": -159.70706176757812, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -230.1080322265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2707061767578125, + "rewards_train/margins": 3.540097236633301, + "rewards_train/rejected": -4.810803413391113, + "step": 678 + }, + { + "epoch": 0.19, + "logps_train/chosen": -123.82501983642578, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -134.71954345703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2825020551681519, + "rewards_train/margins": 0.08945226669311523, + "rewards_train/rejected": -1.371954321861267, + "step": 678 + }, + { + "epoch": 0.19, + "logps_train/chosen": -17.040224075317383, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -24.878528594970703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41027241945266724, + "rewards_train/margins": 0.39008045196533203, + "rewards_train/rejected": -0.8003528714179993, + "step": 678 + }, + { + "epoch": 0.19, + "logps_train/chosen": -103.52017974853516, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -114.02923583984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0520179271698, + "rewards_train/margins": 0.25090575218200684, + "rewards_train/rejected": -2.3029236793518066, + "step": 678 + }, + { + "epoch": 0.19, + "logps_train/chosen": -26.590286254882812, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -67.95294952392578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5090286135673523, + "rewards_train/margins": -0.41373366117477417, + "rewards_train/rejected": -0.09529495239257812, + "step": 678 + }, + { + "epoch": 0.19, + "logps_train/chosen": -91.3939437866211, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -126.15914916992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4393943548202515, + "rewards_train/margins": 0.9765206575393677, + "rewards_train/rejected": -2.415915012359619, + "step": 678 + }, + { + "epoch": 0.19, + "logps_train/chosen": -119.70946502685547, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -169.46536254882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.670946478843689, + "rewards_train/margins": 1.2755898237228394, + "rewards_train/rejected": -2.9465363025665283, + "step": 678 + }, + { + "epoch": 0.19, + "logps_train/chosen": -11.077865600585938, + "logps_train/ref_chosen": -7.5, + "logps_train/ref_rejected": -23.5, + "logps_train/rejected": -26.96091079711914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3577865660190582, + "rewards_train/margins": -0.01169547438621521, + "rewards_train/rejected": -0.346091091632843, + "step": 679 + }, + { + "epoch": 0.19, + "logps_train/chosen": -89.60614013671875, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -159.43948364257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7606140375137329, + "rewards_train/margins": 3.8333343267440796, + "rewards_train/rejected": -4.5939483642578125, + "step": 679 + }, + { + "epoch": 0.19, + "logps_train/chosen": -209.5696258544922, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -257.586181640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.056962490081787, + "rewards_train/margins": 4.5016560554504395, + "rewards_train/rejected": -10.558618545532227, + "step": 679 + }, + { + "epoch": 0.19, + "logps_train/chosen": -102.51359558105469, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -53.5, + "logps_train/rejected": -70.1771469116211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5513595938682556, + "rewards_train/margins": 1.1163551211357117, + "rewards_train/rejected": -1.6677147150039673, + "step": 679 + }, + { + "epoch": 0.19, + "logps_train/chosen": -38.337310791015625, + "logps_train/ref_chosen": -31.0, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -53.32592010498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7337310910224915, + "rewards_train/margins": 1.2988609671592712, + "rewards_train/rejected": -2.0325920581817627, + "step": 679 + }, + { + "epoch": 0.19, + "logps_train/chosen": -53.359642028808594, + "logps_train/ref_chosen": -45.75, + "logps_train/ref_rejected": -3.796875, + "logps_train/rejected": -10.336052894592285, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7609642148017883, + "rewards_train/margins": -0.10704642534255981, + "rewards_train/rejected": -0.6539177894592285, + "step": 679 + }, + { + "epoch": 0.19, + "logps_train/chosen": -63.4287109375, + "logps_train/ref_chosen": -61.25, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -41.73561096191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21787109971046448, + "rewards_train/margins": 2.4806900918483734, + "rewards_train/rejected": -2.698561191558838, + "step": 679 + }, + { + "epoch": 0.19, + "logps_train/chosen": -106.8134536743164, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -156.78103637695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3186546266078949, + "rewards_train/margins": 2.396758407354355, + "rewards_train/rejected": -2.07810378074646, + "step": 679 + }, + { + "epoch": 0.19, + "learning_rate": 1.7424067826234498e-06, + "loss": 0.3869, + "step": 680 + }, + { + "epoch": 0.19, + "logps_train/chosen": -11.057059288024902, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -20.079965591430664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41195592284202576, + "rewards_train/margins": 0.03354063630104065, + "rewards_train/rejected": -0.4454965591430664, + "step": 680 + }, + { + "epoch": 0.19, + "logps_train/chosen": -105.20667266845703, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -157.4610595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.72066730260849, + "rewards_train/margins": 1.6254387497901917, + "rewards_train/rejected": -2.3461060523986816, + "step": 680 + }, + { + "epoch": 0.19, + "logps_train/chosen": -75.32150268554688, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -59.5, + "logps_train/rejected": -72.92095947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3071502447128296, + "rewards_train/margins": 0.03494572639465332, + "rewards_train/rejected": -1.342095971107483, + "step": 680 + }, + { + "epoch": 0.19, + "logps_train/chosen": -108.99066162109375, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -139.3913116455078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.449066162109375, + "rewards_train/margins": -0.2599349021911621, + "rewards_train/rejected": -2.189131259918213, + "step": 680 + }, + { + "epoch": 0.19, + "logps_train/chosen": -75.67107391357422, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -83.87005615234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.032892610877752304, + "rewards_train/margins": 0.31989824399352074, + "rewards_train/rejected": -0.28700563311576843, + "step": 680 + }, + { + "epoch": 0.19, + "logps_train/chosen": -96.10115051269531, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -108.57610321044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.060115098953247, + "rewards_train/margins": 0.747495174407959, + "rewards_train/rejected": -2.807610273361206, + "step": 680 + }, + { + "epoch": 0.19, + "logps_train/chosen": -103.15435791015625, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -141.9849395751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.065435767173767, + "rewards_train/margins": 0.4330582618713379, + "rewards_train/rejected": -1.498494029045105, + "step": 680 + }, + { + "epoch": 0.19, + "logps_train/chosen": -113.57039642333984, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -146.35653686523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7570396661758423, + "rewards_train/margins": 1.9286140203475952, + "rewards_train/rejected": -3.6856536865234375, + "step": 680 + }, + { + "epoch": 0.19, + "logps_train/chosen": -85.07460021972656, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -118.18603515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2574599981307983, + "rewards_train/margins": 2.5111435651779175, + "rewards_train/rejected": -3.768603563308716, + "step": 681 + }, + { + "epoch": 0.19, + "logps_train/chosen": -12.570303916931152, + "logps_train/ref_chosen": -3.296875, + "logps_train/ref_rejected": -3.40625, + "logps_train/rejected": -12.595841407775879, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9273428916931152, + "rewards_train/margins": -0.008383750915527344, + "rewards_train/rejected": -0.9189591407775879, + "step": 681 + }, + { + "epoch": 0.19, + "logps_train/chosen": -18.85763931274414, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -29.078989028930664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21076393127441406, + "rewards_train/margins": 0.6721349954605103, + "rewards_train/rejected": -0.8828989267349243, + "step": 681 + }, + { + "epoch": 0.19, + "logps_train/chosen": -49.33750915527344, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -86.59273529052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6837509870529175, + "rewards_train/margins": 1.825522541999817, + "rewards_train/rejected": -3.5092735290527344, + "step": 681 + }, + { + "epoch": 0.19, + "logps_train/chosen": -170.94601440429688, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -213.87811279296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9946014881134033, + "rewards_train/margins": 2.39320969581604, + "rewards_train/rejected": -5.387811183929443, + "step": 681 + }, + { + "epoch": 0.19, + "logps_train/chosen": -52.28959274291992, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -96.3236312866211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07895927876234055, + "rewards_train/margins": 0.8534038737416267, + "rewards_train/rejected": -0.9323631525039673, + "step": 681 + }, + { + "epoch": 0.19, + "logps_train/chosen": -158.73094177246094, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -183.85189819335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.523094177246094, + "rewards_train/margins": 1.612095832824707, + "rewards_train/rejected": -8.1351900100708, + "step": 681 + }, + { + "epoch": 0.19, + "logps_train/chosen": -49.60090637207031, + "logps_train/ref_chosen": -50.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -84.09513854980469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.03990936279296875, + "rewards_train/margins": -0.25057679414749146, + "rewards_train/rejected": 0.2904861569404602, + "step": 681 + }, + { + "epoch": 0.19, + "learning_rate": 1.7406318009392497e-06, + "loss": 0.42, + "step": 682 + }, + { + "epoch": 0.19, + "logps_train/chosen": -111.49362182617188, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -187.84402465820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8993622064590454, + "rewards_train/margins": 6.285040259361267, + "rewards_train/rejected": -8.184402465820312, + "step": 682 + }, + { + "epoch": 0.19, + "logps_train/chosen": -156.75888061523438, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -196.9340057373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.525887966156006, + "rewards_train/margins": 1.3675127029418945, + "rewards_train/rejected": -6.8934006690979, + "step": 682 + }, + { + "epoch": 0.19, + "logps_train/chosen": -7.2953033447265625, + "logps_train/ref_chosen": -5.875, + "logps_train/ref_rejected": -3.296875, + "logps_train/rejected": -5.99520206451416, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14203034341335297, + "rewards_train/margins": 0.12780235707759857, + "rewards_train/rejected": -0.26983270049095154, + "step": 682 + }, + { + "epoch": 0.19, + "logps_train/chosen": -6.006378173828125, + "logps_train/ref_chosen": -2.0625, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -30.21520233154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.394387811422348, + "rewards_train/margins": 1.1896324455738068, + "rewards_train/rejected": -1.5840202569961548, + "step": 682 + }, + { + "epoch": 0.19, + "logps_train/chosen": -26.89431381225586, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -78.95108795166016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6519314050674438, + "rewards_train/margins": 3.1181775331497192, + "rewards_train/rejected": -3.770108938217163, + "step": 682 + }, + { + "epoch": 0.19, + "logps_train/chosen": -116.49919128417969, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -95.47488403320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7000808715820312, + "rewards_train/margins": 1.4975692629814148, + "rewards_train/rejected": -0.7974883913993835, + "step": 682 + }, + { + "epoch": 0.19, + "logps_train/chosen": -0.11129745841026306, + "logps_train/ref_chosen": -0.240234375, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -43.372440338134766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012893691658973694, + "rewards_train/margins": 3.206387773156166, + "rewards_train/rejected": -3.1934940814971924, + "step": 682 + }, + { + "epoch": 0.19, + "logps_train/chosen": -128.59056091308594, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -169.08526611328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.359056234359741, + "rewards_train/margins": 0.04947042465209961, + "rewards_train/rejected": -2.408526659011841, + "step": 682 + }, + { + "epoch": 0.19, + "logps_train/chosen": -14.413145065307617, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -34.51203918457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30381450057029724, + "rewards_train/margins": 1.6223894655704498, + "rewards_train/rejected": -1.926203966140747, + "step": 683 + }, + { + "epoch": 0.19, + "logps_train/chosen": -30.46043586730957, + "logps_train/ref_chosen": -27.0, + "logps_train/ref_rejected": -29.875, + "logps_train/rejected": -50.16105651855469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34604358673095703, + "rewards_train/margins": 1.6825621128082275, + "rewards_train/rejected": -2.0286056995391846, + "step": 683 + }, + { + "epoch": 0.19, + "logps_train/chosen": -159.12240600585938, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -199.64697265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.3122406005859375, + "rewards_train/margins": 0.6524567604064941, + "rewards_train/rejected": -6.964697360992432, + "step": 683 + }, + { + "epoch": 0.19, + "logps_train/chosen": -79.89886474609375, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -97.99211120605469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4898865222930908, + "rewards_train/margins": -0.7406753897666931, + "rewards_train/rejected": -0.7492111325263977, + "step": 683 + }, + { + "epoch": 0.19, + "logps_train/chosen": -17.038131713867188, + "logps_train/ref_chosen": -1.625, + "logps_train/ref_rejected": -1.625, + "logps_train/rejected": -17.061193466186523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5413131713867188, + "rewards_train/margins": 0.002306222915649414, + "rewards_train/rejected": -1.5436193943023682, + "step": 683 + }, + { + "epoch": 0.19, + "logps_train/chosen": -28.405864715576172, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -20.435855865478516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.503086507320404, + "rewards_train/margins": 0.6279991269111633, + "rewards_train/rejected": -1.1310856342315674, + "step": 683 + }, + { + "epoch": 0.19, + "logps_train/chosen": -24.06247329711914, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -64.43315124511719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.693747341632843, + "rewards_train/margins": 0.6745677590370178, + "rewards_train/rejected": -1.3683151006698608, + "step": 683 + }, + { + "epoch": 0.19, + "logps_train/chosen": -19.982500076293945, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -5.4375, + "logps_train/rejected": -20.87604331970215, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.23924998939037323, + "rewards_train/margins": 1.783104345202446, + "rewards_train/rejected": -1.5438543558120728, + "step": 683 + }, + { + "epoch": 0.19, + "learning_rate": 1.7388516356176911e-06, + "loss": 0.3538, + "step": 684 + }, + { + "epoch": 0.19, + "logps_train/chosen": -250.40350341796875, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -230.4486083984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.840350151062012, + "rewards_train/margins": -0.345489501953125, + "rewards_train/rejected": -10.494860649108887, + "step": 684 + }, + { + "epoch": 0.19, + "logps_train/chosen": -9.40817642211914, + "logps_train/ref_chosen": -3.1875, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -17.490318298339844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6220676302909851, + "rewards_train/margins": -0.15428578853607178, + "rewards_train/rejected": -0.46778184175491333, + "step": 684 + }, + { + "epoch": 0.19, + "logps_train/chosen": -5.392649173736572, + "logps_train/ref_chosen": -1.265625, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -20.26921844482422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41270241141319275, + "rewards_train/margins": 0.714219480752945, + "rewards_train/rejected": -1.1269218921661377, + "step": 684 + }, + { + "epoch": 0.19, + "logps_train/chosen": -105.55116271972656, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -238.31454467773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7051162719726562, + "rewards_train/margins": 9.226338386535645, + "rewards_train/rejected": -10.9314546585083, + "step": 684 + }, + { + "epoch": 0.19, + "logps_train/chosen": -90.77748107910156, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -9.492283821105957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07774811238050461, + "rewards_train/margins": 0.021480269730091095, + "rewards_train/rejected": -0.0992283821105957, + "step": 684 + }, + { + "epoch": 0.19, + "logps_train/chosen": -217.11187744140625, + "logps_train/ref_chosen": -169.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -145.80609130859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.811187744140625, + "rewards_train/margins": -1.8305785655975342, + "rewards_train/rejected": -2.980609178543091, + "step": 684 + }, + { + "epoch": 0.19, + "logps_train/chosen": -92.10664367675781, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -152.69427490234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3606643676757812, + "rewards_train/margins": 3.9587631225585938, + "rewards_train/rejected": -5.319427490234375, + "step": 684 + }, + { + "epoch": 0.19, + "logps_train/chosen": -181.10403442382812, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -9.5625, + "logps_train/rejected": -13.962528228759766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0104033946990967, + "rewards_train/margins": -1.5704005658626556, + "rewards_train/rejected": -0.44000282883644104, + "step": 684 + }, + { + "epoch": 0.19, + "logps_train/chosen": -169.5585174560547, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -57.25, + "logps_train/rejected": -75.27200317382812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.055851697921753, + "rewards_train/margins": -1.2536513805389404, + "rewards_train/rejected": -1.8022003173828125, + "step": 685 + }, + { + "epoch": 0.19, + "logps_train/chosen": -92.94435119628906, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -128.02978515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44443511962890625, + "rewards_train/margins": 1.1585434675216675, + "rewards_train/rejected": -1.6029785871505737, + "step": 685 + }, + { + "epoch": 0.19, + "logps_train/chosen": -6.430882453918457, + "logps_train/ref_chosen": -6.125, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -7.981433868408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.030588245019316673, + "rewards_train/margins": 0.18943014182150364, + "rewards_train/rejected": -0.2200183868408203, + "step": 685 + }, + { + "epoch": 0.19, + "logps_train/chosen": -122.7094955444336, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -208.77932739257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.420949459075928, + "rewards_train/margins": 4.356983661651611, + "rewards_train/rejected": -8.777933120727539, + "step": 685 + }, + { + "epoch": 0.19, + "logps_train/chosen": -138.1119842529297, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -188.50399780273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2111984491348267, + "rewards_train/margins": 0.23920130729675293, + "rewards_train/rejected": -1.4503997564315796, + "step": 685 + }, + { + "epoch": 0.19, + "logps_train/chosen": -25.38302993774414, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -47.75, + "logps_train/rejected": -75.22701263427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4133029878139496, + "rewards_train/margins": 2.3343984186649323, + "rewards_train/rejected": -2.747701406478882, + "step": 685 + }, + { + "epoch": 0.19, + "logps_train/chosen": -3.9069535732269287, + "logps_train/ref_chosen": -0.85546875, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -19.970478057861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30514848232269287, + "rewards_train/margins": 0.07939931750297546, + "rewards_train/rejected": -0.38454779982566833, + "step": 685 + }, + { + "epoch": 0.19, + "logps_train/chosen": -88.01543426513672, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -170.82432556152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.651543378829956, + "rewards_train/margins": 3.6308891773223877, + "rewards_train/rejected": -7.282432556152344, + "step": 685 + }, + { + "epoch": 0.19, + "learning_rate": 1.737066299118044e-06, + "loss": 0.641, + "step": 686 + }, + { + "epoch": 0.19, + "logps_train/chosen": -125.20648193359375, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -188.87216186523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.320648193359375, + "rewards_train/margins": 4.4665679931640625, + "rewards_train/rejected": -6.7872161865234375, + "step": 686 + }, + { + "epoch": 0.19, + "logps_train/chosen": -12.40369987487793, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -12.820382118225098, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22161999344825745, + "rewards_train/margins": 0.23541823029518127, + "rewards_train/rejected": -0.4570382237434387, + "step": 686 + }, + { + "epoch": 0.19, + "logps_train/chosen": -217.6239471435547, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -217.8726348876953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.562394618988037, + "rewards_train/margins": -0.5751311779022217, + "rewards_train/rejected": -3.9872634410858154, + "step": 686 + }, + { + "epoch": 0.19, + "logps_train/chosen": -67.9377670288086, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -115.12472534179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1937767118215561, + "rewards_train/margins": 0.21869583427906036, + "rewards_train/rejected": -0.41247254610061646, + "step": 686 + }, + { + "epoch": 0.19, + "logps_train/chosen": -54.374855041503906, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -20.431472778320312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5624855756759644, + "rewards_train/margins": -1.1193383038043976, + "rewards_train/rejected": -0.4431472718715668, + "step": 686 + }, + { + "epoch": 0.19, + "logps_train/chosen": -19.038394927978516, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -15.934813499450684, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6475895047187805, + "rewards_train/margins": 0.3427668809890747, + "rewards_train/rejected": -0.9903563857078552, + "step": 686 + }, + { + "epoch": 0.19, + "logps_train/chosen": -11.126568794250488, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -12.875, + "logps_train/rejected": -17.47412109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09984312206506729, + "rewards_train/margins": 0.5597552433609962, + "rewards_train/rejected": -0.45991212129592896, + "step": 686 + }, + { + "epoch": 0.19, + "logps_train/chosen": -50.742034912109375, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -22.165721893310547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1992034912109375, + "rewards_train/margins": -0.995131254196167, + "rewards_train/rejected": -1.2040722370147705, + "step": 686 + }, + { + "epoch": 0.19, + "logps_train/chosen": -17.242637634277344, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -20.241653442382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07573623955249786, + "rewards_train/margins": 1.1124015599489212, + "rewards_train/rejected": -1.0366653203964233, + "step": 687 + }, + { + "epoch": 0.19, + "logps_train/chosen": -97.7471694946289, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -171.8365478515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7247170209884644, + "rewards_train/margins": 2.3589378595352173, + "rewards_train/rejected": -4.083654880523682, + "step": 687 + }, + { + "epoch": 0.19, + "logps_train/chosen": -165.12469482421875, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -142.39401245117188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.912469387054443, + "rewards_train/margins": -0.37306785583496094, + "rewards_train/rejected": -4.539401531219482, + "step": 687 + }, + { + "epoch": 0.19, + "logps_train/chosen": -14.95071029663086, + "logps_train/ref_chosen": -0.318359375, + "logps_train/ref_rejected": -2.1875, + "logps_train/rejected": -16.96259117126465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4632351398468018, + "rewards_train/margins": 0.014274001121520996, + "rewards_train/rejected": -1.4775091409683228, + "step": 687 + }, + { + "epoch": 0.19, + "logps_train/chosen": -173.89453125, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -200.0, + "logps_train/rejected": -214.0691375732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.510546863079071, + "rewards_train/margins": 1.9174606204032898, + "rewards_train/rejected": -1.4069137573242188, + "step": 687 + }, + { + "epoch": 0.19, + "logps_train/chosen": -115.97274780273438, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -160.58929443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.197274923324585, + "rewards_train/margins": 0.2616546154022217, + "rewards_train/rejected": -2.4589295387268066, + "step": 687 + }, + { + "epoch": 0.19, + "logps_train/chosen": -68.3608627319336, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -113.96533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2860862910747528, + "rewards_train/margins": 2.660446912050247, + "rewards_train/rejected": -2.946533203125, + "step": 687 + }, + { + "epoch": 0.19, + "logps_train/chosen": -7.288909912109375, + "logps_train/ref_chosen": -3.609375, + "logps_train/ref_rejected": -15.0, + "logps_train/rejected": -19.03554344177246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36795350909233093, + "rewards_train/margins": 0.03560084104537964, + "rewards_train/rejected": -0.40355435013771057, + "step": 687 + }, + { + "epoch": 0.19, + "learning_rate": 1.7352758039357708e-06, + "loss": 0.582, + "step": 688 + }, + { + "epoch": 0.19, + "logps_train/chosen": -1.424691915512085, + "logps_train/ref_chosen": -0.87109375, + "logps_train/ref_rejected": -3.765625, + "logps_train/rejected": -8.404518127441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.055359818041324615, + "rewards_train/margins": 0.40852951258420944, + "rewards_train/rejected": -0.46388933062553406, + "step": 688 + }, + { + "epoch": 0.19, + "logps_train/chosen": -35.407962799072266, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -3.6875, + "logps_train/rejected": -10.954154014587402, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29079627990722656, + "rewards_train/margins": 0.43586915731430054, + "rewards_train/rejected": -0.7266654372215271, + "step": 688 + }, + { + "epoch": 0.19, + "logps_train/chosen": -104.88134765625, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -126.05619812011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8881348371505737, + "rewards_train/margins": 0.817484974861145, + "rewards_train/rejected": -2.7056198120117188, + "step": 688 + }, + { + "epoch": 0.19, + "logps_train/chosen": -0.4165373742580414, + "logps_train/ref_chosen": -0.439453125, + "logps_train/ref_rejected": -3.3125, + "logps_train/rejected": -3.5978872776031494, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.002291575074195862, + "rewards_train/margins": 0.030830303207039833, + "rewards_train/rejected": -0.02853872813284397, + "step": 688 + }, + { + "epoch": 0.19, + "logps_train/chosen": -103.01920318603516, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -177.30572509765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5519203543663025, + "rewards_train/margins": 4.8786521553993225, + "rewards_train/rejected": -5.430572509765625, + "step": 688 + }, + { + "epoch": 0.19, + "logps_train/chosen": -95.24105072021484, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -140.03704833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6241050958633423, + "rewards_train/margins": 2.5795997381210327, + "rewards_train/rejected": -3.203704833984375, + "step": 688 + }, + { + "epoch": 0.19, + "logps_train/chosen": -6.4248366355896, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -20.039737701416016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10810866206884384, + "rewards_train/margins": 0.23336512595415115, + "rewards_train/rejected": -0.341473788022995, + "step": 688 + }, + { + "epoch": 0.19, + "logps_train/chosen": -55.54509353637695, + "logps_train/ref_chosen": -55.75, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -96.83644104003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.020490646362304688, + "rewards_train/margins": 1.4541348218917847, + "rewards_train/rejected": -1.43364417552948, + "step": 688 + }, + { + "epoch": 0.19, + "logps_train/chosen": -54.31208419799805, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -123.73894500732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3812084197998047, + "rewards_train/margins": 3.442686080932617, + "rewards_train/rejected": -3.823894500732422, + "step": 689 + }, + { + "epoch": 0.19, + "logps_train/chosen": -12.563628196716309, + "logps_train/ref_chosen": -0.5390625, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -9.30804443359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2024565935134888, + "rewards_train/margins": -1.3716521561145782, + "rewards_train/rejected": 0.16919556260108948, + "step": 689 + }, + { + "epoch": 0.19, + "logps_train/chosen": -81.4711685180664, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -162.04348754882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4471168518066406, + "rewards_train/margins": 5.007232189178467, + "rewards_train/rejected": -6.454349040985107, + "step": 689 + }, + { + "epoch": 0.19, + "logps_train/chosen": -105.78546142578125, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -106.16931915283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12854614853858948, + "rewards_train/margins": 0.03838576376438141, + "rewards_train/rejected": -0.1669319123029709, + "step": 689 + }, + { + "epoch": 0.19, + "logps_train/chosen": -26.40917205810547, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -45.75, + "logps_train/rejected": -79.20226287841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8034172058105469, + "rewards_train/margins": 2.54180908203125, + "rewards_train/rejected": -3.345226287841797, + "step": 689 + }, + { + "epoch": 0.19, + "logps_train/chosen": -10.305424690246582, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -18.02410888671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2257075309753418, + "rewards_train/margins": 0.8343684077262878, + "rewards_train/rejected": -0.608660876750946, + "step": 689 + }, + { + "epoch": 0.19, + "logps_train/chosen": -109.99308776855469, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -95.51545715332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7993087768554688, + "rewards_train/margins": 0.8522369861602783, + "rewards_train/rejected": -1.651545763015747, + "step": 689 + }, + { + "epoch": 0.19, + "logps_train/chosen": -104.72478485107422, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -214.95733642578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.522478461265564, + "rewards_train/margins": 6.273255467414856, + "rewards_train/rejected": -7.79573392868042, + "step": 689 + }, + { + "epoch": 0.19, + "learning_rate": 1.7334801626024397e-06, + "loss": 0.3768, + "step": 690 + }, + { + "epoch": 0.19, + "logps_train/chosen": -31.09044075012207, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -29.961570739746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16595593094825745, + "rewards_train/margins": 1.0996130406856537, + "rewards_train/rejected": -0.9336571097373962, + "step": 690 + }, + { + "epoch": 0.19, + "logps_train/chosen": -109.27800750732422, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -133.47686767578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2778007984161377, + "rewards_train/margins": 2.6698861122131348, + "rewards_train/rejected": -3.9476869106292725, + "step": 690 + }, + { + "epoch": 0.19, + "logps_train/chosen": -145.896728515625, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -222.49215698242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6396729946136475, + "rewards_train/margins": 1.8095428943634033, + "rewards_train/rejected": -5.449215888977051, + "step": 690 + }, + { + "epoch": 0.19, + "logps_train/chosen": -10.305541038513184, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -38.51326370239258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4243040978908539, + "rewards_train/margins": 1.552022248506546, + "rewards_train/rejected": -1.9763263463974, + "step": 690 + }, + { + "epoch": 0.19, + "logps_train/chosen": -128.2725830078125, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -181.4446563720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0272583961486816, + "rewards_train/margins": 5.117207050323486, + "rewards_train/rejected": -8.144465446472168, + "step": 690 + }, + { + "epoch": 0.19, + "logps_train/chosen": -9.240886688232422, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -27.05788803100586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.024088669568300247, + "rewards_train/margins": 0.8192001692950726, + "rewards_train/rejected": -0.8432888388633728, + "step": 690 + }, + { + "epoch": 0.19, + "logps_train/chosen": -130.0241241455078, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -109.90232849121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.452412486076355, + "rewards_train/margins": 0.4878203868865967, + "rewards_train/rejected": -1.9402328729629517, + "step": 690 + }, + { + "epoch": 0.19, + "logps_train/chosen": -168.28591918945312, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -152.61595153808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7285919189453125, + "rewards_train/margins": 1.9830033779144287, + "rewards_train/rejected": -3.711595296859741, + "step": 690 + }, + { + "epoch": 0.19, + "logps_train/chosen": -20.63507652282715, + "logps_train/ref_chosen": -14.0, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -28.464109420776367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6635076403617859, + "rewards_train/margins": 0.7141533493995667, + "rewards_train/rejected": -1.3776609897613525, + "step": 691 + }, + { + "epoch": 0.19, + "logps_train/chosen": -100.04955291748047, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -164.58099365234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.504955291748047, + "rewards_train/margins": 2.203144073486328, + "rewards_train/rejected": -4.708099365234375, + "step": 691 + }, + { + "epoch": 0.19, + "logps_train/chosen": -30.17351722717285, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -49.0, + "logps_train/rejected": -60.66929244995117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7798517346382141, + "rewards_train/margins": 0.3870775103569031, + "rewards_train/rejected": -1.1669292449951172, + "step": 691 + }, + { + "epoch": 0.19, + "logps_train/chosen": -140.933349609375, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -169.86293029785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7933349609375, + "rewards_train/margins": 0.79295814037323, + "rewards_train/rejected": -1.58629310131073, + "step": 691 + }, + { + "epoch": 0.19, + "logps_train/chosen": -96.29396057128906, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -163.57008361816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.229396104812622, + "rewards_train/margins": 4.877612352371216, + "rewards_train/rejected": -6.107008457183838, + "step": 691 + }, + { + "epoch": 0.19, + "logps_train/chosen": -4.067940711975098, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -1.546875, + "logps_train/rejected": -5.234860897064209, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10366906970739365, + "rewards_train/margins": 0.2651295140385628, + "rewards_train/rejected": -0.3687985837459564, + "step": 691 + }, + { + "epoch": 0.19, + "logps_train/chosen": -22.368309020996094, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -32.90369415283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10066910088062286, + "rewards_train/margins": 1.453538492321968, + "rewards_train/rejected": -1.3528693914413452, + "step": 691 + }, + { + "epoch": 0.19, + "logps_train/chosen": -23.619848251342773, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -14.625, + "logps_train/rejected": -41.948307037353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.024484826251864433, + "rewards_train/margins": 2.7078459728509188, + "rewards_train/rejected": -2.732330799102783, + "step": 691 + }, + { + "epoch": 0.19, + "learning_rate": 1.7316793876856359e-06, + "loss": 0.2452, + "step": 692 + }, + { + "epoch": 0.19, + "logps_train/chosen": -80.99797821044922, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -86.79924011230469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3497978150844574, + "rewards_train/margins": 0.23012623190879822, + "rewards_train/rejected": -0.5799240469932556, + "step": 692 + }, + { + "epoch": 0.19, + "logps_train/chosen": -67.02206420898438, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -90.54623413085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.302206426858902, + "rewards_train/margins": 0.4524169862270355, + "rewards_train/rejected": -0.7546234130859375, + "step": 692 + }, + { + "epoch": 0.19, + "logps_train/chosen": -122.42472839355469, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -177.21047973632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3924728631973267, + "rewards_train/margins": 2.9285753965377808, + "rewards_train/rejected": -4.321048259735107, + "step": 692 + }, + { + "epoch": 0.19, + "logps_train/chosen": -116.1169204711914, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -183.17408752441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9616920351982117, + "rewards_train/margins": 3.8557170033454895, + "rewards_train/rejected": -4.817409038543701, + "step": 692 + }, + { + "epoch": 0.19, + "logps_train/chosen": -196.402099609375, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -192.5635986328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.140210151672363, + "rewards_train/margins": -1.2338504791259766, + "rewards_train/rejected": -8.906359672546387, + "step": 692 + }, + { + "epoch": 0.19, + "logps_train/chosen": -12.816001892089844, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -7.84375, + "logps_train/rejected": -10.04693603515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20035019516944885, + "rewards_train/margins": 0.01996840536594391, + "rewards_train/rejected": -0.22031860053539276, + "step": 692 + }, + { + "epoch": 0.19, + "logps_train/chosen": -160.32650756835938, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -191.26991271972656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.032650947570801, + "rewards_train/margins": -0.8056595325469971, + "rewards_train/rejected": -3.2269914150238037, + "step": 692 + }, + { + "epoch": 0.19, + "logps_train/chosen": -4.749325275421143, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -3.234375, + "logps_train/rejected": -7.764340400695801, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21243253350257874, + "rewards_train/margins": 0.2405640184879303, + "rewards_train/rejected": -0.45299655199050903, + "step": 692 + }, + { + "epoch": 0.19, + "logps_train/chosen": -2.87286376953125, + "logps_train/ref_chosen": -2.6875, + "logps_train/ref_rejected": -38.0, + "logps_train/rejected": -38.628623962402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01853637769818306, + "rewards_train/margins": 0.044326018542051315, + "rewards_train/rejected": -0.06286239624023438, + "step": 693 + }, + { + "epoch": 0.19, + "logps_train/chosen": -80.80284118652344, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -191.536376953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01971588097512722, + "rewards_train/margins": 1.573353623971343, + "rewards_train/rejected": -1.5536377429962158, + "step": 693 + }, + { + "epoch": 0.19, + "logps_train/chosen": -23.102191925048828, + "logps_train/ref_chosen": -23.875, + "logps_train/ref_rejected": -34.25, + "logps_train/rejected": -43.488895416259766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07728081196546555, + "rewards_train/margins": 1.001170389354229, + "rewards_train/rejected": -0.9238895773887634, + "step": 693 + }, + { + "epoch": 0.19, + "logps_train/chosen": -89.79869079589844, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -105.22657012939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9298691153526306, + "rewards_train/margins": 0.8427879214286804, + "rewards_train/rejected": -1.772657036781311, + "step": 693 + }, + { + "epoch": 0.19, + "logps_train/chosen": -126.58682250976562, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -161.99786376953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0086822509765625, + "rewards_train/margins": 2.091104030609131, + "rewards_train/rejected": -4.099786281585693, + "step": 693 + }, + { + "epoch": 0.19, + "logps_train/chosen": -79.3121337890625, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -193.21560668945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7312134504318237, + "rewards_train/margins": 7.2403470277786255, + "rewards_train/rejected": -8.97156047821045, + "step": 693 + }, + { + "epoch": 0.19, + "logps_train/chosen": -42.62223815917969, + "logps_train/ref_chosen": -31.25, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -32.7457275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1372238397598267, + "rewards_train/margins": 0.01234889030456543, + "rewards_train/rejected": -1.149572730064392, + "step": 693 + }, + { + "epoch": 0.19, + "logps_train/chosen": -82.27452087402344, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -161.13619995117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5274521112442017, + "rewards_train/margins": 5.236167788505554, + "rewards_train/rejected": -6.763619899749756, + "step": 693 + }, + { + "epoch": 0.19, + "learning_rate": 1.7298734917888748e-06, + "loss": 0.4634, + "step": 694 + }, + { + "epoch": 0.19, + "logps_train/chosen": -189.60467529296875, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -226.7122802734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.760467529296875, + "rewards_train/margins": 0.010760784149169922, + "rewards_train/rejected": -5.771228313446045, + "step": 694 + }, + { + "epoch": 0.19, + "logps_train/chosen": -156.63113403320312, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -195.64694213867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1631133556365967, + "rewards_train/margins": 1.401580810546875, + "rewards_train/rejected": -3.5646941661834717, + "step": 694 + }, + { + "epoch": 0.19, + "logps_train/chosen": -117.15052795410156, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -122.85126495361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7650527954101562, + "rewards_train/margins": 0.8200737237930298, + "rewards_train/rejected": -1.585126519203186, + "step": 694 + }, + { + "epoch": 0.19, + "logps_train/chosen": -38.9150505065918, + "logps_train/ref_chosen": -35.25, + "logps_train/ref_rejected": -42.25, + "logps_train/rejected": -73.61923217773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36650505661964417, + "rewards_train/margins": 2.770418256521225, + "rewards_train/rejected": -3.136923313140869, + "step": 694 + }, + { + "epoch": 0.19, + "logps_train/chosen": -15.14482593536377, + "logps_train/ref_chosen": -15.5, + "logps_train/ref_rejected": -26.0, + "logps_train/rejected": -34.10651397705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03551740571856499, + "rewards_train/margins": 0.846168827265501, + "rewards_train/rejected": -0.810651421546936, + "step": 694 + }, + { + "epoch": 0.19, + "logps_train/chosen": -85.34938049316406, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -128.70159912109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.134938046336174, + "rewards_train/margins": 2.0852219611406326, + "rewards_train/rejected": -2.2201600074768066, + "step": 694 + }, + { + "epoch": 0.19, + "logps_train/chosen": -117.83200073242188, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -147.32235717773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8832000494003296, + "rewards_train/margins": 0.14903581142425537, + "rewards_train/rejected": -2.032235860824585, + "step": 694 + }, + { + "epoch": 0.19, + "logps_train/chosen": -38.883087158203125, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -29.559316635131836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5883087515830994, + "rewards_train/margins": 0.5676229596138, + "rewards_train/rejected": -1.1559317111968994, + "step": 694 + }, + { + "epoch": 0.19, + "logps_train/chosen": -47.43596649169922, + "logps_train/ref_chosen": -31.0, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -93.14058685302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6435966491699219, + "rewards_train/margins": 1.2954621315002441, + "rewards_train/rejected": -2.939058780670166, + "step": 695 + }, + { + "epoch": 0.19, + "logps_train/chosen": -200.9825439453125, + "logps_train/ref_chosen": -194.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -153.33572387695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.698254406452179, + "rewards_train/margins": 1.9353179335594177, + "rewards_train/rejected": -2.6335723400115967, + "step": 695 + }, + { + "epoch": 0.19, + "logps_train/chosen": -2.1511359214782715, + "logps_train/ref_chosen": -2.140625, + "logps_train/ref_rejected": -7.0625, + "logps_train/rejected": -6.953456878662109, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0010510921711102128, + "rewards_train/margins": -0.011955404304899275, + "rewards_train/rejected": 0.010904312133789062, + "step": 695 + }, + { + "epoch": 0.19, + "logps_train/chosen": -85.75776672363281, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -98.73381042480469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9257766604423523, + "rewards_train/margins": 0.3476044535636902, + "rewards_train/rejected": -1.2733811140060425, + "step": 695 + }, + { + "epoch": 0.19, + "logps_train/chosen": -26.203449249267578, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -28.14122200012207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2578449249267578, + "rewards_train/margins": 0.6312772631645203, + "rewards_train/rejected": -0.8891221880912781, + "step": 695 + }, + { + "epoch": 0.19, + "logps_train/chosen": -8.753596305847168, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -21.422603607177734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08714037388563156, + "rewards_train/margins": 0.723150722682476, + "rewards_train/rejected": -0.6360103487968445, + "step": 695 + }, + { + "epoch": 0.19, + "logps_train/chosen": -3.643247127532959, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -13.313468933105469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12317528575658798, + "rewards_train/margins": 0.49202217906713486, + "rewards_train/rejected": -0.3688468933105469, + "step": 695 + }, + { + "epoch": 0.19, + "logps_train/chosen": -9.822779655456543, + "logps_train/ref_chosen": -7.6875, + "logps_train/ref_rejected": -7.4375, + "logps_train/rejected": -9.113279342651367, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21352796256542206, + "rewards_train/margins": -0.0459500253200531, + "rewards_train/rejected": -0.16757793724536896, + "step": 695 + }, + { + "epoch": 0.19, + "learning_rate": 1.7280624875515129e-06, + "loss": 0.4066, + "step": 696 + }, + { + "epoch": 0.19, + "logps_train/chosen": -15.783411026000977, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -101.1140365600586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0158411264419556, + "rewards_train/margins": 0.6455625295639038, + "rewards_train/rejected": -1.6614036560058594, + "step": 696 + }, + { + "epoch": 0.19, + "logps_train/chosen": -23.088916778564453, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -11.75, + "logps_train/rejected": -18.539201736450195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2463916838169098, + "rewards_train/margins": 0.4325285255908966, + "rewards_train/rejected": -0.6789202094078064, + "step": 696 + }, + { + "epoch": 0.19, + "logps_train/chosen": -104.8179702758789, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -53.0, + "logps_train/rejected": -64.6015853881836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8317970037460327, + "rewards_train/margins": -0.6716384887695312, + "rewards_train/rejected": -1.1601585149765015, + "step": 696 + }, + { + "epoch": 0.19, + "logps_train/chosen": -10.93828010559082, + "logps_train/ref_chosen": -2.390625, + "logps_train/ref_rejected": -0.376953125, + "logps_train/rejected": -2.3517274856567383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8547655344009399, + "rewards_train/margins": -0.6572880893945694, + "rewards_train/rejected": -0.19747744500637054, + "step": 696 + }, + { + "epoch": 0.19, + "logps_train/chosen": -134.2162628173828, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -206.41270446777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.0216264724731445, + "rewards_train/margins": 3.969644069671631, + "rewards_train/rejected": -7.991270542144775, + "step": 696 + }, + { + "epoch": 0.19, + "logps_train/chosen": -14.32198715209961, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -17.9239444732666, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011551285162568092, + "rewards_train/margins": 0.5414457563310862, + "rewards_train/rejected": -0.5298944711685181, + "step": 696 + }, + { + "epoch": 0.19, + "logps_train/chosen": -9.780309677124023, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -13.060110092163086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1157190352678299, + "rewards_train/margins": 0.9436050802469254, + "rewards_train/rejected": -0.8278860449790955, + "step": 696 + }, + { + "epoch": 0.19, + "logps_train/chosen": -0.6786063313484192, + "logps_train/ref_chosen": -0.76171875, + "logps_train/ref_rejected": -0.76171875, + "logps_train/rejected": -0.6599110960960388, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.008311241865158081, + "rewards_train/margins": -0.001869523897767067, + "rewards_train/rejected": 0.010180765762925148, + "step": 696 + }, + { + "epoch": 0.19, + "logps_train/chosen": -5.195804595947266, + "logps_train/ref_chosen": -1.9765625, + "logps_train/ref_rejected": -3.171875, + "logps_train/rejected": -10.963647842407227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32192420959472656, + "rewards_train/margins": 0.457253098487854, + "rewards_train/rejected": -0.7791773080825806, + "step": 697 + }, + { + "epoch": 0.19, + "logps_train/chosen": -7.820107460021973, + "logps_train/ref_chosen": -7.15625, + "logps_train/ref_rejected": -2.453125, + "logps_train/rejected": -7.693710803985596, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06638574600219727, + "rewards_train/margins": 0.4576728343963623, + "rewards_train/rejected": -0.5240585803985596, + "step": 697 + }, + { + "epoch": 0.19, + "logps_train/chosen": -71.75602722167969, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -117.8072509765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0006027221679688, + "rewards_train/margins": 0.38012242317199707, + "rewards_train/rejected": -1.3807251453399658, + "step": 697 + }, + { + "epoch": 0.19, + "logps_train/chosen": -82.08850860595703, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -119.9928207397461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8588508367538452, + "rewards_train/margins": 1.7904313802719116, + "rewards_train/rejected": -3.649282217025757, + "step": 697 + }, + { + "epoch": 0.19, + "logps_train/chosen": -154.89535522460938, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -191.96841430664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8895355463027954, + "rewards_train/margins": 3.107305884361267, + "rewards_train/rejected": -3.9968414306640625, + "step": 697 + }, + { + "epoch": 0.19, + "logps_train/chosen": -143.9133758544922, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -179.71310424804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1913375854492188, + "rewards_train/margins": 1.579972743988037, + "rewards_train/rejected": -4.771310329437256, + "step": 697 + }, + { + "epoch": 0.19, + "logps_train/chosen": -131.64816284179688, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -155.9949493408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8148162961006165, + "rewards_train/margins": 1.584678590297699, + "rewards_train/rejected": -2.3994948863983154, + "step": 697 + }, + { + "epoch": 0.19, + "logps_train/chosen": -1.9143469333648682, + "logps_train/ref_chosen": -3.6875, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -7.035717010498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17731530964374542, + "rewards_train/margins": 0.1527620106935501, + "rewards_train/rejected": 0.024553298950195312, + "step": 697 + }, + { + "epoch": 0.2, + "learning_rate": 1.7262463876486594e-06, + "loss": 0.4549, + "step": 698 + }, + { + "epoch": 0.2, + "logps_train/chosen": -40.541011810302734, + "logps_train/ref_chosen": -34.75, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -79.90895080566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5791012048721313, + "rewards_train/margins": 0.26179391145706177, + "rewards_train/rejected": -0.8408951163291931, + "step": 698 + }, + { + "epoch": 0.2, + "logps_train/chosen": -26.54220199584961, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -19.654071807861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17922019958496094, + "rewards_train/margins": 0.6549369692802429, + "rewards_train/rejected": -0.8341571688652039, + "step": 698 + }, + { + "epoch": 0.2, + "logps_train/chosen": -181.4656219482422, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -257.1748352050781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9465622901916504, + "rewards_train/margins": 5.670921802520752, + "rewards_train/rejected": -9.617484092712402, + "step": 698 + }, + { + "epoch": 0.2, + "logps_train/chosen": -71.79702758789062, + "logps_train/ref_chosen": -36.0, + "logps_train/ref_rejected": -58.0, + "logps_train/rejected": -90.48626708984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.579702854156494, + "rewards_train/margins": -0.33107614517211914, + "rewards_train/rejected": -3.248626708984375, + "step": 698 + }, + { + "epoch": 0.2, + "logps_train/chosen": -4.229071617126465, + "logps_train/ref_chosen": -1.921875, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -7.9437713623046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2307196706533432, + "rewards_train/margins": -0.0019675344228744507, + "rewards_train/rejected": -0.22875213623046875, + "step": 698 + }, + { + "epoch": 0.2, + "logps_train/chosen": -184.9392852783203, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -169.2484893798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9939285516738892, + "rewards_train/margins": 2.3309205770492554, + "rewards_train/rejected": -4.3248491287231445, + "step": 698 + }, + { + "epoch": 0.2, + "logps_train/chosen": -127.12167358398438, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -190.39389038085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4121673107147217, + "rewards_train/margins": 4.627222299575806, + "rewards_train/rejected": -8.039389610290527, + "step": 698 + }, + { + "epoch": 0.2, + "logps_train/chosen": -23.10773468017578, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -35.53711700439453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9170234799385071, + "rewards_train/margins": -0.1508117914199829, + "rewards_train/rejected": -0.7662116885185242, + "step": 698 + }, + { + "epoch": 0.2, + "logps_train/chosen": -162.9500732421875, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -190.69921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.19500732421875, + "rewards_train/margins": -0.12508535385131836, + "rewards_train/rejected": -2.0699219703674316, + "step": 699 + }, + { + "epoch": 0.2, + "logps_train/chosen": -18.588607788085938, + "logps_train/ref_chosen": -13.5625, + "logps_train/ref_rejected": -29.25, + "logps_train/rejected": -35.01890182495117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5026108026504517, + "rewards_train/margins": 0.07427936792373657, + "rewards_train/rejected": -0.5768901705741882, + "step": 699 + }, + { + "epoch": 0.2, + "logps_train/chosen": -31.804290771484375, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -28.563331604003906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1054290533065796, + "rewards_train/margins": -0.46159589290618896, + "rewards_train/rejected": -0.6438331604003906, + "step": 699 + }, + { + "epoch": 0.2, + "logps_train/chosen": -74.52031707763672, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -196.70704650878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.002031707903370261, + "rewards_train/margins": 7.218672847608104, + "rewards_train/rejected": -7.220704555511475, + "step": 699 + }, + { + "epoch": 0.2, + "logps_train/chosen": -6.3094940185546875, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -5.605798721313477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06280060112476349, + "rewards_train/margins": 0.18900547921657562, + "rewards_train/rejected": -0.12620487809181213, + "step": 699 + }, + { + "epoch": 0.2, + "logps_train/chosen": -12.851262092590332, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -23.200538635253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0663762092590332, + "rewards_train/margins": 0.3911776542663574, + "rewards_train/rejected": -0.4575538635253906, + "step": 699 + }, + { + "epoch": 0.2, + "logps_train/chosen": -190.58811950683594, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -200.38092041015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.458812236785889, + "rewards_train/margins": 1.6792798042297363, + "rewards_train/rejected": -6.138092041015625, + "step": 699 + }, + { + "epoch": 0.2, + "logps_train/chosen": -31.148601531982422, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -40.86858367919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7523601651191711, + "rewards_train/margins": 0.08449822664260864, + "rewards_train/rejected": -0.8368583917617798, + "step": 699 + }, + { + "epoch": 0.2, + "learning_rate": 1.724425204791089e-06, + "loss": 0.4838, + "step": 700 + }, + { + "epoch": 0.2, + "logps_train/chosen": -54.688140869140625, + "logps_train/ref_chosen": -49.0, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -12.106183052062988, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5688140988349915, + "rewards_train/margins": -0.19569578766822815, + "rewards_train/rejected": -0.3731183111667633, + "step": 700 + }, + { + "epoch": 0.2, + "logps_train/chosen": -121.06129455566406, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -134.82452392578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2561294734477997, + "rewards_train/margins": 1.6263228952884674, + "rewards_train/rejected": -1.882452368736267, + "step": 700 + }, + { + "epoch": 0.2, + "logps_train/chosen": -1.3059676885604858, + "logps_train/ref_chosen": -0.53515625, + "logps_train/ref_rejected": -0.53515625, + "logps_train/rejected": -1.336280345916748, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07708114385604858, + "rewards_train/margins": 0.0030312687158584595, + "rewards_train/rejected": -0.08011241257190704, + "step": 700 + }, + { + "epoch": 0.2, + "logps_train/chosen": -148.54095458984375, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -277.28131103515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.30409574508667, + "rewards_train/margins": 9.424035549163818, + "rewards_train/rejected": -13.728131294250488, + "step": 700 + }, + { + "epoch": 0.2, + "logps_train/chosen": -22.363393783569336, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -22.738794326782227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8988394141197205, + "rewards_train/margins": 0.3125399947166443, + "rewards_train/rejected": -1.2113794088363647, + "step": 700 + }, + { + "epoch": 0.2, + "logps_train/chosen": -10.663710594177246, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -1.046875, + "logps_train/rejected": -2.4751226902008057, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2944960594177246, + "rewards_train/margins": -0.15167129039764404, + "rewards_train/rejected": -0.14282476902008057, + "step": 700 + }, + { + "epoch": 0.2, + "logps_train/chosen": -17.191631317138672, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -25.111696243286133, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6629131436347961, + "rewards_train/margins": -0.1767435073852539, + "rewards_train/rejected": -0.48616963624954224, + "step": 700 + }, + { + "epoch": 0.2, + "logps_train/chosen": -159.40003967285156, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -181.9394073486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.240004062652588, + "rewards_train/margins": 0.20393657684326172, + "rewards_train/rejected": -5.44394063949585, + "step": 700 + }, + { + "epoch": 0.2, + "logps_train/chosen": -78.10057067871094, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -94.96440124511719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9600570797920227, + "rewards_train/margins": 0.9863830208778381, + "rewards_train/rejected": -1.9464401006698608, + "step": 701 + }, + { + "epoch": 0.2, + "logps_train/chosen": -7.670321464538574, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -13.770812034606934, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17046785354614258, + "rewards_train/margins": 0.44129905104637146, + "rewards_train/rejected": -0.2708311975002289, + "step": 701 + }, + { + "epoch": 0.2, + "logps_train/chosen": -115.4415283203125, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -131.63287353515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5441528558731079, + "rewards_train/margins": 1.7191344499588013, + "rewards_train/rejected": -2.263287305831909, + "step": 701 + }, + { + "epoch": 0.2, + "logps_train/chosen": -209.32415771484375, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -225.56689453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.932415962219238, + "rewards_train/margins": -0.07572650909423828, + "rewards_train/rejected": -6.856689453125, + "step": 701 + }, + { + "epoch": 0.2, + "logps_train/chosen": -92.06527709960938, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -101.31683349609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6065277457237244, + "rewards_train/margins": 0.8751556277275085, + "rewards_train/rejected": -1.481683373451233, + "step": 701 + }, + { + "epoch": 0.2, + "logps_train/chosen": -21.559436798095703, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -3.203125, + "logps_train/rejected": -9.841843605041504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2934436798095703, + "rewards_train/margins": 0.370428204536438, + "rewards_train/rejected": -0.6638718843460083, + "step": 701 + }, + { + "epoch": 0.2, + "logps_train/chosen": -13.048236846923828, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -30.875, + "logps_train/rejected": -35.89567184448242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.017323685809969902, + "rewards_train/margins": 0.4847435224801302, + "rewards_train/rejected": -0.5020672082901001, + "step": 701 + }, + { + "epoch": 0.2, + "logps_train/chosen": -70.59810638427734, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -80.89463806152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20981064438819885, + "rewards_train/margins": 1.829653114080429, + "rewards_train/rejected": -2.039463758468628, + "step": 701 + }, + { + "epoch": 0.2, + "learning_rate": 1.7225989517251497e-06, + "loss": 0.4742, + "step": 702 + }, + { + "epoch": 0.2, + "logps_train/chosen": -128.33184814453125, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -139.66070556640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6831848621368408, + "rewards_train/margins": 0.6828856468200684, + "rewards_train/rejected": -2.366070508956909, + "step": 702 + }, + { + "epoch": 0.2, + "logps_train/chosen": -80.45455932617188, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -148.13290405273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2954559326171875, + "rewards_train/margins": 0.5178345441818237, + "rewards_train/rejected": -1.8132904767990112, + "step": 702 + }, + { + "epoch": 0.2, + "logps_train/chosen": -8.756819725036621, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -3.125, + "logps_train/rejected": -12.204069137573242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0006819725385867059, + "rewards_train/margins": 0.9072249769815244, + "rewards_train/rejected": -0.9079069495201111, + "step": 702 + }, + { + "epoch": 0.2, + "logps_train/chosen": -43.11668395996094, + "logps_train/ref_chosen": -37.25, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -84.4487533569336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5866684317588806, + "rewards_train/margins": 0.9582069516181946, + "rewards_train/rejected": -1.5448753833770752, + "step": 702 + }, + { + "epoch": 0.2, + "logps_train/chosen": -0.6622849106788635, + "logps_train/ref_chosen": -1.2578125, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -6.804623126983643, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05955275893211365, + "rewards_train/margins": 0.18689008057117462, + "rewards_train/rejected": -0.12733732163906097, + "step": 702 + }, + { + "epoch": 0.2, + "logps_train/chosen": -23.804794311523438, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -43.664772033691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7054794430732727, + "rewards_train/margins": 0.43599778413772583, + "rewards_train/rejected": -1.1414772272109985, + "step": 702 + }, + { + "epoch": 0.2, + "logps_train/chosen": -100.82916259765625, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -115.42040252685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6829162836074829, + "rewards_train/margins": 0.10912400484085083, + "rewards_train/rejected": -0.7920402884483337, + "step": 702 + }, + { + "epoch": 0.2, + "logps_train/chosen": -107.81414794921875, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -75.70394897460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13141480088233948, + "rewards_train/margins": 0.338980108499527, + "rewards_train/rejected": -0.47039490938186646, + "step": 702 + }, + { + "epoch": 0.2, + "logps_train/chosen": -51.31292724609375, + "logps_train/ref_chosen": -39.25, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -107.33411407470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.206292748451233, + "rewards_train/margins": 2.2271186113357544, + "rewards_train/rejected": -3.4334113597869873, + "step": 703 + }, + { + "epoch": 0.2, + "logps_train/chosen": -83.68391418457031, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -234.70932006835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2183914184570312, + "rewards_train/margins": 7.5525407791137695, + "rewards_train/rejected": -8.7709321975708, + "step": 703 + }, + { + "epoch": 0.2, + "logps_train/chosen": -46.333133697509766, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -18.481685638427734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4833133816719055, + "rewards_train/margins": -0.06639480590820312, + "rewards_train/rejected": -0.4169185757637024, + "step": 703 + }, + { + "epoch": 0.2, + "logps_train/chosen": -29.52476692199707, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -2.75, + "logps_train/rejected": -13.111218452453613, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.864976704120636, + "rewards_train/margins": 0.17114514112472534, + "rewards_train/rejected": -1.0361218452453613, + "step": 703 + }, + { + "epoch": 0.2, + "logps_train/chosen": -149.96119689941406, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -153.658935546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6961197257041931, + "rewards_train/margins": -0.13022613525390625, + "rewards_train/rejected": -0.5658935904502869, + "step": 703 + }, + { + "epoch": 0.2, + "logps_train/chosen": -125.67874145507812, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -159.4913787841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8678741455078125, + "rewards_train/margins": 2.6812639236450195, + "rewards_train/rejected": -5.549138069152832, + "step": 703 + }, + { + "epoch": 0.2, + "logps_train/chosen": -165.52439880371094, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -200.03282165527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0524399280548096, + "rewards_train/margins": 5.050842046737671, + "rewards_train/rejected": -8.10328197479248, + "step": 703 + }, + { + "epoch": 0.2, + "logps_train/chosen": -9.116498947143555, + "logps_train/ref_chosen": -7.84375, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -11.914265632629395, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12727490067481995, + "rewards_train/margins": 0.4204016625881195, + "rewards_train/rejected": -0.5476765632629395, + "step": 703 + }, + { + "epoch": 0.2, + "learning_rate": 1.720767641232678e-06, + "loss": 0.4123, + "step": 704 + }, + { + "epoch": 0.2, + "logps_train/chosen": -16.544025421142578, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -49.31916046142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5544025301933289, + "rewards_train/margins": 2.1775136590003967, + "rewards_train/rejected": -2.7319161891937256, + "step": 704 + }, + { + "epoch": 0.2, + "logps_train/chosen": -108.90072631835938, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -135.94708251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9400726556777954, + "rewards_train/margins": 2.454635739326477, + "rewards_train/rejected": -3.3947083950042725, + "step": 704 + }, + { + "epoch": 0.2, + "logps_train/chosen": -5.361615180969238, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -5.71875, + "logps_train/rejected": -5.2890944480896, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09866151958703995, + "rewards_train/margins": -0.14162707701325417, + "rewards_train/rejected": 0.04296555742621422, + "step": 704 + }, + { + "epoch": 0.2, + "logps_train/chosen": -56.78038024902344, + "logps_train/ref_chosen": -58.0, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -112.3663558959961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12196197360754013, + "rewards_train/margins": 4.308597467839718, + "rewards_train/rejected": -4.186635494232178, + "step": 704 + }, + { + "epoch": 0.2, + "logps_train/chosen": -112.69947052001953, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -60.0, + "logps_train/rejected": -95.95276641845703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7199470400810242, + "rewards_train/margins": 2.875329554080963, + "rewards_train/rejected": -3.5952765941619873, + "step": 704 + }, + { + "epoch": 0.2, + "logps_train/chosen": -139.51376342773438, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -227.01124572753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3013763427734375, + "rewards_train/margins": 6.999748229980469, + "rewards_train/rejected": -10.301124572753906, + "step": 704 + }, + { + "epoch": 0.2, + "logps_train/chosen": -56.60924530029297, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -39.75, + "logps_train/rejected": -44.79327392578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18592453002929688, + "rewards_train/margins": 0.31840288639068604, + "rewards_train/rejected": -0.5043274164199829, + "step": 704 + }, + { + "epoch": 0.2, + "logps_train/chosen": -14.672314643859863, + "logps_train/ref_chosen": -7.25, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -29.11644172668457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7422314882278442, + "rewards_train/margins": 0.2944127321243286, + "rewards_train/rejected": -1.0366442203521729, + "step": 704 + }, + { + "epoch": 0.2, + "logps_train/chosen": -172.1572265625, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -242.41380310058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0157227516174316, + "rewards_train/margins": 6.225657939910889, + "rewards_train/rejected": -9.24138069152832, + "step": 705 + }, + { + "epoch": 0.2, + "logps_train/chosen": -120.96244812011719, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -193.4532470703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2462449073791504, + "rewards_train/margins": 4.499079704284668, + "rewards_train/rejected": -6.745324611663818, + "step": 705 + }, + { + "epoch": 0.2, + "logps_train/chosen": -143.71018981933594, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -224.72299194335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9710190296173096, + "rewards_train/margins": 8.651280164718628, + "rewards_train/rejected": -10.622299194335938, + "step": 705 + }, + { + "epoch": 0.2, + "logps_train/chosen": -104.17474365234375, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -143.2779083251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.317474365234375, + "rewards_train/margins": 2.4603166580200195, + "rewards_train/rejected": -4.7777910232543945, + "step": 705 + }, + { + "epoch": 0.2, + "logps_train/chosen": -112.90736389160156, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -113.95114135742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.790736436843872, + "rewards_train/margins": 0.10437774658203125, + "rewards_train/rejected": -1.8951141834259033, + "step": 705 + }, + { + "epoch": 0.2, + "logps_train/chosen": -56.4360237121582, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -55.75, + "logps_train/rejected": -54.46195983886719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.018602371215820312, + "rewards_train/margins": -0.14740638434886932, + "rewards_train/rejected": 0.128804013133049, + "step": 705 + }, + { + "epoch": 0.2, + "logps_train/chosen": -96.31311798095703, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -190.63848876953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6313118934631348, + "rewards_train/margins": 5.882537364959717, + "rewards_train/rejected": -8.513849258422852, + "step": 705 + }, + { + "epoch": 0.2, + "logps_train/chosen": -42.36320877075195, + "logps_train/ref_chosen": -30.5, + "logps_train/ref_rejected": -6.8125, + "logps_train/rejected": -14.171329498291016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1863209009170532, + "rewards_train/margins": -0.4504379630088806, + "rewards_train/rejected": -0.7358829379081726, + "step": 705 + }, + { + "epoch": 0.2, + "learning_rate": 1.7189312861309042e-06, + "loss": 0.2861, + "step": 706 + }, + { + "epoch": 0.2, + "logps_train/chosen": -94.93124389648438, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -129.82553100585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8931244611740112, + "rewards_train/margins": 3.7394286394119263, + "rewards_train/rejected": -5.6325531005859375, + "step": 706 + }, + { + "epoch": 0.2, + "logps_train/chosen": -107.95104217529297, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -108.02423095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6951043605804443, + "rewards_train/margins": 0.007318735122680664, + "rewards_train/rejected": -2.702423095703125, + "step": 706 + }, + { + "epoch": 0.2, + "logps_train/chosen": -92.44005584716797, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -58.25, + "logps_train/rejected": -76.67320251464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9940056204795837, + "rewards_train/margins": 0.8483147025108337, + "rewards_train/rejected": -1.8423203229904175, + "step": 706 + }, + { + "epoch": 0.2, + "logps_train/chosen": -20.839191436767578, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -4.5625, + "logps_train/rejected": -10.05819320678711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49016913771629333, + "rewards_train/margins": 0.05940017104148865, + "rewards_train/rejected": -0.549569308757782, + "step": 706 + }, + { + "epoch": 0.2, + "logps_train/chosen": -93.55317687988281, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -168.59686279296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6553176641464233, + "rewards_train/margins": 1.2043687105178833, + "rewards_train/rejected": -2.8596863746643066, + "step": 706 + }, + { + "epoch": 0.2, + "logps_train/chosen": -18.114044189453125, + "logps_train/ref_chosen": -13.875, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -26.844844818115234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4239044189453125, + "rewards_train/margins": 0.4980800747871399, + "rewards_train/rejected": -0.9219844937324524, + "step": 706 + }, + { + "epoch": 0.2, + "logps_train/chosen": -191.55853271484375, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -199.1853790283203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.755853176116943, + "rewards_train/margins": -0.8373150825500488, + "rewards_train/rejected": -4.9185380935668945, + "step": 706 + }, + { + "epoch": 0.2, + "logps_train/chosen": -15.34048843383789, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -27.986522674560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.565298855304718, + "rewards_train/margins": 0.6333534121513367, + "rewards_train/rejected": -1.1986522674560547, + "step": 706 + }, + { + "epoch": 0.2, + "logps_train/chosen": -66.06451416015625, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -93.78128051757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10645141452550888, + "rewards_train/margins": 1.4216766133904457, + "rewards_train/rejected": -1.5281280279159546, + "step": 707 + }, + { + "epoch": 0.2, + "logps_train/chosen": -142.24267578125, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -241.7461395263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.624267578125, + "rewards_train/margins": 6.650346755981445, + "rewards_train/rejected": -10.274614334106445, + "step": 707 + }, + { + "epoch": 0.2, + "logps_train/chosen": -161.94813537597656, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -201.0, + "logps_train/rejected": -253.0484619140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4948135316371918, + "rewards_train/margins": 4.7100328505039215, + "rewards_train/rejected": -5.204846382141113, + "step": 707 + }, + { + "epoch": 0.2, + "logps_train/chosen": -5.587130069732666, + "logps_train/ref_chosen": -6.96875, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -8.295687675476074, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13816200196743011, + "rewards_train/margins": 0.21773076802492142, + "rewards_train/rejected": -0.0795687660574913, + "step": 707 + }, + { + "epoch": 0.2, + "logps_train/chosen": -120.49934387207031, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -213.55429077148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.949934482574463, + "rewards_train/margins": 4.805494785308838, + "rewards_train/rejected": -8.7554292678833, + "step": 707 + }, + { + "epoch": 0.2, + "logps_train/chosen": -230.9901580810547, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -243.89202880859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.499015808105469, + "rewards_train/margins": 0.8901872634887695, + "rewards_train/rejected": -7.389203071594238, + "step": 707 + }, + { + "epoch": 0.2, + "logps_train/chosen": -96.74851989746094, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -107.5504150390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6748520135879517, + "rewards_train/margins": 0.5301895141601562, + "rewards_train/rejected": -1.205041527748108, + "step": 707 + }, + { + "epoch": 0.2, + "logps_train/chosen": -134.17616271972656, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -166.72674560546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.417616367340088, + "rewards_train/margins": -0.44494175910949707, + "rewards_train/rejected": -3.972674608230591, + "step": 707 + }, + { + "epoch": 0.2, + "learning_rate": 1.7170898992723685e-06, + "loss": 0.4166, + "step": 708 + }, + { + "epoch": 0.2, + "logps_train/chosen": -133.04063415527344, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -288.2153625488281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.354063510894775, + "rewards_train/margins": 11.167473316192627, + "rewards_train/rejected": -15.521536827087402, + "step": 708 + }, + { + "epoch": 0.2, + "logps_train/chosen": -7.258096218109131, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -18.827661514282227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.016434622928500175, + "rewards_train/margins": 0.14133152551949024, + "rewards_train/rejected": -0.15776614844799042, + "step": 708 + }, + { + "epoch": 0.2, + "logps_train/chosen": -66.78678894042969, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -56.83584213256836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.703678846359253, + "rewards_train/margins": -0.2200946807861328, + "rewards_train/rejected": -2.48358416557312, + "step": 708 + }, + { + "epoch": 0.2, + "logps_train/chosen": -0.2364330291748047, + "logps_train/ref_chosen": -0.435546875, + "logps_train/ref_rejected": -0.435546875, + "logps_train/rejected": -0.23131652176380157, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0199113842099905, + "rewards_train/margins": -0.0005116518586874008, + "rewards_train/rejected": 0.020423036068677902, + "step": 708 + }, + { + "epoch": 0.2, + "logps_train/chosen": -184.12559509277344, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -217.88253784179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4125595092773438, + "rewards_train/margins": 4.375694274902344, + "rewards_train/rejected": -5.7882537841796875, + "step": 708 + }, + { + "epoch": 0.2, + "logps_train/chosen": -143.824462890625, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -92.63465118408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.732446312904358, + "rewards_train/margins": 0.3310188055038452, + "rewards_train/rejected": -2.063465118408203, + "step": 708 + }, + { + "epoch": 0.2, + "logps_train/chosen": -142.24118041992188, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -162.21697998046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7741180658340454, + "rewards_train/margins": 1.0475798845291138, + "rewards_train/rejected": -2.821697950363159, + "step": 708 + }, + { + "epoch": 0.2, + "logps_train/chosen": -128.35061645507812, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -157.77377319335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9850616455078125, + "rewards_train/margins": 3.4923157691955566, + "rewards_train/rejected": -6.477377414703369, + "step": 708 + }, + { + "epoch": 0.2, + "logps_train/chosen": -26.201107025146484, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -43.75, + "logps_train/rejected": -63.08787536621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9576107263565063, + "rewards_train/margins": 0.9761768579483032, + "rewards_train/rejected": -1.9337875843048096, + "step": 709 + }, + { + "epoch": 0.2, + "logps_train/chosen": -193.87582397460938, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -184.7366180419922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.887582540512085, + "rewards_train/margins": 1.2860791683197021, + "rewards_train/rejected": -5.173661708831787, + "step": 709 + }, + { + "epoch": 0.2, + "logps_train/chosen": -5.611135482788086, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -5.6451544761657715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07951145619153976, + "rewards_train/margins": 0.0034019052982330322, + "rewards_train/rejected": 0.07610955089330673, + "step": 709 + }, + { + "epoch": 0.2, + "logps_train/chosen": -81.02749633789062, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -78.04210662841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3027496337890625, + "rewards_train/margins": 0.8514610528945923, + "rewards_train/rejected": -1.1542106866836548, + "step": 709 + }, + { + "epoch": 0.2, + "logps_train/chosen": -83.6227035522461, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -149.1422119140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0372703075408936, + "rewards_train/margins": 1.9769508838653564, + "rewards_train/rejected": -4.01422119140625, + "step": 709 + }, + { + "epoch": 0.2, + "logps_train/chosen": -156.87896728515625, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -179.2638397216797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.087896823883057, + "rewards_train/margins": -0.9615128040313721, + "rewards_train/rejected": -3.1263840198516846, + "step": 709 + }, + { + "epoch": 0.2, + "logps_train/chosen": -3.9675991535186768, + "logps_train/ref_chosen": -3.78125, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -7.388067722320557, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.018634915351867676, + "rewards_train/margins": 0.24204686284065247, + "rewards_train/rejected": -0.26068177819252014, + "step": 709 + }, + { + "epoch": 0.2, + "logps_train/chosen": -26.3311824798584, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -23.136499404907227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.23311825096607208, + "rewards_train/margins": -0.06946830451488495, + "rewards_train/rejected": -0.16364994645118713, + "step": 709 + }, + { + "epoch": 0.2, + "learning_rate": 1.7152434935448254e-06, + "loss": 0.459, + "step": 710 + }, + { + "epoch": 0.2, + "logps_train/chosen": -7.0816755294799805, + "logps_train/ref_chosen": -4.59375, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -25.032094955444336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24879255890846252, + "rewards_train/margins": 0.5669169723987579, + "rewards_train/rejected": -0.8157095313072205, + "step": 710 + }, + { + "epoch": 0.2, + "logps_train/chosen": -203.1328887939453, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -207.02455139160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.613288879394531, + "rewards_train/margins": -1.2108335494995117, + "rewards_train/rejected": -9.40245532989502, + "step": 710 + }, + { + "epoch": 0.2, + "logps_train/chosen": -2.7738423347473145, + "logps_train/ref_chosen": -2.0625, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -17.354143142700195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07113423198461533, + "rewards_train/margins": 1.0142800584435463, + "rewards_train/rejected": -1.0854142904281616, + "step": 710 + }, + { + "epoch": 0.2, + "logps_train/chosen": -75.48981475830078, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -58.61033630371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09898147732019424, + "rewards_train/margins": 0.6870521530508995, + "rewards_train/rejected": -0.7860336303710938, + "step": 710 + }, + { + "epoch": 0.2, + "logps_train/chosen": -113.35568237304688, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -42.75, + "logps_train/rejected": -58.136070251464844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2355682849884033, + "rewards_train/margins": -0.6969612836837769, + "rewards_train/rejected": -1.5386070013046265, + "step": 710 + }, + { + "epoch": 0.2, + "logps_train/chosen": -8.122785568237305, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -41.727874755859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20647144317626953, + "rewards_train/margins": 0.9292589426040649, + "rewards_train/rejected": -0.7227874994277954, + "step": 710 + }, + { + "epoch": 0.2, + "logps_train/chosen": -7.432162761688232, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -3.21875, + "logps_train/rejected": -6.111464500427246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19321627914905548, + "rewards_train/margins": 0.09605516493320465, + "rewards_train/rejected": -0.28927144408226013, + "step": 710 + }, + { + "epoch": 0.2, + "logps_train/chosen": -173.171630859375, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -218.83334350585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.167163372039795, + "rewards_train/margins": 3.6661715507507324, + "rewards_train/rejected": -10.833334922790527, + "step": 710 + }, + { + "epoch": 0.2, + "logps_train/chosen": -4.054731369018555, + "logps_train/ref_chosen": -2.71875, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -22.15350341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13359813392162323, + "rewards_train/margins": 0.2942522019147873, + "rewards_train/rejected": -0.4278503358364105, + "step": 711 + }, + { + "epoch": 0.2, + "logps_train/chosen": -2.904568672180176, + "logps_train/ref_chosen": -3.734375, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -17.142183303833008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08298063278198242, + "rewards_train/margins": 0.8346989750862122, + "rewards_train/rejected": -0.7517183423042297, + "step": 711 + }, + { + "epoch": 0.2, + "logps_train/chosen": -11.951929092407227, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -8.515827178955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25144290924072266, + "rewards_train/margins": 0.03138980269432068, + "rewards_train/rejected": -0.28283271193504333, + "step": 711 + }, + { + "epoch": 0.2, + "logps_train/chosen": -23.64959144592285, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -144.909423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4399591386318207, + "rewards_train/margins": 2.500983387231827, + "rewards_train/rejected": -2.9409425258636475, + "step": 711 + }, + { + "epoch": 0.2, + "logps_train/chosen": -104.89073181152344, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -301.3652648925781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2890732288360596, + "rewards_train/margins": 15.54745364189148, + "rewards_train/rejected": -16.83652687072754, + "step": 711 + }, + { + "epoch": 0.2, + "logps_train/chosen": -141.38656616210938, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -209.78128051757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.538656711578369, + "rewards_train/margins": 1.4394712448120117, + "rewards_train/rejected": -5.978127956390381, + "step": 711 + }, + { + "epoch": 0.2, + "logps_train/chosen": -138.53135681152344, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -198.83163452148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.853135824203491, + "rewards_train/margins": 4.230027914047241, + "rewards_train/rejected": -7.083163738250732, + "step": 711 + }, + { + "epoch": 0.2, + "logps_train/chosen": -2.1239986419677734, + "logps_train/ref_chosen": -1.8828125, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -9.266687393188477, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.024118615314364433, + "rewards_train/margins": -0.13494987599551678, + "rewards_train/rejected": 0.11083126068115234, + "step": 711 + }, + { + "epoch": 0.2, + "learning_rate": 1.7133920818711583e-06, + "loss": 0.4626, + "step": 712 + }, + { + "epoch": 0.2, + "logps_train/chosen": -228.02587890625, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -227.97970581054688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.002588272094727, + "rewards_train/margins": -1.7046175003051758, + "rewards_train/rejected": -7.297970771789551, + "step": 712 + }, + { + "epoch": 0.2, + "logps_train/chosen": -110.78684997558594, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -164.83465576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4786850214004517, + "rewards_train/margins": 2.8547805547714233, + "rewards_train/rejected": -4.333465576171875, + "step": 712 + }, + { + "epoch": 0.2, + "logps_train/chosen": -8.12096118927002, + "logps_train/ref_chosen": -6.90625, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -12.541813850402832, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12147112190723419, + "rewards_train/margins": -0.07353973761200905, + "rewards_train/rejected": -0.04793138429522514, + "step": 712 + }, + { + "epoch": 0.2, + "logps_train/chosen": -5.465087890625, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -3.125, + "logps_train/rejected": -7.891438961029053, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23088379204273224, + "rewards_train/margins": 0.24576009809970856, + "rewards_train/rejected": -0.4766438901424408, + "step": 712 + }, + { + "epoch": 0.2, + "logps_train/chosen": -5.513624668121338, + "logps_train/ref_chosen": -3.96875, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -17.855321884155273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1544874757528305, + "rewards_train/margins": 0.5372947007417679, + "rewards_train/rejected": -0.6917821764945984, + "step": 712 + }, + { + "epoch": 0.2, + "logps_train/chosen": -28.542572021484375, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -25.16596221923828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9042572379112244, + "rewards_train/margins": -0.03766101598739624, + "rewards_train/rejected": -0.8665962219238281, + "step": 712 + }, + { + "epoch": 0.2, + "logps_train/chosen": -10.278555870056152, + "logps_train/ref_chosen": -2.203125, + "logps_train/ref_rejected": -5.75, + "logps_train/rejected": -16.89173126220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8075430989265442, + "rewards_train/margins": 0.30663007497787476, + "rewards_train/rejected": -1.114173173904419, + "step": 712 + }, + { + "epoch": 0.2, + "logps_train/chosen": -33.05758285522461, + "logps_train/ref_chosen": -27.875, + "logps_train/ref_rejected": -36.5, + "logps_train/rejected": -56.02149200439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.518258273601532, + "rewards_train/margins": 1.4338909983634949, + "rewards_train/rejected": -1.9521492719650269, + "step": 712 + }, + { + "epoch": 0.2, + "logps_train/chosen": -55.9505615234375, + "logps_train/ref_chosen": -38.25, + "logps_train/ref_rejected": -22.25, + "logps_train/rejected": -40.5923957824707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.770056128501892, + "rewards_train/margins": 0.06418347358703613, + "rewards_train/rejected": -1.8342396020889282, + "step": 713 + }, + { + "epoch": 0.2, + "logps_train/chosen": -16.32040023803711, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -12.817782402038574, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41954001784324646, + "rewards_train/margins": 0.37473824620246887, + "rewards_train/rejected": -0.7942782640457153, + "step": 713 + }, + { + "epoch": 0.2, + "logps_train/chosen": -131.69256591796875, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -179.07594299316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.969256639480591, + "rewards_train/margins": 4.938337564468384, + "rewards_train/rejected": -7.907594203948975, + "step": 713 + }, + { + "epoch": 0.2, + "logps_train/chosen": -41.43579864501953, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -28.176353454589844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7685798406600952, + "rewards_train/margins": -0.6384444236755371, + "rewards_train/rejected": -1.130135416984558, + "step": 713 + }, + { + "epoch": 0.2, + "logps_train/chosen": -110.56529235839844, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -139.56954956054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0065293312072754, + "rewards_train/margins": 1.0004255771636963, + "rewards_train/rejected": -3.0069549083709717, + "step": 713 + }, + { + "epoch": 0.2, + "logps_train/chosen": -8.826014518737793, + "logps_train/ref_chosen": -5.75, + "logps_train/ref_rejected": -12.0, + "logps_train/rejected": -21.83440399169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3076014518737793, + "rewards_train/margins": 0.6758389472961426, + "rewards_train/rejected": -0.9834403991699219, + "step": 713 + }, + { + "epoch": 0.2, + "logps_train/chosen": -156.1934814453125, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -158.88905334472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.01934814453125, + "rewards_train/margins": 1.269557237625122, + "rewards_train/rejected": -2.288905382156372, + "step": 713 + }, + { + "epoch": 0.2, + "logps_train/chosen": -80.38650512695312, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -62.25, + "logps_train/rejected": -80.07247924804688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8136504888534546, + "rewards_train/margins": -0.031402587890625, + "rewards_train/rejected": -1.7822479009628296, + "step": 713 + }, + { + "epoch": 0.2, + "learning_rate": 1.7115356772092855e-06, + "loss": 0.5693, + "step": 714 + }, + { + "epoch": 0.2, + "logps_train/chosen": -97.59822082519531, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -54.25, + "logps_train/rejected": -56.35589599609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4098220765590668, + "rewards_train/margins": -0.19923247396945953, + "rewards_train/rejected": -0.21058960258960724, + "step": 714 + }, + { + "epoch": 0.2, + "logps_train/chosen": -64.63127136230469, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -110.92652130126953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6131271719932556, + "rewards_train/margins": 2.6795249581336975, + "rewards_train/rejected": -3.292652130126953, + "step": 714 + }, + { + "epoch": 0.2, + "logps_train/chosen": -90.996337890625, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -221.10919189453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7496337890625, + "rewards_train/margins": 7.911285400390625, + "rewards_train/rejected": -9.660919189453125, + "step": 714 + }, + { + "epoch": 0.2, + "logps_train/chosen": -3.234929323196411, + "logps_train/ref_chosen": -3.671875, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -19.66898536682129, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04369456693530083, + "rewards_train/margins": 0.23559310659766197, + "rewards_train/rejected": -0.19189853966236115, + "step": 714 + }, + { + "epoch": 0.2, + "logps_train/chosen": -168.99473571777344, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -200.29660034179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.299473524093628, + "rewards_train/margins": 1.8301866054534912, + "rewards_train/rejected": -4.129660129547119, + "step": 714 + }, + { + "epoch": 0.2, + "logps_train/chosen": -29.73923683166504, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -25.052391052246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5114237070083618, + "rewards_train/margins": 0.9313154220581055, + "rewards_train/rejected": -1.4427391290664673, + "step": 714 + }, + { + "epoch": 0.2, + "logps_train/chosen": -12.501590728759766, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -2.34375, + "logps_train/rejected": -5.3564534187316895, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.012659072875976562, + "rewards_train/margins": 0.2886112630367279, + "rewards_train/rejected": -0.30127033591270447, + "step": 714 + }, + { + "epoch": 0.2, + "logps_train/chosen": -124.02093505859375, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -156.29949951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.602093458175659, + "rewards_train/margins": 2.727856397628784, + "rewards_train/rejected": -5.329949855804443, + "step": 714 + }, + { + "epoch": 0.2, + "logps_train/chosen": -129.3288116455078, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -229.53659057617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.532881259918213, + "rewards_train/margins": 9.320778369903564, + "rewards_train/rejected": -11.853659629821777, + "step": 715 + }, + { + "epoch": 0.2, + "logps_train/chosen": -16.04878044128418, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -40.25, + "logps_train/rejected": -60.130706787109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3673780560493469, + "rewards_train/margins": 1.6206926703453064, + "rewards_train/rejected": -1.9880707263946533, + "step": 715 + }, + { + "epoch": 0.2, + "logps_train/chosen": -112.30243682861328, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -89.64617919921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.430243730545044, + "rewards_train/margins": 0.3843742609024048, + "rewards_train/rejected": -1.8146179914474487, + "step": 715 + }, + { + "epoch": 0.2, + "logps_train/chosen": -11.810970306396484, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -46.25, + "logps_train/rejected": -51.067047119140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6279720664024353, + "rewards_train/margins": -0.1462673544883728, + "rewards_train/rejected": -0.4817047119140625, + "step": 715 + }, + { + "epoch": 0.2, + "logps_train/chosen": -53.146568298339844, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -46.0, + "logps_train/rejected": -53.19994354248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7146568298339844, + "rewards_train/margins": 0.005337536334991455, + "rewards_train/rejected": -0.7199943661689758, + "step": 715 + }, + { + "epoch": 0.2, + "logps_train/chosen": -5.436236381530762, + "logps_train/ref_chosen": -1.9375, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -19.497695922851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3498736321926117, + "rewards_train/margins": 0.5498959720134735, + "rewards_train/rejected": -0.8997696042060852, + "step": 715 + }, + { + "epoch": 0.2, + "logps_train/chosen": -163.0009765625, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -178.37701416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0000977516174316, + "rewards_train/margins": 4.787603855133057, + "rewards_train/rejected": -6.787701606750488, + "step": 715 + }, + { + "epoch": 0.2, + "logps_train/chosen": -10.50558090209961, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -7.277987480163574, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.578683078289032, + "rewards_train/margins": -0.4196343272924423, + "rewards_train/rejected": -0.15904875099658966, + "step": 715 + }, + { + "epoch": 0.2, + "learning_rate": 1.7096742925520711e-06, + "loss": 0.3813, + "step": 716 + }, + { + "epoch": 0.2, + "logps_train/chosen": -10.014755249023438, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -14.625, + "logps_train/rejected": -14.35482120513916, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1327255219221115, + "rewards_train/margins": -0.15974340215325356, + "rewards_train/rejected": 0.027017880231142044, + "step": 716 + }, + { + "epoch": 0.2, + "logps_train/chosen": -106.32525634765625, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -100.47981262207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03252563625574112, + "rewards_train/margins": 2.51545562595129, + "rewards_train/rejected": -2.5479812622070312, + "step": 716 + }, + { + "epoch": 0.2, + "logps_train/chosen": -22.59447479248047, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -17.2315673828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7219474911689758, + "rewards_train/margins": 0.08245927095413208, + "rewards_train/rejected": -0.8044067621231079, + "step": 716 + }, + { + "epoch": 0.2, + "logps_train/chosen": -166.82113647460938, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -170.59397888183594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.482113838195801, + "rewards_train/margins": -0.07271575927734375, + "rewards_train/rejected": -4.409398078918457, + "step": 716 + }, + { + "epoch": 0.2, + "logps_train/chosen": -119.03631591796875, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -150.5294952392578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.353631496429443, + "rewards_train/margins": -1.4506819248199463, + "rewards_train/rejected": -2.902949571609497, + "step": 716 + }, + { + "epoch": 0.2, + "logps_train/chosen": -233.1547393798828, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -199.12734985351562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.115474224090576, + "rewards_train/margins": -1.5027389526367188, + "rewards_train/rejected": -5.612735271453857, + "step": 716 + }, + { + "epoch": 0.2, + "logps_train/chosen": -6.8017144203186035, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -14.853935241699219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17607855796813965, + "rewards_train/margins": 0.530222088098526, + "rewards_train/rejected": -0.35414353013038635, + "step": 716 + }, + { + "epoch": 0.2, + "logps_train/chosen": -8.659449577331543, + "logps_train/ref_chosen": -6.5, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -5.823005676269531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21594496071338654, + "rewards_train/margins": -0.14614439010620117, + "rewards_train/rejected": -0.06980057060718536, + "step": 716 + }, + { + "epoch": 0.2, + "logps_train/chosen": -123.53987121582031, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -168.19882202148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3039871454238892, + "rewards_train/margins": 2.0158950090408325, + "rewards_train/rejected": -3.3198821544647217, + "step": 717 + }, + { + "epoch": 0.2, + "logps_train/chosen": -31.230344772338867, + "logps_train/ref_chosen": -34.5, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -47.09773254394531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3269655406475067, + "rewards_train/margins": 2.524238795042038, + "rewards_train/rejected": -2.1972732543945312, + "step": 717 + }, + { + "epoch": 0.2, + "logps_train/chosen": -18.89501953125, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -27.34433937072754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.177001953125, + "rewards_train/margins": 0.7824320197105408, + "rewards_train/rejected": -0.9594339728355408, + "step": 717 + }, + { + "epoch": 0.2, + "logps_train/chosen": -128.5450439453125, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -183.62789916992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.754504442214966, + "rewards_train/margins": 2.5082857608795166, + "rewards_train/rejected": -6.262790203094482, + "step": 717 + }, + { + "epoch": 0.2, + "logps_train/chosen": -18.16949462890625, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -27.502666473388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32319948077201843, + "rewards_train/margins": 0.42706719040870667, + "rewards_train/rejected": -0.7502666711807251, + "step": 717 + }, + { + "epoch": 0.2, + "logps_train/chosen": -89.384033203125, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -194.13845825195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.888403296470642, + "rewards_train/margins": 9.47544252872467, + "rewards_train/rejected": -11.363845825195312, + "step": 717 + }, + { + "epoch": 0.2, + "logps_train/chosen": -44.063575744628906, + "logps_train/ref_chosen": -37.25, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -22.692176818847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6813575625419617, + "rewards_train/margins": 0.13161015510559082, + "rewards_train/rejected": -0.8129677176475525, + "step": 717 + }, + { + "epoch": 0.2, + "logps_train/chosen": -132.10740661621094, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -179.1612091064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8607406616210938, + "rewards_train/margins": 0.6553802490234375, + "rewards_train/rejected": -1.5161209106445312, + "step": 717 + }, + { + "epoch": 0.2, + "learning_rate": 1.7078079409272349e-06, + "loss": 0.5648, + "step": 718 + }, + { + "epoch": 0.2, + "logps_train/chosen": -10.242615699768066, + "logps_train/ref_chosen": -7.65625, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -19.198326110839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25863656401634216, + "rewards_train/margins": 0.5799460709095001, + "rewards_train/rejected": -0.8385826349258423, + "step": 718 + }, + { + "epoch": 0.2, + "logps_train/chosen": -13.194388389587402, + "logps_train/ref_chosen": -6.96875, + "logps_train/ref_rejected": -7.4375, + "logps_train/rejected": -19.509212493896484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6225638389587402, + "rewards_train/margins": 0.5846074819564819, + "rewards_train/rejected": -1.2071713209152222, + "step": 718 + }, + { + "epoch": 0.2, + "logps_train/chosen": -16.403587341308594, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -13.52955150604248, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3403587341308594, + "rewards_train/margins": 0.47822141647338867, + "rewards_train/rejected": -0.818580150604248, + "step": 718 + }, + { + "epoch": 0.2, + "logps_train/chosen": -115.12396240234375, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -227.17852783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.162396192550659, + "rewards_train/margins": 8.055456399917603, + "rewards_train/rejected": -10.217852592468262, + "step": 718 + }, + { + "epoch": 0.2, + "logps_train/chosen": -10.110929489135742, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -3.046875, + "logps_train/rejected": -11.50825309753418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6048429608345032, + "rewards_train/margins": 0.24129486083984375, + "rewards_train/rejected": -0.8461378216743469, + "step": 718 + }, + { + "epoch": 0.2, + "logps_train/chosen": -20.690998077392578, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -8.4430570602417, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.45659980177879333, + "rewards_train/margins": -0.16229408979415894, + "rewards_train/rejected": -0.2943057119846344, + "step": 718 + }, + { + "epoch": 0.2, + "logps_train/chosen": -30.604869842529297, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -60.41352462768555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8729869723320007, + "rewards_train/margins": 1.718365490436554, + "rewards_train/rejected": -2.5913524627685547, + "step": 718 + }, + { + "epoch": 0.2, + "logps_train/chosen": -0.7543497085571289, + "logps_train/ref_chosen": -0.84765625, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -16.32946014404297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.00933065451681614, + "rewards_train/margins": -0.0827233325690031, + "rewards_train/rejected": 0.09205398708581924, + "step": 718 + }, + { + "epoch": 0.2, + "logps_train/chosen": -152.94593811035156, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -145.19647216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2945938110351562, + "rewards_train/margins": 1.925053358078003, + "rewards_train/rejected": -3.219647169113159, + "step": 719 + }, + { + "epoch": 0.2, + "logps_train/chosen": -91.21727752685547, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -91.74531555175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.021727753803133965, + "rewards_train/margins": 0.9028038252145052, + "rewards_train/rejected": -0.9245315790176392, + "step": 719 + }, + { + "epoch": 0.2, + "logps_train/chosen": -1.5581897497177124, + "logps_train/ref_chosen": -0.98828125, + "logps_train/ref_rejected": -0.75390625, + "logps_train/rejected": -0.6528105735778809, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0569908507168293, + "rewards_train/margins": -0.06710041873157024, + "rewards_train/rejected": 0.010109568014740944, + "step": 719 + }, + { + "epoch": 0.2, + "logps_train/chosen": -24.110349655151367, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -19.644393920898438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3235349655151367, + "rewards_train/margins": -0.04659557342529297, + "rewards_train/rejected": -0.27693939208984375, + "step": 719 + }, + { + "epoch": 0.2, + "logps_train/chosen": -4.613858222961426, + "logps_train/ref_chosen": -3.859375, + "logps_train/ref_rejected": -0.75, + "logps_train/rejected": -0.9740779995918274, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07544832676649094, + "rewards_train/margins": -0.0530405268073082, + "rewards_train/rejected": -0.02240779995918274, + "step": 719 + }, + { + "epoch": 0.2, + "logps_train/chosen": -199.06417846679688, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -209.9788818359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.706418037414551, + "rewards_train/margins": 0.09147024154663086, + "rewards_train/rejected": -4.797888278961182, + "step": 719 + }, + { + "epoch": 0.2, + "logps_train/chosen": -0.2231898307800293, + "logps_train/ref_chosen": -1.0390625, + "logps_train/ref_rejected": -1.59375, + "logps_train/rejected": -2.7100353240966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08158726990222931, + "rewards_train/margins": 0.19321580231189728, + "rewards_train/rejected": -0.11162853240966797, + "step": 719 + }, + { + "epoch": 0.2, + "logps_train/chosen": -114.89384460449219, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -162.18319702148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6393845081329346, + "rewards_train/margins": 0.8789353370666504, + "rewards_train/rejected": -3.518319845199585, + "step": 719 + }, + { + "epoch": 0.2, + "learning_rate": 1.705936635397259e-06, + "loss": 0.4915, + "step": 720 + }, + { + "epoch": 0.2, + "logps_train/chosen": -112.70645904541016, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -107.26390075683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4706459045410156, + "rewards_train/margins": 0.35574424266815186, + "rewards_train/rejected": -1.8263901472091675, + "step": 720 + }, + { + "epoch": 0.2, + "logps_train/chosen": -78.21080017089844, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -72.68014526367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4210800230503082, + "rewards_train/margins": 0.04693451523780823, + "rewards_train/rejected": -0.46801453828811646, + "step": 720 + }, + { + "epoch": 0.2, + "logps_train/chosen": -21.555015563964844, + "logps_train/ref_chosen": -13.625, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -14.197344779968262, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7930015921592712, + "rewards_train/margins": -0.09514212608337402, + "rewards_train/rejected": -0.6978594660758972, + "step": 720 + }, + { + "epoch": 0.2, + "logps_train/chosen": -13.956987380981445, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -4.262385845184326, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.279301255941391, + "rewards_train/margins": 0.1555398404598236, + "rewards_train/rejected": 0.12376141548156738, + "step": 720 + }, + { + "epoch": 0.2, + "logps_train/chosen": -22.271207809448242, + "logps_train/ref_chosen": -15.1875, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -41.819339752197266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7083708047866821, + "rewards_train/margins": 1.0110632181167603, + "rewards_train/rejected": -1.7194340229034424, + "step": 720 + }, + { + "epoch": 0.2, + "logps_train/chosen": -95.46951293945312, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -30.06808090209961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0469512939453125, + "rewards_train/margins": -0.1901431679725647, + "rewards_train/rejected": -0.8568081259727478, + "step": 720 + }, + { + "epoch": 0.2, + "logps_train/chosen": -109.58196258544922, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -161.10012817382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.108196258544922, + "rewards_train/margins": 1.7518165111541748, + "rewards_train/rejected": -3.8600127696990967, + "step": 720 + }, + { + "epoch": 0.2, + "logps_train/chosen": -187.70401000976562, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -255.6285858154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5704010725021362, + "rewards_train/margins": 7.692457318305969, + "rewards_train/rejected": -9.262858390808105, + "step": 720 + }, + { + "epoch": 0.2, + "logps_train/chosen": -98.50350952148438, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -96.80415344238281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0003509521484375, + "rewards_train/margins": 1.2300643920898438, + "rewards_train/rejected": -2.2304153442382812, + "step": 721 + }, + { + "epoch": 0.2, + "logps_train/chosen": -100.1296615600586, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -148.4022979736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5129661560058594, + "rewards_train/margins": 1.0272636413574219, + "rewards_train/rejected": -1.5402297973632812, + "step": 721 + }, + { + "epoch": 0.2, + "logps_train/chosen": -106.39860534667969, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -105.91929626464844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6398605704307556, + "rewards_train/margins": -0.5479309409856796, + "rewards_train/rejected": -0.09192962944507599, + "step": 721 + }, + { + "epoch": 0.2, + "logps_train/chosen": -98.70771026611328, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -58.0, + "logps_train/rejected": -68.98323822021484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5207710266113281, + "rewards_train/margins": -0.42244720458984375, + "rewards_train/rejected": -1.0983238220214844, + "step": 721 + }, + { + "epoch": 0.2, + "logps_train/chosen": -10.55601978302002, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -9.5625, + "logps_train/rejected": -12.625462532043457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05560198053717613, + "rewards_train/margins": 0.25069427862763405, + "rewards_train/rejected": -0.3062962591648102, + "step": 721 + }, + { + "epoch": 0.2, + "logps_train/chosen": -255.0507049560547, + "logps_train/ref_chosen": -183.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -238.81658935546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.205070495605469, + "rewards_train/margins": -0.3234114646911621, + "rewards_train/rejected": -6.881659030914307, + "step": 721 + }, + { + "epoch": 0.2, + "logps_train/chosen": -74.2879409790039, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -154.796630859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17120590806007385, + "rewards_train/margins": 6.250869184732437, + "rewards_train/rejected": -6.079663276672363, + "step": 721 + }, + { + "epoch": 0.2, + "logps_train/chosen": -9.412586212158203, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -24.716445922851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0774913802742958, + "rewards_train/margins": 0.88663599640131, + "rewards_train/rejected": -0.8091446161270142, + "step": 721 + }, + { + "epoch": 0.2, + "learning_rate": 1.7040603890592981e-06, + "loss": 0.5068, + "step": 722 + }, + { + "epoch": 0.2, + "logps_train/chosen": -59.797760009765625, + "logps_train/ref_chosen": -33.5, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -65.02342224121094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6297760009765625, + "rewards_train/margins": -0.12743377685546875, + "rewards_train/rejected": -2.5023422241210938, + "step": 722 + }, + { + "epoch": 0.2, + "logps_train/chosen": -5.561656475067139, + "logps_train/ref_chosen": -3.015625, + "logps_train/ref_rejected": -1.1640625, + "logps_train/rejected": -1.5771260261535645, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.25460314750671387, + "rewards_train/margins": -0.2132967934012413, + "rewards_train/rejected": -0.041306354105472565, + "step": 722 + }, + { + "epoch": 0.2, + "logps_train/chosen": -102.76040649414062, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -190.04171752929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8760406374931335, + "rewards_train/margins": 5.828131020069122, + "rewards_train/rejected": -6.704171657562256, + "step": 722 + }, + { + "epoch": 0.2, + "logps_train/chosen": -169.74415588378906, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -179.32041931152344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.324415683746338, + "rewards_train/margins": -0.7923736572265625, + "rewards_train/rejected": -3.5320420265197754, + "step": 722 + }, + { + "epoch": 0.2, + "logps_train/chosen": -73.68124389648438, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -111.41415405273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2818756103515625, + "rewards_train/margins": 1.0232910513877869, + "rewards_train/rejected": -0.7414154410362244, + "step": 722 + }, + { + "epoch": 0.2, + "logps_train/chosen": -36.74911117553711, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -17.67898941040039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.199911117553711, + "rewards_train/margins": -0.11638712882995605, + "rewards_train/rejected": -1.0835239887237549, + "step": 722 + }, + { + "epoch": 0.2, + "logps_train/chosen": -194.18264770507812, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -200.0118408203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.118264675140381, + "rewards_train/margins": 0.08291959762573242, + "rewards_train/rejected": -6.201184272766113, + "step": 722 + }, + { + "epoch": 0.2, + "logps_train/chosen": -121.60992431640625, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -121.35675811767578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6609925031661987, + "rewards_train/margins": 2.274683356285095, + "rewards_train/rejected": -3.935675859451294, + "step": 722 + }, + { + "epoch": 0.2, + "logps_train/chosen": -78.57154846191406, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -161.06773376464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14284515380859375, + "rewards_train/margins": 0.7496185302734375, + "rewards_train/rejected": -0.6067733764648438, + "step": 723 + }, + { + "epoch": 0.2, + "logps_train/chosen": -16.22807502746582, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -16.934593200683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16655750572681427, + "rewards_train/margins": 0.24565182626247406, + "rewards_train/rejected": -0.41220933198928833, + "step": 723 + }, + { + "epoch": 0.2, + "logps_train/chosen": -125.51620483398438, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -193.74636840820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.451620578765869, + "rewards_train/margins": 4.723016262054443, + "rewards_train/rejected": -8.174636840820312, + "step": 723 + }, + { + "epoch": 0.2, + "logps_train/chosen": -135.74954223632812, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -231.68423461914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4249541759490967, + "rewards_train/margins": 7.993469476699829, + "rewards_train/rejected": -11.418423652648926, + "step": 723 + }, + { + "epoch": 0.2, + "logps_train/chosen": -201.45082092285156, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -149.81314086914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1450822353363037, + "rewards_train/margins": 2.236232042312622, + "rewards_train/rejected": -5.381314277648926, + "step": 723 + }, + { + "epoch": 0.2, + "logps_train/chosen": -2.226815700531006, + "logps_train/ref_chosen": -0.99609375, + "logps_train/ref_rejected": -28.75, + "logps_train/rejected": -25.228416442871094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12307219952344894, + "rewards_train/margins": -0.4752305671572685, + "rewards_train/rejected": 0.3521583676338196, + "step": 723 + }, + { + "epoch": 0.2, + "logps_train/chosen": -27.383649826049805, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -31.0, + "logps_train/rejected": -42.238487243652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6883649826049805, + "rewards_train/margins": 0.43548381328582764, + "rewards_train/rejected": -1.123848795890808, + "step": 723 + }, + { + "epoch": 0.2, + "logps_train/chosen": -19.959339141845703, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -20.13652801513672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8146839141845703, + "rewards_train/margins": 0.07396888732910156, + "rewards_train/rejected": -0.8886528015136719, + "step": 723 + }, + { + "epoch": 0.2, + "learning_rate": 1.7021792150450874e-06, + "loss": 0.4834, + "step": 724 + }, + { + "epoch": 0.2, + "logps_train/chosen": -45.85166549682617, + "logps_train/ref_chosen": -34.75, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -135.9560546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1101665496826172, + "rewards_train/margins": 3.485438823699951, + "rewards_train/rejected": -4.595605373382568, + "step": 724 + }, + { + "epoch": 0.2, + "logps_train/chosen": -132.9165802001953, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -187.6434326171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.841658115386963, + "rewards_train/margins": 2.7726850509643555, + "rewards_train/rejected": -6.614343166351318, + "step": 724 + }, + { + "epoch": 0.2, + "logps_train/chosen": -81.12518310546875, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -84.8752670288086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.612518310546875, + "rewards_train/margins": 1.2750084400177002, + "rewards_train/rejected": -1.8875267505645752, + "step": 724 + }, + { + "epoch": 0.2, + "logps_train/chosen": -185.588623046875, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -219.42803955078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.908862590789795, + "rewards_train/margins": -1.2660584449768066, + "rewards_train/rejected": -4.642804145812988, + "step": 724 + }, + { + "epoch": 0.2, + "logps_train/chosen": -2.3679420948028564, + "logps_train/ref_chosen": -2.109375, + "logps_train/ref_rejected": -0.65234375, + "logps_train/rejected": -0.34743732213974, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.025856709107756615, + "rewards_train/margins": -0.056347351521253586, + "rewards_train/rejected": 0.03049064241349697, + "step": 724 + }, + { + "epoch": 0.2, + "logps_train/chosen": -16.95989418029785, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -46.09244155883789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.39151057600975037, + "rewards_train/margins": 3.225754827260971, + "rewards_train/rejected": -2.8342442512512207, + "step": 724 + }, + { + "epoch": 0.2, + "logps_train/chosen": -5.5161309242248535, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -20.67302894592285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2281755954027176, + "rewards_train/margins": 0.3016273230314255, + "rewards_train/rejected": -0.5298029184341431, + "step": 724 + }, + { + "epoch": 0.2, + "logps_train/chosen": -150.20819091796875, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -180.6661376953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.120819091796875, + "rewards_train/margins": 4.395794868469238, + "rewards_train/rejected": -5.516613960266113, + "step": 724 + }, + { + "epoch": 0.2, + "logps_train/chosen": -2.3093512058258057, + "logps_train/ref_chosen": -1.9453125, + "logps_train/ref_rejected": -0.921875, + "logps_train/rejected": -0.9940798282623291, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.036403872072696686, + "rewards_train/margins": -0.029183389153331518, + "rewards_train/rejected": -0.007220482919365168, + "step": 725 + }, + { + "epoch": 0.2, + "logps_train/chosen": -20.858131408691406, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -16.5662841796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1045631170272827, + "rewards_train/margins": -0.897934690117836, + "rewards_train/rejected": -0.20662842690944672, + "step": 725 + }, + { + "epoch": 0.2, + "logps_train/chosen": -8.326374053955078, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -76.774169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31388741731643677, + "rewards_train/margins": 1.613529622554779, + "rewards_train/rejected": -1.9274170398712158, + "step": 725 + }, + { + "epoch": 0.2, + "logps_train/chosen": -90.70394134521484, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -123.23140716552734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3203941285610199, + "rewards_train/margins": 0.5527466237545013, + "rewards_train/rejected": -0.8731407523155212, + "step": 725 + }, + { + "epoch": 0.2, + "logps_train/chosen": -13.290151596069336, + "logps_train/ref_chosen": -3.015625, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -12.387370109558105, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0274527072906494, + "rewards_train/margins": -0.40121567249298096, + "rewards_train/rejected": -0.6262370347976685, + "step": 725 + }, + { + "epoch": 0.2, + "logps_train/chosen": -12.9580659866333, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -25.974571228027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20830659568309784, + "rewards_train/margins": 1.6516505032777786, + "rewards_train/rejected": -1.8599570989608765, + "step": 725 + }, + { + "epoch": 0.2, + "logps_train/chosen": -12.309993743896484, + "logps_train/ref_chosen": -6.46875, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -11.527244567871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5841243863105774, + "rewards_train/margins": 0.16235005855560303, + "rewards_train/rejected": -0.7464744448661804, + "step": 725 + }, + { + "epoch": 0.2, + "logps_train/chosen": -0.9509256482124329, + "logps_train/ref_chosen": -2.609375, + "logps_train/ref_rejected": -1.9921875, + "logps_train/rejected": -1.1984323263168335, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16584494709968567, + "rewards_train/margins": 0.08646942675113678, + "rewards_train/rejected": 0.07937552034854889, + "step": 725 + }, + { + "epoch": 0.2, + "learning_rate": 1.7002931265208506e-06, + "loss": 0.5072, + "step": 726 + }, + { + "epoch": 0.2, + "logps_train/chosen": -19.259258270263672, + "logps_train/ref_chosen": -24.125, + "logps_train/ref_rejected": -1.265625, + "logps_train/rejected": -15.28747272491455, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4865741729736328, + "rewards_train/margins": 1.8887590169906616, + "rewards_train/rejected": -1.4021848440170288, + "step": 726 + }, + { + "epoch": 0.2, + "logps_train/chosen": -8.423524856567383, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -3.734375, + "logps_train/rejected": -6.515497207641602, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.29860249161720276, + "rewards_train/margins": -0.020490258932113647, + "rewards_train/rejected": -0.2781122326850891, + "step": 726 + }, + { + "epoch": 0.2, + "logps_train/chosen": -89.49008178710938, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -119.45635223388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4990081787109375, + "rewards_train/margins": 0.6466270685195923, + "rewards_train/rejected": -1.1456352472305298, + "step": 726 + }, + { + "epoch": 0.2, + "logps_train/chosen": -13.625505447387695, + "logps_train/ref_chosen": -0.984375, + "logps_train/ref_rejected": -0.984375, + "logps_train/rejected": -13.812993049621582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2641130685806274, + "rewards_train/margins": 0.018748760223388672, + "rewards_train/rejected": -1.2828618288040161, + "step": 726 + }, + { + "epoch": 0.2, + "logps_train/chosen": -15.43741226196289, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -25.61472511291504, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.28749123215675354, + "rewards_train/margins": -0.26351872086524963, + "rewards_train/rejected": -0.023972511291503906, + "step": 726 + }, + { + "epoch": 0.2, + "logps_train/chosen": -3.708496332168579, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -15.4375, + "logps_train/rejected": -22.442378997802734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1541503667831421, + "rewards_train/margins": 0.8546382784843445, + "rewards_train/rejected": -0.7004879117012024, + "step": 726 + }, + { + "epoch": 0.2, + "logps_train/chosen": -94.84266662597656, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -229.12896728515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4842666387557983, + "rewards_train/margins": 6.828630089759827, + "rewards_train/rejected": -8.312896728515625, + "step": 726 + }, + { + "epoch": 0.2, + "logps_train/chosen": -6.10096549987793, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -7.03125, + "logps_train/rejected": -8.462730407714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15240345895290375, + "rewards_train/margins": 0.29555150866508484, + "rewards_train/rejected": -0.1431480497121811, + "step": 726 + }, + { + "epoch": 0.2, + "logps_train/chosen": -0.9580125212669373, + "logps_train/ref_chosen": -2.484375, + "logps_train/ref_rejected": -3.265625, + "logps_train/rejected": -14.930034637451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15263624489307404, + "rewards_train/margins": 1.3190772086381912, + "rewards_train/rejected": -1.1664409637451172, + "step": 727 + }, + { + "epoch": 0.2, + "logps_train/chosen": -15.324766159057617, + "logps_train/ref_chosen": -7.84375, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -17.06639289855957, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7481016516685486, + "rewards_train/margins": -0.4977123439311981, + "rewards_train/rejected": -0.25038930773735046, + "step": 727 + }, + { + "epoch": 0.2, + "logps_train/chosen": -27.898927688598633, + "logps_train/ref_chosen": -15.625, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -9.638334274291992, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2273927927017212, + "rewards_train/margins": -0.972934365272522, + "rewards_train/rejected": -0.2544584274291992, + "step": 727 + }, + { + "epoch": 0.2, + "logps_train/chosen": -25.431697845458984, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -15.58273983001709, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7181698083877563, + "rewards_train/margins": 0.21510416269302368, + "rewards_train/rejected": -0.93327397108078, + "step": 727 + }, + { + "epoch": 0.2, + "logps_train/chosen": -5.700448989868164, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -1.0703125, + "logps_train/rejected": -12.875151634216309, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07058010250329971, + "rewards_train/margins": 1.2510640397667885, + "rewards_train/rejected": -1.1804839372634888, + "step": 727 + }, + { + "epoch": 0.2, + "logps_train/chosen": -15.156900405883789, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -18.297719955444336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8219400644302368, + "rewards_train/margins": 0.06408196687698364, + "rewards_train/rejected": -0.8860220313072205, + "step": 727 + }, + { + "epoch": 0.2, + "logps_train/chosen": -128.52996826171875, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -226.63580322265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.302996873855591, + "rewards_train/margins": 6.810583829879761, + "rewards_train/rejected": -10.113580703735352, + "step": 727 + }, + { + "epoch": 0.2, + "logps_train/chosen": -133.02935791015625, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -148.6462860107422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.902935743331909, + "rewards_train/margins": -0.08830714225769043, + "rewards_train/rejected": -2.8146286010742188, + "step": 727 + }, + { + "epoch": 0.2, + "learning_rate": 1.6984021366872074e-06, + "loss": 0.5277, + "step": 728 + }, + { + "epoch": 0.2, + "logps_train/chosen": -181.2176971435547, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -139.46401977539062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.821769714355469, + "rewards_train/margins": -0.5253677368164062, + "rewards_train/rejected": -4.2964019775390625, + "step": 728 + }, + { + "epoch": 0.2, + "logps_train/chosen": -16.09436798095703, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -5.75, + "logps_train/rejected": -15.161934852600098, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19056320190429688, + "rewards_train/margins": 1.1317567229270935, + "rewards_train/rejected": -0.9411935210227966, + "step": 728 + }, + { + "epoch": 0.2, + "logps_train/chosen": -33.726539611816406, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -48.0, + "logps_train/rejected": -64.07371520996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2226539850234985, + "rewards_train/margins": 0.38471758365631104, + "rewards_train/rejected": -1.6073715686798096, + "step": 728 + }, + { + "epoch": 0.2, + "logps_train/chosen": -0.7244603037834167, + "logps_train/ref_chosen": -1.453125, + "logps_train/ref_rejected": -0.328125, + "logps_train/rejected": -0.9538402557373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07286646962165833, + "rewards_train/margins": 0.1354379951953888, + "rewards_train/rejected": -0.06257152557373047, + "step": 728 + }, + { + "epoch": 0.2, + "logps_train/chosen": -11.807577133178711, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -41.39822769165039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2932577133178711, + "rewards_train/margins": 1.63406503200531, + "rewards_train/rejected": -1.9273227453231812, + "step": 728 + }, + { + "epoch": 0.2, + "logps_train/chosen": -36.54458999633789, + "logps_train/ref_chosen": -30.125, + "logps_train/ref_rejected": -15.6875, + "logps_train/rejected": -19.07906723022461, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.641959011554718, + "rewards_train/margins": -0.30280229449272156, + "rewards_train/rejected": -0.33915671706199646, + "step": 728 + }, + { + "epoch": 0.2, + "logps_train/chosen": -37.85515594482422, + "logps_train/ref_chosen": -34.75, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -30.177738189697266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3105156123638153, + "rewards_train/margins": 0.8447581827640533, + "rewards_train/rejected": -1.1552737951278687, + "step": 728 + }, + { + "epoch": 0.2, + "logps_train/chosen": -9.416662216186523, + "logps_train/ref_chosen": -3.015625, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -18.949981689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6401037573814392, + "rewards_train/margins": 0.2236444354057312, + "rewards_train/rejected": -0.8637481927871704, + "step": 728 + }, + { + "epoch": 0.2, + "logps_train/chosen": -69.90499877929688, + "logps_train/ref_chosen": -58.75, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -103.47272491455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1154998540878296, + "rewards_train/margins": 1.4317725896835327, + "rewards_train/rejected": -2.5472724437713623, + "step": 729 + }, + { + "epoch": 0.2, + "logps_train/chosen": -10.341889381408691, + "logps_train/ref_chosen": -5.8125, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -26.109737396240234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4529389441013336, + "rewards_train/margins": 0.9330348670482635, + "rewards_train/rejected": -1.3859738111495972, + "step": 729 + }, + { + "epoch": 0.2, + "logps_train/chosen": -181.06752014160156, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -184.5074462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.606751918792725, + "rewards_train/margins": 0.44399261474609375, + "rewards_train/rejected": -5.050744533538818, + "step": 729 + }, + { + "epoch": 0.2, + "logps_train/chosen": -113.2888412475586, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -215.7942657470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0788841247558594, + "rewards_train/margins": 5.6005425453186035, + "rewards_train/rejected": -6.679426670074463, + "step": 729 + }, + { + "epoch": 0.2, + "logps_train/chosen": -87.2639389038086, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -107.50955963134766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.076393961906433, + "rewards_train/margins": 0.8745620250701904, + "rewards_train/rejected": -1.9509559869766235, + "step": 729 + }, + { + "epoch": 0.2, + "logps_train/chosen": -57.500614166259766, + "logps_train/ref_chosen": -53.5, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -96.94151306152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4000614285469055, + "rewards_train/margins": 2.6440898776054382, + "rewards_train/rejected": -3.0441513061523438, + "step": 729 + }, + { + "epoch": 0.2, + "logps_train/chosen": -158.73553466796875, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -189.897705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8735535144805908, + "rewards_train/margins": 1.4162170886993408, + "rewards_train/rejected": -3.2897706031799316, + "step": 729 + }, + { + "epoch": 0.2, + "logps_train/chosen": -11.550804138183594, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -10.266139030456543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.055080413818359375, + "rewards_train/margins": 0.39340850710868835, + "rewards_train/rejected": -0.44848892092704773, + "step": 729 + }, + { + "epoch": 0.2, + "learning_rate": 1.696506258779082e-06, + "loss": 0.412, + "step": 730 + }, + { + "epoch": 0.2, + "logps_train/chosen": -12.932530403137207, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -30.29785919189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3495030403137207, + "rewards_train/margins": 0.9677828550338745, + "rewards_train/rejected": -1.3172858953475952, + "step": 730 + }, + { + "epoch": 0.2, + "logps_train/chosen": -107.0939712524414, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -231.33792114257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.709397077560425, + "rewards_train/margins": 7.924394845962524, + "rewards_train/rejected": -10.63379192352295, + "step": 730 + }, + { + "epoch": 0.2, + "logps_train/chosen": -58.150081634521484, + "logps_train/ref_chosen": -48.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -201.45379638671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9650081992149353, + "rewards_train/margins": 7.780371248722076, + "rewards_train/rejected": -8.745379447937012, + "step": 730 + }, + { + "epoch": 0.2, + "logps_train/chosen": -146.74295043945312, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -191.7470703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.8242950439453125, + "rewards_train/margins": 2.2004122734069824, + "rewards_train/rejected": -7.024707317352295, + "step": 730 + }, + { + "epoch": 0.2, + "logps_train/chosen": -14.02892780303955, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -10.324451446533203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5466427803039551, + "rewards_train/margins": -0.4391976371407509, + "rewards_train/rejected": -0.1074451431632042, + "step": 730 + }, + { + "epoch": 0.2, + "logps_train/chosen": -151.6017303466797, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -190.58847045898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.010173320770264, + "rewards_train/margins": 1.6486740112304688, + "rewards_train/rejected": -7.658847332000732, + "step": 730 + }, + { + "epoch": 0.2, + "logps_train/chosen": -104.21334838867188, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -114.928466796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.471334844827652, + "rewards_train/margins": -0.8284881711006165, + "rewards_train/rejected": 0.3571533262729645, + "step": 730 + }, + { + "epoch": 0.2, + "logps_train/chosen": -69.53976440429688, + "logps_train/ref_chosen": -52.75, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -116.85721588134766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6789764165878296, + "rewards_train/margins": 3.1067453622817993, + "rewards_train/rejected": -4.785721778869629, + "step": 730 + }, + { + "epoch": 0.2, + "logps_train/chosen": -135.31521606445312, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -186.72451782226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.381521701812744, + "rewards_train/margins": 0.89093017578125, + "rewards_train/rejected": -4.272451877593994, + "step": 731 + }, + { + "epoch": 0.2, + "logps_train/chosen": -2.613591432571411, + "logps_train/ref_chosen": -0.9140625, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -12.47432804107666, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1699528992176056, + "rewards_train/margins": 0.13997989892959595, + "rewards_train/rejected": -0.30993279814720154, + "step": 731 + }, + { + "epoch": 0.2, + "logps_train/chosen": -6.243502616882324, + "logps_train/ref_chosen": -6.25, + "logps_train/ref_rejected": -2.15625, + "logps_train/rejected": -3.60115385055542, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0006497383001260459, + "rewards_train/margins": 0.14514012931613252, + "rewards_train/rejected": -0.14449039101600647, + "step": 731 + }, + { + "epoch": 0.2, + "logps_train/chosen": -6.099765300750732, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -40.335166931152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04747653007507324, + "rewards_train/margins": 1.323540210723877, + "rewards_train/rejected": -1.3710167407989502, + "step": 731 + }, + { + "epoch": 0.2, + "logps_train/chosen": -104.43231201171875, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -174.27137756347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19323121011257172, + "rewards_train/margins": 3.733906641602516, + "rewards_train/rejected": -3.927137851715088, + "step": 731 + }, + { + "epoch": 0.2, + "logps_train/chosen": -25.159469604492188, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -28.83631706237793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6409469842910767, + "rewards_train/margins": 0.31768471002578735, + "rewards_train/rejected": -0.958631694316864, + "step": 731 + }, + { + "epoch": 0.2, + "logps_train/chosen": -139.00396728515625, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -173.54312133789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4003968238830566, + "rewards_train/margins": 0.8539152145385742, + "rewards_train/rejected": -4.254312038421631, + "step": 731 + }, + { + "epoch": 0.2, + "logps_train/chosen": -8.415154457092285, + "logps_train/ref_chosen": -7.125, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -11.306968688964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12901544570922852, + "rewards_train/margins": 0.4829314351081848, + "rewards_train/rejected": -0.6119468808174133, + "step": 731 + }, + { + "epoch": 0.2, + "learning_rate": 1.6946055060656098e-06, + "loss": 0.3755, + "step": 732 + }, + { + "epoch": 0.2, + "logps_train/chosen": -24.30278778076172, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -32.93429946899414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9177788496017456, + "rewards_train/margins": 0.27565109729766846, + "rewards_train/rejected": -2.193429946899414, + "step": 732 + }, + { + "epoch": 0.2, + "logps_train/chosen": -191.87051391601562, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -166.11407470703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.387051582336426, + "rewards_train/margins": -1.1756441593170166, + "rewards_train/rejected": -3.211407423019409, + "step": 732 + }, + { + "epoch": 0.2, + "logps_train/chosen": -152.35699462890625, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -159.02560424804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.085699558258057, + "rewards_train/margins": 3.5168609619140625, + "rewards_train/rejected": -7.602560520172119, + "step": 732 + }, + { + "epoch": 0.2, + "logps_train/chosen": -95.54598999023438, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -80.73515319824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3545989990234375, + "rewards_train/margins": 0.5939162969589233, + "rewards_train/rejected": -1.9485152959823608, + "step": 732 + }, + { + "epoch": 0.2, + "logps_train/chosen": -207.68971252441406, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -197.70855712890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.2689714431762695, + "rewards_train/margins": -0.5981154441833496, + "rewards_train/rejected": -5.67085599899292, + "step": 732 + }, + { + "epoch": 0.2, + "logps_train/chosen": -10.624225616455078, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -0.98046875, + "logps_train/rejected": -0.6121842861175537, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.29992255568504333, + "rewards_train/margins": -0.336751002818346, + "rewards_train/rejected": 0.03682844713330269, + "step": 732 + }, + { + "epoch": 0.2, + "logps_train/chosen": -25.991844177246094, + "logps_train/ref_chosen": -26.0, + "logps_train/ref_rejected": -7.15625, + "logps_train/rejected": -17.359895706176758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0008155822870321572, + "rewards_train/margins": 1.0211802244302817, + "rewards_train/rejected": -1.0203646421432495, + "step": 732 + }, + { + "epoch": 0.2, + "logps_train/chosen": -1.8195061683654785, + "logps_train/ref_chosen": -1.4375, + "logps_train/ref_rejected": -2.671875, + "logps_train/rejected": -6.0503973960876465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03820061683654785, + "rewards_train/margins": 0.2996516227722168, + "rewards_train/rejected": -0.33785223960876465, + "step": 732 + }, + { + "epoch": 0.2, + "logps_train/chosen": -22.46782684326172, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -23.15526008605957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22178268432617188, + "rewards_train/margins": 0.7062433362007141, + "rewards_train/rejected": -0.928026020526886, + "step": 733 + }, + { + "epoch": 0.2, + "logps_train/chosen": -6.754950523376465, + "logps_train/ref_chosen": -6.03125, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -18.035675048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07237005233764648, + "rewards_train/margins": 0.35619744658470154, + "rewards_train/rejected": -0.428567498922348, + "step": 733 + }, + { + "epoch": 0.2, + "logps_train/chosen": -75.53800964355469, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -74.03009796142578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3538010120391846, + "rewards_train/margins": -0.25079119205474854, + "rewards_train/rejected": -1.103009819984436, + "step": 733 + }, + { + "epoch": 0.2, + "logps_train/chosen": -44.52669143676758, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -41.5, + "logps_train/rejected": -81.86422729492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5776691436767578, + "rewards_train/margins": 2.4587535858154297, + "rewards_train/rejected": -4.0364227294921875, + "step": 733 + }, + { + "epoch": 0.2, + "logps_train/chosen": -173.63873291015625, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -217.62399291992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.763873338699341, + "rewards_train/margins": 2.3985259532928467, + "rewards_train/rejected": -6.1623992919921875, + "step": 733 + }, + { + "epoch": 0.2, + "logps_train/chosen": -141.8894805908203, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -169.9140167236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6389482021331787, + "rewards_train/margins": 2.8024537563323975, + "rewards_train/rejected": -6.441401958465576, + "step": 733 + }, + { + "epoch": 0.2, + "logps_train/chosen": -12.336919784545898, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -17.59308433532715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5555669665336609, + "rewards_train/margins": 0.13499146699905396, + "rewards_train/rejected": -0.6905584335327148, + "step": 733 + }, + { + "epoch": 0.2, + "logps_train/chosen": -73.45065307617188, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -132.57284545898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5950653553009033, + "rewards_train/margins": 0.9122192859649658, + "rewards_train/rejected": -2.507284641265869, + "step": 733 + }, + { + "epoch": 0.21, + "learning_rate": 1.6926998918500442e-06, + "loss": 0.5127, + "step": 734 + }, + { + "epoch": 0.21, + "logps_train/chosen": -29.19588851928711, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -31.25, + "logps_train/rejected": -34.53472900390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1320888996124268, + "rewards_train/margins": -0.8036159873008728, + "rewards_train/rejected": -0.32847291231155396, + "step": 734 + }, + { + "epoch": 0.21, + "logps_train/chosen": -108.83584594726562, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -191.01451110839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2835845947265625, + "rewards_train/margins": 7.217866897583008, + "rewards_train/rejected": -8.50145149230957, + "step": 734 + }, + { + "epoch": 0.21, + "logps_train/chosen": -26.570724487304688, + "logps_train/ref_chosen": -15.5, + "logps_train/ref_rejected": -31.0, + "logps_train/rejected": -40.18482971191406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1070724725723267, + "rewards_train/margins": -0.18858951330184937, + "rewards_train/rejected": -0.9184829592704773, + "step": 734 + }, + { + "epoch": 0.21, + "logps_train/chosen": -6.970129013061523, + "logps_train/ref_chosen": -5.21875, + "logps_train/ref_rejected": -5.0, + "logps_train/rejected": -8.123753547668457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17513790726661682, + "rewards_train/margins": 0.13723745942115784, + "rewards_train/rejected": -0.31237536668777466, + "step": 734 + }, + { + "epoch": 0.21, + "logps_train/chosen": -42.051456451416016, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -42.08574295043945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30514565110206604, + "rewards_train/margins": 0.6034286320209503, + "rewards_train/rejected": -0.9085742831230164, + "step": 734 + }, + { + "epoch": 0.21, + "logps_train/chosen": -112.14469146728516, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -200.44068908691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3144691586494446, + "rewards_train/margins": 6.429599940776825, + "rewards_train/rejected": -6.7440690994262695, + "step": 734 + }, + { + "epoch": 0.21, + "logps_train/chosen": -5.583495616912842, + "logps_train/ref_chosen": -0.8125, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -16.21001625061035, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47709956765174866, + "rewards_train/margins": 0.7001521289348602, + "rewards_train/rejected": -1.1772516965866089, + "step": 734 + }, + { + "epoch": 0.21, + "logps_train/chosen": -0.21612048149108887, + "logps_train/ref_chosen": -0.267578125, + "logps_train/ref_rejected": -0.267578125, + "logps_train/rejected": -0.21476420760154724, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.005145764444023371, + "rewards_train/margins": -0.00013562757521867752, + "rewards_train/rejected": 0.005281392019242048, + "step": 734 + }, + { + "epoch": 0.21, + "logps_train/chosen": -165.64686584472656, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -199.07583618164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4353134334087372, + "rewards_train/margins": 2.742897003889084, + "rewards_train/rejected": -2.3075835704803467, + "step": 735 + }, + { + "epoch": 0.21, + "logps_train/chosen": -13.50473403930664, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -14.236248016357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7723484039306641, + "rewards_train/margins": 0.10127639770507812, + "rewards_train/rejected": -0.8736248016357422, + "step": 735 + }, + { + "epoch": 0.21, + "logps_train/chosen": -261.7918395996094, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -211.50888061523438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.879183769226074, + "rewards_train/margins": -3.5282955169677734, + "rewards_train/rejected": -5.350888252258301, + "step": 735 + }, + { + "epoch": 0.21, + "logps_train/chosen": -5.256119251251221, + "logps_train/ref_chosen": -1.9609375, + "logps_train/ref_rejected": -2.796875, + "logps_train/rejected": -2.82671856880188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3295181691646576, + "rewards_train/margins": -0.32653381233103573, + "rewards_train/rejected": -0.0029843568336218596, + "step": 735 + }, + { + "epoch": 0.21, + "logps_train/chosen": -120.95379638671875, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -26.410778045654297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05462036281824112, + "rewards_train/margins": 1.826948143541813, + "rewards_train/rejected": -1.7723277807235718, + "step": 735 + }, + { + "epoch": 0.21, + "logps_train/chosen": -11.935359001159668, + "logps_train/ref_chosen": -3.484375, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -30.468679428100586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8450984358787537, + "rewards_train/margins": -0.17323046922683716, + "rewards_train/rejected": -0.6718679666519165, + "step": 735 + }, + { + "epoch": 0.21, + "logps_train/chosen": -102.75395202636719, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -213.14210510253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2753952741622925, + "rewards_train/margins": 4.838815331459045, + "rewards_train/rejected": -6.114210605621338, + "step": 735 + }, + { + "epoch": 0.21, + "logps_train/chosen": -132.35031127929688, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -188.38943481445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0350310802459717, + "rewards_train/margins": 3.603912591934204, + "rewards_train/rejected": -5.638943672180176, + "step": 735 + }, + { + "epoch": 0.21, + "learning_rate": 1.690789429469664e-06, + "loss": 0.639, + "step": 736 + }, + { + "epoch": 0.21, + "logps_train/chosen": -144.6183319091797, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -138.10122680664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8618332147598267, + "rewards_train/margins": 1.29828941822052, + "rewards_train/rejected": -3.1601226329803467, + "step": 736 + }, + { + "epoch": 0.21, + "logps_train/chosen": -2.4009976387023926, + "logps_train/ref_chosen": -2.65625, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -11.745567321777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.025525236502289772, + "rewards_train/margins": 0.3125819806009531, + "rewards_train/rejected": -0.28705674409866333, + "step": 736 + }, + { + "epoch": 0.21, + "logps_train/chosen": -9.15068244934082, + "logps_train/ref_chosen": -3.78125, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -17.99772071838379, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.536943256855011, + "rewards_train/margins": 0.6972038149833679, + "rewards_train/rejected": -1.234147071838379, + "step": 736 + }, + { + "epoch": 0.21, + "logps_train/chosen": -97.07911682128906, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -181.05386352539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7579116821289062, + "rewards_train/margins": 4.7974748611450195, + "rewards_train/rejected": -5.555386543273926, + "step": 736 + }, + { + "epoch": 0.21, + "logps_train/chosen": -57.622352600097656, + "logps_train/ref_chosen": -49.25, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -120.01657104492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8372352719306946, + "rewards_train/margins": 1.4144219756126404, + "rewards_train/rejected": -2.251657247543335, + "step": 736 + }, + { + "epoch": 0.21, + "logps_train/chosen": -89.03691101074219, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -98.71656799316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3036911189556122, + "rewards_train/margins": 1.5679656565189362, + "rewards_train/rejected": -1.8716567754745483, + "step": 736 + }, + { + "epoch": 0.21, + "logps_train/chosen": -87.0777359008789, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -147.2481689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8077735900878906, + "rewards_train/margins": 1.5670433044433594, + "rewards_train/rejected": -2.37481689453125, + "step": 736 + }, + { + "epoch": 0.21, + "logps_train/chosen": -118.65013122558594, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -164.88052368164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03498687967658043, + "rewards_train/margins": 3.2230392955243587, + "rewards_train/rejected": -3.1880524158477783, + "step": 736 + }, + { + "epoch": 0.21, + "logps_train/chosen": -143.91114807128906, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -112.89129638671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8911149501800537, + "rewards_train/margins": -1.1019853353500366, + "rewards_train/rejected": -1.789129614830017, + "step": 737 + }, + { + "epoch": 0.21, + "logps_train/chosen": -179.31333923339844, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -174.0225372314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.831334114074707, + "rewards_train/margins": 0.5209197998046875, + "rewards_train/rejected": -5.3522539138793945, + "step": 737 + }, + { + "epoch": 0.21, + "logps_train/chosen": -1.3106290102005005, + "logps_train/ref_chosen": -2.609375, + "logps_train/ref_rejected": -4.25, + "logps_train/rejected": -8.36587142944336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1298746019601822, + "rewards_train/margins": 0.5414617508649826, + "rewards_train/rejected": -0.4115871489048004, + "step": 737 + }, + { + "epoch": 0.21, + "logps_train/chosen": -4.2996602058410645, + "logps_train/ref_chosen": -2.734375, + "logps_train/ref_rejected": -1.890625, + "logps_train/rejected": -16.370607376098633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1565285176038742, + "rewards_train/margins": 1.291469767689705, + "rewards_train/rejected": -1.447998285293579, + "step": 737 + }, + { + "epoch": 0.21, + "logps_train/chosen": -84.11068725585938, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -133.14157104492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.461068719625473, + "rewards_train/margins": 2.403088480234146, + "rewards_train/rejected": -2.864157199859619, + "step": 737 + }, + { + "epoch": 0.21, + "logps_train/chosen": -211.52679443359375, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -174.71319580078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.35267972946167, + "rewards_train/margins": -0.13136005401611328, + "rewards_train/rejected": -7.221319675445557, + "step": 737 + }, + { + "epoch": 0.21, + "logps_train/chosen": -181.4122772216797, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -130.45608520507812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.941227912902832, + "rewards_train/margins": -3.1956193447113037, + "rewards_train/rejected": -2.7456085681915283, + "step": 737 + }, + { + "epoch": 0.21, + "logps_train/chosen": -5.04539680480957, + "logps_train/ref_chosen": -2.328125, + "logps_train/ref_rejected": -3.78125, + "logps_train/rejected": -8.766641616821289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27172717452049255, + "rewards_train/margins": 0.22681200504302979, + "rewards_train/rejected": -0.49853917956352234, + "step": 737 + }, + { + "epoch": 0.21, + "learning_rate": 1.6888741322956812e-06, + "loss": 0.5665, + "step": 738 + }, + { + "epoch": 0.21, + "logps_train/chosen": -4.889917850494385, + "logps_train/ref_chosen": -3.75, + "logps_train/ref_rejected": -7.5625, + "logps_train/rejected": -9.285944938659668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11399178951978683, + "rewards_train/margins": 0.05835270136594772, + "rewards_train/rejected": -0.17234449088573456, + "step": 738 + }, + { + "epoch": 0.21, + "logps_train/chosen": -55.914981842041016, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -60.75, + "logps_train/rejected": -108.42127227783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8414981961250305, + "rewards_train/margins": 3.9256293177604675, + "rewards_train/rejected": -4.767127513885498, + "step": 738 + }, + { + "epoch": 0.21, + "logps_train/chosen": -3.568380355834961, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -1.4942328929901123, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.053713034838438034, + "rewards_train/margins": -0.3824147395789623, + "rewards_train/rejected": 0.3287017047405243, + "step": 738 + }, + { + "epoch": 0.21, + "logps_train/chosen": -319.08349609375, + "logps_train/ref_chosen": -320.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -159.1887664794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09165038913488388, + "rewards_train/margins": 4.910527132451534, + "rewards_train/rejected": -4.81887674331665, + "step": 738 + }, + { + "epoch": 0.21, + "logps_train/chosen": -5.501790523529053, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -9.142356872558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20642904937267303, + "rewards_train/margins": 0.1734316498041153, + "rewards_train/rejected": -0.37986069917678833, + "step": 738 + }, + { + "epoch": 0.21, + "logps_train/chosen": -45.251983642578125, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -118.96189880371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30019837617874146, + "rewards_train/margins": 3.9959914088249207, + "rewards_train/rejected": -4.296189785003662, + "step": 738 + }, + { + "epoch": 0.21, + "logps_train/chosen": -40.87141418457031, + "logps_train/ref_chosen": -39.5, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -45.113731384277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1371414214372635, + "rewards_train/margins": 2.6617317646741867, + "rewards_train/rejected": -2.79887318611145, + "step": 738 + }, + { + "epoch": 0.21, + "logps_train/chosen": -160.2145538330078, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -147.66326904296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.02145528793335, + "rewards_train/margins": -0.7051284313201904, + "rewards_train/rejected": -3.316326856613159, + "step": 738 + }, + { + "epoch": 0.21, + "logps_train/chosen": -5.182624816894531, + "logps_train/ref_chosen": -2.875, + "logps_train/ref_rejected": -1.7890625, + "logps_train/rejected": -3.3291754722595215, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.23076248168945312, + "rewards_train/margins": -0.07675118744373322, + "rewards_train/rejected": -0.1540112942457199, + "step": 739 + }, + { + "epoch": 0.21, + "logps_train/chosen": -40.81953430175781, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -31.31982421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7069534659385681, + "rewards_train/margins": 0.8625289797782898, + "rewards_train/rejected": -1.569482445716858, + "step": 739 + }, + { + "epoch": 0.21, + "logps_train/chosen": -17.080957412719727, + "logps_train/ref_chosen": -14.25, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -43.776878356933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28309574723243713, + "rewards_train/margins": 0.8945921361446381, + "rewards_train/rejected": -1.1776878833770752, + "step": 739 + }, + { + "epoch": 0.21, + "logps_train/chosen": -31.892147064208984, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -17.48816680908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18578529357910156, + "rewards_train/margins": 0.7158519625663757, + "rewards_train/rejected": -0.5300666689872742, + "step": 739 + }, + { + "epoch": 0.21, + "logps_train/chosen": -14.000929832458496, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -58.29289245605469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33134299516677856, + "rewards_train/margins": 1.372946321964264, + "rewards_train/rejected": -1.7042893171310425, + "step": 739 + }, + { + "epoch": 0.21, + "logps_train/chosen": -109.81578063964844, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -182.73495483398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8315781354904175, + "rewards_train/margins": 2.64191734790802, + "rewards_train/rejected": -4.4734954833984375, + "step": 739 + }, + { + "epoch": 0.21, + "logps_train/chosen": -18.62310791015625, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -37.51958084106445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24981079995632172, + "rewards_train/margins": 1.5396473556756973, + "rewards_train/rejected": -1.789458155632019, + "step": 739 + }, + { + "epoch": 0.21, + "logps_train/chosen": -183.22079467773438, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -151.97732543945312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.222079753875732, + "rewards_train/margins": -1.4743471145629883, + "rewards_train/rejected": -3.747732639312744, + "step": 739 + }, + { + "epoch": 0.21, + "learning_rate": 1.6869540137331443e-06, + "loss": 0.462, + "step": 740 + }, + { + "epoch": 0.21, + "logps_train/chosen": -132.26177978515625, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -147.08628845214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.376178026199341, + "rewards_train/margins": 3.9824507236480713, + "rewards_train/rejected": -6.358628749847412, + "step": 740 + }, + { + "epoch": 0.21, + "logps_train/chosen": -82.74465942382812, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -168.83294677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3744659423828125, + "rewards_train/margins": 6.258828639984131, + "rewards_train/rejected": -7.633294582366943, + "step": 740 + }, + { + "epoch": 0.21, + "logps_train/chosen": -93.36793518066406, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -31.668994903564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01320648193359375, + "rewards_train/margins": 1.6551059484481812, + "rewards_train/rejected": -1.6418994665145874, + "step": 740 + }, + { + "epoch": 0.21, + "logps_train/chosen": -10.950798988342285, + "logps_train/ref_chosen": -6.125, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -12.426458358764648, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48257991671562195, + "rewards_train/margins": 0.1444409191608429, + "rewards_train/rejected": -0.6270208358764648, + "step": 740 + }, + { + "epoch": 0.21, + "logps_train/chosen": -145.546630859375, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -78.88490295410156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7546632289886475, + "rewards_train/margins": -2.066172957420349, + "rewards_train/rejected": -1.6884902715682983, + "step": 740 + }, + { + "epoch": 0.21, + "logps_train/chosen": -37.97053909301758, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -48.25, + "logps_train/rejected": -68.58799743652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8345539569854736, + "rewards_train/margins": 0.19924592971801758, + "rewards_train/rejected": -2.033799886703491, + "step": 740 + }, + { + "epoch": 0.21, + "logps_train/chosen": -54.309391021728516, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -114.61978912353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21906089782714844, + "rewards_train/margins": 1.131039798259735, + "rewards_train/rejected": -0.9119789004325867, + "step": 740 + }, + { + "epoch": 0.21, + "logps_train/chosen": -21.875307083129883, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -34.39883041381836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6125307083129883, + "rewards_train/margins": 0.6398524045944214, + "rewards_train/rejected": -1.2523831129074097, + "step": 740 + }, + { + "epoch": 0.21, + "logps_train/chosen": -11.871726989746094, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -2.953125, + "logps_train/rejected": -7.078100204467773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6003273129463196, + "rewards_train/margins": 1.012824833393097, + "rewards_train/rejected": -0.41249752044677734, + "step": 741 + }, + { + "epoch": 0.21, + "logps_train/chosen": -41.16197967529297, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -25.4361572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8161979913711548, + "rewards_train/margins": 0.27741777896881104, + "rewards_train/rejected": -1.0936157703399658, + "step": 741 + }, + { + "epoch": 0.21, + "logps_train/chosen": -20.704769134521484, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -47.25, + "logps_train/rejected": -52.80104064941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0954769179224968, + "rewards_train/margins": 0.4596271589398384, + "rewards_train/rejected": -0.5551040768623352, + "step": 741 + }, + { + "epoch": 0.21, + "logps_train/chosen": -108.16661834716797, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -107.95111846923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7666618824005127, + "rewards_train/margins": 0.32844996452331543, + "rewards_train/rejected": -3.095111846923828, + "step": 741 + }, + { + "epoch": 0.21, + "logps_train/chosen": -2.4806506633758545, + "logps_train/ref_chosen": -1.0625, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -22.347963333129883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14181506633758545, + "rewards_train/margins": 0.14298126101493835, + "rewards_train/rejected": -0.2847963273525238, + "step": 741 + }, + { + "epoch": 0.21, + "logps_train/chosen": -34.14993667602539, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -94.34417724609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.239993691444397, + "rewards_train/margins": -0.255575954914093, + "rewards_train/rejected": -0.984417736530304, + "step": 741 + }, + { + "epoch": 0.21, + "logps_train/chosen": -41.189823150634766, + "logps_train/ref_chosen": -30.0, + "logps_train/ref_rejected": -43.25, + "logps_train/rejected": -53.503196716308594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1189823150634766, + "rewards_train/margins": -0.09366261959075928, + "rewards_train/rejected": -1.0253196954727173, + "step": 741 + }, + { + "epoch": 0.21, + "logps_train/chosen": -129.61553955078125, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -168.93771362304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.261554002761841, + "rewards_train/margins": 0.8322174549102783, + "rewards_train/rejected": -3.093771457672119, + "step": 741 + }, + { + "epoch": 0.21, + "learning_rate": 1.6850290872208483e-06, + "loss": 0.5484, + "step": 742 + }, + { + "epoch": 0.21, + "logps_train/chosen": -134.66033935546875, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -138.00732421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8660340309143066, + "rewards_train/margins": -0.9153015613555908, + "rewards_train/rejected": -1.9507324695587158, + "step": 742 + }, + { + "epoch": 0.21, + "logps_train/chosen": -20.303909301757812, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -31.455944061279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.192890927195549, + "rewards_train/margins": 1.0277034789323807, + "rewards_train/rejected": -1.2205944061279297, + "step": 742 + }, + { + "epoch": 0.21, + "logps_train/chosen": -136.03396606445312, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -50.25, + "logps_train/rejected": -62.5159797668457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6533966064453125, + "rewards_train/margins": -1.4267985820770264, + "rewards_train/rejected": -1.2265980243682861, + "step": 742 + }, + { + "epoch": 0.21, + "logps_train/chosen": -79.8194580078125, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -132.92791748046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.856945753097534, + "rewards_train/margins": 0.18584609031677246, + "rewards_train/rejected": -3.0427918434143066, + "step": 742 + }, + { + "epoch": 0.21, + "logps_train/chosen": -79.74591827392578, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -234.55870056152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5745918154716492, + "rewards_train/margins": 10.431278049945831, + "rewards_train/rejected": -11.00586986541748, + "step": 742 + }, + { + "epoch": 0.21, + "logps_train/chosen": -7.299953460693359, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -1.421875, + "logps_train/rejected": -3.2853665351867676, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20125465095043182, + "rewards_train/margins": 0.3876038044691086, + "rewards_train/rejected": -0.18634915351867676, + "step": 742 + }, + { + "epoch": 0.21, + "logps_train/chosen": -65.50308227539062, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -104.31355285644531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.349691778421402, + "rewards_train/margins": 0.3310470636934042, + "rewards_train/rejected": 0.01864471472799778, + "step": 742 + }, + { + "epoch": 0.21, + "logps_train/chosen": -12.95751667022705, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -27.6881160736084, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5176267027854919, + "rewards_train/margins": 0.8636849522590637, + "rewards_train/rejected": -1.3813116550445557, + "step": 742 + }, + { + "epoch": 0.21, + "logps_train/chosen": -84.1924819946289, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -168.0040283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7692482471466064, + "rewards_train/margins": 4.231154680252075, + "rewards_train/rejected": -6.000402927398682, + "step": 743 + }, + { + "epoch": 0.21, + "logps_train/chosen": -116.091796875, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -207.24082946777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.759179711341858, + "rewards_train/margins": 4.064903140068054, + "rewards_train/rejected": -5.824082851409912, + "step": 743 + }, + { + "epoch": 0.21, + "logps_train/chosen": -10.122923851013184, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -19.229154586791992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02479238621890545, + "rewards_train/margins": 0.11062307842075825, + "rewards_train/rejected": -0.1354154646396637, + "step": 743 + }, + { + "epoch": 0.21, + "logps_train/chosen": -85.33981323242188, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -85.00474548339844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6839813590049744, + "rewards_train/margins": -0.033506810665130615, + "rewards_train/rejected": -0.6504745483398438, + "step": 743 + }, + { + "epoch": 0.21, + "logps_train/chosen": -11.413040161132812, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -12.496788024902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2538040280342102, + "rewards_train/margins": 0.06462478637695312, + "rewards_train/rejected": -0.31842881441116333, + "step": 743 + }, + { + "epoch": 0.21, + "logps_train/chosen": -116.37666320800781, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -77.44854736328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2876663208007812, + "rewards_train/margins": -0.14281153678894043, + "rewards_train/rejected": -1.1448547840118408, + "step": 743 + }, + { + "epoch": 0.21, + "logps_train/chosen": -27.087512969970703, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -41.63671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49625131487846375, + "rewards_train/margins": 0.3174205720424652, + "rewards_train/rejected": -0.813671886920929, + "step": 743 + }, + { + "epoch": 0.21, + "logps_train/chosen": -5.271055698394775, + "logps_train/ref_chosen": -3.515625, + "logps_train/ref_rejected": -2.28125, + "logps_train/rejected": -6.492374897003174, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17554306983947754, + "rewards_train/margins": 0.24556943774223328, + "rewards_train/rejected": -0.4211125075817108, + "step": 743 + }, + { + "epoch": 0.21, + "learning_rate": 1.6830993662312375e-06, + "loss": 0.5719, + "step": 744 + }, + { + "epoch": 0.21, + "logps_train/chosen": -90.6059341430664, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -107.78121948242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6105934381484985, + "rewards_train/margins": 2.867528796195984, + "rewards_train/rejected": -4.478122234344482, + "step": 744 + }, + { + "epoch": 0.21, + "logps_train/chosen": -22.284709930419922, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -12.315143585205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30347099900245667, + "rewards_train/margins": 0.318668395280838, + "rewards_train/rejected": -0.6221393942832947, + "step": 744 + }, + { + "epoch": 0.21, + "logps_train/chosen": -25.496051788330078, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -5.09375, + "logps_train/rejected": -11.714567184448242, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2246052026748657, + "rewards_train/margins": -0.5625234842300415, + "rewards_train/rejected": -0.6620817184448242, + "step": 744 + }, + { + "epoch": 0.21, + "logps_train/chosen": -9.332747459411621, + "logps_train/ref_chosen": -7.3125, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -78.80619812011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20202474296092987, + "rewards_train/margins": 1.4785950928926468, + "rewards_train/rejected": -1.6806198358535767, + "step": 744 + }, + { + "epoch": 0.21, + "logps_train/chosen": -130.20843505859375, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -147.130126953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2208435535430908, + "rewards_train/margins": -1.3078308627009392, + "rewards_train/rejected": 0.08698730915784836, + "step": 744 + }, + { + "epoch": 0.21, + "logps_train/chosen": -24.365135192871094, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -26.533905029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8365135192871094, + "rewards_train/margins": 0.06687700748443604, + "rewards_train/rejected": -0.9033905267715454, + "step": 744 + }, + { + "epoch": 0.21, + "logps_train/chosen": -153.79910278320312, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -35.55587387084961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.67991042137146, + "rewards_train/margins": -1.074323058128357, + "rewards_train/rejected": -1.605587363243103, + "step": 744 + }, + { + "epoch": 0.21, + "logps_train/chosen": -80.48583221435547, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -179.02560424804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49858322739601135, + "rewards_train/margins": 6.953977197408676, + "rewards_train/rejected": -7.4525604248046875, + "step": 744 + }, + { + "epoch": 0.21, + "logps_train/chosen": -0.00921656470745802, + "logps_train/ref_chosen": -0.0654296875, + "logps_train/ref_rejected": -0.0654296875, + "logps_train/rejected": -0.008893572725355625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.005621312186121941, + "rewards_train/margins": -3.229966387152672e-05, + "rewards_train/rejected": 0.005653611849993467, + "step": 745 + }, + { + "epoch": 0.21, + "logps_train/chosen": -115.88459777832031, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -140.73654174804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.088459849357605, + "rewards_train/margins": 1.6351944208145142, + "rewards_train/rejected": -2.723654270172119, + "step": 745 + }, + { + "epoch": 0.21, + "logps_train/chosen": -19.0787410736084, + "logps_train/ref_chosen": -15.8125, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -28.0859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3266241252422333, + "rewards_train/margins": 1.4007196724414825, + "rewards_train/rejected": -1.7273437976837158, + "step": 745 + }, + { + "epoch": 0.21, + "logps_train/chosen": -3.7177863121032715, + "logps_train/ref_chosen": -3.890625, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -9.927289962768555, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01728386990725994, + "rewards_train/margins": -0.15248713083565235, + "rewards_train/rejected": 0.1697710007429123, + "step": 745 + }, + { + "epoch": 0.21, + "logps_train/chosen": -171.57122802734375, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -173.75152587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.657122850418091, + "rewards_train/margins": 0.91802978515625, + "rewards_train/rejected": -3.575152635574341, + "step": 745 + }, + { + "epoch": 0.21, + "logps_train/chosen": -11.528857231140137, + "logps_train/ref_chosen": -3.921875, + "logps_train/ref_rejected": -6.1875, + "logps_train/rejected": -15.54637622833252, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7606982588768005, + "rewards_train/margins": 0.17518937587738037, + "rewards_train/rejected": -0.9358876347541809, + "step": 745 + }, + { + "epoch": 0.21, + "logps_train/chosen": -3.7706990242004395, + "logps_train/ref_chosen": -1.609375, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -10.801684379577637, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21613240242004395, + "rewards_train/margins": 0.3140360713005066, + "rewards_train/rejected": -0.5301684737205505, + "step": 745 + }, + { + "epoch": 0.21, + "logps_train/chosen": -10.7656831741333, + "logps_train/ref_chosen": -6.71875, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -15.803871154785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4046933352947235, + "rewards_train/margins": 0.17569378018379211, + "rewards_train/rejected": -0.5803871154785156, + "step": 745 + }, + { + "epoch": 0.21, + "learning_rate": 1.6811648642703133e-06, + "loss": 0.5856, + "step": 746 + }, + { + "epoch": 0.21, + "logps_train/chosen": -6.392535209655762, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -18.6900634765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07675351947546005, + "rewards_train/margins": 0.1422528252005577, + "rewards_train/rejected": -0.21900634467601776, + "step": 746 + }, + { + "epoch": 0.21, + "logps_train/chosen": -39.31653594970703, + "logps_train/ref_chosen": -32.25, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -37.40699005126953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7066535949707031, + "rewards_train/margins": -0.16595458984375, + "rewards_train/rejected": -0.5406990051269531, + "step": 746 + }, + { + "epoch": 0.21, + "logps_train/chosen": -118.94585418701172, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -168.79019165039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.144585371017456, + "rewards_train/margins": 5.784434080123901, + "rewards_train/rejected": -7.929019451141357, + "step": 746 + }, + { + "epoch": 0.21, + "logps_train/chosen": -105.91187286376953, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -165.83131408691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7088127136230469, + "rewards_train/margins": 3.7919442653656006, + "rewards_train/rejected": -3.0831315517425537, + "step": 746 + }, + { + "epoch": 0.21, + "logps_train/chosen": -99.53768157958984, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -212.144287109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2037681341171265, + "rewards_train/margins": 8.110661149024963, + "rewards_train/rejected": -9.31442928314209, + "step": 746 + }, + { + "epoch": 0.21, + "logps_train/chosen": -219.251708984375, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -212.6270751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.025170803070068, + "rewards_train/margins": 3.0875372886657715, + "rewards_train/rejected": -10.11270809173584, + "step": 746 + }, + { + "epoch": 0.21, + "logps_train/chosen": -111.5947265625, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -197.05731201171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.559472680091858, + "rewards_train/margins": 4.246258616447449, + "rewards_train/rejected": -5.805731296539307, + "step": 746 + }, + { + "epoch": 0.21, + "logps_train/chosen": -3.251849412918091, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -2.546875, + "logps_train/rejected": -15.765563011169434, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08106505870819092, + "rewards_train/margins": 1.4029338359832764, + "rewards_train/rejected": -1.3218687772750854, + "step": 746 + }, + { + "epoch": 0.21, + "logps_train/chosen": -52.63963317871094, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -61.0, + "logps_train/rejected": -84.2676773071289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1639633178710938, + "rewards_train/margins": 1.162804365158081, + "rewards_train/rejected": -2.326767683029175, + "step": 747 + }, + { + "epoch": 0.21, + "logps_train/chosen": -6.970830917358398, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -6.799532890319824, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26895809173583984, + "rewards_train/margins": 0.1469326913356781, + "rewards_train/rejected": -0.41589078307151794, + "step": 747 + }, + { + "epoch": 0.21, + "logps_train/chosen": -115.07229614257812, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -101.01718139648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7572296261787415, + "rewards_train/margins": 0.8444885611534119, + "rewards_train/rejected": -1.6017181873321533, + "step": 747 + }, + { + "epoch": 0.21, + "logps_train/chosen": -169.97677612304688, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -195.5411376953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3976776599884033, + "rewards_train/margins": 0.8564362525939941, + "rewards_train/rejected": -3.2541139125823975, + "step": 747 + }, + { + "epoch": 0.21, + "logps_train/chosen": -0.44281262159347534, + "logps_train/ref_chosen": -0.70703125, + "logps_train/ref_rejected": -3.234375, + "logps_train/rejected": -9.326118469238281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.026421863585710526, + "rewards_train/margins": 0.6355962343513966, + "rewards_train/rejected": -0.609174370765686, + "step": 747 + }, + { + "epoch": 0.21, + "logps_train/chosen": -18.82689094543457, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -23.5, + "logps_train/rejected": -39.743736267089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44518908858299255, + "rewards_train/margins": 1.1791845858097076, + "rewards_train/rejected": -1.6243736743927002, + "step": 747 + }, + { + "epoch": 0.21, + "logps_train/chosen": -76.12557983398438, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -75.7720947265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.31255799531936646, + "rewards_train/margins": -0.03534850478172302, + "rewards_train/rejected": -0.27720949053764343, + "step": 747 + }, + { + "epoch": 0.21, + "logps_train/chosen": -28.06426239013672, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -19.99085807800293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8689262270927429, + "rewards_train/margins": 0.66765958070755, + "rewards_train/rejected": -1.536585807800293, + "step": 747 + }, + { + "epoch": 0.21, + "learning_rate": 1.6792255948775378e-06, + "loss": 0.3208, + "step": 748 + }, + { + "epoch": 0.21, + "logps_train/chosen": -110.58208465576172, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -143.51451110839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.058208465576171875, + "rewards_train/margins": 2.343242645263672, + "rewards_train/rejected": -2.4014511108398438, + "step": 748 + }, + { + "epoch": 0.21, + "logps_train/chosen": -100.28117370605469, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -150.02032470703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4281173944473267, + "rewards_train/margins": 0.77391517162323, + "rewards_train/rejected": -2.2020325660705566, + "step": 748 + }, + { + "epoch": 0.21, + "logps_train/chosen": -150.3146209716797, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -139.90992736816406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.731462240219116, + "rewards_train/margins": -0.6404695510864258, + "rewards_train/rejected": -3.0909926891326904, + "step": 748 + }, + { + "epoch": 0.21, + "logps_train/chosen": -43.48426818847656, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -154.4793701171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47342681884765625, + "rewards_train/margins": 2.274510145187378, + "rewards_train/rejected": -2.747936964035034, + "step": 748 + }, + { + "epoch": 0.21, + "logps_train/chosen": -0.9775141477584839, + "logps_train/ref_chosen": -1.21875, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -8.791799545288086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02412358485162258, + "rewards_train/margins": 0.053303539752960205, + "rewards_train/rejected": -0.029179954901337624, + "step": 748 + }, + { + "epoch": 0.21, + "logps_train/chosen": -174.14089965820312, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -265.1595458984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.014090061187744, + "rewards_train/margins": 6.201864719390869, + "rewards_train/rejected": -9.215954780578613, + "step": 748 + }, + { + "epoch": 0.21, + "logps_train/chosen": -7.360074996948242, + "logps_train/ref_chosen": -1.6484375, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -20.395723342895508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5711637735366821, + "rewards_train/margins": 0.24340856075286865, + "rewards_train/rejected": -0.8145723342895508, + "step": 748 + }, + { + "epoch": 0.21, + "logps_train/chosen": -99.6556396484375, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -87.08513641357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16556397080421448, + "rewards_train/margins": 1.9429496228694916, + "rewards_train/rejected": -2.108513593673706, + "step": 748 + }, + { + "epoch": 0.21, + "logps_train/chosen": -85.55006408691406, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -84.13789367675781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.205006405711174, + "rewards_train/margins": 0.8087830096483231, + "rewards_train/rejected": -1.013789415359497, + "step": 749 + }, + { + "epoch": 0.21, + "logps_train/chosen": -14.139227867126465, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -15.592540740966797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5451728105545044, + "rewards_train/margins": -0.10466873645782471, + "rewards_train/rejected": -0.4405040740966797, + "step": 749 + }, + { + "epoch": 0.21, + "logps_train/chosen": -59.1229248046875, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -78.6030502319336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13770751655101776, + "rewards_train/margins": 0.14801254030317068, + "rewards_train/rejected": -0.01030502375215292, + "step": 749 + }, + { + "epoch": 0.21, + "logps_train/chosen": -217.22958374023438, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -184.0, + "logps_train/rejected": -239.59722900390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.022958278656006, + "rewards_train/margins": -1.4632353782653809, + "rewards_train/rejected": -5.559722900390625, + "step": 749 + }, + { + "epoch": 0.21, + "logps_train/chosen": -100.07147216796875, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -94.00755310058594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.857147216796875, + "rewards_train/margins": -0.25639188289642334, + "rewards_train/rejected": -0.6007553339004517, + "step": 749 + }, + { + "epoch": 0.21, + "logps_train/chosen": -68.95130157470703, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -155.5845947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2951301634311676, + "rewards_train/margins": 5.263329595327377, + "rewards_train/rejected": -5.558459758758545, + "step": 749 + }, + { + "epoch": 0.21, + "logps_train/chosen": -7.845117568969727, + "logps_train/ref_chosen": -6.59375, + "logps_train/ref_rejected": -3.5, + "logps_train/rejected": -4.940469264984131, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12513676285743713, + "rewards_train/margins": 0.01891016960144043, + "rewards_train/rejected": -0.14404693245887756, + "step": 749 + }, + { + "epoch": 0.21, + "logps_train/chosen": -0.3119763731956482, + "logps_train/ref_chosen": -0.25, + "logps_train/ref_rejected": -0.25, + "logps_train/rejected": -0.30550432205200195, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.006197637412697077, + "rewards_train/margins": -0.0006472049281001091, + "rewards_train/rejected": -0.005550432484596968, + "step": 749 + }, + { + "epoch": 0.21, + "learning_rate": 1.6772815716257411e-06, + "loss": 0.5396, + "step": 750 + }, + { + "epoch": 0.21, + "logps_train/chosen": -265.79840087890625, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -292.5804748535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.179840087890625, + "rewards_train/margins": 1.778207778930664, + "rewards_train/rejected": -10.958047866821289, + "step": 750 + }, + { + "epoch": 0.21, + "logps_train/chosen": -118.5737533569336, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -113.84756469726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3573753833770752, + "rewards_train/margins": 0.17738115787506104, + "rewards_train/rejected": -1.5347565412521362, + "step": 750 + }, + { + "epoch": 0.21, + "logps_train/chosen": -107.31448364257812, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -83.25738525390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.431448370218277, + "rewards_train/margins": -0.5057098492980003, + "rewards_train/rejected": 0.07426147907972336, + "step": 750 + }, + { + "epoch": 0.21, + "logps_train/chosen": -84.09967803955078, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -53.75, + "logps_train/rejected": -64.99783325195312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2599678039550781, + "rewards_train/margins": -0.1351844072341919, + "rewards_train/rejected": -1.1247833967208862, + "step": 750 + }, + { + "epoch": 0.21, + "logps_train/chosen": -0.8132086396217346, + "logps_train/ref_chosen": -1.2421875, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -32.49845886230469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04289788752794266, + "rewards_train/margins": 0.7552437856793404, + "rewards_train/rejected": -0.7123458981513977, + "step": 750 + }, + { + "epoch": 0.21, + "logps_train/chosen": -15.074262619018555, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -25.05877685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05117626115679741, + "rewards_train/margins": 1.5359514243900776, + "rewards_train/rejected": -1.587127685546875, + "step": 750 + }, + { + "epoch": 0.21, + "logps_train/chosen": -17.00620460510254, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -8.585881233215332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07562046498060226, + "rewards_train/margins": 0.014217659831047058, + "rewards_train/rejected": -0.08983812481164932, + "step": 750 + }, + { + "epoch": 0.21, + "logps_train/chosen": -10.012776374816895, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -28.097864151000977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10752763599157333, + "rewards_train/margins": 0.1897587850689888, + "rewards_train/rejected": -0.29728642106056213, + "step": 750 + }, + { + "epoch": 0.21, + "logps_train/chosen": -86.02645874023438, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -170.72589111328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6026458740234375, + "rewards_train/margins": 1.9699432849884033, + "rewards_train/rejected": -3.572589159011841, + "step": 751 + }, + { + "epoch": 0.21, + "logps_train/chosen": -88.4676513671875, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -171.2342529296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.796765148639679, + "rewards_train/margins": 5.476660430431366, + "rewards_train/rejected": -6.273425579071045, + "step": 751 + }, + { + "epoch": 0.21, + "logps_train/chosen": -144.49063110351562, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -171.50146484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.299063205718994, + "rewards_train/margins": 3.451083183288574, + "rewards_train/rejected": -5.750146389007568, + "step": 751 + }, + { + "epoch": 0.21, + "logps_train/chosen": -106.60188293457031, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -93.91427612304688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.460188388824463, + "rewards_train/margins": -0.46876072883605957, + "rewards_train/rejected": -1.9914276599884033, + "step": 751 + }, + { + "epoch": 0.21, + "logps_train/chosen": -11.018291473388672, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -25.422212600708008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06432914733886719, + "rewards_train/margins": 0.7153921127319336, + "rewards_train/rejected": -0.7797212600708008, + "step": 751 + }, + { + "epoch": 0.21, + "logps_train/chosen": -19.7333984375, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -52.79629135131836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.82958984375, + "rewards_train/margins": 0.9750393629074097, + "rewards_train/rejected": -1.8046292066574097, + "step": 751 + }, + { + "epoch": 0.21, + "logps_train/chosen": -16.14106559753418, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -3.59375, + "logps_train/rejected": -11.752551078796387, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3516065776348114, + "rewards_train/margins": 0.4642735421657562, + "rewards_train/rejected": -0.8158801198005676, + "step": 751 + }, + { + "epoch": 0.21, + "logps_train/chosen": -52.68122863769531, + "logps_train/ref_chosen": -52.75, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -10.191691398620605, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006877136416733265, + "rewards_train/margins": 0.5635463120415807, + "rewards_train/rejected": -0.5566691756248474, + "step": 751 + }, + { + "epoch": 0.21, + "learning_rate": 1.6753328081210244e-06, + "loss": 0.4474, + "step": 752 + }, + { + "epoch": 0.21, + "logps_train/chosen": -32.989288330078125, + "logps_train/ref_chosen": -26.625, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -43.579811096191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6364288330078125, + "rewards_train/margins": 1.9840524196624756, + "rewards_train/rejected": -2.620481252670288, + "step": 752 + }, + { + "epoch": 0.21, + "logps_train/chosen": -12.973820686340332, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -9.5625, + "logps_train/rejected": -21.657007217407227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7473821043968201, + "rewards_train/margins": 0.4620686173439026, + "rewards_train/rejected": -1.2094507217407227, + "step": 752 + }, + { + "epoch": 0.21, + "logps_train/chosen": -104.12651062011719, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -92.08644104003906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6626510620117188, + "rewards_train/margins": -0.3040069341659546, + "rewards_train/rejected": -1.3586441278457642, + "step": 752 + }, + { + "epoch": 0.21, + "logps_train/chosen": -45.3168830871582, + "logps_train/ref_chosen": -40.5, + "logps_train/ref_rejected": -39.75, + "logps_train/rejected": -54.22188949584961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48168832063674927, + "rewards_train/margins": 0.9655006527900696, + "rewards_train/rejected": -1.4471889734268188, + "step": 752 + }, + { + "epoch": 0.21, + "logps_train/chosen": -96.32075500488281, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -108.1025161743164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1820755004882812, + "rewards_train/margins": 1.1781761646270752, + "rewards_train/rejected": -3.3602516651153564, + "step": 752 + }, + { + "epoch": 0.21, + "logps_train/chosen": -128.01242065429688, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -226.91404724121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0012420415878296, + "rewards_train/margins": 7.590162873268127, + "rewards_train/rejected": -8.591404914855957, + "step": 752 + }, + { + "epoch": 0.21, + "logps_train/chosen": -253.93475341796875, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -194.4883575439453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.093475341796875, + "rewards_train/margins": -1.4446396827697754, + "rewards_train/rejected": -6.6488356590271, + "step": 752 + }, + { + "epoch": 0.21, + "logps_train/chosen": -18.095861434936523, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -2.953125, + "logps_train/rejected": -11.64305305480957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40333613753318787, + "rewards_train/margins": 0.46565666794776917, + "rewards_train/rejected": -0.868992805480957, + "step": 752 + }, + { + "epoch": 0.21, + "logps_train/chosen": -91.16783905029297, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -51.75, + "logps_train/rejected": -77.87384033203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7167840003967285, + "rewards_train/margins": -0.1043999195098877, + "rewards_train/rejected": -2.612384080886841, + "step": 753 + }, + { + "epoch": 0.21, + "logps_train/chosen": -101.74946594238281, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -17.14118766784668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2749466001987457, + "rewards_train/margins": 0.33292219042778015, + "rewards_train/rejected": -0.6078687906265259, + "step": 753 + }, + { + "epoch": 0.21, + "logps_train/chosen": -153.55508422851562, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -162.51611328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.705508708953857, + "rewards_train/margins": 0.6461029052734375, + "rewards_train/rejected": -5.351611614227295, + "step": 753 + }, + { + "epoch": 0.21, + "logps_train/chosen": -109.63404083251953, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -142.58566284179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.513404130935669, + "rewards_train/margins": 1.6451621055603027, + "rewards_train/rejected": -3.1585662364959717, + "step": 753 + }, + { + "epoch": 0.21, + "logps_train/chosen": -137.2690887451172, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -155.55177307128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3269089460372925, + "rewards_train/margins": 3.428268551826477, + "rewards_train/rejected": -4.7551774978637695, + "step": 753 + }, + { + "epoch": 0.21, + "logps_train/chosen": -91.41305541992188, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -210.55810546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7413055896759033, + "rewards_train/margins": 8.964504957199097, + "rewards_train/rejected": -10.705810546875, + "step": 753 + }, + { + "epoch": 0.21, + "logps_train/chosen": -3.059486150741577, + "logps_train/ref_chosen": -2.125, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -6.61043119430542, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09344861656427383, + "rewards_train/margins": -0.13240549713373184, + "rewards_train/rejected": 0.03895688056945801, + "step": 753 + }, + { + "epoch": 0.21, + "logps_train/chosen": -186.3550262451172, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -204.43055725097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.73550271987915, + "rewards_train/margins": 1.7075529098510742, + "rewards_train/rejected": -6.443055629730225, + "step": 753 + }, + { + "epoch": 0.21, + "learning_rate": 1.6733793180026665e-06, + "loss": 0.4407, + "step": 754 + }, + { + "epoch": 0.21, + "logps_train/chosen": -40.2874641418457, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -103.03038024902344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1537463665008545, + "rewards_train/margins": -0.40070831775665283, + "rewards_train/rejected": -1.7530380487442017, + "step": 754 + }, + { + "epoch": 0.21, + "logps_train/chosen": -113.17782592773438, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -162.04571533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6177825927734375, + "rewards_train/margins": 5.836789131164551, + "rewards_train/rejected": -7.454571723937988, + "step": 754 + }, + { + "epoch": 0.21, + "logps_train/chosen": -67.49038696289062, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -178.93380737304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05096130445599556, + "rewards_train/margins": 7.294342327862978, + "rewards_train/rejected": -7.243381023406982, + "step": 754 + }, + { + "epoch": 0.21, + "logps_train/chosen": -2.634521722793579, + "logps_train/ref_chosen": -1.828125, + "logps_train/ref_rejected": -1.828125, + "logps_train/rejected": -2.635085105895996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08063967525959015, + "rewards_train/margins": 5.6333839893341064e-05, + "rewards_train/rejected": -0.08069600909948349, + "step": 754 + }, + { + "epoch": 0.21, + "logps_train/chosen": -133.16281127929688, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -130.98291015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5162811279296875, + "rewards_train/margins": 4.082009792327881, + "rewards_train/rejected": -4.598290920257568, + "step": 754 + }, + { + "epoch": 0.21, + "logps_train/chosen": -5.755742073059082, + "logps_train/ref_chosen": -3.265625, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -15.448492050170898, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.24901171028614044, + "rewards_train/margins": -0.17916250228881836, + "rewards_train/rejected": -0.06984920799732208, + "step": 754 + }, + { + "epoch": 0.21, + "logps_train/chosen": -9.178343772888184, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -18.240676879882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06158437952399254, + "rewards_train/margins": 0.35623330250382423, + "rewards_train/rejected": -0.4178176820278168, + "step": 754 + }, + { + "epoch": 0.21, + "logps_train/chosen": -16.897872924804688, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -29.82928466796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12271270900964737, + "rewards_train/margins": 1.549391247332096, + "rewards_train/rejected": -1.4266785383224487, + "step": 754 + }, + { + "epoch": 0.21, + "logps_train/chosen": -95.97738647460938, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -192.4207000732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3977386951446533, + "rewards_train/margins": 5.344331502914429, + "rewards_train/rejected": -7.742070198059082, + "step": 755 + }, + { + "epoch": 0.21, + "logps_train/chosen": -100.66081237792969, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -85.6714859008789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5160812735557556, + "rewards_train/margins": 0.9510673880577087, + "rewards_train/rejected": -1.4671486616134644, + "step": 755 + }, + { + "epoch": 0.21, + "logps_train/chosen": -194.92245483398438, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -210.57884216308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3922455310821533, + "rewards_train/margins": 3.765638589859009, + "rewards_train/rejected": -7.157884120941162, + "step": 755 + }, + { + "epoch": 0.21, + "logps_train/chosen": -7.88249397277832, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -14.270659446716309, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4992506206035614, + "rewards_train/margins": 1.5200665891170502, + "rewards_train/rejected": -1.0208159685134888, + "step": 755 + }, + { + "epoch": 0.21, + "logps_train/chosen": -4.547883033752441, + "logps_train/ref_chosen": -2.0625, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -12.24061107635498, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2485383003950119, + "rewards_train/margins": 0.5130227953195572, + "rewards_train/rejected": -0.7615610957145691, + "step": 755 + }, + { + "epoch": 0.21, + "logps_train/chosen": -5.5819196701049805, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -19.27045440673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19569197297096252, + "rewards_train/margins": 0.4813534915447235, + "rewards_train/rejected": -0.677045464515686, + "step": 755 + }, + { + "epoch": 0.21, + "logps_train/chosen": -10.772855758666992, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -27.03011703491211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16478557884693146, + "rewards_train/margins": 0.26322613656520844, + "rewards_train/rejected": -0.4280117154121399, + "step": 755 + }, + { + "epoch": 0.21, + "logps_train/chosen": -29.429656982421875, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -23.103090286254883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0554656982421875, + "rewards_train/margins": 0.4235934019088745, + "rewards_train/rejected": -1.479059100151062, + "step": 755 + }, + { + "epoch": 0.21, + "learning_rate": 1.6714211149430267e-06, + "loss": 0.3581, + "step": 756 + }, + { + "epoch": 0.21, + "logps_train/chosen": -18.295875549316406, + "logps_train/ref_chosen": -3.84375, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -11.633523941040039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4452126026153564, + "rewards_train/margins": -0.7568601965904236, + "rewards_train/rejected": -0.6883524060249329, + "step": 756 + }, + { + "epoch": 0.21, + "logps_train/chosen": -234.61080932617188, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -192.72323608398438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.461081027984619, + "rewards_train/margins": -3.1887574195861816, + "rewards_train/rejected": -4.2723236083984375, + "step": 756 + }, + { + "epoch": 0.21, + "logps_train/chosen": -121.18115234375, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -159.42877197265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.068115234375, + "rewards_train/margins": 2.8747620582580566, + "rewards_train/rejected": -5.942877292633057, + "step": 756 + }, + { + "epoch": 0.21, + "logps_train/chosen": -131.5923614501953, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -129.80743408203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7592360973358154, + "rewards_train/margins": 0.47150731086730957, + "rewards_train/rejected": -3.230743408203125, + "step": 756 + }, + { + "epoch": 0.21, + "logps_train/chosen": -66.12129211425781, + "logps_train/ref_chosen": -56.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -140.7529754638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0121291875839233, + "rewards_train/margins": 3.8631683588027954, + "rewards_train/rejected": -4.875297546386719, + "step": 756 + }, + { + "epoch": 0.21, + "logps_train/chosen": -11.072378158569336, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -1.390625, + "logps_train/rejected": -3.4380173683166504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.036512184888124466, + "rewards_train/margins": 0.24125142768025398, + "rewards_train/rejected": -0.20473924279212952, + "step": 756 + }, + { + "epoch": 0.21, + "logps_train/chosen": -114.84308624267578, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -167.3533935546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.134308815002441, + "rewards_train/margins": 0.6510305404663086, + "rewards_train/rejected": -4.78533935546875, + "step": 756 + }, + { + "epoch": 0.21, + "logps_train/chosen": -124.12329864501953, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -145.12600708007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.162329912185669, + "rewards_train/margins": 2.900270700454712, + "rewards_train/rejected": -5.062600612640381, + "step": 756 + }, + { + "epoch": 0.21, + "logps_train/chosen": -12.851542472839355, + "logps_train/ref_chosen": -0.76171875, + "logps_train/ref_rejected": -0.76171875, + "logps_train/rejected": -13.353317260742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2089823484420776, + "rewards_train/margins": 0.050177574157714844, + "rewards_train/rejected": -1.2591599225997925, + "step": 757 + }, + { + "epoch": 0.21, + "logps_train/chosen": -200.48495483398438, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -167.21661376953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.948495388031006, + "rewards_train/margins": -0.07683372497558594, + "rewards_train/rejected": -5.87166166305542, + "step": 757 + }, + { + "epoch": 0.21, + "logps_train/chosen": -11.905441284179688, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -24.577011108398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6655441522598267, + "rewards_train/margins": 0.6921570301055908, + "rewards_train/rejected": -1.3577011823654175, + "step": 757 + }, + { + "epoch": 0.21, + "logps_train/chosen": -16.637605667114258, + "logps_train/ref_chosen": -12.0, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -23.581172943115234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4637605845928192, + "rewards_train/margins": 0.019356727600097656, + "rewards_train/rejected": -0.48311731219291687, + "step": 757 + }, + { + "epoch": 0.21, + "logps_train/chosen": -84.497314453125, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -109.23114776611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14973144233226776, + "rewards_train/margins": 0.3733833581209183, + "rewards_train/rejected": -0.523114800453186, + "step": 757 + }, + { + "epoch": 0.21, + "logps_train/chosen": -150.00906372070312, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -205.7381134033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.250906467437744, + "rewards_train/margins": 6.57290506362915, + "rewards_train/rejected": -10.823811531066895, + "step": 757 + }, + { + "epoch": 0.21, + "logps_train/chosen": -16.442981719970703, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -38.25177001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5005481839179993, + "rewards_train/margins": 1.1246288418769836, + "rewards_train/rejected": -1.625177025794983, + "step": 757 + }, + { + "epoch": 0.21, + "logps_train/chosen": -102.12562561035156, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -143.13113403320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4125625789165497, + "rewards_train/margins": 2.000550776720047, + "rewards_train/rejected": -2.4131133556365967, + "step": 757 + }, + { + "epoch": 0.21, + "learning_rate": 1.6694582126474503e-06, + "loss": 0.5879, + "step": 758 + }, + { + "epoch": 0.21, + "logps_train/chosen": -12.803265571594238, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -24.643512725830078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.23217344284057617, + "rewards_train/margins": 1.315274715423584, + "rewards_train/rejected": -1.0831012725830078, + "step": 758 + }, + { + "epoch": 0.21, + "logps_train/chosen": -22.517271041870117, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -29.25, + "logps_train/rejected": -29.457061767578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3392271101474762, + "rewards_train/margins": -0.3185209333896637, + "rewards_train/rejected": -0.0207061767578125, + "step": 758 + }, + { + "epoch": 0.21, + "logps_train/chosen": -149.69630432128906, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -35.0, + "logps_train/rejected": -73.49002075195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3696305751800537, + "rewards_train/margins": 0.4793715476989746, + "rewards_train/rejected": -3.8490021228790283, + "step": 758 + }, + { + "epoch": 0.21, + "logps_train/chosen": -7.048877239227295, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -4.149934768676758, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2736377418041229, + "rewards_train/margins": -0.33989426493644714, + "rewards_train/rejected": 0.06625652313232422, + "step": 758 + }, + { + "epoch": 0.21, + "logps_train/chosen": -64.2044448852539, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -237.49801635742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7954444885253906, + "rewards_train/margins": 10.75435733795166, + "rewards_train/rejected": -11.54980182647705, + "step": 758 + }, + { + "epoch": 0.21, + "logps_train/chosen": -32.458717346191406, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -3.312746047973633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1458717584609985, + "rewards_train/margins": -1.0536596551537514, + "rewards_train/rejected": -0.09221210330724716, + "step": 758 + }, + { + "epoch": 0.21, + "logps_train/chosen": -106.58985900878906, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -168.37210083007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4089858531951904, + "rewards_train/margins": 4.12822413444519, + "rewards_train/rejected": -7.537209987640381, + "step": 758 + }, + { + "epoch": 0.21, + "logps_train/chosen": -73.10157775878906, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -109.20072937011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6101577877998352, + "rewards_train/margins": 0.6599151492118835, + "rewards_train/rejected": -1.2700729370117188, + "step": 758 + }, + { + "epoch": 0.21, + "logps_train/chosen": -8.186074256896973, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -11.015488624572754, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.30610743165016174, + "rewards_train/margins": -0.0420585572719574, + "rewards_train/rejected": -0.26404887437820435, + "step": 759 + }, + { + "epoch": 0.21, + "logps_train/chosen": -122.69622802734375, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -226.6669464111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.919622778892517, + "rewards_train/margins": 5.3470717668533325, + "rewards_train/rejected": -7.26669454574585, + "step": 759 + }, + { + "epoch": 0.21, + "logps_train/chosen": -119.84295654296875, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -92.7952651977539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13429565727710724, + "rewards_train/margins": 2.245230957865715, + "rewards_train/rejected": -2.3795266151428223, + "step": 759 + }, + { + "epoch": 0.21, + "logps_train/chosen": -4.037759304046631, + "logps_train/ref_chosen": -2.296875, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -16.107139587402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17408843338489532, + "rewards_train/margins": 0.5741255134344101, + "rewards_train/rejected": -0.7482139468193054, + "step": 759 + }, + { + "epoch": 0.21, + "logps_train/chosen": -16.755451202392578, + "logps_train/ref_chosen": -15.1875, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -14.585233688354492, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.15679512917995453, + "rewards_train/margins": -0.3607717603445053, + "rewards_train/rejected": 0.20397663116455078, + "step": 759 + }, + { + "epoch": 0.21, + "logps_train/chosen": -25.191558837890625, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -36.83051681518555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48165589570999146, + "rewards_train/margins": 1.1263957619667053, + "rewards_train/rejected": -1.6080516576766968, + "step": 759 + }, + { + "epoch": 0.21, + "logps_train/chosen": -8.05408000946045, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -33.56629180908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24290800094604492, + "rewards_train/margins": 1.3387211561203003, + "rewards_train/rejected": -1.5816291570663452, + "step": 759 + }, + { + "epoch": 0.21, + "logps_train/chosen": -26.18960189819336, + "logps_train/ref_chosen": -21.375, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -27.755001068115234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48146018385887146, + "rewards_train/margins": 0.10653993487358093, + "rewards_train/rejected": -0.5880001187324524, + "step": 759 + }, + { + "epoch": 0.21, + "learning_rate": 1.6674906248541724e-06, + "loss": 0.4723, + "step": 760 + }, + { + "epoch": 0.21, + "logps_train/chosen": -19.11569595336914, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -39.57547378540039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6553196310997009, + "rewards_train/margins": 1.327227771282196, + "rewards_train/rejected": -1.982547402381897, + "step": 760 + }, + { + "epoch": 0.21, + "logps_train/chosen": -31.076034545898438, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -96.64942932128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3326034545898438, + "rewards_train/margins": 2.032339572906494, + "rewards_train/rejected": -3.364943027496338, + "step": 760 + }, + { + "epoch": 0.21, + "logps_train/chosen": -58.29077911376953, + "logps_train/ref_chosen": -54.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -85.6080322265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4290779232978821, + "rewards_train/margins": 0.531725287437439, + "rewards_train/rejected": -0.960803210735321, + "step": 760 + }, + { + "epoch": 0.21, + "logps_train/chosen": -107.95094299316406, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -107.60928344726562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4450943171977997, + "rewards_train/margins": -0.03416597843170166, + "rewards_train/rejected": -0.410928338766098, + "step": 760 + }, + { + "epoch": 0.21, + "logps_train/chosen": -11.02560043334961, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -3.15625, + "logps_train/rejected": -8.333641052246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20993995666503906, + "rewards_train/margins": 0.7276790738105774, + "rewards_train/rejected": -0.5177391171455383, + "step": 760 + }, + { + "epoch": 0.21, + "logps_train/chosen": -1.1858769655227661, + "logps_train/ref_chosen": -0.65625, + "logps_train/ref_rejected": -1.71875, + "logps_train/rejected": -12.499124526977539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05296269804239273, + "rewards_train/margins": 1.025074802339077, + "rewards_train/rejected": -1.0780375003814697, + "step": 760 + }, + { + "epoch": 0.21, + "logps_train/chosen": -4.478153228759766, + "logps_train/ref_chosen": -3.203125, + "logps_train/ref_rejected": -3.4375, + "logps_train/rejected": -11.176889419555664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12750282883644104, + "rewards_train/margins": 0.6464361250400543, + "rewards_train/rejected": -0.7739389538764954, + "step": 760 + }, + { + "epoch": 0.21, + "logps_train/chosen": -6.939768314361572, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -17.02654266357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08727317303419113, + "rewards_train/margins": 0.11492743901908398, + "rewards_train/rejected": -0.027654265984892845, + "step": 760 + }, + { + "epoch": 0.21, + "logps_train/chosen": -86.4583740234375, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -130.18966674804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.654162585735321, + "rewards_train/margins": 1.9231292605400085, + "rewards_train/rejected": -1.2689666748046875, + "step": 761 + }, + { + "epoch": 0.21, + "logps_train/chosen": -19.982505798339844, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -33.92414474487305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8857505917549133, + "rewards_train/margins": 0.7566638588905334, + "rewards_train/rejected": -1.6424144506454468, + "step": 761 + }, + { + "epoch": 0.21, + "logps_train/chosen": -145.2567138671875, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -252.5833282470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8256714344024658, + "rewards_train/margins": 6.932661771774292, + "rewards_train/rejected": -8.758333206176758, + "step": 761 + }, + { + "epoch": 0.21, + "logps_train/chosen": -6.741292476654053, + "logps_train/ref_chosen": -3.734375, + "logps_train/ref_rejected": -3.90625, + "logps_train/rejected": -3.5261569023132324, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.30069175362586975, + "rewards_train/margins": -0.3387010656297207, + "rewards_train/rejected": 0.03800931200385094, + "step": 761 + }, + { + "epoch": 0.21, + "logps_train/chosen": -140.4098358154297, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -159.35250854492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8409836292266846, + "rewards_train/margins": 0.49426722526550293, + "rewards_train/rejected": -3.3352508544921875, + "step": 761 + }, + { + "epoch": 0.21, + "logps_train/chosen": -76.0699691772461, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -55.5, + "logps_train/rejected": -80.53834533691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8569968938827515, + "rewards_train/margins": 0.646837592124939, + "rewards_train/rejected": -2.5038344860076904, + "step": 761 + }, + { + "epoch": 0.21, + "logps_train/chosen": -41.338321685791016, + "logps_train/ref_chosen": -34.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -158.80307006835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6838321685791016, + "rewards_train/margins": 3.2964749336242676, + "rewards_train/rejected": -3.980307102203369, + "step": 761 + }, + { + "epoch": 0.21, + "logps_train/chosen": -0.5254396200180054, + "logps_train/ref_chosen": -1.9140625, + "logps_train/ref_rejected": -5.09375, + "logps_train/rejected": -9.104480743408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13886229693889618, + "rewards_train/margins": 0.539935365319252, + "rewards_train/rejected": -0.40107306838035583, + "step": 761 + }, + { + "epoch": 0.21, + "learning_rate": 1.6655183653342216e-06, + "loss": 0.3801, + "step": 762 + }, + { + "epoch": 0.21, + "logps_train/chosen": -0.04107356444001198, + "logps_train/ref_chosen": -0.091796875, + "logps_train/ref_rejected": -0.091796875, + "logps_train/rejected": -0.042205408215522766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005072331055998802, + "rewards_train/margins": 0.00011318409815430641, + "rewards_train/rejected": 0.004959146957844496, + "step": 762 + }, + { + "epoch": 0.21, + "logps_train/chosen": -34.35472106933594, + "logps_train/ref_chosen": -23.875, + "logps_train/ref_rejected": -34.25, + "logps_train/rejected": -64.27143096923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0479720830917358, + "rewards_train/margins": 1.954171061515808, + "rewards_train/rejected": -3.002143144607544, + "step": 762 + }, + { + "epoch": 0.21, + "logps_train/chosen": -107.28067779541016, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -144.41287231445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5780677795410156, + "rewards_train/margins": 2.5132195949554443, + "rewards_train/rejected": -3.09128737449646, + "step": 762 + }, + { + "epoch": 0.21, + "logps_train/chosen": -11.645731925964355, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -29.337621688842773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03957319259643555, + "rewards_train/margins": 1.0441889762878418, + "rewards_train/rejected": -1.0837621688842773, + "step": 762 + }, + { + "epoch": 0.21, + "logps_train/chosen": -209.15609741210938, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -151.2261962890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.5156097412109375, + "rewards_train/margins": -0.9929900169372559, + "rewards_train/rejected": -3.5226197242736816, + "step": 762 + }, + { + "epoch": 0.21, + "logps_train/chosen": -2.758286952972412, + "logps_train/ref_chosen": -1.9921875, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -35.41812515258789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07660994678735733, + "rewards_train/margins": 0.09020257741212845, + "rewards_train/rejected": -0.16681252419948578, + "step": 762 + }, + { + "epoch": 0.21, + "logps_train/chosen": -24.286865234375, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -23.447975158691406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8411865234375, + "rewards_train/margins": -0.14638900756835938, + "rewards_train/rejected": -0.6947975158691406, + "step": 762 + }, + { + "epoch": 0.21, + "logps_train/chosen": -96.67604064941406, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -81.89947509765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.43239593505859375, + "rewards_train/margins": 1.5223435163497925, + "rewards_train/rejected": -1.0899475812911987, + "step": 762 + }, + { + "epoch": 0.21, + "logps_train/chosen": -9.598718643188477, + "logps_train/ref_chosen": -5.21875, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -42.07243347167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43799686431884766, + "rewards_train/margins": 1.031746506690979, + "rewards_train/rejected": -1.4697433710098267, + "step": 763 + }, + { + "epoch": 0.21, + "logps_train/chosen": -69.40833282470703, + "logps_train/ref_chosen": -46.75, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -24.170930862426758, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2658333778381348, + "rewards_train/margins": -1.0424902439117432, + "rewards_train/rejected": -1.2233431339263916, + "step": 763 + }, + { + "epoch": 0.21, + "logps_train/chosen": -131.8023681640625, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -124.24039459228516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.980236828327179, + "rewards_train/margins": 1.8438026309013367, + "rewards_train/rejected": -2.8240394592285156, + "step": 763 + }, + { + "epoch": 0.21, + "logps_train/chosen": -241.89967346191406, + "logps_train/ref_chosen": -234.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -230.51161193847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7899673581123352, + "rewards_train/margins": 6.261193931102753, + "rewards_train/rejected": -7.051161289215088, + "step": 763 + }, + { + "epoch": 0.21, + "logps_train/chosen": -127.76016998291016, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -195.03253173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7760169506073, + "rewards_train/margins": 5.427236795425415, + "rewards_train/rejected": -8.203253746032715, + "step": 763 + }, + { + "epoch": 0.21, + "logps_train/chosen": -10.066246032714844, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -9.878506660461426, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.41287460923194885, + "rewards_train/margins": -0.018773943185806274, + "rewards_train/rejected": -0.3941006660461426, + "step": 763 + }, + { + "epoch": 0.21, + "logps_train/chosen": -153.3550567626953, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -115.7256851196289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2355058193206787, + "rewards_train/margins": 0.8870627880096436, + "rewards_train/rejected": -3.1225686073303223, + "step": 763 + }, + { + "epoch": 0.21, + "logps_train/chosen": -3.726361036300659, + "logps_train/ref_chosen": -2.359375, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -31.702381134033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13669860363006592, + "rewards_train/margins": 0.8835395574569702, + "rewards_train/rejected": -1.0202381610870361, + "step": 763 + }, + { + "epoch": 0.21, + "learning_rate": 1.663541447891323e-06, + "loss": 0.458, + "step": 764 + }, + { + "epoch": 0.21, + "logps_train/chosen": -61.19711685180664, + "logps_train/ref_chosen": -49.75, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -175.27420043945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1447117328643799, + "rewards_train/margins": 6.282708406448364, + "rewards_train/rejected": -7.427420139312744, + "step": 764 + }, + { + "epoch": 0.21, + "logps_train/chosen": -13.811380386352539, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -24.455995559692383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14363804459571838, + "rewards_train/margins": 1.326961487531662, + "rewards_train/rejected": -1.4705995321273804, + "step": 764 + }, + { + "epoch": 0.21, + "logps_train/chosen": -144.49838256835938, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -155.04905700683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8998382091522217, + "rewards_train/margins": 2.555067777633667, + "rewards_train/rejected": -5.454905986785889, + "step": 764 + }, + { + "epoch": 0.21, + "logps_train/chosen": -135.63662719726562, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -197.22535705566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.8636627197265625, + "rewards_train/margins": 3.5088729858398438, + "rewards_train/rejected": -9.372535705566406, + "step": 764 + }, + { + "epoch": 0.21, + "logps_train/chosen": -17.631120681762695, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -16.736684799194336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2506120800971985, + "rewards_train/margins": 0.7543063759803772, + "rewards_train/rejected": -1.0049184560775757, + "step": 764 + }, + { + "epoch": 0.21, + "logps_train/chosen": -29.50388526916504, + "logps_train/ref_chosen": -27.0, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -21.983840942382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2503885328769684, + "rewards_train/margins": 1.4448705613613129, + "rewards_train/rejected": -1.6952590942382812, + "step": 764 + }, + { + "epoch": 0.21, + "logps_train/chosen": -96.9492416381836, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -189.15655517578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.194924235343933, + "rewards_train/margins": 5.42073118686676, + "rewards_train/rejected": -6.615655422210693, + "step": 764 + }, + { + "epoch": 0.21, + "logps_train/chosen": -20.29804801940918, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -20.19808006286621, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10480480641126633, + "rewards_train/margins": 0.7337532117962837, + "rewards_train/rejected": -0.83855801820755, + "step": 764 + }, + { + "epoch": 0.21, + "logps_train/chosen": -15.90562915802002, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -38.5, + "logps_train/rejected": -47.35005187988281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.690562903881073, + "rewards_train/margins": 0.1944422721862793, + "rewards_train/rejected": -0.8850051760673523, + "step": 765 + }, + { + "epoch": 0.21, + "logps_train/chosen": -27.33463478088379, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -61.789649963378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42096349596977234, + "rewards_train/margins": 2.3580015003681183, + "rewards_train/rejected": -2.7789649963378906, + "step": 765 + }, + { + "epoch": 0.21, + "logps_train/chosen": -86.31858825683594, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -44.75, + "logps_train/rejected": -47.38877868652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1818588227033615, + "rewards_train/margins": 0.08201904594898224, + "rewards_train/rejected": -0.26387786865234375, + "step": 765 + }, + { + "epoch": 0.21, + "logps_train/chosen": -232.22694396972656, + "logps_train/ref_chosen": -208.0, + "logps_train/ref_rejected": -200.0, + "logps_train/rejected": -240.98043823242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.422694444656372, + "rewards_train/margins": 1.675349473953247, + "rewards_train/rejected": -4.098043918609619, + "step": 765 + }, + { + "epoch": 0.21, + "logps_train/chosen": -121.37196350097656, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -142.45321655273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0871963500976562, + "rewards_train/margins": 3.208125591278076, + "rewards_train/rejected": -5.295321941375732, + "step": 765 + }, + { + "epoch": 0.21, + "logps_train/chosen": -9.840624809265137, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -16.06869888305664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5559374690055847, + "rewards_train/margins": 0.5321823954582214, + "rewards_train/rejected": -1.0881198644638062, + "step": 765 + }, + { + "epoch": 0.21, + "logps_train/chosen": -368.8593444824219, + "logps_train/ref_chosen": -228.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -275.6451416015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -14.08593463897705, + "rewards_train/margins": -1.2214202880859375, + "rewards_train/rejected": -12.864514350891113, + "step": 765 + }, + { + "epoch": 0.21, + "logps_train/chosen": -1.3292871713638306, + "logps_train/ref_chosen": -1.65625, + "logps_train/ref_rejected": -6.875, + "logps_train/rejected": -5.145087242126465, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.03269628435373306, + "rewards_train/margins": -0.14029499143362045, + "rewards_train/rejected": 0.17299127578735352, + "step": 765 + }, + { + "epoch": 0.21, + "learning_rate": 1.661559886361803e-06, + "loss": 0.3498, + "step": 766 + }, + { + "epoch": 0.21, + "logps_train/chosen": -159.98782348632812, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -159.24888610839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.098782539367676, + "rewards_train/margins": 0.2761063575744629, + "rewards_train/rejected": -6.374888896942139, + "step": 766 + }, + { + "epoch": 0.21, + "logps_train/chosen": -99.68382263183594, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -187.6405029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5683822631835938, + "rewards_train/margins": 1.3956680297851562, + "rewards_train/rejected": -4.96405029296875, + "step": 766 + }, + { + "epoch": 0.21, + "logps_train/chosen": -23.36273956298828, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -28.358667373657227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2800239324569702, + "rewards_train/margins": 0.12459278106689453, + "rewards_train/rejected": -1.4046167135238647, + "step": 766 + }, + { + "epoch": 0.21, + "logps_train/chosen": -134.58396911621094, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -153.173095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8583968877792358, + "rewards_train/margins": 3.508912682533264, + "rewards_train/rejected": -5.3673095703125, + "step": 766 + }, + { + "epoch": 0.21, + "logps_train/chosen": -29.488819122314453, + "logps_train/ref_chosen": -31.0, + "logps_train/ref_rejected": -30.0, + "logps_train/rejected": -49.01015853881836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15111808478832245, + "rewards_train/margins": 2.0521339625120163, + "rewards_train/rejected": -1.9010158777236938, + "step": 766 + }, + { + "epoch": 0.21, + "logps_train/chosen": -80.66229248046875, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -89.55938720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.166229248046875, + "rewards_train/margins": 0.4897094964981079, + "rewards_train/rejected": -0.6559387445449829, + "step": 766 + }, + { + "epoch": 0.21, + "logps_train/chosen": -33.411808013916016, + "logps_train/ref_chosen": -29.75, + "logps_train/ref_rejected": -6.46875, + "logps_train/rejected": -18.808074951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36618080735206604, + "rewards_train/margins": 0.8677516877651215, + "rewards_train/rejected": -1.2339324951171875, + "step": 766 + }, + { + "epoch": 0.21, + "logps_train/chosen": -80.00383758544922, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -49.329978942871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8503837585449219, + "rewards_train/margins": 1.3701140880584717, + "rewards_train/rejected": -2.2204978466033936, + "step": 766 + }, + { + "epoch": 0.21, + "logps_train/chosen": -172.14486694335938, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -201.123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8144867420196533, + "rewards_train/margins": 0.897817850112915, + "rewards_train/rejected": -4.712304592132568, + "step": 767 + }, + { + "epoch": 0.21, + "logps_train/chosen": -8.892325401306152, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -12.807686805725098, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18923254311084747, + "rewards_train/margins": 0.3696611374616623, + "rewards_train/rejected": -0.5588936805725098, + "step": 767 + }, + { + "epoch": 0.21, + "logps_train/chosen": -21.804851531982422, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -26.613414764404297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6367351412773132, + "rewards_train/margins": -0.17539367079734802, + "rewards_train/rejected": -0.4613414704799652, + "step": 767 + }, + { + "epoch": 0.21, + "logps_train/chosen": -102.93260192871094, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -119.72676849365234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8932602405548096, + "rewards_train/margins": 2.329416513442993, + "rewards_train/rejected": -5.222676753997803, + "step": 767 + }, + { + "epoch": 0.21, + "logps_train/chosen": -11.262761116027832, + "logps_train/ref_chosen": -1.1875, + "logps_train/ref_rejected": -0.44140625, + "logps_train/rejected": -13.515527725219727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.007526159286499, + "rewards_train/margins": 0.29988598823547363, + "rewards_train/rejected": -1.3074121475219727, + "step": 767 + }, + { + "epoch": 0.21, + "logps_train/chosen": -67.64607238769531, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -119.37849426269531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3146072328090668, + "rewards_train/margins": 2.323242336511612, + "rewards_train/rejected": -2.6378495693206787, + "step": 767 + }, + { + "epoch": 0.21, + "logps_train/chosen": -7.063769340515137, + "logps_train/ref_chosen": -3.1875, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -17.439395904541016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3876269459724426, + "rewards_train/margins": 0.3938126564025879, + "rewards_train/rejected": -0.7814396023750305, + "step": 767 + }, + { + "epoch": 0.21, + "logps_train/chosen": -12.035346031188965, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -30.75, + "logps_train/rejected": -38.13368225097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.258965402841568, + "rewards_train/margins": 0.9973336160182953, + "rewards_train/rejected": -0.7383682131767273, + "step": 767 + }, + { + "epoch": 0.21, + "learning_rate": 1.6595736946144915e-06, + "loss": 0.3653, + "step": 768 + }, + { + "epoch": 0.21, + "logps_train/chosen": -4.608023643493652, + "logps_train/ref_chosen": -2.484375, + "logps_train/ref_rejected": -1.609375, + "logps_train/rejected": -3.387352228164673, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21236486732959747, + "rewards_train/margins": -0.03456714749336243, + "rewards_train/rejected": -0.17779771983623505, + "step": 768 + }, + { + "epoch": 0.21, + "logps_train/chosen": -170.477294921875, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -224.45086669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4477295875549316, + "rewards_train/margins": 4.997357368469238, + "rewards_train/rejected": -7.44508695602417, + "step": 768 + }, + { + "epoch": 0.21, + "logps_train/chosen": -96.61090087890625, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -170.08407592773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9110901355743408, + "rewards_train/margins": 1.2973175048828125, + "rewards_train/rejected": -3.2084076404571533, + "step": 768 + }, + { + "epoch": 0.21, + "logps_train/chosen": -77.38982391357422, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -91.85281372070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3639824390411377, + "rewards_train/margins": 1.0712990760803223, + "rewards_train/rejected": -2.43528151512146, + "step": 768 + }, + { + "epoch": 0.21, + "logps_train/chosen": -2.048459529876709, + "logps_train/ref_chosen": -3.0625, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -10.91491985321045, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10140404850244522, + "rewards_train/margins": 0.3803960457444191, + "rewards_train/rejected": -0.2789919972419739, + "step": 768 + }, + { + "epoch": 0.21, + "logps_train/chosen": -8.65538501739502, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -20.75193214416504, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.31866350769996643, + "rewards_train/margins": -0.005970299243927002, + "rewards_train/rejected": -0.31269320845603943, + "step": 768 + }, + { + "epoch": 0.21, + "logps_train/chosen": -4.861186504364014, + "logps_train/ref_chosen": -1.2421875, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -11.363645553588867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3618999123573303, + "rewards_train/margins": -0.2317853569984436, + "rewards_train/rejected": -0.13011455535888672, + "step": 768 + }, + { + "epoch": 0.21, + "logps_train/chosen": -184.7803955078125, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -208.77154541015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.278039455413818, + "rewards_train/margins": 1.19911527633667, + "rewards_train/rejected": -7.477154731750488, + "step": 768 + }, + { + "epoch": 0.21, + "logps_train/chosen": -70.63807678222656, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -114.53974151611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6388076543807983, + "rewards_train/margins": 1.0651665925979614, + "rewards_train/rejected": -2.7039742469787598, + "step": 769 + }, + { + "epoch": 0.21, + "logps_train/chosen": -6.269756317138672, + "logps_train/ref_chosen": -0.024169921875, + "logps_train/ref_rejected": -0.024169921875, + "logps_train/rejected": -6.519262313842773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6245586276054382, + "rewards_train/margins": 0.024950623512268066, + "rewards_train/rejected": -0.6495092511177063, + "step": 769 + }, + { + "epoch": 0.21, + "logps_train/chosen": -212.91342163085938, + "logps_train/ref_chosen": -184.0, + "logps_train/ref_rejected": -255.0, + "logps_train/rejected": -313.66796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8913421630859375, + "rewards_train/margins": 2.975454807281494, + "rewards_train/rejected": -5.866796970367432, + "step": 769 + }, + { + "epoch": 0.21, + "logps_train/chosen": -91.6971435546875, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -149.90185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7197144031524658, + "rewards_train/margins": 2.370471239089966, + "rewards_train/rejected": -4.090185642242432, + "step": 769 + }, + { + "epoch": 0.21, + "logps_train/chosen": -64.63433837890625, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -87.09690856933594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.738433837890625, + "rewards_train/margins": -0.5787429809570312, + "rewards_train/rejected": -0.15969085693359375, + "step": 769 + }, + { + "epoch": 0.21, + "logps_train/chosen": -138.2818603515625, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -159.546142578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.428185939788818, + "rewards_train/margins": 0.9764285087585449, + "rewards_train/rejected": -5.404614448547363, + "step": 769 + }, + { + "epoch": 0.21, + "logps_train/chosen": -192.8133544921875, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -199.483154296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.881335735321045, + "rewards_train/margins": -0.13302040100097656, + "rewards_train/rejected": -5.748315334320068, + "step": 769 + }, + { + "epoch": 0.21, + "logps_train/chosen": -1.7128201723098755, + "logps_train/ref_chosen": -0.55859375, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -14.594688415527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11542264372110367, + "rewards_train/margins": 1.1049837693572044, + "rewards_train/rejected": -1.220406413078308, + "step": 769 + }, + { + "epoch": 0.22, + "learning_rate": 1.6575828865506246e-06, + "loss": 0.4409, + "step": 770 + }, + { + "epoch": 0.22, + "logps_train/chosen": -11.241500854492188, + "logps_train/ref_chosen": -7.3125, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -33.2930908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3929000794887543, + "rewards_train/margins": 0.37390902638435364, + "rewards_train/rejected": -0.7668091058731079, + "step": 770 + }, + { + "epoch": 0.22, + "logps_train/chosen": -110.58604431152344, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -120.54753112792969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6086044311523438, + "rewards_train/margins": 0.04614865779876709, + "rewards_train/rejected": -1.6547530889511108, + "step": 770 + }, + { + "epoch": 0.22, + "logps_train/chosen": -158.42918395996094, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -146.73455810546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7429184317588806, + "rewards_train/margins": 0.4305374026298523, + "rewards_train/rejected": -1.173455834388733, + "step": 770 + }, + { + "epoch": 0.22, + "logps_train/chosen": -148.86273193359375, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -215.04730224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8862732648849487, + "rewards_train/margins": 5.518457055091858, + "rewards_train/rejected": -7.404730319976807, + "step": 770 + }, + { + "epoch": 0.22, + "logps_train/chosen": -48.35236740112305, + "logps_train/ref_chosen": -53.5, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -27.460891723632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5147632956504822, + "rewards_train/margins": 2.235852539539337, + "rewards_train/rejected": -1.721089243888855, + "step": 770 + }, + { + "epoch": 0.22, + "logps_train/chosen": -100.08329772949219, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -116.80350494384766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1583297699689865, + "rewards_train/margins": 2.9220208674669266, + "rewards_train/rejected": -3.080350637435913, + "step": 770 + }, + { + "epoch": 0.22, + "logps_train/chosen": -17.343326568603516, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -49.162864685058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9280826449394226, + "rewards_train/margins": 2.3382039666175842, + "rewards_train/rejected": -3.266286611557007, + "step": 770 + }, + { + "epoch": 0.22, + "logps_train/chosen": -136.29336547851562, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -178.46975708007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.479336738586426, + "rewards_train/margins": 3.26763916015625, + "rewards_train/rejected": -7.746975898742676, + "step": 770 + }, + { + "epoch": 0.22, + "logps_train/chosen": -147.37384033203125, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -134.7440948486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.237384080886841, + "rewards_train/margins": 3.5870254039764404, + "rewards_train/rejected": -5.824409484863281, + "step": 771 + }, + { + "epoch": 0.22, + "logps_train/chosen": -125.63973999023438, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -131.66497802734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4639739990234375, + "rewards_train/margins": 1.5525238513946533, + "rewards_train/rejected": -3.016497850418091, + "step": 771 + }, + { + "epoch": 0.22, + "logps_train/chosen": -21.22087860107422, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -73.2227783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6345878839492798, + "rewards_train/margins": 0.13768994808197021, + "rewards_train/rejected": -0.77227783203125, + "step": 771 + }, + { + "epoch": 0.22, + "logps_train/chosen": -58.05297088623047, + "logps_train/ref_chosen": -44.25, + "logps_train/ref_rejected": -43.5, + "logps_train/rejected": -64.43829345703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.380297064781189, + "rewards_train/margins": 0.7135323286056519, + "rewards_train/rejected": -2.093829393386841, + "step": 771 + }, + { + "epoch": 0.22, + "logps_train/chosen": -5.581045150756836, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -22.57347869873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07060451805591583, + "rewards_train/margins": 0.78049336373806, + "rewards_train/rejected": -0.8510978817939758, + "step": 771 + }, + { + "epoch": 0.22, + "logps_train/chosen": -118.8398666381836, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -146.13919067382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06601333618164062, + "rewards_train/margins": 3.2799324989318848, + "rewards_train/rejected": -3.213919162750244, + "step": 771 + }, + { + "epoch": 0.22, + "logps_train/chosen": -72.95468139648438, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -160.62059020996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.395468145608902, + "rewards_train/margins": 2.7665909230709076, + "rewards_train/rejected": -3.1620590686798096, + "step": 771 + }, + { + "epoch": 0.22, + "logps_train/chosen": -51.689300537109375, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -131.11917114257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0810699462890625, + "rewards_train/margins": 0.29298706352710724, + "rewards_train/rejected": -0.21191711723804474, + "step": 771 + }, + { + "epoch": 0.22, + "learning_rate": 1.655587476103748e-06, + "loss": 0.2657, + "step": 772 + }, + { + "epoch": 0.22, + "logps_train/chosen": -12.625494003295898, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -11.596296310424805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3125494122505188, + "rewards_train/margins": 0.21583020687103271, + "rewards_train/rejected": -0.5283796191215515, + "step": 772 + }, + { + "epoch": 0.22, + "logps_train/chosen": -24.491695404052734, + "logps_train/ref_chosen": -24.125, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -44.868247985839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0366695411503315, + "rewards_train/margins": 2.2501554004848003, + "rewards_train/rejected": -2.286824941635132, + "step": 772 + }, + { + "epoch": 0.22, + "logps_train/chosen": -32.93836212158203, + "logps_train/ref_chosen": -31.0, + "logps_train/ref_rejected": -2.3125, + "logps_train/rejected": -12.301960945129395, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19383621215820312, + "rewards_train/margins": 0.8051099181175232, + "rewards_train/rejected": -0.9989461302757263, + "step": 772 + }, + { + "epoch": 0.22, + "logps_train/chosen": -302.7785949707031, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -264.0063781738281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.777859687805176, + "rewards_train/margins": -0.7772216796875, + "rewards_train/rejected": -12.000638008117676, + "step": 772 + }, + { + "epoch": 0.22, + "logps_train/chosen": -113.16732025146484, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -178.14732360839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7167320251464844, + "rewards_train/margins": 0.5980005264282227, + "rewards_train/rejected": -4.314732551574707, + "step": 772 + }, + { + "epoch": 0.22, + "logps_train/chosen": -93.21090698242188, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -79.77567291259766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2710907459259033, + "rewards_train/margins": -0.44352346658706665, + "rewards_train/rejected": -0.8275672793388367, + "step": 772 + }, + { + "epoch": 0.22, + "logps_train/chosen": -9.843883514404297, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -24.375, + "logps_train/rejected": -32.576725006103516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1781383603811264, + "rewards_train/margins": 0.6420341283082962, + "rewards_train/rejected": -0.8201724886894226, + "step": 772 + }, + { + "epoch": 0.22, + "logps_train/chosen": -24.79198455810547, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -56.75, + "logps_train/rejected": -69.38525390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5291985273361206, + "rewards_train/margins": -0.2656731605529785, + "rewards_train/rejected": -1.263525366783142, + "step": 772 + }, + { + "epoch": 0.22, + "logps_train/chosen": -26.08358383178711, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -26.33212661743164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.870858371257782, + "rewards_train/margins": 0.7061043381690979, + "rewards_train/rejected": -1.5769627094268799, + "step": 773 + }, + { + "epoch": 0.22, + "logps_train/chosen": -17.346363067626953, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -94.07302856445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20963631570339203, + "rewards_train/margins": 1.0476665645837784, + "rewards_train/rejected": -1.2573028802871704, + "step": 773 + }, + { + "epoch": 0.22, + "logps_train/chosen": -6.58818244934082, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -2.40625, + "logps_train/rejected": -7.127063274383545, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2661817669868469, + "rewards_train/margins": 0.7382631003856659, + "rewards_train/rejected": -0.47208133339881897, + "step": 773 + }, + { + "epoch": 0.22, + "logps_train/chosen": -71.34921264648438, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -99.98825073242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5150787234306335, + "rewards_train/margins": 2.763903796672821, + "rewards_train/rejected": -2.2488250732421875, + "step": 773 + }, + { + "epoch": 0.22, + "logps_train/chosen": -77.69277954101562, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -153.5913543701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0692780017852783, + "rewards_train/margins": 2.1898574829101562, + "rewards_train/rejected": -3.2591354846954346, + "step": 773 + }, + { + "epoch": 0.22, + "logps_train/chosen": -13.99227523803711, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -36.14284896850586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15702247619628906, + "rewards_train/margins": 1.496307373046875, + "rewards_train/rejected": -1.339284896850586, + "step": 773 + }, + { + "epoch": 0.22, + "logps_train/chosen": -9.135732650756836, + "logps_train/ref_chosen": -0.62890625, + "logps_train/ref_rejected": -1.5078125, + "logps_train/rejected": -9.01306438446045, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8506826758384705, + "rewards_train/margins": -0.10015749931335449, + "rewards_train/rejected": -0.750525176525116, + "step": 773 + }, + { + "epoch": 0.22, + "logps_train/chosen": -146.2921142578125, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -186.21041870117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.079211711883545, + "rewards_train/margins": 2.241830348968506, + "rewards_train/rejected": -8.32104206085205, + "step": 773 + }, + { + "epoch": 0.22, + "learning_rate": 1.6535874772396192e-06, + "loss": 0.4468, + "step": 774 + }, + { + "epoch": 0.22, + "logps_train/chosen": -134.8090057373047, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -218.77098083496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.780900478363037, + "rewards_train/margins": 4.446197986602783, + "rewards_train/rejected": -9.22709846496582, + "step": 774 + }, + { + "epoch": 0.22, + "logps_train/chosen": -114.5177001953125, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -122.41263580322266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.25177001953125, + "rewards_train/margins": 2.589493751525879, + "rewards_train/rejected": -5.841263771057129, + "step": 774 + }, + { + "epoch": 0.22, + "logps_train/chosen": -10.862138748168945, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -14.776602745056152, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07996387779712677, + "rewards_train/margins": 0.4351963847875595, + "rewards_train/rejected": -0.5151602625846863, + "step": 774 + }, + { + "epoch": 0.22, + "logps_train/chosen": -5.514636993408203, + "logps_train/ref_chosen": -2.6875, + "logps_train/ref_rejected": -3.53125, + "logps_train/rejected": -7.104140281677246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28271371126174927, + "rewards_train/margins": 0.07457533478736877, + "rewards_train/rejected": -0.35728904604911804, + "step": 774 + }, + { + "epoch": 0.22, + "logps_train/chosen": -82.06207275390625, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -82.98656463623047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6062073111534119, + "rewards_train/margins": -0.1575508415699005, + "rewards_train/rejected": -0.44865646958351135, + "step": 774 + }, + { + "epoch": 0.22, + "logps_train/chosen": -174.34375, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -179.05087280273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8343750238418579, + "rewards_train/margins": 3.570712447166443, + "rewards_train/rejected": -4.405087471008301, + "step": 774 + }, + { + "epoch": 0.22, + "logps_train/chosen": -201.90750122070312, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -216.927978515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.040750503540039, + "rewards_train/margins": 0.3020477294921875, + "rewards_train/rejected": -9.342798233032227, + "step": 774 + }, + { + "epoch": 0.22, + "logps_train/chosen": -4.318026065826416, + "logps_train/ref_chosen": -3.78125, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -7.298829078674316, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05367760732769966, + "rewards_train/margins": 0.14183030650019646, + "rewards_train/rejected": -0.19550791382789612, + "step": 774 + }, + { + "epoch": 0.22, + "logps_train/chosen": -156.37269592285156, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -266.4296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.83726966381073, + "rewards_train/margins": 8.805699467658997, + "rewards_train/rejected": -10.642969131469727, + "step": 775 + }, + { + "epoch": 0.22, + "logps_train/chosen": -36.42322540283203, + "logps_train/ref_chosen": -31.625, + "logps_train/ref_rejected": -28.5, + "logps_train/rejected": -55.88883972167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4798225462436676, + "rewards_train/margins": 2.259061425924301, + "rewards_train/rejected": -2.7388839721679688, + "step": 775 + }, + { + "epoch": 0.22, + "logps_train/chosen": -145.9365234375, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -165.6474609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.99365234375, + "rewards_train/margins": 0.07109379768371582, + "rewards_train/rejected": -3.064746141433716, + "step": 775 + }, + { + "epoch": 0.22, + "logps_train/chosen": -79.52393341064453, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -78.31405639648438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6523933410644531, + "rewards_train/margins": -0.12098771333694458, + "rewards_train/rejected": -0.5314056277275085, + "step": 775 + }, + { + "epoch": 0.22, + "logps_train/chosen": -68.73407745361328, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -100.23834228515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.423407793045044, + "rewards_train/margins": -0.7495735287666321, + "rewards_train/rejected": -0.6738342642784119, + "step": 775 + }, + { + "epoch": 0.22, + "logps_train/chosen": -82.05598449707031, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -120.94379425048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.155598521232605, + "rewards_train/margins": 1.2387808561325073, + "rewards_train/rejected": -2.3943793773651123, + "step": 775 + }, + { + "epoch": 0.22, + "logps_train/chosen": -217.80429077148438, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -225.9814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.5804290771484375, + "rewards_train/margins": 1.3177156448364258, + "rewards_train/rejected": -7.898144721984863, + "step": 775 + }, + { + "epoch": 0.22, + "logps_train/chosen": -20.20174789428711, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -10.1875, + "logps_train/rejected": -19.651268005371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18267479538917542, + "rewards_train/margins": 0.763702005147934, + "rewards_train/rejected": -0.9463768005371094, + "step": 775 + }, + { + "epoch": 0.22, + "learning_rate": 1.6515829039561102e-06, + "loss": 0.4215, + "step": 776 + }, + { + "epoch": 0.22, + "logps_train/chosen": -124.24736022949219, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -147.3650665283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22526398301124573, + "rewards_train/margins": 0.3617706447839737, + "rewards_train/rejected": -0.13650666177272797, + "step": 776 + }, + { + "epoch": 0.22, + "logps_train/chosen": -14.513090133666992, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -13.878335952758789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6700590252876282, + "rewards_train/margins": 0.2208995819091797, + "rewards_train/rejected": -0.8909586071968079, + "step": 776 + }, + { + "epoch": 0.22, + "logps_train/chosen": -121.87178802490234, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -182.48309326171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8871787786483765, + "rewards_train/margins": 5.56113064289093, + "rewards_train/rejected": -7.448309421539307, + "step": 776 + }, + { + "epoch": 0.22, + "logps_train/chosen": -98.30127716064453, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -156.04891967773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.98012775182724, + "rewards_train/margins": 2.824764311313629, + "rewards_train/rejected": -3.804892063140869, + "step": 776 + }, + { + "epoch": 0.22, + "logps_train/chosen": -12.864324569702148, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -15.930121421813965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0739324614405632, + "rewards_train/margins": 0.17532967776060104, + "rewards_train/rejected": -0.24926213920116425, + "step": 776 + }, + { + "epoch": 0.22, + "logps_train/chosen": -17.660905838012695, + "logps_train/ref_chosen": -3.9375, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -34.77031707763672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3723405599594116, + "rewards_train/margins": 0.279691219329834, + "rewards_train/rejected": -1.6520317792892456, + "step": 776 + }, + { + "epoch": 0.22, + "logps_train/chosen": -2.532884120941162, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -9.077857971191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04983658716082573, + "rewards_train/margins": 0.5044973902404308, + "rewards_train/rejected": -0.4546608030796051, + "step": 776 + }, + { + "epoch": 0.22, + "logps_train/chosen": -135.83303833007812, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -243.42279052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.03330397605896, + "rewards_train/margins": 6.108975648880005, + "rewards_train/rejected": -9.142279624938965, + "step": 776 + }, + { + "epoch": 0.22, + "logps_train/chosen": -152.7227020263672, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -189.7119598388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6722701787948608, + "rewards_train/margins": 2.3989259004592896, + "rewards_train/rejected": -4.07119607925415, + "step": 777 + }, + { + "epoch": 0.22, + "logps_train/chosen": -15.076192855834961, + "logps_train/ref_chosen": -13.5625, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -19.766258239746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15136928856372833, + "rewards_train/margins": 1.1408815830945969, + "rewards_train/rejected": -1.2922508716583252, + "step": 777 + }, + { + "epoch": 0.22, + "logps_train/chosen": -25.02703857421875, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -23.823823928833008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37770387530326843, + "rewards_train/margins": 0.9171785414218903, + "rewards_train/rejected": -1.2948824167251587, + "step": 777 + }, + { + "epoch": 0.22, + "logps_train/chosen": -10.396812438964844, + "logps_train/ref_chosen": -5.375, + "logps_train/ref_rejected": -1.625, + "logps_train/rejected": -11.683395385742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5021812319755554, + "rewards_train/margins": 0.5036583542823792, + "rewards_train/rejected": -1.0058395862579346, + "step": 777 + }, + { + "epoch": 0.22, + "logps_train/chosen": -14.446813583374023, + "logps_train/ref_chosen": -0.73046875, + "logps_train/ref_rejected": -3.453125, + "logps_train/rejected": -21.783000946044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3716344833374023, + "rewards_train/margins": 0.4613531827926636, + "rewards_train/rejected": -1.832987666130066, + "step": 777 + }, + { + "epoch": 0.22, + "logps_train/chosen": -22.140377044677734, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -23.904767990112305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3202877044677734, + "rewards_train/margins": 0.09518909454345703, + "rewards_train/rejected": -1.4154767990112305, + "step": 777 + }, + { + "epoch": 0.22, + "logps_train/chosen": -10.914020538330078, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -13.387449264526367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09140205383300781, + "rewards_train/margins": 0.1973428726196289, + "rewards_train/rejected": -0.2887449264526367, + "step": 777 + }, + { + "epoch": 0.22, + "logps_train/chosen": -67.74385833740234, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -230.0, + "logps_train/rejected": -291.02215576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3243858814239502, + "rewards_train/margins": 4.777829885482788, + "rewards_train/rejected": -6.102215766906738, + "step": 777 + }, + { + "epoch": 0.22, + "learning_rate": 1.649573770283108e-06, + "loss": 0.3589, + "step": 778 + }, + { + "epoch": 0.22, + "logps_train/chosen": -165.224609375, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -207.6894073486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5224609375, + "rewards_train/margins": 2.646480083465576, + "rewards_train/rejected": -6.168941020965576, + "step": 778 + }, + { + "epoch": 0.22, + "logps_train/chosen": -82.72514343261719, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -116.00947570800781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.47748565673828125, + "rewards_train/margins": -0.42156678438186646, + "rewards_train/rejected": 0.8990524411201477, + "step": 778 + }, + { + "epoch": 0.22, + "logps_train/chosen": -97.70213317871094, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -212.14361572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6702133417129517, + "rewards_train/margins": 8.044148802757263, + "rewards_train/rejected": -9.714362144470215, + "step": 778 + }, + { + "epoch": 0.22, + "logps_train/chosen": -14.621589660644531, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -29.375, + "logps_train/rejected": -41.9830436706543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8340339660644531, + "rewards_train/margins": 0.4267704486846924, + "rewards_train/rejected": -1.2608044147491455, + "step": 778 + }, + { + "epoch": 0.22, + "logps_train/chosen": -2.210855722427368, + "logps_train/ref_chosen": -1.2109375, + "logps_train/ref_rejected": -3.0625, + "logps_train/rejected": -3.1010727882385254, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0999918207526207, + "rewards_train/margins": -0.09613454178906977, + "rewards_train/rejected": -0.0038572789635509253, + "step": 778 + }, + { + "epoch": 0.22, + "logps_train/chosen": -164.15313720703125, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -221.4527587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.415313720703125, + "rewards_train/margins": 3.8299622535705566, + "rewards_train/rejected": -5.245275974273682, + "step": 778 + }, + { + "epoch": 0.22, + "logps_train/chosen": -57.45040512084961, + "logps_train/ref_chosen": -40.5, + "logps_train/ref_rejected": -14.8125, + "logps_train/rejected": -44.92235565185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6950405836105347, + "rewards_train/margins": 1.315945029258728, + "rewards_train/rejected": -3.0109856128692627, + "step": 778 + }, + { + "epoch": 0.22, + "logps_train/chosen": -113.15487670898438, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -42.0, + "logps_train/rejected": -67.9300308227539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.115487813949585, + "rewards_train/margins": 0.47751522064208984, + "rewards_train/rejected": -2.593003034591675, + "step": 778 + }, + { + "epoch": 0.22, + "logps_train/chosen": -117.91692352294922, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -115.61155700683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5916923880577087, + "rewards_train/margins": 2.2194634079933167, + "rewards_train/rejected": -2.8111557960510254, + "step": 779 + }, + { + "epoch": 0.22, + "logps_train/chosen": -172.54611206054688, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -243.26287841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.654611349105835, + "rewards_train/margins": 5.271676778793335, + "rewards_train/rejected": -7.92628812789917, + "step": 779 + }, + { + "epoch": 0.22, + "logps_train/chosen": -144.91409301757812, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -185.0, + "logps_train/rejected": -228.90292358398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7414093017578125, + "rewards_train/margins": 2.64888334274292, + "rewards_train/rejected": -4.390292644500732, + "step": 779 + }, + { + "epoch": 0.22, + "logps_train/chosen": -177.47171020507812, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -133.2349395751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9471710324287415, + "rewards_train/margins": 0.9763229489326477, + "rewards_train/rejected": -1.9234939813613892, + "step": 779 + }, + { + "epoch": 0.22, + "logps_train/chosen": -103.12750244140625, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -101.76251983642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0627503395080566, + "rewards_train/margins": 0.2635016441345215, + "rewards_train/rejected": -3.326251983642578, + "step": 779 + }, + { + "epoch": 0.22, + "logps_train/chosen": -112.44921875, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -148.3189239501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.644921898841858, + "rewards_train/margins": 1.5869704484939575, + "rewards_train/rejected": -3.2318923473358154, + "step": 779 + }, + { + "epoch": 0.22, + "logps_train/chosen": -157.71377563476562, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -201.48556518554688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.971377849578857, + "rewards_train/margins": -0.22282123565673828, + "rewards_train/rejected": -5.748556613922119, + "step": 779 + }, + { + "epoch": 0.22, + "logps_train/chosen": -135.45947265625, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -136.82852172851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04594726487994194, + "rewards_train/margins": 0.13690491393208504, + "rewards_train/rejected": -0.18285217881202698, + "step": 779 + }, + { + "epoch": 0.22, + "learning_rate": 1.6475600902824188e-06, + "loss": 0.3545, + "step": 780 + }, + { + "epoch": 0.22, + "logps_train/chosen": -204.29241943359375, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -216.39117431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.729241847991943, + "rewards_train/margins": 2.759875774383545, + "rewards_train/rejected": -9.489117622375488, + "step": 780 + }, + { + "epoch": 0.22, + "logps_train/chosen": -7.768760681152344, + "logps_train/ref_chosen": -5.375, + "logps_train/ref_rejected": -2.953125, + "logps_train/rejected": -3.662749767303467, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.23937606811523438, + "rewards_train/margins": -0.16841358691453934, + "rewards_train/rejected": -0.07096248120069504, + "step": 780 + }, + { + "epoch": 0.22, + "logps_train/chosen": -178.34942626953125, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -260.2745666503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.434942722320557, + "rewards_train/margins": 6.792514324188232, + "rewards_train/rejected": -11.227457046508789, + "step": 780 + }, + { + "epoch": 0.22, + "logps_train/chosen": -5.617903232574463, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -15.1436767578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21633468568325043, + "rewards_train/margins": 0.574452355504036, + "rewards_train/rejected": -0.3581176698207855, + "step": 780 + }, + { + "epoch": 0.22, + "logps_train/chosen": -164.42198181152344, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -137.6050567626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0421981811523438, + "rewards_train/margins": 1.6183075904846191, + "rewards_train/rejected": -3.660505771636963, + "step": 780 + }, + { + "epoch": 0.22, + "logps_train/chosen": -114.40115356445312, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -123.96432495117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2901153564453125, + "rewards_train/margins": 0.3063170909881592, + "rewards_train/rejected": -2.5964324474334717, + "step": 780 + }, + { + "epoch": 0.22, + "logps_train/chosen": -132.888427734375, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -189.6288604736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7888427972793579, + "rewards_train/margins": 6.174043536186218, + "rewards_train/rejected": -6.962886333465576, + "step": 780 + }, + { + "epoch": 0.22, + "logps_train/chosen": -3.546759843826294, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -6.442531585693359, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01092598494142294, + "rewards_train/margins": 0.20520217064768076, + "rewards_train/rejected": -0.2161281555891037, + "step": 780 + }, + { + "epoch": 0.22, + "logps_train/chosen": -14.83061408996582, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -18.940702438354492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11431141197681427, + "rewards_train/margins": 0.7297588437795639, + "rewards_train/rejected": -0.8440702557563782, + "step": 781 + }, + { + "epoch": 0.22, + "logps_train/chosen": -184.56015014648438, + "logps_train/ref_chosen": -169.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -141.6510772705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5560150146484375, + "rewards_train/margins": 1.5090928077697754, + "rewards_train/rejected": -3.065107822418213, + "step": 781 + }, + { + "epoch": 0.22, + "logps_train/chosen": -19.6944637298584, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -69.66574096679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7131963968276978, + "rewards_train/margins": 2.903377652168274, + "rewards_train/rejected": -3.6165740489959717, + "step": 781 + }, + { + "epoch": 0.22, + "logps_train/chosen": -155.15585327148438, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -209.0, + "logps_train/rejected": -249.6116943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1155853271484375, + "rewards_train/margins": 2.945584297180176, + "rewards_train/rejected": -4.061169624328613, + "step": 781 + }, + { + "epoch": 0.22, + "logps_train/chosen": -13.759599685668945, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -9.5625, + "logps_train/rejected": -39.207969665527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30720996856689453, + "rewards_train/margins": 2.657336950302124, + "rewards_train/rejected": -2.9645469188690186, + "step": 781 + }, + { + "epoch": 0.22, + "logps_train/chosen": -30.8776912689209, + "logps_train/ref_chosen": -29.375, + "logps_train/ref_rejected": -57.75, + "logps_train/rejected": -61.69833755493164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15026913583278656, + "rewards_train/margins": 0.24456463754177094, + "rewards_train/rejected": -0.3948337733745575, + "step": 781 + }, + { + "epoch": 0.22, + "logps_train/chosen": -261.15789794921875, + "logps_train/ref_chosen": -202.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -207.85791015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.91579008102417, + "rewards_train/margins": -1.22999906539917, + "rewards_train/rejected": -4.685791015625, + "step": 781 + }, + { + "epoch": 0.22, + "logps_train/chosen": -161.5487823486328, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -147.68714904785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3548781871795654, + "rewards_train/margins": 3.1138370037078857, + "rewards_train/rejected": -6.468715190887451, + "step": 781 + }, + { + "epoch": 0.22, + "learning_rate": 1.6455418780476672e-06, + "loss": 0.3434, + "step": 782 + }, + { + "epoch": 0.22, + "logps_train/chosen": -239.64776611328125, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -241.12936401367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.864776611328125, + "rewards_train/margins": 2.7481603622436523, + "rewards_train/rejected": -9.612936973571777, + "step": 782 + }, + { + "epoch": 0.22, + "logps_train/chosen": -16.713245391845703, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -30.724088668823242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35882455110549927, + "rewards_train/margins": 0.913584291934967, + "rewards_train/rejected": -1.2724088430404663, + "step": 782 + }, + { + "epoch": 0.22, + "logps_train/chosen": -145.6266326904297, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -106.12794494628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5626633167266846, + "rewards_train/margins": 1.1001312732696533, + "rewards_train/rejected": -2.662794589996338, + "step": 782 + }, + { + "epoch": 0.22, + "logps_train/chosen": -19.743267059326172, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -2.40625, + "logps_train/rejected": -9.39161205291748, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5180767178535461, + "rewards_train/margins": 0.18045949935913086, + "rewards_train/rejected": -0.698536217212677, + "step": 782 + }, + { + "epoch": 0.22, + "logps_train/chosen": -62.242454528808594, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -18.612972259521484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3492454290390015, + "rewards_train/margins": -0.22857320308685303, + "rewards_train/rejected": -1.1206722259521484, + "step": 782 + }, + { + "epoch": 0.22, + "logps_train/chosen": -12.590044021606445, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -5.46875, + "logps_train/rejected": -9.60120964050293, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8433794379234314, + "rewards_train/margins": -0.4301334619522095, + "rewards_train/rejected": -0.4132459759712219, + "step": 782 + }, + { + "epoch": 0.22, + "logps_train/chosen": -10.065239906311035, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -2.46875, + "logps_train/rejected": -13.478157043457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3721489906311035, + "rewards_train/margins": 0.7287917137145996, + "rewards_train/rejected": -1.1009407043457031, + "step": 782 + }, + { + "epoch": 0.22, + "logps_train/chosen": -101.49496459960938, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -209.25299072265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.399496555328369, + "rewards_train/margins": 6.875802516937256, + "rewards_train/rejected": -9.275299072265625, + "step": 782 + }, + { + "epoch": 0.22, + "logps_train/chosen": -7.256569862365723, + "logps_train/ref_chosen": -2.890625, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -21.736215591430664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43659448623657227, + "rewards_train/margins": 0.4807770848274231, + "rewards_train/rejected": -0.9173715710639954, + "step": 783 + }, + { + "epoch": 0.22, + "logps_train/chosen": -26.048620223999023, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -22.95794105529785, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3548620343208313, + "rewards_train/margins": 1.4503070712089539, + "rewards_train/rejected": -1.8051691055297852, + "step": 783 + }, + { + "epoch": 0.22, + "logps_train/chosen": -12.437796592712402, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -6.28125, + "logps_train/rejected": -19.38747215270996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.043720342218875885, + "rewards_train/margins": 1.354342557489872, + "rewards_train/rejected": -1.310622215270996, + "step": 783 + }, + { + "epoch": 0.22, + "logps_train/chosen": -36.5921630859375, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -17.971895217895508, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8092163801193237, + "rewards_train/margins": -0.949526846408844, + "rewards_train/rejected": -0.8596895337104797, + "step": 783 + }, + { + "epoch": 0.22, + "logps_train/chosen": -140.1563262939453, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -176.2477569580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.015632629394531, + "rewards_train/margins": 0.5091433525085449, + "rewards_train/rejected": -4.524775981903076, + "step": 783 + }, + { + "epoch": 0.22, + "logps_train/chosen": -73.3185043334961, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -107.15072631835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23185043036937714, + "rewards_train/margins": 0.5332222133874893, + "rewards_train/rejected": -0.7650726437568665, + "step": 783 + }, + { + "epoch": 0.22, + "logps_train/chosen": -106.22008514404297, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -31.308589935302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2720085084438324, + "rewards_train/margins": 1.9963504374027252, + "rewards_train/rejected": -2.2683589458465576, + "step": 783 + }, + { + "epoch": 0.22, + "logps_train/chosen": -6.266900062561035, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -28.814220428466797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.292059987783432, + "rewards_train/margins": 2.6547320783138275, + "rewards_train/rejected": -2.3626720905303955, + "step": 783 + }, + { + "epoch": 0.22, + "learning_rate": 1.643519147704199e-06, + "loss": 0.4228, + "step": 784 + }, + { + "epoch": 0.22, + "logps_train/chosen": -6.386549949645996, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -11.858634948730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2613449990749359, + "rewards_train/margins": 0.3534584939479828, + "rewards_train/rejected": -0.09211349487304688, + "step": 784 + }, + { + "epoch": 0.22, + "logps_train/chosen": -70.19210815429688, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -87.52692413330078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6442108154296875, + "rewards_train/margins": 0.033481597900390625, + "rewards_train/rejected": -3.677692413330078, + "step": 784 + }, + { + "epoch": 0.22, + "logps_train/chosen": -15.506763458251953, + "logps_train/ref_chosen": -0.8125, + "logps_train/ref_rejected": -0.8125, + "logps_train/rejected": -15.640660285949707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4694263935089111, + "rewards_train/margins": 0.0133897066116333, + "rewards_train/rejected": -1.4828161001205444, + "step": 784 + }, + { + "epoch": 0.22, + "logps_train/chosen": -55.48735809326172, + "logps_train/ref_chosen": -56.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -146.53067016601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.051264192909002304, + "rewards_train/margins": 2.604331161826849, + "rewards_train/rejected": -2.5530669689178467, + "step": 784 + }, + { + "epoch": 0.22, + "logps_train/chosen": -122.71307373046875, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -196.2847900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.571307420730591, + "rewards_train/margins": 3.6571714878082275, + "rewards_train/rejected": -7.228478908538818, + "step": 784 + }, + { + "epoch": 0.22, + "logps_train/chosen": -152.02175903320312, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -171.82850646972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.4021759033203125, + "rewards_train/margins": 1.7306747436523438, + "rewards_train/rejected": -6.132850646972656, + "step": 784 + }, + { + "epoch": 0.22, + "logps_train/chosen": -243.39462280273438, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -238.17330932617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.8394622802734375, + "rewards_train/margins": 0.7778692245483398, + "rewards_train/rejected": -8.617331504821777, + "step": 784 + }, + { + "epoch": 0.22, + "logps_train/chosen": -289.7232971191406, + "logps_train/ref_chosen": -196.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -116.92357635498047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.372329711914062, + "rewards_train/margins": -5.629971981048584, + "rewards_train/rejected": -3.7423577308654785, + "step": 784 + }, + { + "epoch": 0.22, + "logps_train/chosen": -35.13575744628906, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -63.31245422363281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1385757476091385, + "rewards_train/margins": 2.5676696747541428, + "rewards_train/rejected": -2.7062454223632812, + "step": 785 + }, + { + "epoch": 0.22, + "logps_train/chosen": -165.18405151367188, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -198.96548461914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.268405437469482, + "rewards_train/margins": 2.17814302444458, + "rewards_train/rejected": -7.4465484619140625, + "step": 785 + }, + { + "epoch": 0.22, + "logps_train/chosen": -7.791662693023682, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -62.38917541503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10833372920751572, + "rewards_train/margins": 3.0472513660788536, + "rewards_train/rejected": -2.938917636871338, + "step": 785 + }, + { + "epoch": 0.22, + "logps_train/chosen": -14.631721496582031, + "logps_train/ref_chosen": -2.421875, + "logps_train/ref_rejected": -3.53125, + "logps_train/rejected": -18.01323890686035, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.220984697341919, + "rewards_train/margins": 0.22721421718597412, + "rewards_train/rejected": -1.448198914527893, + "step": 785 + }, + { + "epoch": 0.22, + "logps_train/chosen": -9.931097030639648, + "logps_train/ref_chosen": -5.84375, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -20.39874267578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4087347090244293, + "rewards_train/margins": 0.2811395823955536, + "rewards_train/rejected": -0.6898742914199829, + "step": 785 + }, + { + "epoch": 0.22, + "logps_train/chosen": -65.39004516601562, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -3.1875, + "logps_train/rejected": -10.103038787841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28900453448295593, + "rewards_train/margins": 0.4025493562221527, + "rewards_train/rejected": -0.6915538907051086, + "step": 785 + }, + { + "epoch": 0.22, + "logps_train/chosen": -14.677899360656738, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -23.98482894897461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42403993010520935, + "rewards_train/margins": 0.39319297671318054, + "rewards_train/rejected": -0.8172329068183899, + "step": 785 + }, + { + "epoch": 0.22, + "logps_train/chosen": -10.954056739807129, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -33.66145706176758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08290567249059677, + "rewards_train/margins": 2.561364986002445, + "rewards_train/rejected": -2.644270658493042, + "step": 785 + }, + { + "epoch": 0.22, + "learning_rate": 1.6414919134089823e-06, + "loss": 0.6654, + "step": 786 + }, + { + "epoch": 0.22, + "logps_train/chosen": -30.20220947265625, + "logps_train/ref_chosen": -27.375, + "logps_train/ref_rejected": -1.5234375, + "logps_train/rejected": -14.096896171569824, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2827209532260895, + "rewards_train/margins": 0.9746249616146088, + "rewards_train/rejected": -1.2573459148406982, + "step": 786 + }, + { + "epoch": 0.22, + "logps_train/chosen": -198.52333068847656, + "logps_train/ref_chosen": -189.0, + "logps_train/ref_rejected": -189.0, + "logps_train/rejected": -200.2701416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9523330926895142, + "rewards_train/margins": 0.17468106746673584, + "rewards_train/rejected": -1.12701416015625, + "step": 786 + }, + { + "epoch": 0.22, + "logps_train/chosen": -13.215291976928711, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -30.75, + "logps_train/rejected": -51.19847106933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.515279233455658, + "rewards_train/margins": 1.5295680165290833, + "rewards_train/rejected": -2.044847249984741, + "step": 786 + }, + { + "epoch": 0.22, + "logps_train/chosen": -17.759384155273438, + "logps_train/ref_chosen": -14.75, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -13.451891899108887, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3009384274482727, + "rewards_train/margins": 0.21300077438354492, + "rewards_train/rejected": -0.5139392018318176, + "step": 786 + }, + { + "epoch": 0.22, + "logps_train/chosen": -149.459716796875, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -221.05296325683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.645971655845642, + "rewards_train/margins": 2.559324860572815, + "rewards_train/rejected": -4.205296516418457, + "step": 786 + }, + { + "epoch": 0.22, + "logps_train/chosen": -11.469159126281738, + "logps_train/ref_chosen": -5.875, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -28.40178108215332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5594159364700317, + "rewards_train/margins": 0.699512243270874, + "rewards_train/rejected": -1.2589281797409058, + "step": 786 + }, + { + "epoch": 0.22, + "logps_train/chosen": -204.15762329101562, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -200.44407653808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.665762424468994, + "rewards_train/margins": 0.1286454200744629, + "rewards_train/rejected": -7.794407844543457, + "step": 786 + }, + { + "epoch": 0.22, + "logps_train/chosen": -122.23841094970703, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -140.35342407226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.173841118812561, + "rewards_train/margins": 1.761501431465149, + "rewards_train/rejected": -2.93534255027771, + "step": 786 + }, + { + "epoch": 0.22, + "logps_train/chosen": -7.487957000732422, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -23.65809440612793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2956707179546356, + "rewards_train/margins": 0.2201387584209442, + "rewards_train/rejected": -0.5158094763755798, + "step": 787 + }, + { + "epoch": 0.22, + "logps_train/chosen": -166.0906219482422, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -185.75628662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6090621948242188, + "rewards_train/margins": 1.266566514968872, + "rewards_train/rejected": -3.875628709793091, + "step": 787 + }, + { + "epoch": 0.22, + "logps_train/chosen": -142.153076171875, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -212.57666015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5653076171875, + "rewards_train/margins": 8.242358207702637, + "rewards_train/rejected": -10.807665824890137, + "step": 787 + }, + { + "epoch": 0.22, + "logps_train/chosen": -142.028076171875, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -133.03985595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5028076171875, + "rewards_train/margins": 1.4011781215667725, + "rewards_train/rejected": -2.9039857387542725, + "step": 787 + }, + { + "epoch": 0.22, + "logps_train/chosen": -136.47190856933594, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -187.4095458984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3471908569335938, + "rewards_train/margins": 4.7437639236450195, + "rewards_train/rejected": -6.090954780578613, + "step": 787 + }, + { + "epoch": 0.22, + "logps_train/chosen": -140.13882446289062, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -207.8214111328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6138824820518494, + "rewards_train/margins": 8.66825920343399, + "rewards_train/rejected": -9.28214168548584, + "step": 787 + }, + { + "epoch": 0.22, + "logps_train/chosen": -15.764683723449707, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -33.005104064941406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6889683604240417, + "rewards_train/margins": -0.10095793008804321, + "rewards_train/rejected": -0.5880104303359985, + "step": 787 + }, + { + "epoch": 0.22, + "logps_train/chosen": -12.088628768920898, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -2.03125, + "logps_train/rejected": -2.6228108406066895, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5682379007339478, + "rewards_train/margins": -0.5090818144381046, + "rewards_train/rejected": -0.059156086295843124, + "step": 787 + }, + { + "epoch": 0.22, + "learning_rate": 1.6394601893505078e-06, + "loss": 0.3612, + "step": 788 + }, + { + "epoch": 0.22, + "logps_train/chosen": -107.10832977294922, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -231.0, + "logps_train/rejected": -275.4271545410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7608329653739929, + "rewards_train/margins": 3.681882679462433, + "rewards_train/rejected": -4.442715644836426, + "step": 788 + }, + { + "epoch": 0.22, + "logps_train/chosen": -23.05226707458496, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -9.205493927001953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9364767074584961, + "rewards_train/margins": -0.44717732071876526, + "rewards_train/rejected": -0.48929938673973083, + "step": 788 + }, + { + "epoch": 0.22, + "logps_train/chosen": -5.8310866355896, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -14.591752052307129, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09248366206884384, + "rewards_train/margins": 0.39794153720140457, + "rewards_train/rejected": -0.4904251992702484, + "step": 788 + }, + { + "epoch": 0.22, + "logps_train/chosen": -103.4395751953125, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -67.78131103515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4439575672149658, + "rewards_train/margins": -0.4158264398574829, + "rewards_train/rejected": -1.028131127357483, + "step": 788 + }, + { + "epoch": 0.22, + "logps_train/chosen": -52.1501579284668, + "logps_train/ref_chosen": -41.75, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -44.47165298461914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0400158166885376, + "rewards_train/margins": 1.4821494817733765, + "rewards_train/rejected": -2.522165298461914, + "step": 788 + }, + { + "epoch": 0.22, + "logps_train/chosen": -63.46305847167969, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -17.566247940063477, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0963058471679688, + "rewards_train/margins": -0.4396810531616211, + "rewards_train/rejected": -0.6566247940063477, + "step": 788 + }, + { + "epoch": 0.22, + "logps_train/chosen": -16.5931453704834, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -13.605095863342285, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8780645728111267, + "rewards_train/margins": -0.23630499839782715, + "rewards_train/rejected": -0.6417595744132996, + "step": 788 + }, + { + "epoch": 0.22, + "logps_train/chosen": -4.887980937957764, + "logps_train/ref_chosen": -3.96875, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -24.878131866455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09192309528589249, + "rewards_train/margins": 0.9396400675177574, + "rewards_train/rejected": -1.03156316280365, + "step": 788 + }, + { + "epoch": 0.22, + "logps_train/chosen": -67.58147430419922, + "logps_train/ref_chosen": -59.25, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -139.4736328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8331474661827087, + "rewards_train/margins": 2.5142157673835754, + "rewards_train/rejected": -3.347363233566284, + "step": 789 + }, + { + "epoch": 0.22, + "logps_train/chosen": -171.053955078125, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -213.19979858398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.205395460128784, + "rewards_train/margins": 3.814584493637085, + "rewards_train/rejected": -7.019979953765869, + "step": 789 + }, + { + "epoch": 0.22, + "logps_train/chosen": -170.25250244140625, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -219.06121826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.925250291824341, + "rewards_train/margins": 4.380871534347534, + "rewards_train/rejected": -8.306121826171875, + "step": 789 + }, + { + "epoch": 0.22, + "logps_train/chosen": -196.5186004638672, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -234.16651916503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.701859951019287, + "rewards_train/margins": 1.7147917747497559, + "rewards_train/rejected": -9.416651725769043, + "step": 789 + }, + { + "epoch": 0.22, + "logps_train/chosen": -19.676429748535156, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -77.02207946777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09264297783374786, + "rewards_train/margins": 0.5095649808645248, + "rewards_train/rejected": -0.6022079586982727, + "step": 789 + }, + { + "epoch": 0.22, + "logps_train/chosen": -1.828921914100647, + "logps_train/ref_chosen": -0.87890625, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -16.466747283935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09500157088041306, + "rewards_train/margins": 0.8672981932759285, + "rewards_train/rejected": -0.9622997641563416, + "step": 789 + }, + { + "epoch": 0.22, + "logps_train/chosen": -4.649569034576416, + "logps_train/ref_chosen": -1.3359375, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -13.0479097366333, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33136317133903503, + "rewards_train/margins": 0.02967780828475952, + "rewards_train/rejected": -0.36104097962379456, + "step": 789 + }, + { + "epoch": 0.22, + "logps_train/chosen": -137.74269104003906, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -207.0646209716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3742692470550537, + "rewards_train/margins": 6.632192850112915, + "rewards_train/rejected": -10.006462097167969, + "step": 789 + }, + { + "epoch": 0.22, + "learning_rate": 1.6374239897486897e-06, + "loss": 0.4047, + "step": 790 + }, + { + "epoch": 0.22, + "logps_train/chosen": -123.4300537109375, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -185.853759765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.693005383014679, + "rewards_train/margins": 2.4923705458641052, + "rewards_train/rejected": -3.185375928878784, + "step": 790 + }, + { + "epoch": 0.22, + "logps_train/chosen": -66.7707748413086, + "logps_train/ref_chosen": -48.5, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -57.4056282043457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8270775079727173, + "rewards_train/margins": -1.061514675617218, + "rewards_train/rejected": -0.7655628323554993, + "step": 790 + }, + { + "epoch": 0.22, + "logps_train/chosen": -0.551988959312439, + "logps_train/ref_chosen": -0.173828125, + "logps_train/ref_rejected": -0.173828125, + "logps_train/rejected": -0.5507227778434753, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.037816084921360016, + "rewards_train/margins": -0.000126618891954422, + "rewards_train/rejected": -0.037689466029405594, + "step": 790 + }, + { + "epoch": 0.22, + "logps_train/chosen": -76.59076690673828, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -24.815608978271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2090766876935959, + "rewards_train/margins": 0.4349842220544815, + "rewards_train/rejected": -0.6440609097480774, + "step": 790 + }, + { + "epoch": 0.22, + "logps_train/chosen": -99.55958557128906, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -77.58619689941406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8559585809707642, + "rewards_train/margins": -0.6973388195037842, + "rewards_train/rejected": -1.15861976146698, + "step": 790 + }, + { + "epoch": 0.22, + "logps_train/chosen": -5.9335713386535645, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -6.260314464569092, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.17773213982582092, + "rewards_train/margins": -0.004825696349143982, + "rewards_train/rejected": -0.17290644347667694, + "step": 790 + }, + { + "epoch": 0.22, + "logps_train/chosen": -60.93402862548828, + "logps_train/ref_chosen": -52.75, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -93.38752746582031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.818402886390686, + "rewards_train/margins": 1.470349907875061, + "rewards_train/rejected": -2.288752794265747, + "step": 790 + }, + { + "epoch": 0.22, + "logps_train/chosen": -158.3872528076172, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -154.0172576904297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5387252569198608, + "rewards_train/margins": -0.23699951171875, + "rewards_train/rejected": -1.3017257452011108, + "step": 790 + }, + { + "epoch": 0.22, + "logps_train/chosen": -8.692830085754395, + "logps_train/ref_chosen": -6.125, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -23.779279708862305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25678300857543945, + "rewards_train/margins": 1.014894962310791, + "rewards_train/rejected": -1.2716779708862305, + "step": 791 + }, + { + "epoch": 0.22, + "logps_train/chosen": -22.450132369995117, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -14.4375, + "logps_train/rejected": -21.076108932495117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6075132489204407, + "rewards_train/margins": 0.056347668170928955, + "rewards_train/rejected": -0.6638609170913696, + "step": 791 + }, + { + "epoch": 0.22, + "logps_train/chosen": -143.4124755859375, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -176.25399780273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.941247582435608, + "rewards_train/margins": 3.6841524839401245, + "rewards_train/rejected": -5.625400066375732, + "step": 791 + }, + { + "epoch": 0.22, + "logps_train/chosen": -13.739977836608887, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -16.450265884399414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46149778366088867, + "rewards_train/margins": 0.5304037928581238, + "rewards_train/rejected": -0.9919015765190125, + "step": 791 + }, + { + "epoch": 0.22, + "logps_train/chosen": -161.69873046875, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -153.99917602539062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.819873094558716, + "rewards_train/margins": -0.5699553489685059, + "rewards_train/rejected": -3.24991774559021, + "step": 791 + }, + { + "epoch": 0.22, + "logps_train/chosen": -195.79554748535156, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -201.13711547851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.1795549392700195, + "rewards_train/margins": 0.9341568946838379, + "rewards_train/rejected": -6.113711833953857, + "step": 791 + }, + { + "epoch": 0.22, + "logps_train/chosen": -12.517486572265625, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -24.0, + "logps_train/rejected": -31.789289474487305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3704986572265625, + "rewards_train/margins": 0.408430278301239, + "rewards_train/rejected": -0.7789289355278015, + "step": 791 + }, + { + "epoch": 0.22, + "logps_train/chosen": -3.848144292831421, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -23.227312088012695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08393557369709015, + "rewards_train/margins": 0.5941667705774307, + "rewards_train/rejected": -0.5102311968803406, + "step": 791 + }, + { + "epoch": 0.22, + "learning_rate": 1.6353833288547663e-06, + "loss": 0.5762, + "step": 792 + }, + { + "epoch": 0.22, + "logps_train/chosen": -5.299760818481445, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -22.010120391845703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03935108333826065, + "rewards_train/margins": 1.0054109320044518, + "rewards_train/rejected": -1.0447620153427124, + "step": 792 + }, + { + "epoch": 0.22, + "logps_train/chosen": -1.8334578275680542, + "logps_train/ref_chosen": -1.0234375, + "logps_train/ref_rejected": -6.75, + "logps_train/rejected": -15.427409172058105, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08100203424692154, + "rewards_train/margins": 0.786738894879818, + "rewards_train/rejected": -0.8677409291267395, + "step": 792 + }, + { + "epoch": 0.22, + "logps_train/chosen": -62.88020706176758, + "logps_train/ref_chosen": -49.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -157.2600860595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3880207538604736, + "rewards_train/margins": 2.7879879474639893, + "rewards_train/rejected": -4.176008701324463, + "step": 792 + }, + { + "epoch": 0.22, + "logps_train/chosen": -10.82545280456543, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -15.032819747924805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24504528939723969, + "rewards_train/margins": 0.28323669731616974, + "rewards_train/rejected": -0.5282819867134094, + "step": 792 + }, + { + "epoch": 0.22, + "logps_train/chosen": -201.748046875, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -209.6885986328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.524805068969727, + "rewards_train/margins": -0.3559446334838867, + "rewards_train/rejected": -8.16886043548584, + "step": 792 + }, + { + "epoch": 0.22, + "logps_train/chosen": -3.2066943645477295, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -17.38257598876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15120556950569153, + "rewards_train/margins": 1.2800882160663605, + "rewards_train/rejected": -1.128882646560669, + "step": 792 + }, + { + "epoch": 0.22, + "logps_train/chosen": -55.528419494628906, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -59.5, + "logps_train/rejected": -70.51225280761719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47784194350242615, + "rewards_train/margins": 0.6233833134174347, + "rewards_train/rejected": -1.1012252569198608, + "step": 792 + }, + { + "epoch": 0.22, + "logps_train/chosen": -15.91024112701416, + "logps_train/ref_chosen": -14.75, + "logps_train/ref_rejected": -7.875, + "logps_train/rejected": -21.516368865966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11602411419153214, + "rewards_train/margins": 1.2481128200888634, + "rewards_train/rejected": -1.3641369342803955, + "step": 792 + }, + { + "epoch": 0.22, + "logps_train/chosen": -118.42839050292969, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -116.42523193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4928390979766846, + "rewards_train/margins": 0.34968411922454834, + "rewards_train/rejected": -1.842523217201233, + "step": 793 + }, + { + "epoch": 0.22, + "logps_train/chosen": -124.28363037109375, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -112.76760864257812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9283630847930908, + "rewards_train/margins": -0.8016022443771362, + "rewards_train/rejected": -1.1267608404159546, + "step": 793 + }, + { + "epoch": 0.22, + "logps_train/chosen": -51.37791442871094, + "logps_train/ref_chosen": -52.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -145.847900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06220855936408043, + "rewards_train/margins": 5.546998884528875, + "rewards_train/rejected": -5.484790325164795, + "step": 793 + }, + { + "epoch": 0.22, + "logps_train/chosen": -132.53854370117188, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -177.14096069335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4038543701171875, + "rewards_train/margins": 4.410241603851318, + "rewards_train/rejected": -5.814095973968506, + "step": 793 + }, + { + "epoch": 0.22, + "logps_train/chosen": -7.284425258636475, + "logps_train/ref_chosen": -7.3125, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -18.17852783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0028074742294847965, + "rewards_train/margins": 0.3081602514721453, + "rewards_train/rejected": -0.3053527772426605, + "step": 793 + }, + { + "epoch": 0.22, + "logps_train/chosen": -104.89788055419922, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -258.5029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.289788007736206, + "rewards_train/margins": 12.96050477027893, + "rewards_train/rejected": -15.250292778015137, + "step": 793 + }, + { + "epoch": 0.22, + "logps_train/chosen": -173.65626525878906, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -207.38540649414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6656265258789062, + "rewards_train/margins": 3.1729140281677246, + "rewards_train/rejected": -6.838540554046631, + "step": 793 + }, + { + "epoch": 0.22, + "logps_train/chosen": -202.02516174316406, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -211.13323974609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.202516078948975, + "rewards_train/margins": 0.8108081817626953, + "rewards_train/rejected": -6.01332426071167, + "step": 793 + }, + { + "epoch": 0.22, + "learning_rate": 1.6333382209512e-06, + "loss": 0.3627, + "step": 794 + }, + { + "epoch": 0.22, + "logps_train/chosen": -43.46596908569336, + "logps_train/ref_chosen": -36.75, + "logps_train/ref_rejected": -41.5, + "logps_train/rejected": -51.95507049560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6715969443321228, + "rewards_train/margins": 0.373910129070282, + "rewards_train/rejected": -1.0455070734024048, + "step": 794 + }, + { + "epoch": 0.22, + "logps_train/chosen": -22.99030303955078, + "logps_train/ref_chosen": -7.46875, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -20.805458068847656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5521553754806519, + "rewards_train/margins": -0.9778595566749573, + "rewards_train/rejected": -0.5742958188056946, + "step": 794 + }, + { + "epoch": 0.22, + "logps_train/chosen": -30.66600799560547, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -19.575443267822266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1041008234024048, + "rewards_train/margins": -0.027806520462036133, + "rewards_train/rejected": -1.0762943029403687, + "step": 794 + }, + { + "epoch": 0.22, + "logps_train/chosen": -6.616048336029053, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -22.745332717895508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3133951723575592, + "rewards_train/margins": 0.96292844414711, + "rewards_train/rejected": -0.6495332717895508, + "step": 794 + }, + { + "epoch": 0.22, + "logps_train/chosen": -4.516218662261963, + "logps_train/ref_chosen": -2.3125, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -13.171239852905273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22037187218666077, + "rewards_train/margins": 0.1280021071434021, + "rewards_train/rejected": -0.34837397933006287, + "step": 794 + }, + { + "epoch": 0.22, + "logps_train/chosen": -197.54483032226562, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -210.29139709472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.054482936859131, + "rewards_train/margins": 2.5246567726135254, + "rewards_train/rejected": -8.579139709472656, + "step": 794 + }, + { + "epoch": 0.22, + "logps_train/chosen": -97.47516632080078, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -110.63104248046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0475165843963623, + "rewards_train/margins": -0.5344123840332031, + "rewards_train/rejected": -2.513104200363159, + "step": 794 + }, + { + "epoch": 0.22, + "logps_train/chosen": -26.426809310913086, + "logps_train/ref_chosen": -24.875, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -16.40654754638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1551809310913086, + "rewards_train/margins": 0.9073488712310791, + "rewards_train/rejected": -1.0625298023223877, + "step": 794 + }, + { + "epoch": 0.22, + "logps_train/chosen": -89.15325927734375, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -62.25, + "logps_train/rejected": -74.06359100341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.665325939655304, + "rewards_train/margins": 0.5160332322120667, + "rewards_train/rejected": -1.1813591718673706, + "step": 795 + }, + { + "epoch": 0.22, + "logps_train/chosen": -25.62640380859375, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -35.362091064453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2188904285430908, + "rewards_train/margins": 0.3923187255859375, + "rewards_train/rejected": -1.6112091541290283, + "step": 795 + }, + { + "epoch": 0.22, + "logps_train/chosen": -65.54545593261719, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -64.28587341308594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8545455932617188, + "rewards_train/margins": -0.6009582579135895, + "rewards_train/rejected": -0.2535873353481293, + "step": 795 + }, + { + "epoch": 0.22, + "logps_train/chosen": -118.83631896972656, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -169.87820434570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.883631944656372, + "rewards_train/margins": 2.204188585281372, + "rewards_train/rejected": -5.087820529937744, + "step": 795 + }, + { + "epoch": 0.22, + "logps_train/chosen": -37.032554626464844, + "logps_train/ref_chosen": -29.75, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -84.32272338867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7282554507255554, + "rewards_train/margins": 1.3540168404579163, + "rewards_train/rejected": -2.0822722911834717, + "step": 795 + }, + { + "epoch": 0.22, + "logps_train/chosen": -100.31147766113281, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -196.78369140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.281147837638855, + "rewards_train/margins": 7.947221875190735, + "rewards_train/rejected": -9.22836971282959, + "step": 795 + }, + { + "epoch": 0.22, + "logps_train/chosen": -119.21466064453125, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -190.05654907226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.271466016769409, + "rewards_train/margins": 4.484188795089722, + "rewards_train/rejected": -7.755654811859131, + "step": 795 + }, + { + "epoch": 0.22, + "logps_train/chosen": -4.522360324859619, + "logps_train/ref_chosen": -1.53125, + "logps_train/ref_rejected": -0.859375, + "logps_train/rejected": -3.5523762702941895, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2991110384464264, + "rewards_train/margins": -0.02981090545654297, + "rewards_train/rejected": -0.2693001329898834, + "step": 795 + }, + { + "epoch": 0.22, + "learning_rate": 1.6312886803515784e-06, + "loss": 0.4981, + "step": 796 + }, + { + "epoch": 0.22, + "logps_train/chosen": -47.801719665527344, + "logps_train/ref_chosen": -23.5, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -48.81064987182617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4301719665527344, + "rewards_train/margins": 0.07589316368103027, + "rewards_train/rejected": -2.5060651302337646, + "step": 796 + }, + { + "epoch": 0.22, + "logps_train/chosen": -7.8371734619140625, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -34.59239196777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2587173581123352, + "rewards_train/margins": 1.6505218148231506, + "rewards_train/rejected": -1.9092391729354858, + "step": 796 + }, + { + "epoch": 0.22, + "logps_train/chosen": -23.42850685119629, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -9.576072692871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25535067915916443, + "rewards_train/margins": 0.23975658416748047, + "rewards_train/rejected": -0.4951072633266449, + "step": 796 + }, + { + "epoch": 0.22, + "logps_train/chosen": -8.08323860168457, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -28.120485305786133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18957386910915375, + "rewards_train/margins": 0.9349746853113174, + "rewards_train/rejected": -1.1245485544204712, + "step": 796 + }, + { + "epoch": 0.22, + "logps_train/chosen": -209.5610809326172, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -221.18218994140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1561081409454346, + "rewards_train/margins": 1.4621107578277588, + "rewards_train/rejected": -4.618218898773193, + "step": 796 + }, + { + "epoch": 0.22, + "logps_train/chosen": -51.22507858276367, + "logps_train/ref_chosen": -36.0, + "logps_train/ref_rejected": -15.0, + "logps_train/rejected": -25.464412689208984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.522507905960083, + "rewards_train/margins": -0.47606658935546875, + "rewards_train/rejected": -1.0464413166046143, + "step": 796 + }, + { + "epoch": 0.22, + "logps_train/chosen": -40.01869583129883, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -4.502256870269775, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7018696069717407, + "rewards_train/margins": -0.7860189229249954, + "rewards_train/rejected": 0.0841493159532547, + "step": 796 + }, + { + "epoch": 0.22, + "logps_train/chosen": -19.81163215637207, + "logps_train/ref_chosen": -12.6875, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -5.734731197357178, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7124132513999939, + "rewards_train/margins": -0.5248776227235794, + "rewards_train/rejected": -0.1875356286764145, + "step": 796 + }, + { + "epoch": 0.22, + "logps_train/chosen": -148.4376678466797, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -175.8463592529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3937668800354004, + "rewards_train/margins": 4.790869235992432, + "rewards_train/rejected": -7.184636116027832, + "step": 797 + }, + { + "epoch": 0.22, + "logps_train/chosen": -66.56599426269531, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -103.96697998046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5315994024276733, + "rewards_train/margins": 1.2650986909866333, + "rewards_train/rejected": -2.7966980934143066, + "step": 797 + }, + { + "epoch": 0.22, + "logps_train/chosen": -170.0443878173828, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -130.5906219482422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.054439067840576, + "rewards_train/margins": -2.8953769207000732, + "rewards_train/rejected": -3.159062147140503, + "step": 797 + }, + { + "epoch": 0.22, + "logps_train/chosen": -70.14087677001953, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -81.9898681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.66408771276474, + "rewards_train/margins": 0.7848991751670837, + "rewards_train/rejected": -1.4489868879318237, + "step": 797 + }, + { + "epoch": 0.22, + "logps_train/chosen": -181.4384765625, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -109.10035705566406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.543847560882568, + "rewards_train/margins": -1.683811902999878, + "rewards_train/rejected": -2.8600356578826904, + "step": 797 + }, + { + "epoch": 0.22, + "logps_train/chosen": -199.13494873046875, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -236.74578857421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.163495063781738, + "rewards_train/margins": -1.4889159202575684, + "rewards_train/rejected": -6.67457914352417, + "step": 797 + }, + { + "epoch": 0.22, + "logps_train/chosen": -104.98837280273438, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -58.92975616455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6988372802734375, + "rewards_train/margins": 0.6441383361816406, + "rewards_train/rejected": -2.342975616455078, + "step": 797 + }, + { + "epoch": 0.22, + "logps_train/chosen": -32.902671813964844, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -20.61916732788086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6652672290802002, + "rewards_train/margins": -0.5408505201339722, + "rewards_train/rejected": -1.124416708946228, + "step": 797 + }, + { + "epoch": 0.22, + "learning_rate": 1.6292347214005124e-06, + "loss": 0.8509, + "step": 798 + }, + { + "epoch": 0.22, + "logps_train/chosen": -127.95675659179688, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -156.7767333984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8456757068634033, + "rewards_train/margins": 1.2819979190826416, + "rewards_train/rejected": -4.127673625946045, + "step": 798 + }, + { + "epoch": 0.22, + "logps_train/chosen": -8.57668399810791, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -2.984375, + "logps_train/rejected": -8.162129402160645, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.513918399810791, + "rewards_train/margins": 0.0038570761680603027, + "rewards_train/rejected": -0.5177754759788513, + "step": 798 + }, + { + "epoch": 0.22, + "logps_train/chosen": -2.486541986465454, + "logps_train/ref_chosen": -0.57421875, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -4.632962226867676, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1912323236465454, + "rewards_train/margins": -0.31543610244989395, + "rewards_train/rejected": 0.12420377880334854, + "step": 798 + }, + { + "epoch": 0.22, + "logps_train/chosen": -235.3985595703125, + "logps_train/ref_chosen": -220.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -198.28280639648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.53985595703125, + "rewards_train/margins": 1.688424825668335, + "rewards_train/rejected": -3.228280782699585, + "step": 798 + }, + { + "epoch": 0.22, + "logps_train/chosen": -14.300834655761719, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -41.0, + "logps_train/rejected": -63.61878967285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1425834745168686, + "rewards_train/margins": 2.1192954927682877, + "rewards_train/rejected": -2.2618789672851562, + "step": 798 + }, + { + "epoch": 0.22, + "logps_train/chosen": -186.46844482421875, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -200.82154846191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.49684476852417, + "rewards_train/margins": 1.9853100776672363, + "rewards_train/rejected": -8.482154846191406, + "step": 798 + }, + { + "epoch": 0.22, + "logps_train/chosen": -93.13369750976562, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -104.08668518066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8133697509765625, + "rewards_train/margins": 0.6952989101409912, + "rewards_train/rejected": -2.5086686611175537, + "step": 798 + }, + { + "epoch": 0.22, + "logps_train/chosen": -7.472407817840576, + "logps_train/ref_chosen": -4.65625, + "logps_train/ref_rejected": -6.34375, + "logps_train/rejected": -12.13265609741211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2816157937049866, + "rewards_train/margins": 0.2972748279571533, + "rewards_train/rejected": -0.5788906216621399, + "step": 798 + }, + { + "epoch": 0.22, + "logps_train/chosen": -158.6337127685547, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -166.63095092773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4133713245391846, + "rewards_train/margins": 1.7997238636016846, + "rewards_train/rejected": -5.213095188140869, + "step": 799 + }, + { + "epoch": 0.22, + "logps_train/chosen": -192.22067260742188, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -217.96005249023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.922067165374756, + "rewards_train/margins": 0.5739383697509766, + "rewards_train/rejected": -5.496005535125732, + "step": 799 + }, + { + "epoch": 0.22, + "logps_train/chosen": -68.68140411376953, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -87.59002685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26814040541648865, + "rewards_train/margins": 1.4908622801303864, + "rewards_train/rejected": -1.759002685546875, + "step": 799 + }, + { + "epoch": 0.22, + "logps_train/chosen": -97.00521850585938, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -184.85574340820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7505218982696533, + "rewards_train/margins": 5.9850523471832275, + "rewards_train/rejected": -7.735574245452881, + "step": 799 + }, + { + "epoch": 0.22, + "logps_train/chosen": -22.347946166992188, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -27.740724563598633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1597946137189865, + "rewards_train/margins": 1.2330278903245926, + "rewards_train/rejected": -1.392822504043579, + "step": 799 + }, + { + "epoch": 0.22, + "logps_train/chosen": -231.44677734375, + "logps_train/ref_chosen": -189.0, + "logps_train/ref_rejected": -278.0, + "logps_train/rejected": -327.83544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.244678020477295, + "rewards_train/margins": 0.7388668060302734, + "rewards_train/rejected": -4.983544826507568, + "step": 799 + }, + { + "epoch": 0.22, + "logps_train/chosen": -117.62734985351562, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -172.1408233642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4627350568771362, + "rewards_train/margins": 4.701347470283508, + "rewards_train/rejected": -6.1640825271606445, + "step": 799 + }, + { + "epoch": 0.22, + "logps_train/chosen": -8.11037826538086, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -21.125, + "logps_train/rejected": -45.45945739746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007712173741310835, + "rewards_train/margins": 2.4411578658036888, + "rewards_train/rejected": -2.433445692062378, + "step": 799 + }, + { + "epoch": 0.22, + "learning_rate": 1.627176358473537e-06, + "loss": 0.2948, + "step": 800 + }, + { + "epoch": 0.22, + "logps_train/chosen": -35.754295349121094, + "logps_train/ref_chosen": -28.25, + "logps_train/ref_rejected": -3.59375, + "logps_train/rejected": -10.08060359954834, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7504295706748962, + "rewards_train/margins": -0.10174417495727539, + "rewards_train/rejected": -0.6486853957176208, + "step": 800 + }, + { + "epoch": 0.22, + "logps_train/chosen": -140.43011474609375, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -191.50564575195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7930115461349487, + "rewards_train/margins": 4.757553219795227, + "rewards_train/rejected": -6.550564765930176, + "step": 800 + }, + { + "epoch": 0.22, + "logps_train/chosen": -11.884279251098633, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -12.33736801147461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18657207489013672, + "rewards_train/margins": 0.8421838879585266, + "rewards_train/rejected": -0.6556118130683899, + "step": 800 + }, + { + "epoch": 0.22, + "logps_train/chosen": -12.354377746582031, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -22.168916702270508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2041877806186676, + "rewards_train/margins": 0.70020392537117, + "rewards_train/rejected": -0.9043917059898376, + "step": 800 + }, + { + "epoch": 0.22, + "logps_train/chosen": -123.82769775390625, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -214.0497589111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9827698469161987, + "rewards_train/margins": 7.872205853462219, + "rewards_train/rejected": -9.854975700378418, + "step": 800 + }, + { + "epoch": 0.22, + "logps_train/chosen": -77.95399475097656, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -128.1078643798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5453994870185852, + "rewards_train/margins": 5.315386950969696, + "rewards_train/rejected": -5.860786437988281, + "step": 800 + }, + { + "epoch": 0.22, + "logps_train/chosen": -43.44485855102539, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -124.98260498046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.869485855102539, + "rewards_train/margins": 1.8287746906280518, + "rewards_train/rejected": -3.698260545730591, + "step": 800 + }, + { + "epoch": 0.22, + "logps_train/chosen": -12.363958358764648, + "logps_train/ref_chosen": -3.8125, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -130.52711486816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8551458716392517, + "rewards_train/margins": 5.3975659012794495, + "rewards_train/rejected": -6.252711772918701, + "step": 800 + }, + { + "epoch": 0.22, + "logps_train/chosen": -36.60324478149414, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -23.494508743286133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.197824478149414, + "rewards_train/margins": 0.11412644386291504, + "rewards_train/rejected": -1.311950922012329, + "step": 801 + }, + { + "epoch": 0.22, + "logps_train/chosen": -9.681300163269043, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -3.09375, + "logps_train/rejected": -5.53354024887085, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5368800163269043, + "rewards_train/margins": -0.2929009944200516, + "rewards_train/rejected": -0.24397902190685272, + "step": 801 + }, + { + "epoch": 0.22, + "logps_train/chosen": -180.31332397460938, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -242.15109252929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.831332683563232, + "rewards_train/margins": 2.983776569366455, + "rewards_train/rejected": -9.815109252929688, + "step": 801 + }, + { + "epoch": 0.22, + "logps_train/chosen": -95.83282470703125, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -154.79214477539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1332825422286987, + "rewards_train/margins": 1.7459319829940796, + "rewards_train/rejected": -2.8792145252227783, + "step": 801 + }, + { + "epoch": 0.22, + "logps_train/chosen": -133.2549591064453, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -108.51001739501953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.025496006011963, + "rewards_train/margins": -0.6744942665100098, + "rewards_train/rejected": -1.3510017395019531, + "step": 801 + }, + { + "epoch": 0.22, + "logps_train/chosen": -6.6181840896606445, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -8.81351089477539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1944315880537033, + "rewards_train/margins": 0.6257826834917068, + "rewards_train/rejected": -0.43135109543800354, + "step": 801 + }, + { + "epoch": 0.22, + "logps_train/chosen": -6.047068119049072, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -28.580167770385742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07970681041479111, + "rewards_train/margins": 1.6220600381493568, + "rewards_train/rejected": -1.701766848564148, + "step": 801 + }, + { + "epoch": 0.22, + "logps_train/chosen": -81.81256103515625, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -35.25, + "logps_train/rejected": -48.32322692871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.231256127357483, + "rewards_train/margins": 0.07606661319732666, + "rewards_train/rejected": -1.3073227405548096, + "step": 801 + }, + { + "epoch": 0.22, + "learning_rate": 1.6251136059770102e-06, + "loss": 0.3578, + "step": 802 + }, + { + "epoch": 0.22, + "logps_train/chosen": -134.19354248046875, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -249.20196533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2193543910980225, + "rewards_train/margins": 8.600842714309692, + "rewards_train/rejected": -10.820197105407715, + "step": 802 + }, + { + "epoch": 0.22, + "logps_train/chosen": -4.785237789154053, + "logps_train/ref_chosen": -3.25, + "logps_train/ref_rejected": -2.328125, + "logps_train/rejected": -6.026439666748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.153523787856102, + "rewards_train/margins": 0.21630768477916718, + "rewards_train/rejected": -0.36983147263526917, + "step": 802 + }, + { + "epoch": 0.22, + "logps_train/chosen": -158.6671905517578, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -140.1573486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5667190551757812, + "rewards_train/margins": 1.9990158081054688, + "rewards_train/rejected": -4.56573486328125, + "step": 802 + }, + { + "epoch": 0.22, + "logps_train/chosen": -134.13209533691406, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -137.76779174804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5632095336914062, + "rewards_train/margins": 1.113569736480713, + "rewards_train/rejected": -4.676779270172119, + "step": 802 + }, + { + "epoch": 0.22, + "logps_train/chosen": -194.10659790039062, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -131.32220458984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2106597423553467, + "rewards_train/margins": -0.2284393310546875, + "rewards_train/rejected": -2.982220411300659, + "step": 802 + }, + { + "epoch": 0.22, + "logps_train/chosen": -134.0864715576172, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -138.79957580566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.70864725112915, + "rewards_train/margins": 0.3713102340698242, + "rewards_train/rejected": -5.079957485198975, + "step": 802 + }, + { + "epoch": 0.22, + "logps_train/chosen": -186.58682250976562, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -135.85317993164062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.058682441711426, + "rewards_train/margins": -3.2233643531799316, + "rewards_train/rejected": -2.835318088531494, + "step": 802 + }, + { + "epoch": 0.22, + "logps_train/chosen": -90.49465942382812, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -91.99295806884766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6494659185409546, + "rewards_train/margins": 0.14982986450195312, + "rewards_train/rejected": -1.7992957830429077, + "step": 802 + }, + { + "epoch": 0.22, + "logps_train/chosen": -87.37519073486328, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -145.68374633789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18751907348632812, + "rewards_train/margins": 4.580855846405029, + "rewards_train/rejected": -4.768374919891357, + "step": 803 + }, + { + "epoch": 0.22, + "logps_train/chosen": -3.97110915184021, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -52.212528228759766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19351409375667572, + "rewards_train/margins": 4.283517107367516, + "rewards_train/rejected": -4.09000301361084, + "step": 803 + }, + { + "epoch": 0.22, + "logps_train/chosen": -104.28614807128906, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -175.6006317138672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.27861487865448, + "rewards_train/margins": 3.6814485788345337, + "rewards_train/rejected": -4.960063457489014, + "step": 803 + }, + { + "epoch": 0.22, + "logps_train/chosen": -13.270146369934082, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -29.56232452392578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2479853630065918, + "rewards_train/margins": 1.5917178392410278, + "rewards_train/rejected": -1.343732476234436, + "step": 803 + }, + { + "epoch": 0.22, + "logps_train/chosen": -123.71361541748047, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -201.04434204101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3713616132736206, + "rewards_train/margins": 5.63307249546051, + "rewards_train/rejected": -7.004434108734131, + "step": 803 + }, + { + "epoch": 0.22, + "logps_train/chosen": -142.95729064941406, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -120.59172821044922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.1957292556762695, + "rewards_train/margins": -3.436556339263916, + "rewards_train/rejected": -2.7591729164123535, + "step": 803 + }, + { + "epoch": 0.22, + "logps_train/chosen": -15.624906539916992, + "logps_train/ref_chosen": -6.84375, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -42.64421081542969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8781156539916992, + "rewards_train/margins": 1.3738055229187012, + "rewards_train/rejected": -2.2519211769104004, + "step": 803 + }, + { + "epoch": 0.22, + "logps_train/chosen": -162.89382934570312, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -112.09479522705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1893829107284546, + "rewards_train/margins": 1.0700966119766235, + "rewards_train/rejected": -2.259479522705078, + "step": 803 + }, + { + "epoch": 0.22, + "learning_rate": 1.6230464783480126e-06, + "loss": 0.6531, + "step": 804 + }, + { + "epoch": 0.22, + "logps_train/chosen": -2.368222951889038, + "logps_train/ref_chosen": -1.8671875, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -6.895910263061523, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05010354518890381, + "rewards_train/margins": -0.17926251888275146, + "rewards_train/rejected": 0.12915897369384766, + "step": 804 + }, + { + "epoch": 0.22, + "logps_train/chosen": -7.862874507904053, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -10.636449813842773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2269124537706375, + "rewards_train/margins": 0.03673253953456879, + "rewards_train/rejected": -0.2636449933052063, + "step": 804 + }, + { + "epoch": 0.22, + "logps_train/chosen": -129.97976684570312, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -205.92869567871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2979767322540283, + "rewards_train/margins": 1.9948928356170654, + "rewards_train/rejected": -5.292869567871094, + "step": 804 + }, + { + "epoch": 0.22, + "logps_train/chosen": -8.39303970336914, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -25.389432907104492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3518039882183075, + "rewards_train/margins": 0.3371393382549286, + "rewards_train/rejected": -0.6889433264732361, + "step": 804 + }, + { + "epoch": 0.22, + "logps_train/chosen": -13.942342758178711, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -29.001850128173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06201572343707085, + "rewards_train/margins": 1.2872007600963116, + "rewards_train/rejected": -1.2251850366592407, + "step": 804 + }, + { + "epoch": 0.22, + "logps_train/chosen": -57.36796951293945, + "logps_train/ref_chosen": -50.25, + "logps_train/ref_rejected": -23.5, + "logps_train/rejected": -54.89202117919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7117969393730164, + "rewards_train/margins": 2.4274051785469055, + "rewards_train/rejected": -3.139202117919922, + "step": 804 + }, + { + "epoch": 0.22, + "logps_train/chosen": -37.39939498901367, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -3.765625, + "logps_train/rejected": -14.488003730773926, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8649395108222961, + "rewards_train/margins": 0.20729833841323853, + "rewards_train/rejected": -1.0722378492355347, + "step": 804 + }, + { + "epoch": 0.22, + "logps_train/chosen": -124.45079040527344, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -101.47566223144531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4950790405273438, + "rewards_train/margins": 0.0024871826171875, + "rewards_train/rejected": -1.4975662231445312, + "step": 804 + }, + { + "epoch": 0.23, + "logps_train/chosen": -116.85382843017578, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -174.65313720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2353829145431519, + "rewards_train/margins": 6.329930901527405, + "rewards_train/rejected": -7.565313816070557, + "step": 805 + }, + { + "epoch": 0.23, + "logps_train/chosen": -52.48400115966797, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -56.25, + "logps_train/rejected": -95.8505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 1.001599907875061, + "rewards_train/margins": 4.961658596992493, + "rewards_train/rejected": -3.9600586891174316, + "step": 805 + }, + { + "epoch": 0.23, + "logps_train/chosen": -64.47914123535156, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -64.5138168334961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2520858943462372, + "rewards_train/margins": 0.003467574715614319, + "rewards_train/rejected": 0.24861831963062286, + "step": 805 + }, + { + "epoch": 0.23, + "logps_train/chosen": -179.04537963867188, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -191.40838623046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.504538059234619, + "rewards_train/margins": 1.9363007545471191, + "rewards_train/rejected": -4.440838813781738, + "step": 805 + }, + { + "epoch": 0.23, + "logps_train/chosen": -19.44927406311035, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -24.0, + "logps_train/rejected": -32.56531524658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23242740333080292, + "rewards_train/margins": 0.6241041570901871, + "rewards_train/rejected": -0.85653156042099, + "step": 805 + }, + { + "epoch": 0.23, + "logps_train/chosen": -9.735811233520508, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -20.23910140991211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33295613527297974, + "rewards_train/margins": 0.24095404148101807, + "rewards_train/rejected": -0.5739101767539978, + "step": 805 + }, + { + "epoch": 0.23, + "logps_train/chosen": -105.5372314453125, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -140.65513610839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.50372314453125, + "rewards_train/margins": 1.861790418624878, + "rewards_train/rejected": -2.365513563156128, + "step": 805 + }, + { + "epoch": 0.23, + "logps_train/chosen": -36.43436050415039, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -28.52730369567871, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3184360265731812, + "rewards_train/margins": 0.615544319152832, + "rewards_train/rejected": -1.9339803457260132, + "step": 805 + }, + { + "epoch": 0.23, + "learning_rate": 1.620974990054246e-06, + "loss": 0.3852, + "step": 806 + }, + { + "epoch": 0.23, + "logps_train/chosen": -1.758892297744751, + "logps_train/ref_chosen": -1.140625, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -4.124157428741455, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06182673200964928, + "rewards_train/margins": 0.014651510864496231, + "rewards_train/rejected": -0.07647824287414551, + "step": 806 + }, + { + "epoch": 0.23, + "logps_train/chosen": -4.714755058288574, + "logps_train/ref_chosen": -5.5625, + "logps_train/ref_rejected": -7.34375, + "logps_train/rejected": -25.657299041748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08477449417114258, + "rewards_train/margins": 1.916129469871521, + "rewards_train/rejected": -1.8313549757003784, + "step": 806 + }, + { + "epoch": 0.23, + "logps_train/chosen": -5.860818862915039, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -28.875, + "logps_train/rejected": -57.67463302612305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.33891811966896057, + "rewards_train/margins": 3.218881517648697, + "rewards_train/rejected": -2.8799633979797363, + "step": 806 + }, + { + "epoch": 0.23, + "logps_train/chosen": -2.676138162612915, + "logps_train/ref_chosen": -6.46875, + "logps_train/ref_rejected": -3.90625, + "logps_train/rejected": -5.2866926193237305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.37926119565963745, + "rewards_train/margins": 0.517305463552475, + "rewards_train/rejected": -0.13804426789283752, + "step": 806 + }, + { + "epoch": 0.23, + "logps_train/chosen": -93.70247650146484, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -191.58399963378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5202476978302, + "rewards_train/margins": 6.888152837753296, + "rewards_train/rejected": -9.408400535583496, + "step": 806 + }, + { + "epoch": 0.23, + "logps_train/chosen": -90.87065887451172, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -151.21865844726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43706589937210083, + "rewards_train/margins": 0.7847999930381775, + "rewards_train/rejected": -1.2218658924102783, + "step": 806 + }, + { + "epoch": 0.23, + "logps_train/chosen": -185.30471801757812, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -246.71441650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.5304718017578125, + "rewards_train/margins": 5.440970420837402, + "rewards_train/rejected": -10.971442222595215, + "step": 806 + }, + { + "epoch": 0.23, + "logps_train/chosen": -28.981544494628906, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -37.31062316894531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2575294971466064, + "rewards_train/margins": 0.09853291511535645, + "rewards_train/rejected": -2.356062412261963, + "step": 806 + }, + { + "epoch": 0.23, + "logps_train/chosen": -10.449400901794434, + "logps_train/ref_chosen": -3.65625, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -57.835365295410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6793150901794434, + "rewards_train/margins": 1.2792214155197144, + "rewards_train/rejected": -1.9585365056991577, + "step": 807 + }, + { + "epoch": 0.23, + "logps_train/chosen": -46.006412506103516, + "logps_train/ref_chosen": -46.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -140.34097290039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04935875162482262, + "rewards_train/margins": 4.533456232398748, + "rewards_train/rejected": -4.484097480773926, + "step": 807 + }, + { + "epoch": 0.23, + "logps_train/chosen": -176.93231201171875, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -278.06451416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.843231201171875, + "rewards_train/margins": 1.9632205963134766, + "rewards_train/rejected": -8.806451797485352, + "step": 807 + }, + { + "epoch": 0.23, + "logps_train/chosen": -6.863170146942139, + "logps_train/ref_chosen": -6.5625, + "logps_train/ref_rejected": -2.953125, + "logps_train/rejected": -10.395333290100098, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.030067015439271927, + "rewards_train/margins": 0.7141538374125957, + "rewards_train/rejected": -0.7442208528518677, + "step": 807 + }, + { + "epoch": 0.23, + "logps_train/chosen": -136.9416046142578, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -136.90277099609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9441604614257812, + "rewards_train/margins": -0.3538832664489746, + "rewards_train/rejected": -2.5902771949768066, + "step": 807 + }, + { + "epoch": 0.23, + "logps_train/chosen": -157.33868408203125, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -166.7035675048828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.433868408203125, + "rewards_train/margins": -0.26351165771484375, + "rewards_train/rejected": -2.1703567504882812, + "step": 807 + }, + { + "epoch": 0.23, + "logps_train/chosen": -98.20218658447266, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -207.48382568359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.445218801498413, + "rewards_train/margins": 5.953164339065552, + "rewards_train/rejected": -9.398383140563965, + "step": 807 + }, + { + "epoch": 0.23, + "logps_train/chosen": -132.73077392578125, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -144.30209350585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.473077297210693, + "rewards_train/margins": 0.4071321487426758, + "rewards_train/rejected": -5.880209445953369, + "step": 807 + }, + { + "epoch": 0.23, + "learning_rate": 1.6188991555939317e-06, + "loss": 0.3358, + "step": 808 + }, + { + "epoch": 0.23, + "logps_train/chosen": -96.16714477539062, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -100.70072937011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9167144894599915, + "rewards_train/margins": 0.8033584952354431, + "rewards_train/rejected": -1.7200729846954346, + "step": 808 + }, + { + "epoch": 0.23, + "logps_train/chosen": -93.71379089355469, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -111.25798034667969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3713791072368622, + "rewards_train/margins": -0.14558106660842896, + "rewards_train/rejected": -0.22579804062843323, + "step": 808 + }, + { + "epoch": 0.23, + "logps_train/chosen": -45.58247756958008, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -29.62965202331543, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3707478046417236, + "rewards_train/margins": -0.42028260231018066, + "rewards_train/rejected": -1.950465202331543, + "step": 808 + }, + { + "epoch": 0.23, + "logps_train/chosen": -17.428607940673828, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -25.544723510742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0071392059326171875, + "rewards_train/margins": 0.011611557099968195, + "rewards_train/rejected": -0.0044723511673510075, + "step": 808 + }, + { + "epoch": 0.23, + "logps_train/chosen": -48.58052062988281, + "logps_train/ref_chosen": -44.25, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -108.52464294433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43305206298828125, + "rewards_train/margins": 3.869412422180176, + "rewards_train/rejected": -4.302464485168457, + "step": 808 + }, + { + "epoch": 0.23, + "logps_train/chosen": -71.59283447265625, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -53.0, + "logps_train/rejected": -80.28543853759766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.859283447265625, + "rewards_train/margins": 1.869260549545288, + "rewards_train/rejected": -2.728543996810913, + "step": 808 + }, + { + "epoch": 0.23, + "logps_train/chosen": -40.900535583496094, + "logps_train/ref_chosen": -25.5, + "logps_train/ref_rejected": -52.75, + "logps_train/rejected": -98.68661499023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5400536060333252, + "rewards_train/margins": 3.0536081790924072, + "rewards_train/rejected": -4.593661785125732, + "step": 808 + }, + { + "epoch": 0.23, + "logps_train/chosen": -200.2911376953125, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -261.9286193847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.629113674163818, + "rewards_train/margins": 4.163748264312744, + "rewards_train/rejected": -8.792861938476562, + "step": 808 + }, + { + "epoch": 0.23, + "logps_train/chosen": -15.525046348571777, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -6.90625, + "logps_train/rejected": -9.114323616027832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.058754634112119675, + "rewards_train/margins": 0.1620527245104313, + "rewards_train/rejected": -0.22080735862255096, + "step": 809 + }, + { + "epoch": 0.23, + "logps_train/chosen": -26.48194694519043, + "logps_train/ref_chosen": -23.0, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -11.65510368347168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3481946885585785, + "rewards_train/margins": 0.1548157036304474, + "rewards_train/rejected": -0.5030103921890259, + "step": 809 + }, + { + "epoch": 0.23, + "logps_train/chosen": -119.75547790527344, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -128.39219665527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7755478024482727, + "rewards_train/margins": 1.5636720061302185, + "rewards_train/rejected": -2.339219808578491, + "step": 809 + }, + { + "epoch": 0.23, + "logps_train/chosen": -65.9043960571289, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -36.41489028930664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5404396057128906, + "rewards_train/margins": -0.11145055294036865, + "rewards_train/rejected": -1.428989052772522, + "step": 809 + }, + { + "epoch": 0.23, + "logps_train/chosen": -132.24063110351562, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -147.7993927001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5240631103515625, + "rewards_train/margins": 0.8558762073516846, + "rewards_train/rejected": -2.379939317703247, + "step": 809 + }, + { + "epoch": 0.23, + "logps_train/chosen": -18.303302764892578, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -34.54655075073242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9178302884101868, + "rewards_train/margins": 0.6368247866630554, + "rewards_train/rejected": -1.5546550750732422, + "step": 809 + }, + { + "epoch": 0.23, + "logps_train/chosen": -148.35833740234375, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -46.0, + "logps_train/rejected": -55.721397399902344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.885833740234375, + "rewards_train/margins": -1.9136939644813538, + "rewards_train/rejected": -0.9721397757530212, + "step": 809 + }, + { + "epoch": 0.23, + "logps_train/chosen": -180.95416259765625, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -211.35430908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.09541654586792, + "rewards_train/margins": 3.440014362335205, + "rewards_train/rejected": -7.535430908203125, + "step": 809 + }, + { + "epoch": 0.23, + "learning_rate": 1.6168189894957109e-06, + "loss": 0.5009, + "step": 810 + }, + { + "epoch": 0.23, + "logps_train/chosen": -39.95797348022461, + "logps_train/ref_chosen": -26.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -114.07232666015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3957973718643188, + "rewards_train/margins": 2.011435389518738, + "rewards_train/rejected": -3.4072327613830566, + "step": 810 + }, + { + "epoch": 0.23, + "logps_train/chosen": -126.03195190429688, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -166.91998291015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5531952381134033, + "rewards_train/margins": 3.0388033390045166, + "rewards_train/rejected": -5.59199857711792, + "step": 810 + }, + { + "epoch": 0.23, + "logps_train/chosen": -27.755857467651367, + "logps_train/ref_chosen": -7.90625, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -25.63646697998047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9849607944488525, + "rewards_train/margins": -0.024439096450805664, + "rewards_train/rejected": -1.9605216979980469, + "step": 810 + }, + { + "epoch": 0.23, + "logps_train/chosen": -36.762535095214844, + "logps_train/ref_chosen": -29.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -126.07121276855469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7762535214424133, + "rewards_train/margins": 1.5308677554130554, + "rewards_train/rejected": -2.3071212768554688, + "step": 810 + }, + { + "epoch": 0.23, + "logps_train/chosen": -27.736244201660156, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -14.625, + "logps_train/rejected": -31.51234245300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2361243963241577, + "rewards_train/margins": 0.4526098966598511, + "rewards_train/rejected": -1.6887342929840088, + "step": 810 + }, + { + "epoch": 0.23, + "logps_train/chosen": -90.61214447021484, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -37.98271179199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8112144470214844, + "rewards_train/margins": 1.099556803703308, + "rewards_train/rejected": -1.9107712507247925, + "step": 810 + }, + { + "epoch": 0.23, + "logps_train/chosen": -91.09844207763672, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -29.5, + "logps_train/rejected": -34.99705123901367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14015579223632812, + "rewards_train/margins": 0.6898609399795532, + "rewards_train/rejected": -0.5497051477432251, + "step": 810 + }, + { + "epoch": 0.23, + "logps_train/chosen": -72.89765930175781, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -123.75251770019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4602340757846832, + "rewards_train/margins": 0.6854858547449112, + "rewards_train/rejected": -0.22525177896022797, + "step": 810 + }, + { + "epoch": 0.23, + "logps_train/chosen": -2.2663021087646484, + "logps_train/ref_chosen": -1.4296875, + "logps_train/ref_rejected": -2.9375, + "logps_train/rejected": -1.3572543859481812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.08366145938634872, + "rewards_train/margins": -0.24168602377176285, + "rewards_train/rejected": 0.15802456438541412, + "step": 811 + }, + { + "epoch": 0.23, + "logps_train/chosen": -17.716922760009766, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -28.197433471679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8091922998428345, + "rewards_train/margins": 0.1980510950088501, + "rewards_train/rejected": -1.0072433948516846, + "step": 811 + }, + { + "epoch": 0.23, + "logps_train/chosen": -9.134172439575195, + "logps_train/ref_chosen": -11.5625, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -17.695180892944336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24283276498317719, + "rewards_train/margins": 1.1123508661985397, + "rewards_train/rejected": -0.8695181012153625, + "step": 811 + }, + { + "epoch": 0.23, + "logps_train/chosen": -25.134273529052734, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -23.58379554748535, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6696773767471313, + "rewards_train/margins": -0.46129775047302246, + "rewards_train/rejected": -1.2083796262741089, + "step": 811 + }, + { + "epoch": 0.23, + "logps_train/chosen": -130.74935913085938, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -226.16268920898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6249359846115112, + "rewards_train/margins": 7.391332745552063, + "rewards_train/rejected": -9.016268730163574, + "step": 811 + }, + { + "epoch": 0.23, + "logps_train/chosen": -23.27595329284668, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -61.72050857543945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3338453769683838, + "rewards_train/margins": 1.3882055282592773, + "rewards_train/rejected": -2.722050905227661, + "step": 811 + }, + { + "epoch": 0.23, + "logps_train/chosen": -89.86650848388672, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -87.19712829589844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4366508722305298, + "rewards_train/margins": -0.06693804264068604, + "rewards_train/rejected": -1.3697128295898438, + "step": 811 + }, + { + "epoch": 0.23, + "logps_train/chosen": -130.72650146484375, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -156.0517578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.372650146484375, + "rewards_train/margins": 3.63252592086792, + "rewards_train/rejected": -5.005176067352295, + "step": 811 + }, + { + "epoch": 0.23, + "learning_rate": 1.6147345063185403e-06, + "loss": 0.3938, + "step": 812 + }, + { + "epoch": 0.23, + "logps_train/chosen": -5.869978904724121, + "logps_train/ref_chosen": -4.8125, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -9.16064739227295, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10574789345264435, + "rewards_train/margins": 0.17906685173511505, + "rewards_train/rejected": -0.2848147451877594, + "step": 812 + }, + { + "epoch": 0.23, + "logps_train/chosen": -7.579939842224121, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -7.9391069412231445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08575601875782013, + "rewards_train/margins": 0.2015417143702507, + "rewards_train/rejected": -0.11578569561243057, + "step": 812 + }, + { + "epoch": 0.23, + "logps_train/chosen": -25.812374114990234, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -17.141887664794922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7437373995780945, + "rewards_train/margins": -0.0732986330986023, + "rewards_train/rejected": -0.6704387664794922, + "step": 812 + }, + { + "epoch": 0.23, + "logps_train/chosen": -6.904718399047852, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -20.984329223632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34984683990478516, + "rewards_train/margins": 0.4423360824584961, + "rewards_train/rejected": -0.7921829223632812, + "step": 812 + }, + { + "epoch": 0.23, + "logps_train/chosen": -12.463528633117676, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -10.1875, + "logps_train/rejected": -19.80207061767578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6244778633117676, + "rewards_train/margins": 0.3369792103767395, + "rewards_train/rejected": -0.9614570736885071, + "step": 812 + }, + { + "epoch": 0.23, + "logps_train/chosen": -44.75779724121094, + "logps_train/ref_chosen": -38.5, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -22.434329986572266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6257797479629517, + "rewards_train/margins": 0.4364032745361328, + "rewards_train/rejected": -1.0621830224990845, + "step": 812 + }, + { + "epoch": 0.23, + "logps_train/chosen": -17.097497940063477, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -1.6796875, + "logps_train/rejected": -4.611293792724609, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2722497880458832, + "rewards_train/margins": 0.02091085910797119, + "rewards_train/rejected": -0.29316064715385437, + "step": 812 + }, + { + "epoch": 0.23, + "logps_train/chosen": -17.774612426757812, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -94.01176452636719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07746124267578125, + "rewards_train/margins": 0.4237152338027954, + "rewards_train/rejected": -0.5011764764785767, + "step": 812 + }, + { + "epoch": 0.23, + "logps_train/chosen": -10.77964973449707, + "logps_train/ref_chosen": -12.6875, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -8.075995445251465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19078503549098969, + "rewards_train/margins": 0.5640095919370651, + "rewards_train/rejected": -0.37322455644607544, + "step": 813 + }, + { + "epoch": 0.23, + "logps_train/chosen": -22.93195915222168, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -38.0, + "logps_train/rejected": -44.99601745605469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.668195903301239, + "rewards_train/margins": 0.031405866146087646, + "rewards_train/rejected": -0.6996017694473267, + "step": 813 + }, + { + "epoch": 0.23, + "logps_train/chosen": -103.75618743896484, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -132.3113555908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2756187617778778, + "rewards_train/margins": 0.4555167853832245, + "rewards_train/rejected": -0.7311355471611023, + "step": 813 + }, + { + "epoch": 0.23, + "logps_train/chosen": -103.9678955078125, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -104.29777526855469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15321044623851776, + "rewards_train/margins": 0.03298797458410263, + "rewards_train/rejected": 0.12022247165441513, + "step": 813 + }, + { + "epoch": 0.23, + "logps_train/chosen": -59.632728576660156, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -40.1336555480957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.48672714829444885, + "rewards_train/margins": 2.537592798471451, + "rewards_train/rejected": -2.050865650177002, + "step": 813 + }, + { + "epoch": 0.23, + "logps_train/chosen": -14.861698150634766, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -6.375, + "logps_train/rejected": -9.981390953063965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26116982102394104, + "rewards_train/margins": 0.09946927428245544, + "rewards_train/rejected": -0.3606390953063965, + "step": 813 + }, + { + "epoch": 0.23, + "logps_train/chosen": -0.6590994596481323, + "logps_train/ref_chosen": -0.984375, + "logps_train/ref_rejected": -0.984375, + "logps_train/rejected": -0.6584495902061462, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.03252755478024483, + "rewards_train/margins": -6.498768925666809e-05, + "rewards_train/rejected": 0.032592542469501495, + "step": 813 + }, + { + "epoch": 0.23, + "logps_train/chosen": -9.759302139282227, + "logps_train/ref_chosen": -7.03125, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -28.966630935668945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27280521392822266, + "rewards_train/margins": 1.2613579034805298, + "rewards_train/rejected": -1.5341631174087524, + "step": 813 + }, + { + "epoch": 0.23, + "learning_rate": 1.6126457206515918e-06, + "loss": 0.5381, + "step": 814 + }, + { + "epoch": 0.23, + "logps_train/chosen": -10.199810981750488, + "logps_train/ref_chosen": -6.0, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -14.809371948242188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.41998109221458435, + "rewards_train/margins": -0.13279390335083008, + "rewards_train/rejected": -0.2871871888637543, + "step": 814 + }, + { + "epoch": 0.23, + "logps_train/chosen": -124.0594482421875, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -168.86068725585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2059448957443237, + "rewards_train/margins": 3.330124020576477, + "rewards_train/rejected": -4.536068916320801, + "step": 814 + }, + { + "epoch": 0.23, + "logps_train/chosen": -123.93135833740234, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -202.32882690429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5931358337402344, + "rewards_train/margins": 3.939746856689453, + "rewards_train/rejected": -7.5328826904296875, + "step": 814 + }, + { + "epoch": 0.23, + "logps_train/chosen": -99.37014770507812, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -127.57050323486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6870148181915283, + "rewards_train/margins": 1.6700356006622314, + "rewards_train/rejected": -3.3570504188537598, + "step": 814 + }, + { + "epoch": 0.23, + "logps_train/chosen": -22.048587799072266, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -34.11441421508789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9048587679862976, + "rewards_train/margins": 0.4565827250480652, + "rewards_train/rejected": -1.3614414930343628, + "step": 814 + }, + { + "epoch": 0.23, + "logps_train/chosen": -10.251313209533691, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -10.856771469116211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21888132393360138, + "rewards_train/margins": 0.3261708468198776, + "rewards_train/rejected": -0.545052170753479, + "step": 814 + }, + { + "epoch": 0.23, + "logps_train/chosen": -188.0847930908203, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -202.34536743164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.208479404449463, + "rewards_train/margins": 2.0760579109191895, + "rewards_train/rejected": -9.284537315368652, + "step": 814 + }, + { + "epoch": 0.23, + "logps_train/chosen": -68.53048706054688, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -183.40805053710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.353048712015152, + "rewards_train/margins": 3.9877563416957855, + "rewards_train/rejected": -4.3408050537109375, + "step": 814 + }, + { + "epoch": 0.23, + "logps_train/chosen": -41.48814392089844, + "logps_train/ref_chosen": -40.0, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -60.01993942260742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.148814395070076, + "rewards_train/margins": 0.3281795531511307, + "rewards_train/rejected": -0.47699394822120667, + "step": 815 + }, + { + "epoch": 0.23, + "logps_train/chosen": -148.33009338378906, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -224.0, + "logps_train/rejected": -343.94537353515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6330093145370483, + "rewards_train/margins": 10.361528038978577, + "rewards_train/rejected": -11.994537353515625, + "step": 815 + }, + { + "epoch": 0.23, + "logps_train/chosen": -47.40070724487305, + "logps_train/ref_chosen": -56.0, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -75.89816284179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.8599292635917664, + "rewards_train/margins": 1.9997455477714539, + "rewards_train/rejected": -1.1398162841796875, + "step": 815 + }, + { + "epoch": 0.23, + "logps_train/chosen": -49.48800277709961, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -25.756595611572266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7238003015518188, + "rewards_train/margins": 0.8643592596054077, + "rewards_train/rejected": -1.5881595611572266, + "step": 815 + }, + { + "epoch": 0.23, + "logps_train/chosen": -23.217620849609375, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -30.125, + "logps_train/rejected": -44.7340202331543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22176209092140198, + "rewards_train/margins": 1.2391400039196014, + "rewards_train/rejected": -1.4609020948410034, + "step": 815 + }, + { + "epoch": 0.23, + "logps_train/chosen": -18.389291763305664, + "logps_train/ref_chosen": -7.15625, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -13.122801780700684, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1233042478561401, + "rewards_train/margins": -0.22664904594421387, + "rewards_train/rejected": -0.8966552019119263, + "step": 815 + }, + { + "epoch": 0.23, + "logps_train/chosen": -150.0066375732422, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -172.44313049316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.300663709640503, + "rewards_train/margins": 2.1436493396759033, + "rewards_train/rejected": -4.444313049316406, + "step": 815 + }, + { + "epoch": 0.23, + "logps_train/chosen": -37.141334533691406, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -42.29143524169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06413345783948898, + "rewards_train/margins": 0.16501007229089737, + "rewards_train/rejected": -0.22914353013038635, + "step": 815 + }, + { + "epoch": 0.23, + "learning_rate": 1.610552647114151e-06, + "loss": 0.3105, + "step": 816 + }, + { + "epoch": 0.23, + "logps_train/chosen": -104.38301086425781, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -171.36666870117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.838301181793213, + "rewards_train/margins": 4.598365783691406, + "rewards_train/rejected": -7.436666965484619, + "step": 816 + }, + { + "epoch": 0.23, + "logps_train/chosen": -30.76352882385254, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -26.054845809936523, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8263528943061829, + "rewards_train/margins": -0.04586827754974365, + "rewards_train/rejected": -0.7804846167564392, + "step": 816 + }, + { + "epoch": 0.23, + "logps_train/chosen": -69.17495727539062, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -103.29061126708984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.317495733499527, + "rewards_train/margins": 1.9615653455257416, + "rewards_train/rejected": -2.2790610790252686, + "step": 816 + }, + { + "epoch": 0.23, + "logps_train/chosen": -113.80799865722656, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -121.33575439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7807998657226562, + "rewards_train/margins": 0.8027756214141846, + "rewards_train/rejected": -2.583575487136841, + "step": 816 + }, + { + "epoch": 0.23, + "logps_train/chosen": -40.03525924682617, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -24.81111717224121, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7285259366035461, + "rewards_train/margins": 0.190085768699646, + "rewards_train/rejected": -0.9186117053031921, + "step": 816 + }, + { + "epoch": 0.23, + "logps_train/chosen": -169.99575805664062, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -188.08993530273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.149575710296631, + "rewards_train/margins": 1.3594179153442383, + "rewards_train/rejected": -6.508993625640869, + "step": 816 + }, + { + "epoch": 0.23, + "logps_train/chosen": -142.45684814453125, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -163.3416748046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.99568510055542, + "rewards_train/margins": 0.288482666015625, + "rewards_train/rejected": -5.284167766571045, + "step": 816 + }, + { + "epoch": 0.23, + "logps_train/chosen": -123.44780731201172, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -128.22171020507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7947807312011719, + "rewards_train/margins": 2.527390480041504, + "rewards_train/rejected": -4.322171211242676, + "step": 816 + }, + { + "epoch": 0.23, + "logps_train/chosen": -9.076894760131836, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -23.610506057739258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12643948197364807, + "rewards_train/margins": 1.2658611238002777, + "rewards_train/rejected": -1.3923006057739258, + "step": 817 + }, + { + "epoch": 0.23, + "logps_train/chosen": -23.08635902404785, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -21.785749435424805, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1711359024047852, + "rewards_train/margins": -0.09881091117858887, + "rewards_train/rejected": -1.0723249912261963, + "step": 817 + }, + { + "epoch": 0.23, + "logps_train/chosen": -80.92835235595703, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -114.73722076416016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7928352355957031, + "rewards_train/margins": 2.2808868885040283, + "rewards_train/rejected": -3.0737221240997314, + "step": 817 + }, + { + "epoch": 0.23, + "logps_train/chosen": -17.457592010498047, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -11.75, + "logps_train/rejected": -33.98313522338867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15424080193042755, + "rewards_train/margins": 2.3775543719530106, + "rewards_train/rejected": -2.223313570022583, + "step": 817 + }, + { + "epoch": 0.23, + "logps_train/chosen": -162.1077423095703, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -160.9013214111328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.660774230957031, + "rewards_train/margins": -0.72064208984375, + "rewards_train/rejected": -3.9401321411132812, + "step": 817 + }, + { + "epoch": 0.23, + "logps_train/chosen": -39.49116516113281, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -26.41259765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.599116563796997, + "rewards_train/margins": -1.001606822013855, + "rewards_train/rejected": -1.597509741783142, + "step": 817 + }, + { + "epoch": 0.23, + "logps_train/chosen": -89.31585693359375, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -145.5847930908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.26841431856155396, + "rewards_train/margins": 5.3768938183784485, + "rewards_train/rejected": -5.1084794998168945, + "step": 817 + }, + { + "epoch": 0.23, + "logps_train/chosen": -195.53192138671875, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -201.7528533935547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.903192043304443, + "rewards_train/margins": -0.7279067039489746, + "rewards_train/rejected": -7.175285339355469, + "step": 817 + }, + { + "epoch": 0.23, + "learning_rate": 1.6084553003555133e-06, + "loss": 0.4645, + "step": 818 + }, + { + "epoch": 0.23, + "logps_train/chosen": -15.83985710144043, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -22.116783142089844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.099610686302185, + "rewards_train/margins": -0.3129323720932007, + "rewards_train/rejected": -0.7866783142089844, + "step": 818 + }, + { + "epoch": 0.23, + "logps_train/chosen": -65.22351837158203, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -31.987646102905273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1776481717824936, + "rewards_train/margins": 2.188912734389305, + "rewards_train/rejected": -2.0112645626068115, + "step": 818 + }, + { + "epoch": 0.23, + "logps_train/chosen": -27.156539916992188, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -16.452163696289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1719039678573608, + "rewards_train/margins": 0.0045623779296875, + "rewards_train/rejected": -1.1764663457870483, + "step": 818 + }, + { + "epoch": 0.23, + "logps_train/chosen": -40.16350173950195, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -24.200462341308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6538501977920532, + "rewards_train/margins": 0.269321084022522, + "rewards_train/rejected": -1.9231712818145752, + "step": 818 + }, + { + "epoch": 0.23, + "logps_train/chosen": -146.26734924316406, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -175.8935089111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9267349243164062, + "rewards_train/margins": 1.66261625289917, + "rewards_train/rejected": -4.589351177215576, + "step": 818 + }, + { + "epoch": 0.23, + "logps_train/chosen": -12.743663787841797, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -1.7890625, + "logps_train/rejected": -8.593321800231934, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3368663787841797, + "rewards_train/margins": 0.3435595631599426, + "rewards_train/rejected": -0.6804259419441223, + "step": 818 + }, + { + "epoch": 0.23, + "logps_train/chosen": -101.083251953125, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -221.04196166992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9583252668380737, + "rewards_train/margins": 6.2458707094192505, + "rewards_train/rejected": -8.204195976257324, + "step": 818 + }, + { + "epoch": 0.23, + "logps_train/chosen": -16.22455596923828, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -3.078125, + "logps_train/rejected": -11.584728240966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25370559096336365, + "rewards_train/margins": 0.596954733133316, + "rewards_train/rejected": -0.8506603240966797, + "step": 818 + }, + { + "epoch": 0.23, + "logps_train/chosen": -7.252846717834473, + "logps_train/ref_chosen": -2.296875, + "logps_train/ref_rejected": -0.9375, + "logps_train/rejected": -11.862587928771973, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4955971837043762, + "rewards_train/margins": 0.596911609172821, + "rewards_train/rejected": -1.0925087928771973, + "step": 819 + }, + { + "epoch": 0.23, + "logps_train/chosen": -107.73544311523438, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -220.40513610839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2735443115234375, + "rewards_train/margins": 5.166969299316406, + "rewards_train/rejected": -7.440513610839844, + "step": 819 + }, + { + "epoch": 0.23, + "logps_train/chosen": -14.427454948425293, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -9.041786193847656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8927454948425293, + "rewards_train/margins": -0.39481687545776367, + "rewards_train/rejected": -0.4979286193847656, + "step": 819 + }, + { + "epoch": 0.23, + "logps_train/chosen": -44.875221252441406, + "logps_train/ref_chosen": -31.5, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -25.65497398376465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3375221490859985, + "rewards_train/margins": 0.02172529697418213, + "rewards_train/rejected": -1.3592474460601807, + "step": 819 + }, + { + "epoch": 0.23, + "logps_train/chosen": -154.0667724609375, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -174.14950561523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.106677293777466, + "rewards_train/margins": 4.308273553848267, + "rewards_train/rejected": -7.414950847625732, + "step": 819 + }, + { + "epoch": 0.23, + "logps_train/chosen": -179.9402618408203, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -174.4249725341797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.894026279449463, + "rewards_train/margins": -0.3515291213989258, + "rewards_train/rejected": -6.542497158050537, + "step": 819 + }, + { + "epoch": 0.23, + "logps_train/chosen": -12.230377197265625, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -12.214152336120605, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9371002316474915, + "rewards_train/margins": -0.18443500995635986, + "rewards_train/rejected": -0.7526652216911316, + "step": 819 + }, + { + "epoch": 0.23, + "logps_train/chosen": -16.87880516052246, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -24.0, + "logps_train/rejected": -48.53657531738281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5128805041313171, + "rewards_train/margins": 1.9407771229743958, + "rewards_train/rejected": -2.453657627105713, + "step": 819 + }, + { + "epoch": 0.23, + "learning_rate": 1.6063536950548825e-06, + "loss": 0.452, + "step": 820 + }, + { + "epoch": 0.23, + "logps_train/chosen": -132.16676330566406, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -152.76138305664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7666763663291931, + "rewards_train/margins": 1.2094619870185852, + "rewards_train/rejected": -1.9761383533477783, + "step": 820 + }, + { + "epoch": 0.23, + "logps_train/chosen": -128.92140197753906, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -119.7662124633789, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4421402215957642, + "rewards_train/margins": -0.9155189394950867, + "rewards_train/rejected": -0.5266212821006775, + "step": 820 + }, + { + "epoch": 0.23, + "logps_train/chosen": -38.13634490966797, + "logps_train/ref_chosen": -29.25, + "logps_train/ref_rejected": -62.5, + "logps_train/rejected": -83.6456298828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8886345028877258, + "rewards_train/margins": 1.2259284853935242, + "rewards_train/rejected": -2.11456298828125, + "step": 820 + }, + { + "epoch": 0.23, + "logps_train/chosen": -6.518383979797363, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -13.561508178710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12058839946985245, + "rewards_train/margins": 0.42931243032217026, + "rewards_train/rejected": -0.5499008297920227, + "step": 820 + }, + { + "epoch": 0.23, + "logps_train/chosen": -224.7802734375, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -223.09262084960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.828027725219727, + "rewards_train/margins": 0.18123435974121094, + "rewards_train/rejected": -10.009262084960938, + "step": 820 + }, + { + "epoch": 0.23, + "logps_train/chosen": -110.86259460449219, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -128.72662353515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6362594366073608, + "rewards_train/margins": 0.7864030599594116, + "rewards_train/rejected": -2.4226624965667725, + "step": 820 + }, + { + "epoch": 0.23, + "logps_train/chosen": -127.12433624267578, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -139.5011749267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8124336004257202, + "rewards_train/margins": 1.0376838445663452, + "rewards_train/rejected": -2.8501174449920654, + "step": 820 + }, + { + "epoch": 0.23, + "logps_train/chosen": -90.92125701904297, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -111.73896789550781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6421257257461548, + "rewards_train/margins": 1.181771159172058, + "rewards_train/rejected": -2.823896884918213, + "step": 820 + }, + { + "epoch": 0.23, + "logps_train/chosen": -300.5879821777344, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -281.4654541015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -13.658798217773438, + "rewards_train/margins": -0.7122526168823242, + "rewards_train/rejected": -12.946545600891113, + "step": 821 + }, + { + "epoch": 0.23, + "logps_train/chosen": -12.76468276977539, + "logps_train/ref_chosen": -6.65625, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -15.74884033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.610843300819397, + "rewards_train/margins": 0.42966580390930176, + "rewards_train/rejected": -1.0405091047286987, + "step": 821 + }, + { + "epoch": 0.23, + "logps_train/chosen": -7.533564567565918, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -9.229496002197266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2783564627170563, + "rewards_train/margins": -0.00853186845779419, + "rewards_train/rejected": -0.2698245942592621, + "step": 821 + }, + { + "epoch": 0.23, + "logps_train/chosen": -11.688239097595215, + "logps_train/ref_chosen": -2.359375, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -27.4521427154541, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9328864216804504, + "rewards_train/margins": 0.5498278737068176, + "rewards_train/rejected": -1.482714295387268, + "step": 821 + }, + { + "epoch": 0.23, + "logps_train/chosen": -118.72029876708984, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -117.59229278564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7220299243927, + "rewards_train/margins": 0.9871993064880371, + "rewards_train/rejected": -3.7092292308807373, + "step": 821 + }, + { + "epoch": 0.23, + "logps_train/chosen": -79.58495330810547, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -85.99385070800781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.3915046751499176, + "rewards_train/margins": -0.05911025404930115, + "rewards_train/rejected": 0.45061492919921875, + "step": 821 + }, + { + "epoch": 0.23, + "logps_train/chosen": -17.07444953918457, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -79.97457885742188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2293199300765991, + "rewards_train/margins": -0.9318620264530182, + "rewards_train/rejected": -0.29745790362358093, + "step": 821 + }, + { + "epoch": 0.23, + "logps_train/chosen": -61.956825256347656, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -12.853851318359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2456825226545334, + "rewards_train/margins": 0.39595259726047516, + "rewards_train/rejected": -0.6416351199150085, + "step": 821 + }, + { + "epoch": 0.23, + "learning_rate": 1.6042478459212668e-06, + "loss": 0.5872, + "step": 822 + }, + { + "epoch": 0.23, + "logps_train/chosen": -177.505859375, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -199.0, + "logps_train/rejected": -248.3184814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4505860805511475, + "rewards_train/margins": 1.481261968612671, + "rewards_train/rejected": -4.931848049163818, + "step": 822 + }, + { + "epoch": 0.23, + "logps_train/chosen": -70.88467407226562, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -126.15438842773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.388467401266098, + "rewards_train/margins": 3.0269714891910553, + "rewards_train/rejected": -3.4154388904571533, + "step": 822 + }, + { + "epoch": 0.23, + "logps_train/chosen": -55.12070083618164, + "logps_train/ref_chosen": -52.0, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -51.10856628417969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3120701014995575, + "rewards_train/margins": 2.473786622285843, + "rewards_train/rejected": -2.7858567237854004, + "step": 822 + }, + { + "epoch": 0.23, + "logps_train/chosen": -53.4227294921875, + "logps_train/ref_chosen": -31.5, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -22.011188507080078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.192272901535034, + "rewards_train/margins": -0.45365405082702637, + "rewards_train/rejected": -1.7386188507080078, + "step": 822 + }, + { + "epoch": 0.23, + "logps_train/chosen": -76.22642517089844, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -126.24633026123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8976424932479858, + "rewards_train/margins": 4.076990723609924, + "rewards_train/rejected": -5.97463321685791, + "step": 822 + }, + { + "epoch": 0.23, + "logps_train/chosen": -0.42885729670524597, + "logps_train/ref_chosen": -0.62890625, + "logps_train/ref_rejected": -0.62890625, + "logps_train/rejected": -0.427143394947052, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.020004896447062492, + "rewards_train/margins": -0.00017138943076133728, + "rewards_train/rejected": 0.02017628587782383, + "step": 822 + }, + { + "epoch": 0.23, + "logps_train/chosen": -13.63072395324707, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -15.891880989074707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28182241320610046, + "rewards_train/margins": 0.46986570954322815, + "rewards_train/rejected": -0.7516881227493286, + "step": 822 + }, + { + "epoch": 0.23, + "logps_train/chosen": -172.65936279296875, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -185.84268188476562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2659363746643066, + "rewards_train/margins": -0.18166804313659668, + "rewards_train/rejected": -3.08426833152771, + "step": 822 + }, + { + "epoch": 0.23, + "logps_train/chosen": -155.03594970703125, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -196.18881225585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3035950660705566, + "rewards_train/margins": 2.815286159515381, + "rewards_train/rejected": -6.1188812255859375, + "step": 823 + }, + { + "epoch": 0.23, + "logps_train/chosen": -101.4917984008789, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -163.17532348632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4991798400878906, + "rewards_train/margins": 2.2183525562286377, + "rewards_train/rejected": -2.7175323963165283, + "step": 823 + }, + { + "epoch": 0.23, + "logps_train/chosen": -7.031209945678711, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -23.79134750366211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009370994754135609, + "rewards_train/margins": 1.319763827137649, + "rewards_train/rejected": -1.3291348218917847, + "step": 823 + }, + { + "epoch": 0.23, + "logps_train/chosen": -128.79226684570312, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -167.6335906982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4292266368865967, + "rewards_train/margins": 3.1841323375701904, + "rewards_train/rejected": -5.613358974456787, + "step": 823 + }, + { + "epoch": 0.23, + "logps_train/chosen": -177.67538452148438, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -206.45643615722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.967538595199585, + "rewards_train/margins": 1.7781050205230713, + "rewards_train/rejected": -5.745643615722656, + "step": 823 + }, + { + "epoch": 0.23, + "logps_train/chosen": -119.26986694335938, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -146.02374267578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8269867300987244, + "rewards_train/margins": 2.925387680530548, + "rewards_train/rejected": -3.7523744106292725, + "step": 823 + }, + { + "epoch": 0.23, + "logps_train/chosen": -24.1013126373291, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -48.0997314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2851312756538391, + "rewards_train/margins": 2.3498420119285583, + "rewards_train/rejected": -2.6349732875823975, + "step": 823 + }, + { + "epoch": 0.23, + "logps_train/chosen": -147.92791748046875, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -206.69847106933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5927917957305908, + "rewards_train/margins": 6.077055215835571, + "rewards_train/rejected": -7.669847011566162, + "step": 823 + }, + { + "epoch": 0.23, + "learning_rate": 1.6021377676933782e-06, + "loss": 0.2502, + "step": 824 + }, + { + "epoch": 0.23, + "logps_train/chosen": -79.39993286132812, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -129.1914825439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2100067138671875, + "rewards_train/margins": 0.9791550040245056, + "rewards_train/rejected": -0.7691482901573181, + "step": 824 + }, + { + "epoch": 0.23, + "logps_train/chosen": -23.846019744873047, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -60.05110168457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2346019744873047, + "rewards_train/margins": 1.9955081939697266, + "rewards_train/rejected": -2.2301101684570312, + "step": 824 + }, + { + "epoch": 0.23, + "logps_train/chosen": -20.416263580322266, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -1.953125, + "logps_train/rejected": -10.964343070983887, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24162636697292328, + "rewards_train/margins": 0.6594954282045364, + "rewards_train/rejected": -0.9011217951774597, + "step": 824 + }, + { + "epoch": 0.23, + "logps_train/chosen": -187.79922485351562, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -116.82275390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.37992262840271, + "rewards_train/margins": -1.4976471662521362, + "rewards_train/rejected": -1.8822754621505737, + "step": 824 + }, + { + "epoch": 0.23, + "logps_train/chosen": -173.6608428955078, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -203.79676818847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.366084575653076, + "rewards_train/margins": 4.41359281539917, + "rewards_train/rejected": -9.779677391052246, + "step": 824 + }, + { + "epoch": 0.23, + "logps_train/chosen": -154.4085693359375, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -190.03536987304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.94085693359375, + "rewards_train/margins": 4.412680149078369, + "rewards_train/rejected": -6.353537082672119, + "step": 824 + }, + { + "epoch": 0.23, + "logps_train/chosen": -11.783431053161621, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -12.02291202545166, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8158431053161621, + "rewards_train/margins": -0.0635519027709961, + "rewards_train/rejected": -0.752291202545166, + "step": 824 + }, + { + "epoch": 0.23, + "logps_train/chosen": -220.83206176757812, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -223.2091064453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.183206558227539, + "rewards_train/margins": -0.8622956275939941, + "rewards_train/rejected": -7.320910930633545, + "step": 824 + }, + { + "epoch": 0.23, + "logps_train/chosen": -31.948036193847656, + "logps_train/ref_chosen": -28.25, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -35.04097366333008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36980363726615906, + "rewards_train/margins": 0.9217937290668488, + "rewards_train/rejected": -1.2915973663330078, + "step": 825 + }, + { + "epoch": 0.23, + "logps_train/chosen": -6.991029262542725, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -20.8559513092041, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28972792625427246, + "rewards_train/margins": 0.8583672046661377, + "rewards_train/rejected": -1.1480951309204102, + "step": 825 + }, + { + "epoch": 0.23, + "logps_train/chosen": -82.63468933105469, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -85.44769287109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9134689569473267, + "rewards_train/margins": 0.7813003063201904, + "rewards_train/rejected": -1.694769263267517, + "step": 825 + }, + { + "epoch": 0.23, + "logps_train/chosen": -27.750118255615234, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -20.985515594482422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5937618017196655, + "rewards_train/margins": -0.48896026611328125, + "rewards_train/rejected": -1.1048015356063843, + "step": 825 + }, + { + "epoch": 0.23, + "logps_train/chosen": -111.17388916015625, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -182.4878387451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.917388916015625, + "rewards_train/margins": 2.631394863128662, + "rewards_train/rejected": -4.548783779144287, + "step": 825 + }, + { + "epoch": 0.23, + "logps_train/chosen": -3.175795316696167, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -10.794914245605469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0589829683303833, + "rewards_train/margins": 0.4384743869304657, + "rewards_train/rejected": -0.3794914186000824, + "step": 825 + }, + { + "epoch": 0.23, + "logps_train/chosen": -102.57962036132812, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -160.60433959960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6579620242118835, + "rewards_train/margins": 3.9524720311164856, + "rewards_train/rejected": -4.610434055328369, + "step": 825 + }, + { + "epoch": 0.23, + "logps_train/chosen": -25.726707458496094, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -16.85927391052246, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7476707696914673, + "rewards_train/margins": -0.09299337863922119, + "rewards_train/rejected": -0.6546773910522461, + "step": 825 + }, + { + "epoch": 0.23, + "learning_rate": 1.6000234751395267e-06, + "loss": 0.493, + "step": 826 + }, + { + "epoch": 0.23, + "logps_train/chosen": -71.31614685058594, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -39.75, + "logps_train/rejected": -59.53015899658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2683853209018707, + "rewards_train/margins": 2.246401220560074, + "rewards_train/rejected": -1.9780158996582031, + "step": 826 + }, + { + "epoch": 0.23, + "logps_train/chosen": -23.784687042236328, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -20.938419342041016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8409687280654907, + "rewards_train/margins": -0.10962677001953125, + "rewards_train/rejected": -0.7313419580459595, + "step": 826 + }, + { + "epoch": 0.23, + "logps_train/chosen": -139.9832000732422, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -140.22512817382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9483200311660767, + "rewards_train/margins": 0.02419281005859375, + "rewards_train/rejected": -1.9725128412246704, + "step": 826 + }, + { + "epoch": 0.23, + "logps_train/chosen": -66.73175048828125, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -117.51050567626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4231750965118408, + "rewards_train/margins": 3.6778757572174072, + "rewards_train/rejected": -5.101050853729248, + "step": 826 + }, + { + "epoch": 0.23, + "logps_train/chosen": -125.72359466552734, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -159.739013671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.022359609603882, + "rewards_train/margins": 2.00154185295105, + "rewards_train/rejected": -4.023901462554932, + "step": 826 + }, + { + "epoch": 0.23, + "logps_train/chosen": -122.35331726074219, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -157.93472290039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5853317975997925, + "rewards_train/margins": 3.3581405878067017, + "rewards_train/rejected": -4.943472385406494, + "step": 826 + }, + { + "epoch": 0.23, + "logps_train/chosen": -26.904966354370117, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -27.245193481445312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7904967069625854, + "rewards_train/margins": -1.0159773230552673, + "rewards_train/rejected": -0.7745193839073181, + "step": 826 + }, + { + "epoch": 0.23, + "logps_train/chosen": -41.25175094604492, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -58.75, + "logps_train/rejected": -91.9327163696289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.25017511844635, + "rewards_train/margins": 2.0680965185165405, + "rewards_train/rejected": -3.3182716369628906, + "step": 826 + }, + { + "epoch": 0.23, + "logps_train/chosen": -51.85684585571289, + "logps_train/ref_chosen": -29.75, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -93.22042846679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2106845378875732, + "rewards_train/margins": 3.8113582134246826, + "rewards_train/rejected": -6.022042751312256, + "step": 827 + }, + { + "epoch": 0.23, + "logps_train/chosen": -194.6581268310547, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -201.92202758789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.065812826156616, + "rewards_train/margins": 0.4263899326324463, + "rewards_train/rejected": -2.4922027587890625, + "step": 827 + }, + { + "epoch": 0.23, + "logps_train/chosen": -8.835719108581543, + "logps_train/ref_chosen": -7.75, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -23.45840072631836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10857190936803818, + "rewards_train/margins": 1.5278931632637978, + "rewards_train/rejected": -1.636465072631836, + "step": 827 + }, + { + "epoch": 0.23, + "logps_train/chosen": -222.117919921875, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -246.47650146484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.2117919921875, + "rewards_train/margins": 2.2358579635620117, + "rewards_train/rejected": -9.447649955749512, + "step": 827 + }, + { + "epoch": 0.23, + "logps_train/chosen": -79.28759765625, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -217.19740295410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42875978350639343, + "rewards_train/margins": 13.74098089337349, + "rewards_train/rejected": -14.169740676879883, + "step": 827 + }, + { + "epoch": 0.23, + "logps_train/chosen": -39.60264587402344, + "logps_train/ref_chosen": -24.125, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -28.40756607055664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5477646589279175, + "rewards_train/margins": -0.3695080280303955, + "rewards_train/rejected": -1.178256630897522, + "step": 827 + }, + { + "epoch": 0.23, + "logps_train/chosen": -127.57185363769531, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -243.3243408203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1071853637695312, + "rewards_train/margins": 6.625248908996582, + "rewards_train/rejected": -9.732434272766113, + "step": 827 + }, + { + "epoch": 0.23, + "logps_train/chosen": -21.446748733520508, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -29.25, + "logps_train/rejected": -38.710479736328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7696748971939087, + "rewards_train/margins": 0.17637306451797485, + "rewards_train/rejected": -0.9460479617118835, + "step": 827 + }, + { + "epoch": 0.23, + "learning_rate": 1.5979049830575188e-06, + "loss": 0.343, + "step": 828 + }, + { + "epoch": 0.23, + "logps_train/chosen": -235.85934448242188, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -194.9724884033203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.385934829711914, + "rewards_train/margins": -1.3386859893798828, + "rewards_train/rejected": -9.047248840332031, + "step": 828 + }, + { + "epoch": 0.23, + "logps_train/chosen": -4.888455390930176, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -7.7819719314575195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12634554505348206, + "rewards_train/margins": 0.21747666597366333, + "rewards_train/rejected": -0.3438222110271454, + "step": 828 + }, + { + "epoch": 0.23, + "logps_train/chosen": -20.91463851928711, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -18.36118507385254, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8477138876914978, + "rewards_train/margins": -0.5365953743457794, + "rewards_train/rejected": -0.3111185133457184, + "step": 828 + }, + { + "epoch": 0.23, + "logps_train/chosen": -50.158287048339844, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -134.9288787841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1408287137746811, + "rewards_train/margins": 2.502059116959572, + "rewards_train/rejected": -2.642887830734253, + "step": 828 + }, + { + "epoch": 0.23, + "logps_train/chosen": -1.2282544374465942, + "logps_train/ref_chosen": -0.470703125, + "logps_train/ref_rejected": -3.65625, + "logps_train/rejected": -3.034329414367676, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07575513422489166, + "rewards_train/margins": -0.1379471942782402, + "rewards_train/rejected": 0.06219206005334854, + "step": 828 + }, + { + "epoch": 0.23, + "logps_train/chosen": -209.32994079589844, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -207.00845336914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.032994031906128, + "rewards_train/margins": 1.8678514957427979, + "rewards_train/rejected": -4.900845527648926, + "step": 828 + }, + { + "epoch": 0.23, + "logps_train/chosen": -37.06856155395508, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -31.375, + "logps_train/rejected": -51.027130126953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4568561613559723, + "rewards_train/margins": 1.508356899023056, + "rewards_train/rejected": -1.9652130603790283, + "step": 828 + }, + { + "epoch": 0.23, + "logps_train/chosen": -38.529457092285156, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -26.818859100341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5404456853866577, + "rewards_train/margins": 0.12894022464752197, + "rewards_train/rejected": -1.6693859100341797, + "step": 828 + }, + { + "epoch": 0.23, + "logps_train/chosen": -216.82777404785156, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -604.0, + "logps_train/rejected": -755.9347534179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.98277759552002, + "rewards_train/margins": 6.210698127746582, + "rewards_train/rejected": -15.193475723266602, + "step": 829 + }, + { + "epoch": 0.23, + "logps_train/chosen": -78.29733276367188, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -113.44717407226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02026672475039959, + "rewards_train/margins": 1.81498415581882, + "rewards_train/rejected": -1.7947174310684204, + "step": 829 + }, + { + "epoch": 0.23, + "logps_train/chosen": -144.84568786621094, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -222.31216430664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8345688581466675, + "rewards_train/margins": 4.796647667884827, + "rewards_train/rejected": -6.631216526031494, + "step": 829 + }, + { + "epoch": 0.23, + "logps_train/chosen": -4.501511573791504, + "logps_train/ref_chosen": -3.0, + "logps_train/ref_rejected": -2.9375, + "logps_train/rejected": -6.278576374053955, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15015116333961487, + "rewards_train/margins": 0.18395647406578064, + "rewards_train/rejected": -0.3341076374053955, + "step": 829 + }, + { + "epoch": 0.23, + "logps_train/chosen": -76.67977905273438, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -60.0, + "logps_train/rejected": -107.46168518066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6179779171943665, + "rewards_train/margins": 4.128190696239471, + "rewards_train/rejected": -4.746168613433838, + "step": 829 + }, + { + "epoch": 0.23, + "logps_train/chosen": -260.136962890625, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -259.25531005859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -13.4136962890625, + "rewards_train/margins": 0.161834716796875, + "rewards_train/rejected": -13.575531005859375, + "step": 829 + }, + { + "epoch": 0.23, + "logps_train/chosen": -91.01856994628906, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -177.26382446289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.251857042312622, + "rewards_train/margins": 3.5745255947113037, + "rewards_train/rejected": -4.826382637023926, + "step": 829 + }, + { + "epoch": 0.23, + "logps_train/chosen": -32.793670654296875, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -24.5, + "logps_train/rejected": -32.760868072509766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8293671011924744, + "rewards_train/margins": -0.0032802820205688477, + "rewards_train/rejected": -0.8260868191719055, + "step": 829 + }, + { + "epoch": 0.23, + "learning_rate": 1.5957823062745528e-06, + "loss": 0.4435, + "step": 830 + }, + { + "epoch": 0.23, + "logps_train/chosen": -179.2376708984375, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -181.0, + "logps_train/rejected": -258.40625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3237671852111816, + "rewards_train/margins": 5.416857719421387, + "rewards_train/rejected": -7.740624904632568, + "step": 830 + }, + { + "epoch": 0.23, + "logps_train/chosen": -131.6866455078125, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -250.63662719726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.518664598464966, + "rewards_train/margins": 10.294997930526733, + "rewards_train/rejected": -12.8136625289917, + "step": 830 + }, + { + "epoch": 0.23, + "logps_train/chosen": -96.63642883300781, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -244.79183959960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.113642930984497, + "rewards_train/margins": 10.515540838241577, + "rewards_train/rejected": -12.629183769226074, + "step": 830 + }, + { + "epoch": 0.23, + "logps_train/chosen": -144.79788208007812, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -229.89935302734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.229788303375244, + "rewards_train/margins": 3.7601475715637207, + "rewards_train/rejected": -8.989935874938965, + "step": 830 + }, + { + "epoch": 0.23, + "logps_train/chosen": -6.381062030792236, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -1.3203125, + "logps_train/rejected": -6.806816577911377, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09435620158910751, + "rewards_train/margins": 0.45429424196481705, + "rewards_train/rejected": -0.5486504435539246, + "step": 830 + }, + { + "epoch": 0.23, + "logps_train/chosen": -1.536392331123352, + "logps_train/ref_chosen": -1.515625, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -13.02737808227539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.002076733158901334, + "rewards_train/margins": 0.6912860989104956, + "rewards_train/rejected": -0.693362832069397, + "step": 830 + }, + { + "epoch": 0.23, + "logps_train/chosen": -182.6695556640625, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -143.50906372070312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.766955614089966, + "rewards_train/margins": -0.01604914665222168, + "rewards_train/rejected": -2.750906467437744, + "step": 830 + }, + { + "epoch": 0.23, + "logps_train/chosen": -145.77584838867188, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -129.51004028320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8275848627090454, + "rewards_train/margins": 2.7734192609786987, + "rewards_train/rejected": -4.601004123687744, + "step": 830 + }, + { + "epoch": 0.23, + "logps_train/chosen": -169.08685302734375, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -318.92919921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9086854457855225, + "rewards_train/margins": 9.88423466682434, + "rewards_train/rejected": -12.792920112609863, + "step": 831 + }, + { + "epoch": 0.23, + "logps_train/chosen": -0.14551618695259094, + "logps_train/ref_chosen": -0.2216796875, + "logps_train/ref_rejected": -0.2216796875, + "logps_train/rejected": -0.1392512321472168, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.007616349961608648, + "rewards_train/margins": -0.000626495573669672, + "rewards_train/rejected": 0.00824284553527832, + "step": 831 + }, + { + "epoch": 0.23, + "logps_train/chosen": -22.3808650970459, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -27.0, + "logps_train/rejected": -54.548439025878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9193364977836609, + "rewards_train/margins": 1.8355074524879456, + "rewards_train/rejected": -2.7548439502716064, + "step": 831 + }, + { + "epoch": 0.23, + "logps_train/chosen": -99.90624237060547, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -103.92747497558594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8906242847442627, + "rewards_train/margins": -1.5478767156600952, + "rewards_train/rejected": -1.3427475690841675, + "step": 831 + }, + { + "epoch": 0.23, + "logps_train/chosen": -169.37965393066406, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -267.6690673828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.137965202331543, + "rewards_train/margins": 6.078941345214844, + "rewards_train/rejected": -14.216906547546387, + "step": 831 + }, + { + "epoch": 0.23, + "logps_train/chosen": -9.718059539794922, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -2.96875, + "logps_train/rejected": -18.700580596923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.628055989742279, + "rewards_train/margins": 0.9451270699501038, + "rewards_train/rejected": -1.5731830596923828, + "step": 831 + }, + { + "epoch": 0.23, + "logps_train/chosen": -25.851177215576172, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -23.837390899658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3663676977157593, + "rewards_train/margins": 0.12362146377563477, + "rewards_train/rejected": -1.489989161491394, + "step": 831 + }, + { + "epoch": 0.23, + "logps_train/chosen": -134.421875, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -187.29051208496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.692187488079071, + "rewards_train/margins": 0.6368637681007385, + "rewards_train/rejected": -1.3290512561798096, + "step": 831 + }, + { + "epoch": 0.23, + "learning_rate": 1.5936554596471154e-06, + "loss": 0.3536, + "step": 832 + }, + { + "epoch": 0.23, + "logps_train/chosen": -191.25332641601562, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -231.55780029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.225332736968994, + "rewards_train/margins": 1.5304474830627441, + "rewards_train/rejected": -7.755780220031738, + "step": 832 + }, + { + "epoch": 0.23, + "logps_train/chosen": -32.86172866821289, + "logps_train/ref_chosen": -28.0, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -12.921634674072266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4861728847026825, + "rewards_train/margins": 0.22786560654640198, + "rewards_train/rejected": -0.7140384912490845, + "step": 832 + }, + { + "epoch": 0.23, + "logps_train/chosen": -19.906597137451172, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -14.06097412109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1469097137451172, + "rewards_train/margins": -0.15956228971481323, + "rewards_train/rejected": -0.987347424030304, + "step": 832 + }, + { + "epoch": 0.23, + "logps_train/chosen": -246.31614685058594, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -309.892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.131614685058594, + "rewards_train/margins": 2.957643508911133, + "rewards_train/rejected": -13.089258193969727, + "step": 832 + }, + { + "epoch": 0.23, + "logps_train/chosen": -44.003623962402344, + "logps_train/ref_chosen": -31.25, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -80.24772644042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2753623723983765, + "rewards_train/margins": 2.824410557746887, + "rewards_train/rejected": -4.099772930145264, + "step": 832 + }, + { + "epoch": 0.23, + "logps_train/chosen": -206.4176483154297, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -216.4694366455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.041764736175537, + "rewards_train/margins": 1.7051787376403809, + "rewards_train/rejected": -8.746943473815918, + "step": 832 + }, + { + "epoch": 0.23, + "logps_train/chosen": -1.6550904512405396, + "logps_train/ref_chosen": -0.953125, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -23.139266967773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07019654661417007, + "rewards_train/margins": 1.7031051740050316, + "rewards_train/rejected": -1.7733017206192017, + "step": 832 + }, + { + "epoch": 0.23, + "logps_train/chosen": -11.573519706726074, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -8.774045944213867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01985197141766548, + "rewards_train/margins": -0.16744737699627876, + "rewards_train/rejected": 0.14759540557861328, + "step": 832 + }, + { + "epoch": 0.23, + "logps_train/chosen": -15.080456733703613, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -7.599421501159668, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3392956852912903, + "rewards_train/margins": -0.22310353070497513, + "rewards_train/rejected": -0.11619215458631516, + "step": 833 + }, + { + "epoch": 0.23, + "logps_train/chosen": -115.66148376464844, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -168.51116943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.516148567199707, + "rewards_train/margins": 1.284968376159668, + "rewards_train/rejected": -5.801116943359375, + "step": 833 + }, + { + "epoch": 0.23, + "logps_train/chosen": -128.55484008789062, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -168.23329162597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05548400804400444, + "rewards_train/margins": 3.2678452022373676, + "rewards_train/rejected": -3.323329210281372, + "step": 833 + }, + { + "epoch": 0.23, + "logps_train/chosen": -123.51519012451172, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -96.39846801757812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.05151891708374, + "rewards_train/margins": -1.1116721630096436, + "rewards_train/rejected": -2.9398467540740967, + "step": 833 + }, + { + "epoch": 0.23, + "logps_train/chosen": -8.34549331665039, + "logps_train/ref_chosen": -5.6875, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -10.157325744628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.265799343585968, + "rewards_train/margins": 0.18743324279785156, + "rewards_train/rejected": -0.4532325863838196, + "step": 833 + }, + { + "epoch": 0.23, + "logps_train/chosen": -52.430076599121094, + "logps_train/ref_chosen": -52.25, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -42.33590316772461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.018007660284638405, + "rewards_train/margins": 0.4405826684087515, + "rewards_train/rejected": -0.4585903286933899, + "step": 833 + }, + { + "epoch": 0.23, + "logps_train/chosen": -44.4268913269043, + "logps_train/ref_chosen": -41.25, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -84.68766784667969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3176891505718231, + "rewards_train/margins": 1.1510776579380035, + "rewards_train/rejected": -1.4687668085098267, + "step": 833 + }, + { + "epoch": 0.23, + "logps_train/chosen": -69.56410217285156, + "logps_train/ref_chosen": -39.5, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -127.5766830444336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0064103603363037, + "rewards_train/margins": 2.6512582302093506, + "rewards_train/rejected": -5.657668590545654, + "step": 833 + }, + { + "epoch": 0.23, + "learning_rate": 1.5915244580608787e-06, + "loss": 0.4195, + "step": 834 + }, + { + "epoch": 0.23, + "logps_train/chosen": -176.46705627441406, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -50.25, + "logps_train/rejected": -71.8199462890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4467055797576904, + "rewards_train/margins": -0.28971099853515625, + "rewards_train/rejected": -2.156994581222534, + "step": 834 + }, + { + "epoch": 0.23, + "logps_train/chosen": -14.276006698608398, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -48.14867401123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5338506698608398, + "rewards_train/margins": 2.006016731262207, + "rewards_train/rejected": -2.539867401123047, + "step": 834 + }, + { + "epoch": 0.23, + "logps_train/chosen": -364.86029052734375, + "logps_train/ref_chosen": -239.0, + "logps_train/ref_rejected": -199.0, + "logps_train/rejected": -333.6747741699219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -12.586029052734375, + "rewards_train/margins": 0.8814487457275391, + "rewards_train/rejected": -13.467477798461914, + "step": 834 + }, + { + "epoch": 0.23, + "logps_train/chosen": -66.72321319580078, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -94.0950698852539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12232132256031036, + "rewards_train/margins": 0.43718568980693817, + "rewards_train/rejected": -0.5595070123672485, + "step": 834 + }, + { + "epoch": 0.23, + "logps_train/chosen": -0.480779230594635, + "logps_train/ref_chosen": -0.416015625, + "logps_train/ref_rejected": -0.416015625, + "logps_train/rejected": -0.48072683811187744, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.006476360838860273, + "rewards_train/margins": -5.239620804786682e-06, + "rewards_train/rejected": -0.006471121218055487, + "step": 834 + }, + { + "epoch": 0.23, + "logps_train/chosen": -6.894150257110596, + "logps_train/ref_chosen": -1.3046875, + "logps_train/ref_rejected": -1.1328125, + "logps_train/rejected": -2.2698991298675537, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5589463114738464, + "rewards_train/margins": -0.4452376440167427, + "rewards_train/rejected": -0.11370866745710373, + "step": 834 + }, + { + "epoch": 0.23, + "logps_train/chosen": -20.595407485961914, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -17.11858558654785, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5782907605171204, + "rewards_train/margins": -0.1664322018623352, + "rewards_train/rejected": -0.41185855865478516, + "step": 834 + }, + { + "epoch": 0.23, + "logps_train/chosen": -16.000444412231445, + "logps_train/ref_chosen": -6.625, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -18.843507766723633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9375444650650024, + "rewards_train/margins": 0.44368135929107666, + "rewards_train/rejected": -1.381225824356079, + "step": 834 + }, + { + "epoch": 0.23, + "logps_train/chosen": -25.053081512451172, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -22.84233856201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7053081393241882, + "rewards_train/margins": 0.6039257645606995, + "rewards_train/rejected": -1.3092339038848877, + "step": 835 + }, + { + "epoch": 0.23, + "logps_train/chosen": -38.89356231689453, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -23.472496032714844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.639356255531311, + "rewards_train/margins": -0.10460662841796875, + "rewards_train/rejected": -1.5347496271133423, + "step": 835 + }, + { + "epoch": 0.23, + "logps_train/chosen": -21.998823165893555, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -15.17198657989502, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.29988232254981995, + "rewards_train/margins": -0.21393366158008575, + "rewards_train/rejected": -0.08594866096973419, + "step": 835 + }, + { + "epoch": 0.23, + "logps_train/chosen": -11.622102737426758, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -5.25, + "logps_train/rejected": -7.818079948425293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1184602752327919, + "rewards_train/margins": 0.13834773749113083, + "rewards_train/rejected": -0.25680801272392273, + "step": 835 + }, + { + "epoch": 0.23, + "logps_train/chosen": -35.43979263305664, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -36.10268020629883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21897926926612854, + "rewards_train/margins": 0.1912887692451477, + "rewards_train/rejected": -0.41026803851127625, + "step": 835 + }, + { + "epoch": 0.23, + "logps_train/chosen": -10.609938621520996, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -24.73771858215332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02025613747537136, + "rewards_train/margins": 1.2252779956907034, + "rewards_train/rejected": -1.205021858215332, + "step": 835 + }, + { + "epoch": 0.23, + "logps_train/chosen": -118.06758117675781, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -175.44007873535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0067581175826489925, + "rewards_train/margins": 5.937249755952507, + "rewards_train/rejected": -5.944007873535156, + "step": 835 + }, + { + "epoch": 0.23, + "logps_train/chosen": -153.38861083984375, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -212.1388397216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5888612270355225, + "rewards_train/margins": 4.425023317337036, + "rewards_train/rejected": -8.013884544372559, + "step": 835 + }, + { + "epoch": 0.23, + "learning_rate": 1.5893893164305938e-06, + "loss": 0.5127, + "step": 836 + }, + { + "epoch": 0.23, + "logps_train/chosen": -139.45399475097656, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -128.49472045898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.395399570465088, + "rewards_train/margins": 1.7040724754333496, + "rewards_train/rejected": -4.0994720458984375, + "step": 836 + }, + { + "epoch": 0.23, + "logps_train/chosen": -153.855712890625, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -230.46499633789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.135571241378784, + "rewards_train/margins": 6.410928964614868, + "rewards_train/rejected": -9.546500205993652, + "step": 836 + }, + { + "epoch": 0.23, + "logps_train/chosen": -88.73794555664062, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -99.22036743164062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0737946033477783, + "rewards_train/margins": -0.0017578601837158203, + "rewards_train/rejected": -2.0720367431640625, + "step": 836 + }, + { + "epoch": 0.23, + "logps_train/chosen": -95.35795593261719, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -161.09231567382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3857955932617188, + "rewards_train/margins": 6.023436069488525, + "rewards_train/rejected": -7.409231662750244, + "step": 836 + }, + { + "epoch": 0.23, + "logps_train/chosen": -29.295146942138672, + "logps_train/ref_chosen": -26.5, + "logps_train/ref_rejected": -26.125, + "logps_train/rejected": -40.20332336425781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27951470017433167, + "rewards_train/margins": 1.1283176839351654, + "rewards_train/rejected": -1.407832384109497, + "step": 836 + }, + { + "epoch": 0.23, + "logps_train/chosen": -20.257633209228516, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -33.33415222167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5632633566856384, + "rewards_train/margins": 0.4701518416404724, + "rewards_train/rejected": -1.0334151983261108, + "step": 836 + }, + { + "epoch": 0.23, + "logps_train/chosen": -189.81045532226562, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -301.74676513671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.481045722961426, + "rewards_train/margins": 6.493631362915039, + "rewards_train/rejected": -11.974677085876465, + "step": 836 + }, + { + "epoch": 0.23, + "logps_train/chosen": -283.86895751953125, + "logps_train/ref_chosen": -169.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -294.83258056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.486895561218262, + "rewards_train/margins": 0.29636287689208984, + "rewards_train/rejected": -11.783258438110352, + "step": 836 + }, + { + "epoch": 0.23, + "logps_train/chosen": -173.0613250732422, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -207.69802856445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5061325430870056, + "rewards_train/margins": 5.063670217990875, + "rewards_train/rejected": -5.569802761077881, + "step": 837 + }, + { + "epoch": 0.23, + "logps_train/chosen": -34.20901870727539, + "logps_train/ref_chosen": -36.75, + "logps_train/ref_rejected": -37.25, + "logps_train/rejected": -34.79228973388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.25409814715385437, + "rewards_train/margins": 0.008327111601829529, + "rewards_train/rejected": 0.24577103555202484, + "step": 837 + }, + { + "epoch": 0.23, + "logps_train/chosen": -185.82693481445312, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -251.17877197265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.882693767547607, + "rewards_train/margins": 1.6351838111877441, + "rewards_train/rejected": -9.517877578735352, + "step": 837 + }, + { + "epoch": 0.23, + "logps_train/chosen": -168.9611358642578, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -149.7315673828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.196113586425781, + "rewards_train/margins": -1.7729568481445312, + "rewards_train/rejected": -2.42315673828125, + "step": 837 + }, + { + "epoch": 0.23, + "logps_train/chosen": -133.38673400878906, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -261.22064208984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03867340087890625, + "rewards_train/margins": 8.383391380310059, + "rewards_train/rejected": -8.422064781188965, + "step": 837 + }, + { + "epoch": 0.23, + "logps_train/chosen": -14.26648998260498, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -54.60355758666992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6453990340232849, + "rewards_train/margins": 1.3399567008018494, + "rewards_train/rejected": -1.9853557348251343, + "step": 837 + }, + { + "epoch": 0.23, + "logps_train/chosen": -68.96428680419922, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -117.9203109741211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5464286804199219, + "rewards_train/margins": 3.6456027030944824, + "rewards_train/rejected": -4.192031383514404, + "step": 837 + }, + { + "epoch": 0.23, + "logps_train/chosen": -93.66868591308594, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -124.70536804199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11686859279870987, + "rewards_train/margins": 1.5036682114005089, + "rewards_train/rejected": -1.6205368041992188, + "step": 837 + }, + { + "epoch": 0.23, + "learning_rate": 1.587250049699988e-06, + "loss": 0.3408, + "step": 838 + }, + { + "epoch": 0.23, + "logps_train/chosen": -7.945680618286133, + "logps_train/ref_chosen": -1.9140625, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -28.26349449157715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6031618118286133, + "rewards_train/margins": 1.2544376850128174, + "rewards_train/rejected": -1.8575994968414307, + "step": 838 + }, + { + "epoch": 0.23, + "logps_train/chosen": -19.704710006713867, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -5.280803680419922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.25797101855278015, + "rewards_train/margins": -0.34239064902067184, + "rewards_train/rejected": 0.0844196304678917, + "step": 838 + }, + { + "epoch": 0.23, + "logps_train/chosen": -13.59889030456543, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -33.43583679199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5411390662193298, + "rewards_train/margins": 2.1618197560310364, + "rewards_train/rejected": -2.702958822250366, + "step": 838 + }, + { + "epoch": 0.23, + "logps_train/chosen": -10.923468589782715, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -19.03323745727539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18609686195850372, + "rewards_train/margins": 0.7109768837690353, + "rewards_train/rejected": -0.8970737457275391, + "step": 838 + }, + { + "epoch": 0.23, + "logps_train/chosen": -177.71356201171875, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -191.01780700683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.971356391906738, + "rewards_train/margins": 0.2304244041442871, + "rewards_train/rejected": -5.201780796051025, + "step": 838 + }, + { + "epoch": 0.23, + "logps_train/chosen": -36.803218841552734, + "logps_train/ref_chosen": -28.125, + "logps_train/ref_rejected": -3.703125, + "logps_train/rejected": -13.151449203491211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8678218722343445, + "rewards_train/margins": 0.07701057195663452, + "rewards_train/rejected": -0.944832444190979, + "step": 838 + }, + { + "epoch": 0.23, + "logps_train/chosen": -12.489068984985352, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -21.840837478637695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12390690296888351, + "rewards_train/margins": 0.7351768687367439, + "rewards_train/rejected": -0.8590837717056274, + "step": 838 + }, + { + "epoch": 0.23, + "logps_train/chosen": -46.09773254394531, + "logps_train/ref_chosen": -31.75, + "logps_train/ref_rejected": -26.25, + "logps_train/rejected": -51.958248138427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.434773325920105, + "rewards_train/margins": 1.1360515356063843, + "rewards_train/rejected": -2.5708248615264893, + "step": 838 + }, + { + "epoch": 0.23, + "logps_train/chosen": -157.70855712890625, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -245.29550170898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.020855903625488, + "rewards_train/margins": 4.308694839477539, + "rewards_train/rejected": -8.329550743103027, + "step": 839 + }, + { + "epoch": 0.23, + "logps_train/chosen": -76.4290771484375, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -26.875, + "logps_train/rejected": -42.00885009765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49290773272514343, + "rewards_train/margins": 1.0204773247241974, + "rewards_train/rejected": -1.5133850574493408, + "step": 839 + }, + { + "epoch": 0.23, + "logps_train/chosen": -24.301240921020508, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -44.390106201171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18012408912181854, + "rewards_train/margins": 2.308886483311653, + "rewards_train/rejected": -2.4890105724334717, + "step": 839 + }, + { + "epoch": 0.23, + "logps_train/chosen": -178.91458129882812, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -176.3842010498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.691458225250244, + "rewards_train/margins": 4.196961879730225, + "rewards_train/rejected": -6.888420104980469, + "step": 839 + }, + { + "epoch": 0.23, + "logps_train/chosen": -31.322105407714844, + "logps_train/ref_chosen": -25.0, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -20.01032829284668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6322105526924133, + "rewards_train/margins": 0.48132234811782837, + "rewards_train/rejected": -1.1135329008102417, + "step": 839 + }, + { + "epoch": 0.23, + "logps_train/chosen": -113.52445983886719, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -126.74504852294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9524459838867188, + "rewards_train/margins": 1.1220588684082031, + "rewards_train/rejected": -3.074504852294922, + "step": 839 + }, + { + "epoch": 0.23, + "logps_train/chosen": -189.44386291503906, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -187.68328857421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.544386386871338, + "rewards_train/margins": -0.07605743408203125, + "rewards_train/rejected": -5.468328952789307, + "step": 839 + }, + { + "epoch": 0.23, + "logps_train/chosen": -13.841976165771484, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -21.923110961914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9466976523399353, + "rewards_train/margins": 0.40811342000961304, + "rewards_train/rejected": -1.3548110723495483, + "step": 839 + }, + { + "epoch": 0.23, + "learning_rate": 1.5851066728416616e-06, + "loss": 0.3744, + "step": 840 + }, + { + "epoch": 0.23, + "logps_train/chosen": -85.89720916748047, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -103.54847717285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6397209167480469, + "rewards_train/margins": 1.565126895904541, + "rewards_train/rejected": -2.204847812652588, + "step": 840 + }, + { + "epoch": 0.23, + "logps_train/chosen": -7.550819396972656, + "logps_train/ref_chosen": -5.8125, + "logps_train/ref_rejected": -2.515625, + "logps_train/rejected": -4.474514484405518, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17383193969726562, + "rewards_train/margins": 0.02205701172351837, + "rewards_train/rejected": -0.195888951420784, + "step": 840 + }, + { + "epoch": 0.23, + "logps_train/chosen": -35.573081970214844, + "logps_train/ref_chosen": -25.125, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -25.63134002685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.044808268547058, + "rewards_train/margins": 0.9152007102966309, + "rewards_train/rejected": -1.960008978843689, + "step": 840 + }, + { + "epoch": 0.23, + "logps_train/chosen": -174.71115112304688, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -158.1873016357422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.921115398406982, + "rewards_train/margins": -0.10238504409790039, + "rewards_train/rejected": -5.818730354309082, + "step": 840 + }, + { + "epoch": 0.23, + "logps_train/chosen": -175.84524536132812, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -217.4140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8845245838165283, + "rewards_train/margins": 4.956881761550903, + "rewards_train/rejected": -7.841406345367432, + "step": 840 + }, + { + "epoch": 0.23, + "logps_train/chosen": -87.91963195800781, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -106.96533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5419632196426392, + "rewards_train/margins": 0.8545700311660767, + "rewards_train/rejected": -2.396533250808716, + "step": 840 + }, + { + "epoch": 0.23, + "logps_train/chosen": -148.65634155273438, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -209.97216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1656341552734375, + "rewards_train/margins": 6.131582736968994, + "rewards_train/rejected": -7.297216892242432, + "step": 840 + }, + { + "epoch": 0.23, + "logps_train/chosen": -129.889892578125, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -113.76985931396484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2889893054962158, + "rewards_train/margins": 1.6879966259002686, + "rewards_train/rejected": -2.9769859313964844, + "step": 840 + }, + { + "epoch": 0.24, + "logps_train/chosen": -61.60717010498047, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -24.550315856933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6857170462608337, + "rewards_train/margins": 1.2099395394325256, + "rewards_train/rejected": -1.8956565856933594, + "step": 841 + }, + { + "epoch": 0.24, + "logps_train/chosen": -88.64468383789062, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -161.71482849121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0644683837890625, + "rewards_train/margins": 5.1570143699646, + "rewards_train/rejected": -6.221482753753662, + "step": 841 + }, + { + "epoch": 0.24, + "logps_train/chosen": -155.46890258789062, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -164.884521484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.196890354156494, + "rewards_train/margins": -0.4584379196166992, + "rewards_train/rejected": -5.738452434539795, + "step": 841 + }, + { + "epoch": 0.24, + "logps_train/chosen": -210.29486083984375, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -192.10813903808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.029486179351807, + "rewards_train/margins": 1.0813279151916504, + "rewards_train/rejected": -6.110814094543457, + "step": 841 + }, + { + "epoch": 0.24, + "logps_train/chosen": -16.792585372924805, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -0.796875, + "logps_train/rejected": -2.6252171993255615, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8480085730552673, + "rewards_train/margins": -0.6651743501424789, + "rewards_train/rejected": -0.1828342229127884, + "step": 841 + }, + { + "epoch": 0.24, + "logps_train/chosen": -110.96591186523438, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -193.75503540039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3965911865234375, + "rewards_train/margins": 1.7789123058319092, + "rewards_train/rejected": -3.1755034923553467, + "step": 841 + }, + { + "epoch": 0.24, + "logps_train/chosen": -103.66411590576172, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -193.76443481445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9164116382598877, + "rewards_train/margins": 4.960031747817993, + "rewards_train/rejected": -7.876443386077881, + "step": 841 + }, + { + "epoch": 0.24, + "logps_train/chosen": -12.400577545166016, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -7.78125, + "logps_train/rejected": -36.050437927246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3213077485561371, + "rewards_train/margins": 2.505611091852188, + "rewards_train/rejected": -2.826918840408325, + "step": 841 + }, + { + "epoch": 0.24, + "learning_rate": 1.582959200856979e-06, + "loss": 0.3322, + "step": 842 + }, + { + "epoch": 0.24, + "logps_train/chosen": -10.273082733154297, + "logps_train/ref_chosen": -1.890625, + "logps_train/ref_rejected": -3.625, + "logps_train/rejected": -13.950101852416992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8382458090782166, + "rewards_train/margins": 0.19426435232162476, + "rewards_train/rejected": -1.0325101613998413, + "step": 842 + }, + { + "epoch": 0.24, + "logps_train/chosen": -23.538978576660156, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -35.29147720336914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0788978338241577, + "rewards_train/margins": 0.32524991035461426, + "rewards_train/rejected": -1.404147744178772, + "step": 842 + }, + { + "epoch": 0.24, + "logps_train/chosen": -111.07842254638672, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -129.1278076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6578423976898193, + "rewards_train/margins": 0.004938364028930664, + "rewards_train/rejected": -3.66278076171875, + "step": 842 + }, + { + "epoch": 0.24, + "logps_train/chosen": -106.19966125488281, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -172.47840881347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8199661374092102, + "rewards_train/margins": 1.2278746962547302, + "rewards_train/rejected": -2.0478408336639404, + "step": 842 + }, + { + "epoch": 0.24, + "logps_train/chosen": -9.275126457214355, + "logps_train/ref_chosen": -2.140625, + "logps_train/ref_rejected": -10.1875, + "logps_train/rejected": -25.568050384521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7134501338005066, + "rewards_train/margins": 0.8246049284934998, + "rewards_train/rejected": -1.5380550622940063, + "step": 842 + }, + { + "epoch": 0.24, + "logps_train/chosen": -160.652099609375, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -129.33807373046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1652100086212158, + "rewards_train/margins": -0.4814026355743408, + "rewards_train/rejected": -0.683807373046875, + "step": 842 + }, + { + "epoch": 0.24, + "logps_train/chosen": -109.50392150878906, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -200.5191192626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.3503923416137695, + "rewards_train/margins": 4.001520156860352, + "rewards_train/rejected": -8.351912498474121, + "step": 842 + }, + { + "epoch": 0.24, + "logps_train/chosen": -12.87983512878418, + "logps_train/ref_chosen": -0.431640625, + "logps_train/ref_rejected": -0.431640625, + "logps_train/rejected": -13.255261421203613, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2448195219039917, + "rewards_train/margins": 0.03754258155822754, + "rewards_train/rejected": -1.2823621034622192, + "step": 842 + }, + { + "epoch": 0.24, + "logps_train/chosen": -9.431601524353027, + "logps_train/ref_chosen": -2.78125, + "logps_train/ref_rejected": -6.90625, + "logps_train/rejected": -13.67630386352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6650351881980896, + "rewards_train/margins": 0.011970221996307373, + "rewards_train/rejected": -0.677005410194397, + "step": 843 + }, + { + "epoch": 0.24, + "logps_train/chosen": -79.53056335449219, + "logps_train/ref_chosen": -60.0, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -97.00455474853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9530563354492188, + "rewards_train/margins": 1.6973991394042969, + "rewards_train/rejected": -3.6504554748535156, + "step": 843 + }, + { + "epoch": 0.24, + "logps_train/chosen": -206.8644561767578, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -145.5792236328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.986445903778076, + "rewards_train/margins": -2.578523635864258, + "rewards_train/rejected": -4.407922267913818, + "step": 843 + }, + { + "epoch": 0.24, + "logps_train/chosen": -24.9440975189209, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -25.298030853271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3694097697734833, + "rewards_train/margins": 1.3228932917118073, + "rewards_train/rejected": -1.6923030614852905, + "step": 843 + }, + { + "epoch": 0.24, + "logps_train/chosen": -20.01311492919922, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -1.6484375, + "logps_train/rejected": -9.102304458618164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4731864929199219, + "rewards_train/margins": -0.7277997732162476, + "rewards_train/rejected": -0.7453867197036743, + "step": 843 + }, + { + "epoch": 0.24, + "logps_train/chosen": -13.011313438415527, + "logps_train/ref_chosen": -1.953125, + "logps_train/ref_rejected": -0.337890625, + "logps_train/rejected": -9.135689735412598, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1058188676834106, + "rewards_train/margins": -0.22603893280029297, + "rewards_train/rejected": -0.8797799348831177, + "step": 843 + }, + { + "epoch": 0.24, + "logps_train/chosen": -38.92400360107422, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -52.14092254638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.442400336265564, + "rewards_train/margins": 1.571691870689392, + "rewards_train/rejected": -3.014092206954956, + "step": 843 + }, + { + "epoch": 0.24, + "logps_train/chosen": -62.460899353027344, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -50.91948699951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3039100766181946, + "rewards_train/margins": 1.7958588004112244, + "rewards_train/rejected": -1.4919487237930298, + "step": 843 + }, + { + "epoch": 0.24, + "learning_rate": 1.5808076487759677e-06, + "loss": 0.6332, + "step": 844 + }, + { + "epoch": 0.24, + "logps_train/chosen": -46.62101745605469, + "logps_train/ref_chosen": -41.25, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -108.67050170898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5371017456054688, + "rewards_train/margins": 2.8299484252929688, + "rewards_train/rejected": -3.3670501708984375, + "step": 844 + }, + { + "epoch": 0.24, + "logps_train/chosen": -8.704684257507324, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -2.0, + "logps_train/rejected": -6.368875503540039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0642184242606163, + "rewards_train/margins": 0.37266913801431656, + "rewards_train/rejected": -0.43688756227493286, + "step": 844 + }, + { + "epoch": 0.24, + "logps_train/chosen": -10.292987823486328, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -18.866046905517578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05429878458380699, + "rewards_train/margins": 0.8260559178888798, + "rewards_train/rejected": -0.8803547024726868, + "step": 844 + }, + { + "epoch": 0.24, + "logps_train/chosen": -157.22584533691406, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -146.87020874023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.572584629058838, + "rewards_train/margins": 2.314436435699463, + "rewards_train/rejected": -5.887021064758301, + "step": 844 + }, + { + "epoch": 0.24, + "logps_train/chosen": -112.59080505371094, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -150.78521728515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7090805172920227, + "rewards_train/margins": 2.319441258907318, + "rewards_train/rejected": -3.028521776199341, + "step": 844 + }, + { + "epoch": 0.24, + "logps_train/chosen": -115.71895599365234, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -200.61752319335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5718955993652344, + "rewards_train/margins": 4.5898566246032715, + "rewards_train/rejected": -7.161752223968506, + "step": 844 + }, + { + "epoch": 0.24, + "logps_train/chosen": -43.86668014526367, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -35.43549346923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4991681575775146, + "rewards_train/margins": 0.4318811893463135, + "rewards_train/rejected": -2.931049346923828, + "step": 844 + }, + { + "epoch": 0.24, + "logps_train/chosen": -44.43860626220703, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -41.81693649291992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.568860650062561, + "rewards_train/margins": 0.8753330707550049, + "rewards_train/rejected": -1.444193720817566, + "step": 844 + }, + { + "epoch": 0.24, + "logps_train/chosen": -27.972148895263672, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -11.946287155151367, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3972148895263672, + "rewards_train/margins": -0.015086174011230469, + "rewards_train/rejected": -0.3821287155151367, + "step": 845 + }, + { + "epoch": 0.24, + "logps_train/chosen": -9.832645416259766, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -34.21880340576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03951454162597656, + "rewards_train/margins": 1.7823657989501953, + "rewards_train/rejected": -1.8218803405761719, + "step": 845 + }, + { + "epoch": 0.24, + "logps_train/chosen": -141.20228576660156, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -285.4971923828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0702285766601562, + "rewards_train/margins": 9.079490661621094, + "rewards_train/rejected": -11.14971923828125, + "step": 845 + }, + { + "epoch": 0.24, + "logps_train/chosen": -117.06671142578125, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -147.8883056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6066712141036987, + "rewards_train/margins": 3.082159638404846, + "rewards_train/rejected": -4.688830852508545, + "step": 845 + }, + { + "epoch": 0.24, + "logps_train/chosen": -89.2910385131836, + "logps_train/ref_chosen": -58.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -151.94805908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.129103899002075, + "rewards_train/margins": 2.565702199935913, + "rewards_train/rejected": -5.694806098937988, + "step": 845 + }, + { + "epoch": 0.24, + "logps_train/chosen": -108.52285766601562, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -144.25096130371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5522857904434204, + "rewards_train/margins": 2.372810482978821, + "rewards_train/rejected": -3.925096273422241, + "step": 845 + }, + { + "epoch": 0.24, + "logps_train/chosen": -88.43612670898438, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -86.38377380371094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.493612676858902, + "rewards_train/margins": -0.0052352845668792725, + "rewards_train/rejected": -0.4883773922920227, + "step": 845 + }, + { + "epoch": 0.24, + "logps_train/chosen": -187.88818359375, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -148.9935302734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.038818359375, + "rewards_train/margins": 0.06053495407104492, + "rewards_train/rejected": -7.099353313446045, + "step": 845 + }, + { + "epoch": 0.24, + "learning_rate": 1.5786520316572107e-06, + "loss": 0.2759, + "step": 846 + }, + { + "epoch": 0.24, + "logps_train/chosen": -104.66957092285156, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -144.07814025878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.26695716381073, + "rewards_train/margins": 1.9408568143844604, + "rewards_train/rejected": -3.2078139781951904, + "step": 846 + }, + { + "epoch": 0.24, + "logps_train/chosen": -32.06719207763672, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -2.34375, + "logps_train/rejected": -18.277027130126953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0129692554473877, + "rewards_train/margins": -0.41964149475097656, + "rewards_train/rejected": -1.5933277606964111, + "step": 846 + }, + { + "epoch": 0.24, + "logps_train/chosen": -26.3560791015625, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -1.4453125, + "logps_train/rejected": -15.286334991455078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.523107886314392, + "rewards_train/margins": -0.1390056610107422, + "rewards_train/rejected": -1.38410222530365, + "step": 846 + }, + { + "epoch": 0.24, + "logps_train/chosen": -184.7484130859375, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -186.70437622070312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.824841499328613, + "rewards_train/margins": -2.804403781890869, + "rewards_train/rejected": -6.020437717437744, + "step": 846 + }, + { + "epoch": 0.24, + "logps_train/chosen": -108.95258331298828, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -168.1226043701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.245258331298828, + "rewards_train/margins": 3.2670021057128906, + "rewards_train/rejected": -5.512260437011719, + "step": 846 + }, + { + "epoch": 0.24, + "logps_train/chosen": -118.91226959228516, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -110.77680206298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3412269353866577, + "rewards_train/margins": 1.1864532232284546, + "rewards_train/rejected": -2.5276801586151123, + "step": 846 + }, + { + "epoch": 0.24, + "logps_train/chosen": -182.17483520507812, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -200.82272338867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9174835681915283, + "rewards_train/margins": 2.364788770675659, + "rewards_train/rejected": -6.2822723388671875, + "step": 846 + }, + { + "epoch": 0.24, + "logps_train/chosen": -11.493257522583008, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -6.375, + "logps_train/rejected": -11.221506118774414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5868257880210876, + "rewards_train/margins": -0.10217517614364624, + "rewards_train/rejected": -0.4846506118774414, + "step": 846 + }, + { + "epoch": 0.24, + "logps_train/chosen": -23.46967124938965, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -29.417774200439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5844671130180359, + "rewards_train/margins": 0.3448103070259094, + "rewards_train/rejected": -0.9292774200439453, + "step": 847 + }, + { + "epoch": 0.24, + "logps_train/chosen": -6.752764701843262, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -16.777755737304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0028485299553722143, + "rewards_train/margins": 0.5368741394486278, + "rewards_train/rejected": -0.5340256094932556, + "step": 847 + }, + { + "epoch": 0.24, + "logps_train/chosen": -59.55615234375, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -20.09323501586914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.405615210533142, + "rewards_train/margins": -0.0587916374206543, + "rewards_train/rejected": -1.3468235731124878, + "step": 847 + }, + { + "epoch": 0.24, + "logps_train/chosen": -155.91497802734375, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -193.61581420898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.691497802734375, + "rewards_train/margins": 4.270083904266357, + "rewards_train/rejected": -6.961581707000732, + "step": 847 + }, + { + "epoch": 0.24, + "logps_train/chosen": -133.22845458984375, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -162.4991455078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9728455543518066, + "rewards_train/margins": 1.0770692825317383, + "rewards_train/rejected": -5.049914836883545, + "step": 847 + }, + { + "epoch": 0.24, + "logps_train/chosen": -104.95714569091797, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -73.50119018554688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4457145929336548, + "rewards_train/margins": -1.2455955743789673, + "rewards_train/rejected": -0.2001190185546875, + "step": 847 + }, + { + "epoch": 0.24, + "logps_train/chosen": -3.5901834964752197, + "logps_train/ref_chosen": -1.3984375, + "logps_train/ref_rejected": -1.296875, + "logps_train/rejected": -0.5484811067581177, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2191746085882187, + "rewards_train/margins": -0.29401399940252304, + "rewards_train/rejected": 0.07483939081430435, + "step": 847 + }, + { + "epoch": 0.24, + "logps_train/chosen": -226.050048828125, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -204.04342651367188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.205004692077637, + "rewards_train/margins": -1.0506620407104492, + "rewards_train/rejected": -9.154342651367188, + "step": 847 + }, + { + "epoch": 0.24, + "learning_rate": 1.5764923645877431e-06, + "loss": 0.7215, + "step": 848 + }, + { + "epoch": 0.24, + "logps_train/chosen": -196.91802978515625, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -214.42535400390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.39180326461792, + "rewards_train/margins": 2.550732135772705, + "rewards_train/rejected": -6.942535400390625, + "step": 848 + }, + { + "epoch": 0.24, + "logps_train/chosen": -96.6320571899414, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -172.9253692626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9632057547569275, + "rewards_train/margins": 6.629331171512604, + "rewards_train/rejected": -7.592536926269531, + "step": 848 + }, + { + "epoch": 0.24, + "logps_train/chosen": -17.444990158081055, + "logps_train/ref_chosen": -7.3125, + "logps_train/ref_rejected": -60.0, + "logps_train/rejected": -76.37345123291016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0132490396499634, + "rewards_train/margins": 0.624096155166626, + "rewards_train/rejected": -1.6373451948165894, + "step": 848 + }, + { + "epoch": 0.24, + "logps_train/chosen": -105.9942398071289, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -168.23880004882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4994239807128906, + "rewards_train/margins": 1.5244560241699219, + "rewards_train/rejected": -3.0238800048828125, + "step": 848 + }, + { + "epoch": 0.24, + "logps_train/chosen": -43.542945861816406, + "logps_train/ref_chosen": -23.5, + "logps_train/ref_rejected": -45.5, + "logps_train/rejected": -51.7996711730957, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0042946338653564, + "rewards_train/margins": -1.3743274807929993, + "rewards_train/rejected": -0.6299671530723572, + "step": 848 + }, + { + "epoch": 0.24, + "logps_train/chosen": -68.42091369628906, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -23.34115982055664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6920914053916931, + "rewards_train/margins": 0.7420246005058289, + "rewards_train/rejected": -1.434116005897522, + "step": 848 + }, + { + "epoch": 0.24, + "logps_train/chosen": -240.26266479492188, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -189.53976440429688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.326266765594482, + "rewards_train/margins": -1.1222901344299316, + "rewards_train/rejected": -6.203976631164551, + "step": 848 + }, + { + "epoch": 0.24, + "logps_train/chosen": -28.2454833984375, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -25.880020141601562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2120484113693237, + "rewards_train/margins": -0.06154632568359375, + "rewards_train/rejected": -1.15050208568573, + "step": 848 + }, + { + "epoch": 0.24, + "logps_train/chosen": -6.644225597381592, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -7.5625, + "logps_train/rejected": -11.931278228759766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10192256420850754, + "rewards_train/margins": 0.33495525270700455, + "rewards_train/rejected": -0.4368778169155121, + "step": 849 + }, + { + "epoch": 0.24, + "logps_train/chosen": -98.00377655029297, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -270.27490234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.300377607345581, + "rewards_train/margins": 12.977113008499146, + "rewards_train/rejected": -15.277490615844727, + "step": 849 + }, + { + "epoch": 0.24, + "logps_train/chosen": -157.3793487548828, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -149.72564697265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.637935161590576, + "rewards_train/margins": -0.5653705596923828, + "rewards_train/rejected": -5.072564601898193, + "step": 849 + }, + { + "epoch": 0.24, + "logps_train/chosen": -1.48692786693573, + "logps_train/ref_chosen": -2.265625, + "logps_train/ref_rejected": -2.34375, + "logps_train/rejected": -1.380035161972046, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.077869713306427, + "rewards_train/margins": -0.018501773476600647, + "rewards_train/rejected": 0.09637148678302765, + "step": 849 + }, + { + "epoch": 0.24, + "logps_train/chosen": -164.59103393554688, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -136.85462951660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3591034412384033, + "rewards_train/margins": 2.626359701156616, + "rewards_train/rejected": -5.9854631423950195, + "step": 849 + }, + { + "epoch": 0.24, + "logps_train/chosen": -2.0802924633026123, + "logps_train/ref_chosen": -2.25, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -16.71193504333496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01697075366973877, + "rewards_train/margins": 0.7069142460823059, + "rewards_train/rejected": -0.6899434924125671, + "step": 849 + }, + { + "epoch": 0.24, + "logps_train/chosen": -168.4702606201172, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -138.13340759277344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.247026205062866, + "rewards_train/margins": -2.2336853742599487, + "rewards_train/rejected": -1.0133408308029175, + "step": 849 + }, + { + "epoch": 0.24, + "logps_train/chosen": -7.481739044189453, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -6.4895219802856445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2137989103794098, + "rewards_train/margins": 0.0007782876491546631, + "rewards_train/rejected": -0.21457719802856445, + "step": 849 + }, + { + "epoch": 0.24, + "learning_rate": 1.5743286626829435e-06, + "loss": 0.6612, + "step": 850 + }, + { + "epoch": 0.24, + "logps_train/chosen": -19.50736427307129, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -44.84049987792969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9819864630699158, + "rewards_train/margins": 1.802063524723053, + "rewards_train/rejected": -2.7840499877929688, + "step": 850 + }, + { + "epoch": 0.24, + "logps_train/chosen": -44.798545837402344, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -5.28125, + "logps_train/rejected": -16.728017807006836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.11735463142395, + "rewards_train/margins": -0.9726778268814087, + "rewards_train/rejected": -1.1446768045425415, + "step": 850 + }, + { + "epoch": 0.24, + "logps_train/chosen": -269.27056884765625, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -243.05491638183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.627057075500488, + "rewards_train/margins": 0.6284351348876953, + "rewards_train/rejected": -12.255492210388184, + "step": 850 + }, + { + "epoch": 0.24, + "logps_train/chosen": -10.372465133666992, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -32.8305778503418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4747465252876282, + "rewards_train/margins": 0.24581128358840942, + "rewards_train/rejected": -0.7205578088760376, + "step": 850 + }, + { + "epoch": 0.24, + "logps_train/chosen": -186.2086181640625, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -245.0, + "logps_train/rejected": -342.6792907714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.520862102508545, + "rewards_train/margins": 5.247066974639893, + "rewards_train/rejected": -9.767929077148438, + "step": 850 + }, + { + "epoch": 0.24, + "logps_train/chosen": -116.73555755615234, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -114.6844711303711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8235557675361633, + "rewards_train/margins": 1.2948912978172302, + "rewards_train/rejected": -2.1184470653533936, + "step": 850 + }, + { + "epoch": 0.24, + "logps_train/chosen": -177.66387939453125, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -222.48931884765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.566387951374054, + "rewards_train/margins": 6.882544219493866, + "rewards_train/rejected": -7.44893217086792, + "step": 850 + }, + { + "epoch": 0.24, + "logps_train/chosen": -6.535550117492676, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -14.094423294067383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.24105501174926758, + "rewards_train/margins": -0.05036267638206482, + "rewards_train/rejected": -0.19069233536720276, + "step": 850 + }, + { + "epoch": 0.24, + "logps_train/chosen": -59.96792221069336, + "logps_train/ref_chosen": -41.75, + "logps_train/ref_rejected": -13.3125, + "logps_train/rejected": -26.670337677001953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8217922449111938, + "rewards_train/margins": -0.4860084056854248, + "rewards_train/rejected": -1.335783839225769, + "step": 851 + }, + { + "epoch": 0.24, + "logps_train/chosen": -160.52963256835938, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -175.178955078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8529633283615112, + "rewards_train/margins": 0.9649323225021362, + "rewards_train/rejected": -2.8178956508636475, + "step": 851 + }, + { + "epoch": 0.24, + "logps_train/chosen": -1.7260329723358154, + "logps_train/ref_chosen": -1.6796875, + "logps_train/ref_rejected": -0.49609375, + "logps_train/rejected": -0.3742581009864807, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.004634547512978315, + "rewards_train/margins": -0.01681811222806573, + "rewards_train/rejected": 0.012183564715087414, + "step": 851 + }, + { + "epoch": 0.24, + "logps_train/chosen": -11.863980293273926, + "logps_train/ref_chosen": -3.328125, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -15.131035804748535, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8535855412483215, + "rewards_train/margins": -0.8342319596558809, + "rewards_train/rejected": -0.019353581592440605, + "step": 851 + }, + { + "epoch": 0.24, + "logps_train/chosen": -11.978325843811035, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -39.03916931152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5697075724601746, + "rewards_train/margins": 1.034209430217743, + "rewards_train/rejected": -1.6039170026779175, + "step": 851 + }, + { + "epoch": 0.24, + "logps_train/chosen": -89.25437927246094, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -184.45233154296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2254379987716675, + "rewards_train/margins": 1.1197952032089233, + "rewards_train/rejected": -2.345233201980591, + "step": 851 + }, + { + "epoch": 0.24, + "logps_train/chosen": -9.56512451171875, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -36.14674377441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24973754584789276, + "rewards_train/margins": 1.3269119709730148, + "rewards_train/rejected": -1.077174425125122, + "step": 851 + }, + { + "epoch": 0.24, + "logps_train/chosen": -10.787575721740723, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -37.826629638671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11625757068395615, + "rewards_train/margins": 1.4789054170250893, + "rewards_train/rejected": -1.5951629877090454, + "step": 851 + }, + { + "epoch": 0.24, + "learning_rate": 1.5721609410864326e-06, + "loss": 0.4766, + "step": 852 + }, + { + "epoch": 0.24, + "logps_train/chosen": -1.7221126556396484, + "logps_train/ref_chosen": -1.578125, + "logps_train/ref_rejected": -14.8125, + "logps_train/rejected": -19.779481887817383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.014398765750229359, + "rewards_train/margins": 0.4822994349524379, + "rewards_train/rejected": -0.49669820070266724, + "step": 852 + }, + { + "epoch": 0.24, + "logps_train/chosen": -150.6326446533203, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -193.21255493164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.113264560699463, + "rewards_train/margins": 3.1579909324645996, + "rewards_train/rejected": -8.271255493164062, + "step": 852 + }, + { + "epoch": 0.24, + "logps_train/chosen": -0.879422128200531, + "logps_train/ref_chosen": -0.85546875, + "logps_train/ref_rejected": -1.8984375, + "logps_train/rejected": -2.627485990524292, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.002395337913185358, + "rewards_train/margins": 0.07050950964912772, + "rewards_train/rejected": -0.07290484756231308, + "step": 852 + }, + { + "epoch": 0.24, + "logps_train/chosen": -26.118852615356445, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -24.125, + "logps_train/rejected": -36.05485153198242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9493852853775024, + "rewards_train/margins": 0.24359989166259766, + "rewards_train/rejected": -1.1929851770401, + "step": 852 + }, + { + "epoch": 0.24, + "logps_train/chosen": -279.990478515625, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -186.0, + "logps_train/rejected": -291.685791015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.79904842376709, + "rewards_train/margins": -1.23046875, + "rewards_train/rejected": -10.56857967376709, + "step": 852 + }, + { + "epoch": 0.24, + "logps_train/chosen": -130.66433715820312, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -236.0788116455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4664337635040283, + "rewards_train/margins": 3.9414474964141846, + "rewards_train/rejected": -6.407881259918213, + "step": 852 + }, + { + "epoch": 0.24, + "logps_train/chosen": -1.3963111639022827, + "logps_train/ref_chosen": -1.3359375, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -15.544171333312988, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.006037366576492786, + "rewards_train/margins": 1.0827548382803798, + "rewards_train/rejected": -1.0887922048568726, + "step": 852 + }, + { + "epoch": 0.24, + "logps_train/chosen": -7.206601142883301, + "logps_train/ref_chosen": -3.953125, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -82.22126770019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3253476321697235, + "rewards_train/margins": 1.5467791855335236, + "rewards_train/rejected": -1.872126817703247, + "step": 852 + }, + { + "epoch": 0.24, + "logps_train/chosen": -91.38658142089844, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -223.5701904296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2886581420898438, + "rewards_train/margins": 7.868361473083496, + "rewards_train/rejected": -9.15701961517334, + "step": 853 + }, + { + "epoch": 0.24, + "logps_train/chosen": -182.05238342285156, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -221.55999755859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.055238723754883, + "rewards_train/margins": 3.700760841369629, + "rewards_train/rejected": -11.755999565124512, + "step": 853 + }, + { + "epoch": 0.24, + "logps_train/chosen": -64.85324096679688, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -71.43525695800781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1603240966796875, + "rewards_train/margins": -0.3167984038591385, + "rewards_train/rejected": 0.156474307179451, + "step": 853 + }, + { + "epoch": 0.24, + "logps_train/chosen": -5.382810592651367, + "logps_train/ref_chosen": -1.9609375, + "logps_train/ref_rejected": -0.419921875, + "logps_train/rejected": -1.164116382598877, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3421873152256012, + "rewards_train/margins": -0.26776786148548126, + "rewards_train/rejected": -0.07441945374011993, + "step": 853 + }, + { + "epoch": 0.24, + "logps_train/chosen": -174.20248413085938, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -187.3311767578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.120248556137085, + "rewards_train/margins": 2.1128690242767334, + "rewards_train/rejected": -5.233117580413818, + "step": 853 + }, + { + "epoch": 0.24, + "logps_train/chosen": -40.88306427001953, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -39.14429473876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2633063793182373, + "rewards_train/margins": 0.6573731899261475, + "rewards_train/rejected": -2.9206795692443848, + "step": 853 + }, + { + "epoch": 0.24, + "logps_train/chosen": -166.87387084960938, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -140.81954956054688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7873871326446533, + "rewards_train/margins": -0.50543212890625, + "rewards_train/rejected": -3.2819550037384033, + "step": 853 + }, + { + "epoch": 0.24, + "logps_train/chosen": -16.59954071044922, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -102.08872985839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7724540829658508, + "rewards_train/margins": 2.3864189982414246, + "rewards_train/rejected": -3.1588730812072754, + "step": 853 + }, + { + "epoch": 0.24, + "learning_rate": 1.5699892149699618e-06, + "loss": 0.4419, + "step": 854 + }, + { + "epoch": 0.24, + "logps_train/chosen": -115.20222473144531, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -156.40318298339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8202224969863892, + "rewards_train/margins": 3.520095705986023, + "rewards_train/rejected": -5.340318202972412, + "step": 854 + }, + { + "epoch": 0.24, + "logps_train/chosen": -26.43563461303711, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -7.8125, + "logps_train/rejected": -28.249603271484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.568563461303711, + "rewards_train/margins": 0.475147008895874, + "rewards_train/rejected": -2.043710470199585, + "step": 854 + }, + { + "epoch": 0.24, + "logps_train/chosen": -161.16575622558594, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -139.8398895263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2165756225585938, + "rewards_train/margins": 1.817413330078125, + "rewards_train/rejected": -5.033988952636719, + "step": 854 + }, + { + "epoch": 0.24, + "logps_train/chosen": -1.8934564590454102, + "logps_train/ref_chosen": -1.640625, + "logps_train/ref_rejected": -1.171875, + "logps_train/rejected": -0.6509377956390381, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.025283146649599075, + "rewards_train/margins": -0.07737686857581139, + "rewards_train/rejected": 0.05209372192621231, + "step": 854 + }, + { + "epoch": 0.24, + "logps_train/chosen": -13.13735294342041, + "logps_train/ref_chosen": -3.515625, + "logps_train/ref_rejected": -2.796875, + "logps_train/rejected": -13.644691467285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.96217280626297, + "rewards_train/margins": 0.12260884046554565, + "rewards_train/rejected": -1.0847816467285156, + "step": 854 + }, + { + "epoch": 0.24, + "logps_train/chosen": -25.946596145629883, + "logps_train/ref_chosen": -22.25, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -63.566951751708984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3696596324443817, + "rewards_train/margins": 2.7370355427265167, + "rewards_train/rejected": -3.1066951751708984, + "step": 854 + }, + { + "epoch": 0.24, + "logps_train/chosen": -56.2284049987793, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -129.99624633789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07715950161218643, + "rewards_train/margins": 2.926784135401249, + "rewards_train/rejected": -2.8496246337890625, + "step": 854 + }, + { + "epoch": 0.24, + "logps_train/chosen": -152.3536376953125, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -150.28268432617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7353638410568237, + "rewards_train/margins": 3.1429048776626587, + "rewards_train/rejected": -4.878268718719482, + "step": 854 + }, + { + "epoch": 0.24, + "logps_train/chosen": -8.606383323669434, + "logps_train/ref_chosen": -1.9375, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -41.993934631347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6668883562088013, + "rewards_train/margins": 1.6950050592422485, + "rewards_train/rejected": -2.36189341545105, + "step": 855 + }, + { + "epoch": 0.24, + "logps_train/chosen": -87.44474792480469, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -108.38458251953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4944747686386108, + "rewards_train/margins": -0.25601649284362793, + "rewards_train/rejected": -1.238458275794983, + "step": 855 + }, + { + "epoch": 0.24, + "logps_train/chosen": -29.6119327545166, + "logps_train/ref_chosen": -14.0, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -33.4935302734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5611933469772339, + "rewards_train/margins": -0.7243403196334839, + "rewards_train/rejected": -0.83685302734375, + "step": 855 + }, + { + "epoch": 0.24, + "logps_train/chosen": -38.17501449584961, + "logps_train/ref_chosen": -32.25, + "logps_train/ref_rejected": -58.5, + "logps_train/rejected": -75.93565368652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5925014615058899, + "rewards_train/margins": 1.1510639786720276, + "rewards_train/rejected": -1.7435654401779175, + "step": 855 + }, + { + "epoch": 0.24, + "logps_train/chosen": -7.823145866394043, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -18.933143615722656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.22918958961963654, + "rewards_train/margins": -0.048375219106674194, + "rewards_train/rejected": -0.18081437051296234, + "step": 855 + }, + { + "epoch": 0.24, + "logps_train/chosen": -39.32908248901367, + "logps_train/ref_chosen": -39.25, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -39.124305725097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007908249273896217, + "rewards_train/margins": 2.910772418603301, + "rewards_train/rejected": -2.9186806678771973, + "step": 855 + }, + { + "epoch": 0.24, + "logps_train/chosen": -112.23988342285156, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -42.616554260253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1739883422851562, + "rewards_train/margins": 0.5376670360565186, + "rewards_train/rejected": -2.711655378341675, + "step": 855 + }, + { + "epoch": 0.24, + "logps_train/chosen": -14.479851722717285, + "logps_train/ref_chosen": -7.21875, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -11.600801467895508, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7261101603507996, + "rewards_train/margins": -0.18165498971939087, + "rewards_train/rejected": -0.5444551706314087, + "step": 855 + }, + { + "epoch": 0.24, + "learning_rate": 1.5678134995333125e-06, + "loss": 0.4125, + "step": 856 + }, + { + "epoch": 0.24, + "logps_train/chosen": -102.66520690917969, + "logps_train/ref_chosen": -55.75, + "logps_train/ref_rejected": -22.375, + "logps_train/rejected": -63.43803787231445, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.691520690917969, + "rewards_train/margins": -0.5852169990539551, + "rewards_train/rejected": -4.106303691864014, + "step": 856 + }, + { + "epoch": 0.24, + "logps_train/chosen": -121.40992736816406, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -166.1708526611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7909927368164062, + "rewards_train/margins": 4.576092720031738, + "rewards_train/rejected": -7.3670854568481445, + "step": 856 + }, + { + "epoch": 0.24, + "logps_train/chosen": -79.69756317138672, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -93.70557403564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7697563171386719, + "rewards_train/margins": 1.8008010387420654, + "rewards_train/rejected": -2.5705573558807373, + "step": 856 + }, + { + "epoch": 0.24, + "logps_train/chosen": -13.861547470092773, + "logps_train/ref_chosen": -3.78125, + "logps_train/ref_rejected": -1.1953125, + "logps_train/rejected": -4.8468146324157715, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.008029818534851, + "rewards_train/margins": -0.6428796052932739, + "rewards_train/rejected": -0.36515021324157715, + "step": 856 + }, + { + "epoch": 0.24, + "logps_train/chosen": -120.412109375, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -154.7660369873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.091210961341858, + "rewards_train/margins": 3.285392642021179, + "rewards_train/rejected": -4.376603603363037, + "step": 856 + }, + { + "epoch": 0.24, + "logps_train/chosen": -190.14166259765625, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -184.92401123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.914166450500488, + "rewards_train/margins": 1.0282349586486816, + "rewards_train/rejected": -7.94240140914917, + "step": 856 + }, + { + "epoch": 0.24, + "logps_train/chosen": -157.5482177734375, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -266.771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.204822063446045, + "rewards_train/margins": 4.772326946258545, + "rewards_train/rejected": -8.97714900970459, + "step": 856 + }, + { + "epoch": 0.24, + "logps_train/chosen": -27.64796257019043, + "logps_train/ref_chosen": -26.5, + "logps_train/ref_rejected": -1.3984375, + "logps_train/rejected": -9.416648864746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11479625850915909, + "rewards_train/margins": 0.6870249137282372, + "rewards_train/rejected": -0.8018211722373962, + "step": 856 + }, + { + "epoch": 0.24, + "logps_train/chosen": -114.40099334716797, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -164.26502990722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0900993347167969, + "rewards_train/margins": 2.936403751373291, + "rewards_train/rejected": -4.026503086090088, + "step": 857 + }, + { + "epoch": 0.24, + "logps_train/chosen": -170.1398468017578, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -276.78857421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.413984775543213, + "rewards_train/margins": 8.664872646331787, + "rewards_train/rejected": -12.078857421875, + "step": 857 + }, + { + "epoch": 0.24, + "logps_train/chosen": -100.69143676757812, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -163.2381591796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7191437482833862, + "rewards_train/margins": 5.004672169685364, + "rewards_train/rejected": -6.72381591796875, + "step": 857 + }, + { + "epoch": 0.24, + "logps_train/chosen": -12.855358123779297, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -38.642738342285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06053581461310387, + "rewards_train/margins": 1.8537380434572697, + "rewards_train/rejected": -1.9142738580703735, + "step": 857 + }, + { + "epoch": 0.24, + "logps_train/chosen": -17.363662719726562, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -28.432430267333984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5926162600517273, + "rewards_train/margins": 0.1756267547607422, + "rewards_train/rejected": -0.7682430148124695, + "step": 857 + }, + { + "epoch": 0.24, + "logps_train/chosen": -97.88636779785156, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -140.54827880859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2886369228363037, + "rewards_train/margins": 2.1161911487579346, + "rewards_train/rejected": -4.404828071594238, + "step": 857 + }, + { + "epoch": 0.24, + "logps_train/chosen": -19.84950065612793, + "logps_train/ref_chosen": -7.1875, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -25.3249454498291, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.266200065612793, + "rewards_train/margins": 0.8350446224212646, + "rewards_train/rejected": -2.1012446880340576, + "step": 857 + }, + { + "epoch": 0.24, + "logps_train/chosen": -12.962044715881348, + "logps_train/ref_chosen": -4.40625, + "logps_train/ref_rejected": -3.78125, + "logps_train/rejected": -13.464154243469238, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8555794954299927, + "rewards_train/margins": 0.11271095275878906, + "rewards_train/rejected": -0.9682904481887817, + "step": 857 + }, + { + "epoch": 0.24, + "learning_rate": 1.5656338100041855e-06, + "loss": 0.3082, + "step": 858 + }, + { + "epoch": 0.24, + "logps_train/chosen": -17.586091995239258, + "logps_train/ref_chosen": -7.21875, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -25.33854866027832, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0367342233657837, + "rewards_train/margins": -0.2653793692588806, + "rewards_train/rejected": -0.7713548541069031, + "step": 858 + }, + { + "epoch": 0.24, + "logps_train/chosen": -100.07123565673828, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -129.28213500976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3071235716342926, + "rewards_train/margins": 3.3210900723934174, + "rewards_train/rejected": -3.62821364402771, + "step": 858 + }, + { + "epoch": 0.24, + "logps_train/chosen": -5.280364990234375, + "logps_train/ref_chosen": -5.375, + "logps_train/ref_rejected": -1.84375, + "logps_train/rejected": -16.398868560791016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009463501162827015, + "rewards_train/margins": 1.4649754287675023, + "rewards_train/rejected": -1.4555119276046753, + "step": 858 + }, + { + "epoch": 0.24, + "logps_train/chosen": -4.309096336364746, + "logps_train/ref_chosen": -0.875, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -6.059532642364502, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.34340962767601013, + "rewards_train/margins": -0.4155813679099083, + "rewards_train/rejected": 0.07217174023389816, + "step": 858 + }, + { + "epoch": 0.24, + "logps_train/chosen": -80.27900695800781, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -107.30921936035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3720993101596832, + "rewards_train/margins": 0.10302123427391052, + "rewards_train/rejected": 0.2690780758857727, + "step": 858 + }, + { + "epoch": 0.24, + "logps_train/chosen": -40.74228286743164, + "logps_train/ref_chosen": -28.5, + "logps_train/ref_rejected": -47.25, + "logps_train/rejected": -61.715614318847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2242282629013062, + "rewards_train/margins": 0.22233319282531738, + "rewards_train/rejected": -1.4465614557266235, + "step": 858 + }, + { + "epoch": 0.24, + "logps_train/chosen": -163.33865356445312, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -206.65440368652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.833865642547607, + "rewards_train/margins": 3.0315747261047363, + "rewards_train/rejected": -8.865440368652344, + "step": 858 + }, + { + "epoch": 0.24, + "logps_train/chosen": -151.78985595703125, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -61.0, + "logps_train/rejected": -90.19491577148438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7789857387542725, + "rewards_train/margins": -0.8594942092895508, + "rewards_train/rejected": -2.9194915294647217, + "step": 858 + }, + { + "epoch": 0.24, + "logps_train/chosen": -120.9678955078125, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -127.27601623535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.596789598464966, + "rewards_train/margins": 0.9808123111724854, + "rewards_train/rejected": -4.577601909637451, + "step": 859 + }, + { + "epoch": 0.24, + "logps_train/chosen": -87.75444793701172, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -91.33990478515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.025444794446229935, + "rewards_train/margins": 0.1085456870496273, + "rewards_train/rejected": -0.13399048149585724, + "step": 859 + }, + { + "epoch": 0.24, + "logps_train/chosen": -15.53223991394043, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -7.03125, + "logps_train/rejected": -20.49798583984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.528223991394043, + "rewards_train/margins": 0.8184496164321899, + "rewards_train/rejected": -1.346673607826233, + "step": 859 + }, + { + "epoch": 0.24, + "logps_train/chosen": -74.59600830078125, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -31.79540252685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7096008658409119, + "rewards_train/margins": 1.7730644345283508, + "rewards_train/rejected": -2.4826653003692627, + "step": 859 + }, + { + "epoch": 0.24, + "logps_train/chosen": -102.84713745117188, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -43.123390197753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0347137451171875, + "rewards_train/margins": 1.871375322341919, + "rewards_train/rejected": -2.9060890674591064, + "step": 859 + }, + { + "epoch": 0.24, + "logps_train/chosen": -219.1920623779297, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -233.5130615234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.819206237792969, + "rewards_train/margins": 2.6321001052856445, + "rewards_train/rejected": -9.451306343078613, + "step": 859 + }, + { + "epoch": 0.24, + "logps_train/chosen": -2.985927104949951, + "logps_train/ref_chosen": -0.625, + "logps_train/ref_rejected": -3.875, + "logps_train/rejected": -7.120304107666016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2360927164554596, + "rewards_train/margins": 0.08843770623207092, + "rewards_train/rejected": -0.3245304226875305, + "step": 859 + }, + { + "epoch": 0.24, + "logps_train/chosen": -2.0166170597076416, + "logps_train/ref_chosen": -0.6171875, + "logps_train/ref_rejected": -0.6171875, + "logps_train/rejected": -2.014796733856201, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1399429589509964, + "rewards_train/margins": -0.00018203258514404297, + "rewards_train/rejected": -0.13976092636585236, + "step": 859 + }, + { + "epoch": 0.24, + "learning_rate": 1.5634501616380967e-06, + "loss": 0.4704, + "step": 860 + }, + { + "epoch": 0.24, + "logps_train/chosen": -131.82791137695312, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -200.60946655273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2827911376953125, + "rewards_train/margins": 2.6781554222106934, + "rewards_train/rejected": -4.960946559906006, + "step": 860 + }, + { + "epoch": 0.24, + "logps_train/chosen": -154.00106811523438, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -201.68910217285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7001067996025085, + "rewards_train/margins": 6.568803608417511, + "rewards_train/rejected": -7.2689104080200195, + "step": 860 + }, + { + "epoch": 0.24, + "logps_train/chosen": -26.19286346435547, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -33.45146179199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5567863583564758, + "rewards_train/margins": 1.0258598923683167, + "rewards_train/rejected": -1.5826462507247925, + "step": 860 + }, + { + "epoch": 0.24, + "logps_train/chosen": -11.586669921875, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -12.4375, + "logps_train/rejected": -29.137163162231445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7274169921875, + "rewards_train/margins": 0.9425493478775024, + "rewards_train/rejected": -1.6699663400650024, + "step": 860 + }, + { + "epoch": 0.24, + "logps_train/chosen": -116.89195251464844, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -215.36636352539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0391952991485596, + "rewards_train/margins": 8.747441053390503, + "rewards_train/rejected": -9.786636352539062, + "step": 860 + }, + { + "epoch": 0.24, + "logps_train/chosen": -10.550138473510742, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -7.141008377075195, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5268888473510742, + "rewards_train/margins": -0.5627880096435547, + "rewards_train/rejected": 0.03589916229248047, + "step": 860 + }, + { + "epoch": 0.24, + "logps_train/chosen": -11.77305793762207, + "logps_train/ref_chosen": -1.3359375, + "logps_train/ref_rejected": -3.375, + "logps_train/rejected": -15.576425552368164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0437120199203491, + "rewards_train/margins": 0.1764305830001831, + "rewards_train/rejected": -1.2201426029205322, + "step": 860 + }, + { + "epoch": 0.24, + "logps_train/chosen": -100.59532165527344, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -149.51849365234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4095321595668793, + "rewards_train/margins": 1.5423171818256378, + "rewards_train/rejected": -1.951849341392517, + "step": 860 + }, + { + "epoch": 0.24, + "logps_train/chosen": -140.26535034179688, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -232.1290283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0265350341796875, + "rewards_train/margins": 8.986368179321289, + "rewards_train/rejected": -10.012903213500977, + "step": 861 + }, + { + "epoch": 0.24, + "logps_train/chosen": -12.49742317199707, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -25.280513763427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.780992329120636, + "rewards_train/margins": 0.6345590949058533, + "rewards_train/rejected": -1.4155514240264893, + "step": 861 + }, + { + "epoch": 0.24, + "logps_train/chosen": -144.39308166503906, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -167.5637664794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4393081665039062, + "rewards_train/margins": 2.8670687675476074, + "rewards_train/rejected": -6.306376934051514, + "step": 861 + }, + { + "epoch": 0.24, + "logps_train/chosen": -11.442924499511719, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -34.481143951416016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25679245591163635, + "rewards_train/margins": 2.035071939229965, + "rewards_train/rejected": -2.2918643951416016, + "step": 861 + }, + { + "epoch": 0.24, + "logps_train/chosen": -19.127023696899414, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -2.046875, + "logps_train/rejected": -6.541232109069824, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5002023577690125, + "rewards_train/margins": -0.05076664686203003, + "rewards_train/rejected": -0.4494357109069824, + "step": 861 + }, + { + "epoch": 0.24, + "logps_train/chosen": -130.6587371826172, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -185.95872497558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4658737182617188, + "rewards_train/margins": 3.8799986839294434, + "rewards_train/rejected": -7.345872402191162, + "step": 861 + }, + { + "epoch": 0.24, + "logps_train/chosen": -139.1771240234375, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -56.5, + "logps_train/rejected": -71.5740737915039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.21771240234375, + "rewards_train/margins": -2.7103049755096436, + "rewards_train/rejected": -1.5074074268341064, + "step": 861 + }, + { + "epoch": 0.24, + "logps_train/chosen": -155.899169921875, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -241.31288146972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9399170875549316, + "rewards_train/margins": 4.591371059417725, + "rewards_train/rejected": -8.531288146972656, + "step": 861 + }, + { + "epoch": 0.24, + "learning_rate": 1.5612625697182692e-06, + "loss": 0.4155, + "step": 862 + }, + { + "epoch": 0.24, + "logps_train/chosen": -117.92970275878906, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -141.33091735839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7929702997207642, + "rewards_train/margins": 1.0901213884353638, + "rewards_train/rejected": -2.883091688156128, + "step": 862 + }, + { + "epoch": 0.24, + "logps_train/chosen": -2.5233545303344727, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -10.766756057739258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05547704920172691, + "rewards_train/margins": 0.6009026430547237, + "rewards_train/rejected": -0.5454255938529968, + "step": 862 + }, + { + "epoch": 0.24, + "logps_train/chosen": -195.93092346191406, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -188.58273315429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.593092441558838, + "rewards_train/margins": 0.16518115997314453, + "rewards_train/rejected": -5.758273601531982, + "step": 862 + }, + { + "epoch": 0.24, + "logps_train/chosen": -9.036046028137207, + "logps_train/ref_chosen": -5.75, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -16.477170944213867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3286046087741852, + "rewards_train/margins": 0.5253625214099884, + "rewards_train/rejected": -0.8539671301841736, + "step": 862 + }, + { + "epoch": 0.24, + "logps_train/chosen": -12.022621154785156, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -23.91061782836914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3710121214389801, + "rewards_train/margins": 1.4544247090816498, + "rewards_train/rejected": -1.8254368305206299, + "step": 862 + }, + { + "epoch": 0.24, + "logps_train/chosen": -7.646783351898193, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -5.21875, + "logps_train/rejected": -5.330785751342773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17282167077064514, + "rewards_train/margins": 0.18402524571865797, + "rewards_train/rejected": -0.011203574948012829, + "step": 862 + }, + { + "epoch": 0.24, + "logps_train/chosen": -96.18144989013672, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -52.0, + "logps_train/rejected": -80.73316955566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.618144989013672, + "rewards_train/margins": 0.2551720142364502, + "rewards_train/rejected": -2.873317003250122, + "step": 862 + }, + { + "epoch": 0.24, + "logps_train/chosen": -15.122180938720703, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -35.43658447265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5434681177139282, + "rewards_train/margins": 1.3501904010772705, + "rewards_train/rejected": -1.8936585187911987, + "step": 862 + }, + { + "epoch": 0.24, + "logps_train/chosen": -13.235958099365234, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -27.39954376220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4798458218574524, + "rewards_train/margins": 1.1476085782051086, + "rewards_train/rejected": -1.627454400062561, + "step": 863 + }, + { + "epoch": 0.24, + "logps_train/chosen": -40.93756866455078, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -69.25198364257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0062568187713623, + "rewards_train/margins": 2.893941640853882, + "rewards_train/rejected": -4.900198459625244, + "step": 863 + }, + { + "epoch": 0.24, + "logps_train/chosen": -79.6550064086914, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -61.0, + "logps_train/rejected": -123.60721588134766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0155006647109985, + "rewards_train/margins": 5.245221018791199, + "rewards_train/rejected": -6.260721683502197, + "step": 863 + }, + { + "epoch": 0.24, + "logps_train/chosen": -24.609004974365234, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -7.78125, + "logps_train/rejected": -23.87638282775879, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4296505451202393, + "rewards_train/margins": 0.17986273765563965, + "rewards_train/rejected": -1.609513282775879, + "step": 863 + }, + { + "epoch": 0.24, + "logps_train/chosen": -35.96468734741211, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -11.643295288085938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3214687407016754, + "rewards_train/margins": -0.5383892208337784, + "rewards_train/rejected": 0.21692048013210297, + "step": 863 + }, + { + "epoch": 0.24, + "logps_train/chosen": -146.58847045898438, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -161.350341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7088470458984375, + "rewards_train/margins": 0.7761871814727783, + "rewards_train/rejected": -3.485034227371216, + "step": 863 + }, + { + "epoch": 0.24, + "logps_train/chosen": -118.97550201416016, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -130.28390502929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.147550344467163, + "rewards_train/margins": 2.0808403491973877, + "rewards_train/rejected": -5.228390693664551, + "step": 863 + }, + { + "epoch": 0.24, + "logps_train/chosen": -104.25540161132812, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -149.36831665039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.375540256500244, + "rewards_train/margins": 1.5612916946411133, + "rewards_train/rejected": -4.936831951141357, + "step": 863 + }, + { + "epoch": 0.24, + "learning_rate": 1.5590710495555271e-06, + "loss": 0.378, + "step": 864 + }, + { + "epoch": 0.24, + "logps_train/chosen": -211.34762573242188, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -90.06482696533203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.88476276397705, + "rewards_train/margins": -6.4282801151275635, + "rewards_train/rejected": -2.4564826488494873, + "step": 864 + }, + { + "epoch": 0.24, + "logps_train/chosen": -12.391992568969727, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -26.974441528320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14544926583766937, + "rewards_train/margins": 1.0144949108362198, + "rewards_train/rejected": -1.1599441766738892, + "step": 864 + }, + { + "epoch": 0.24, + "logps_train/chosen": -24.7535457611084, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -9.82737922668457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0566046237945557, + "rewards_train/margins": -0.4629291892051697, + "rewards_train/rejected": -0.593675434589386, + "step": 864 + }, + { + "epoch": 0.24, + "logps_train/chosen": -21.026737213134766, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -31.6594181060791, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46517372131347656, + "rewards_train/margins": 1.7382681369781494, + "rewards_train/rejected": -2.203441858291626, + "step": 864 + }, + { + "epoch": 0.24, + "logps_train/chosen": -8.04906177520752, + "logps_train/ref_chosen": -0.8359375, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -13.877676963806152, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7213124632835388, + "rewards_train/margins": 0.05083024501800537, + "rewards_train/rejected": -0.7721427083015442, + "step": 864 + }, + { + "epoch": 0.24, + "logps_train/chosen": -14.602104187011719, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -2.15625, + "logps_train/rejected": -19.565336227416992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6289604306221008, + "rewards_train/margins": 1.1119481921195984, + "rewards_train/rejected": -1.7409086227416992, + "step": 864 + }, + { + "epoch": 0.24, + "logps_train/chosen": -32.78919219970703, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -42.0, + "logps_train/rejected": -53.696372985839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.041419267654419, + "rewards_train/margins": 0.12821805477142334, + "rewards_train/rejected": -1.1696373224258423, + "step": 864 + }, + { + "epoch": 0.24, + "logps_train/chosen": -166.98118591308594, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -199.0, + "logps_train/rejected": -275.927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2981185913085938, + "rewards_train/margins": 5.394654750823975, + "rewards_train/rejected": -7.692773342132568, + "step": 864 + }, + { + "epoch": 0.24, + "logps_train/chosen": -233.73590087890625, + "logps_train/ref_chosen": -192.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -211.01171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.173590183258057, + "rewards_train/margins": 5.12758207321167, + "rewards_train/rejected": -9.301172256469727, + "step": 865 + }, + { + "epoch": 0.24, + "logps_train/chosen": -38.918304443359375, + "logps_train/ref_chosen": -43.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -96.60320281982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.40816956758499146, + "rewards_train/margins": 0.31848984956741333, + "rewards_train/rejected": 0.08967971801757812, + "step": 865 + }, + { + "epoch": 0.24, + "logps_train/chosen": -77.99929809570312, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -122.5878677368164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9499298334121704, + "rewards_train/margins": 2.2588568925857544, + "rewards_train/rejected": -3.208786725997925, + "step": 865 + }, + { + "epoch": 0.24, + "logps_train/chosen": -35.01801300048828, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -24.327238082885742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1893013715744019, + "rewards_train/margins": 0.6277974843978882, + "rewards_train/rejected": -1.81709885597229, + "step": 865 + }, + { + "epoch": 0.24, + "logps_train/chosen": -137.283203125, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -211.37667846679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.628320336341858, + "rewards_train/margins": 8.009347319602966, + "rewards_train/rejected": -9.637667655944824, + "step": 865 + }, + { + "epoch": 0.24, + "logps_train/chosen": -253.09268188476562, + "logps_train/ref_chosen": -185.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -265.6773681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.809268474578857, + "rewards_train/margins": 2.358468532562256, + "rewards_train/rejected": -9.167737007141113, + "step": 865 + }, + { + "epoch": 0.24, + "logps_train/chosen": -178.65936279296875, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -215.7208251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2659363746643066, + "rewards_train/margins": 5.65614652633667, + "rewards_train/rejected": -8.922082901000977, + "step": 865 + }, + { + "epoch": 0.24, + "logps_train/chosen": -6.630014419555664, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -21.597911834716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1494985669851303, + "rewards_train/margins": 0.815539762377739, + "rewards_train/rejected": -0.6660411953926086, + "step": 865 + }, + { + "epoch": 0.24, + "learning_rate": 1.556875616488188e-06, + "loss": 0.6861, + "step": 866 + }, + { + "epoch": 0.24, + "logps_train/chosen": -2.48256516456604, + "logps_train/ref_chosen": -1.7421875, + "logps_train/ref_rejected": -6.875, + "logps_train/rejected": -14.773064613342285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07403776794672012, + "rewards_train/margins": 0.7157687172293663, + "rewards_train/rejected": -0.7898064851760864, + "step": 866 + }, + { + "epoch": 0.24, + "logps_train/chosen": -158.27301025390625, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -159.87896728515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2273011207580566, + "rewards_train/margins": -0.03940439224243164, + "rewards_train/rejected": -2.187896728515625, + "step": 866 + }, + { + "epoch": 0.24, + "logps_train/chosen": -26.966102600097656, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -26.62755012512207, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9091103076934814, + "rewards_train/margins": -0.3901052474975586, + "rewards_train/rejected": -1.5190050601959229, + "step": 866 + }, + { + "epoch": 0.24, + "logps_train/chosen": -57.20304870605469, + "logps_train/ref_chosen": -38.25, + "logps_train/ref_rejected": -14.1875, + "logps_train/rejected": -45.04515838623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8953049182891846, + "rewards_train/margins": 1.1904609203338623, + "rewards_train/rejected": -3.085765838623047, + "step": 866 + }, + { + "epoch": 0.24, + "logps_train/chosen": -13.93149471282959, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -7.34375, + "logps_train/rejected": -9.415548324584961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.019350528717041016, + "rewards_train/margins": 0.22653035819530487, + "rewards_train/rejected": -0.20717982947826385, + "step": 866 + }, + { + "epoch": 0.24, + "logps_train/chosen": -206.06826782226562, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -185.62535095214844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.806827068328857, + "rewards_train/margins": -1.0442919731140137, + "rewards_train/rejected": -3.7625350952148438, + "step": 866 + }, + { + "epoch": 0.24, + "logps_train/chosen": -93.94209289550781, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -198.90728759765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8942092657089233, + "rewards_train/margins": 6.546519875526428, + "rewards_train/rejected": -8.440729141235352, + "step": 866 + }, + { + "epoch": 0.24, + "logps_train/chosen": -8.512653350830078, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -15.54130744934082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12373466789722443, + "rewards_train/margins": 1.0466154366731644, + "rewards_train/rejected": -0.9228807687759399, + "step": 866 + }, + { + "epoch": 0.24, + "logps_train/chosen": -0.7672454118728638, + "logps_train/ref_chosen": -0.435546875, + "logps_train/ref_rejected": -2.8125, + "logps_train/rejected": -4.290285110473633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03316985443234444, + "rewards_train/margins": 0.11460865661501884, + "rewards_train/rejected": -0.14777851104736328, + "step": 867 + }, + { + "epoch": 0.24, + "logps_train/chosen": -74.65653228759766, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -106.3486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8656532168388367, + "rewards_train/margins": 0.7192100882530212, + "rewards_train/rejected": -1.584863305091858, + "step": 867 + }, + { + "epoch": 0.24, + "logps_train/chosen": -5.266395568847656, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -6.458795070648193, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.039139557629823685, + "rewards_train/margins": 0.00673995167016983, + "rewards_train/rejected": -0.045879509299993515, + "step": 867 + }, + { + "epoch": 0.24, + "logps_train/chosen": -5.935931205749512, + "logps_train/ref_chosen": -1.6484375, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -4.34775447845459, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4287493824958801, + "rewards_train/margins": -0.49084893614053726, + "rewards_train/rejected": 0.062099553644657135, + "step": 867 + }, + { + "epoch": 0.24, + "logps_train/chosen": -4.31888484954834, + "logps_train/ref_chosen": -3.5, + "logps_train/ref_rejected": -3.53125, + "logps_train/rejected": -8.094234466552734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08188848942518234, + "rewards_train/margins": 0.3744099512696266, + "rewards_train/rejected": -0.45629844069480896, + "step": 867 + }, + { + "epoch": 0.24, + "logps_train/chosen": -49.61486053466797, + "logps_train/ref_chosen": -41.75, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -18.037504196166992, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7864860892295837, + "rewards_train/margins": -0.020235657691955566, + "rewards_train/rejected": -0.7662504315376282, + "step": 867 + }, + { + "epoch": 0.24, + "logps_train/chosen": -17.125898361206055, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -37.49470520019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17508983612060547, + "rewards_train/margins": 1.8993806838989258, + "rewards_train/rejected": -2.0744705200195312, + "step": 867 + }, + { + "epoch": 0.24, + "logps_train/chosen": -203.36819458007812, + "logps_train/ref_chosen": -180.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -193.87747192382812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3368194103240967, + "rewards_train/margins": -0.6490721702575684, + "rewards_train/rejected": -1.6877472400665283, + "step": 867 + }, + { + "epoch": 0.24, + "learning_rate": 1.5546762858819557e-06, + "loss": 0.6029, + "step": 868 + }, + { + "epoch": 0.24, + "logps_train/chosen": -21.819080352783203, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -25.74184799194336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8319080471992493, + "rewards_train/margins": -0.15772324800491333, + "rewards_train/rejected": -0.6741847991943359, + "step": 868 + }, + { + "epoch": 0.24, + "logps_train/chosen": -6.212471961975098, + "logps_train/ref_chosen": -3.28125, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -27.577672958374023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29312220215797424, + "rewards_train/margins": 1.183395117521286, + "rewards_train/rejected": -1.4765173196792603, + "step": 868 + }, + { + "epoch": 0.24, + "logps_train/chosen": -62.839866638183594, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -12.875, + "logps_train/rejected": -26.940860748291016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4839866161346436, + "rewards_train/margins": -1.0774005651474, + "rewards_train/rejected": -1.4065860509872437, + "step": 868 + }, + { + "epoch": 0.24, + "logps_train/chosen": -18.64225959777832, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -24.217782974243164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27047595381736755, + "rewards_train/margins": 1.170052319765091, + "rewards_train/rejected": -1.4405282735824585, + "step": 868 + }, + { + "epoch": 0.24, + "logps_train/chosen": -15.325643539428711, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -17.194520950317383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.713814377784729, + "rewards_train/margins": -0.21311229467391968, + "rewards_train/rejected": -0.5007020831108093, + "step": 868 + }, + { + "epoch": 0.24, + "logps_train/chosen": -130.80145263671875, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -183.85708618164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.580145239830017, + "rewards_train/margins": 3.8055635690689087, + "rewards_train/rejected": -5.385708808898926, + "step": 868 + }, + { + "epoch": 0.24, + "logps_train/chosen": -149.58294677734375, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -196.95864868164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7582948207855225, + "rewards_train/margins": 1.937570333480835, + "rewards_train/rejected": -5.695865154266357, + "step": 868 + }, + { + "epoch": 0.24, + "logps_train/chosen": -24.67087173461914, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -42.5, + "logps_train/rejected": -61.79756164550781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21708717942237854, + "rewards_train/margins": 1.7126689851284027, + "rewards_train/rejected": -1.9297561645507812, + "step": 868 + }, + { + "epoch": 0.24, + "logps_train/chosen": -179.76553344726562, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -201.12506103515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.276553630828857, + "rewards_train/margins": 1.735952377319336, + "rewards_train/rejected": -7.012506008148193, + "step": 869 + }, + { + "epoch": 0.24, + "logps_train/chosen": -193.67681884765625, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -170.92945861816406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.767682075500488, + "rewards_train/margins": -0.974736213684082, + "rewards_train/rejected": -4.792945861816406, + "step": 869 + }, + { + "epoch": 0.24, + "logps_train/chosen": -269.8782043457031, + "logps_train/ref_chosen": -219.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -237.76358032226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.087820529937744, + "rewards_train/margins": 2.68853759765625, + "rewards_train/rejected": -7.776358127593994, + "step": 869 + }, + { + "epoch": 0.24, + "logps_train/chosen": -118.49250793457031, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -149.64459228515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6992509365081787, + "rewards_train/margins": 1.4652082920074463, + "rewards_train/rejected": -4.164459228515625, + "step": 869 + }, + { + "epoch": 0.24, + "logps_train/chosen": -197.90985107421875, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -243.27171325683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.090985059738159, + "rewards_train/margins": 6.336186647415161, + "rewards_train/rejected": -8.42717170715332, + "step": 869 + }, + { + "epoch": 0.24, + "logps_train/chosen": -155.6222686767578, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -208.15965270996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3122270107269287, + "rewards_train/margins": 5.153738260269165, + "rewards_train/rejected": -8.465965270996094, + "step": 869 + }, + { + "epoch": 0.24, + "logps_train/chosen": -8.798605918884277, + "logps_train/ref_chosen": -7.53125, + "logps_train/ref_rejected": -26.125, + "logps_train/rejected": -44.80192565917969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1267355978488922, + "rewards_train/margins": 1.7409569919109344, + "rewards_train/rejected": -1.8676925897598267, + "step": 869 + }, + { + "epoch": 0.24, + "logps_train/chosen": -19.625633239746094, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -42.003047943115234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7500633597373962, + "rewards_train/margins": -0.549758568406105, + "rewards_train/rejected": -0.2003047913312912, + "step": 869 + }, + { + "epoch": 0.24, + "learning_rate": 1.5524730731298133e-06, + "loss": 0.4197, + "step": 870 + }, + { + "epoch": 0.24, + "logps_train/chosen": -205.6719512939453, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -198.86172485351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.6671953201293945, + "rewards_train/margins": 0.31897735595703125, + "rewards_train/rejected": -5.986172676086426, + "step": 870 + }, + { + "epoch": 0.24, + "logps_train/chosen": -1.6618660688400269, + "logps_train/ref_chosen": -1.2578125, + "logps_train/ref_rejected": -5.84375, + "logps_train/rejected": -6.965435981750488, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.040405359119176865, + "rewards_train/margins": 0.07176324352622032, + "rewards_train/rejected": -0.11216860264539719, + "step": 870 + }, + { + "epoch": 0.24, + "logps_train/chosen": -199.89523315429688, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -183.3804168701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.0895233154296875, + "rewards_train/margins": 3.748518466949463, + "rewards_train/rejected": -7.83804178237915, + "step": 870 + }, + { + "epoch": 0.24, + "logps_train/chosen": -248.1547088623047, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -192.0, + "logps_train/rejected": -251.79873657226562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.215470790863037, + "rewards_train/margins": -1.2355971336364746, + "rewards_train/rejected": -5.9798736572265625, + "step": 870 + }, + { + "epoch": 0.24, + "logps_train/chosen": -126.53231048583984, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -192.34927368164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.153231143951416, + "rewards_train/margins": 6.43169641494751, + "rewards_train/rejected": -8.584927558898926, + "step": 870 + }, + { + "epoch": 0.24, + "logps_train/chosen": -170.1187744140625, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -171.98977661132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.811877489089966, + "rewards_train/margins": 3.4371001720428467, + "rewards_train/rejected": -6.2489776611328125, + "step": 870 + }, + { + "epoch": 0.24, + "logps_train/chosen": -137.5955810546875, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -120.11335754394531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.209558129310608, + "rewards_train/margins": 0.25177764892578125, + "rewards_train/rejected": -1.4613357782363892, + "step": 870 + }, + { + "epoch": 0.24, + "logps_train/chosen": -5.005959510803223, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -3.75, + "logps_train/rejected": -5.138619422912598, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09434594959020615, + "rewards_train/margins": 0.04451598972082138, + "rewards_train/rejected": -0.13886193931102753, + "step": 870 + }, + { + "epoch": 0.24, + "logps_train/chosen": -14.024595260620117, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -1.7734375, + "logps_train/rejected": -3.644824743270874, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4649595320224762, + "rewards_train/margins": -0.27782081067562103, + "rewards_train/rejected": -0.18713872134685516, + "step": 871 + }, + { + "epoch": 0.24, + "logps_train/chosen": -218.23902893066406, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -196.59214782714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.8239030838012695, + "rewards_train/margins": 0.13531160354614258, + "rewards_train/rejected": -4.959214687347412, + "step": 871 + }, + { + "epoch": 0.24, + "logps_train/chosen": -17.392253875732422, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -12.408167839050293, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.431412935256958, + "rewards_train/margins": -1.0218461453914642, + "rewards_train/rejected": -0.4095667898654938, + "step": 871 + }, + { + "epoch": 0.24, + "logps_train/chosen": -136.77198791503906, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -178.22586059570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.82719886302948, + "rewards_train/margins": 4.845387101173401, + "rewards_train/rejected": -6.672585964202881, + "step": 871 + }, + { + "epoch": 0.24, + "logps_train/chosen": -134.1455535888672, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -265.7538146972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1145553588867188, + "rewards_train/margins": 9.760826110839844, + "rewards_train/rejected": -12.875381469726562, + "step": 871 + }, + { + "epoch": 0.24, + "logps_train/chosen": -7.884030342102051, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -18.609216690063477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48527804017066956, + "rewards_train/margins": 0.722518652677536, + "rewards_train/rejected": -1.2077966928482056, + "step": 871 + }, + { + "epoch": 0.24, + "logps_train/chosen": -15.180578231811523, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -5.25, + "logps_train/rejected": -14.85228157043457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27430781722068787, + "rewards_train/margins": 0.6859203279018402, + "rewards_train/rejected": -0.9602281451225281, + "step": 871 + }, + { + "epoch": 0.24, + "logps_train/chosen": -3.8466861248016357, + "logps_train/ref_chosen": -0.41015625, + "logps_train/ref_rejected": -0.41015625, + "logps_train/rejected": -3.7157835960388184, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.34365299344062805, + "rewards_train/margins": -0.013090252876281738, + "rewards_train/rejected": -0.3305627405643463, + "step": 871 + }, + { + "epoch": 0.24, + "learning_rate": 1.550265993651913e-06, + "loss": 0.5194, + "step": 872 + }, + { + "epoch": 0.24, + "logps_train/chosen": -12.999706268310547, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -52.396026611328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2999706268310547, + "rewards_train/margins": 1.5396320819854736, + "rewards_train/rejected": -1.8396027088165283, + "step": 872 + }, + { + "epoch": 0.24, + "logps_train/chosen": -27.885746002197266, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -31.546031951904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3135746121406555, + "rewards_train/margins": 1.241028606891632, + "rewards_train/rejected": -1.5546032190322876, + "step": 872 + }, + { + "epoch": 0.24, + "logps_train/chosen": -115.59869384765625, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -187.64346313476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.109869360923767, + "rewards_train/margins": 4.254477143287659, + "rewards_train/rejected": -5.364346504211426, + "step": 872 + }, + { + "epoch": 0.24, + "logps_train/chosen": -131.64306640625, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -124.139892578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.764306664466858, + "rewards_train/margins": -0.05031740665435791, + "rewards_train/rejected": -1.7139892578125, + "step": 872 + }, + { + "epoch": 0.24, + "logps_train/chosen": -32.59372329711914, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -45.25, + "logps_train/rejected": -73.18649291992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.096872329711914, + "rewards_train/margins": 1.696777105331421, + "rewards_train/rejected": -2.793649435043335, + "step": 872 + }, + { + "epoch": 0.24, + "logps_train/chosen": -0.4513842761516571, + "logps_train/ref_chosen": -0.283203125, + "logps_train/ref_rejected": -3.078125, + "logps_train/rejected": -5.214855194091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01681811548769474, + "rewards_train/margins": 0.19685490988194942, + "rewards_train/rejected": -0.21367302536964417, + "step": 872 + }, + { + "epoch": 0.24, + "logps_train/chosen": -151.1182861328125, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -156.3856658935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.31182861328125, + "rewards_train/margins": 1.8767380714416504, + "rewards_train/rejected": -4.1885666847229, + "step": 872 + }, + { + "epoch": 0.24, + "logps_train/chosen": -93.23224639892578, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -184.23712158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.073224663734436, + "rewards_train/margins": 6.650487780570984, + "rewards_train/rejected": -7.72371244430542, + "step": 872 + }, + { + "epoch": 0.24, + "logps_train/chosen": -14.259201049804688, + "logps_train/ref_chosen": -2.421875, + "logps_train/ref_rejected": -7.34375, + "logps_train/rejected": -23.237930297851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1837326288223267, + "rewards_train/margins": 0.4056854248046875, + "rewards_train/rejected": -1.5894180536270142, + "step": 873 + }, + { + "epoch": 0.24, + "logps_train/chosen": -1.0482529401779175, + "logps_train/ref_chosen": -0.80078125, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -8.140755653381348, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.024747168645262718, + "rewards_train/margins": -0.023171603330411017, + "rewards_train/rejected": -0.0015755653148517013, + "step": 873 + }, + { + "epoch": 0.24, + "logps_train/chosen": -5.478234767913818, + "logps_train/ref_chosen": -3.578125, + "logps_train/ref_rejected": -0.482421875, + "logps_train/rejected": -4.627663612365723, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19001097977161407, + "rewards_train/margins": 0.2245131880044937, + "rewards_train/rejected": -0.4145241677761078, + "step": 873 + }, + { + "epoch": 0.24, + "logps_train/chosen": -12.105225563049316, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -24.41815948486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7261475920677185, + "rewards_train/margins": 0.9094184041023254, + "rewards_train/rejected": -1.635565996170044, + "step": 873 + }, + { + "epoch": 0.24, + "logps_train/chosen": -81.28091430664062, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -163.35354614257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02190857008099556, + "rewards_train/margins": 3.8572631366550922, + "rewards_train/rejected": -3.8353545665740967, + "step": 873 + }, + { + "epoch": 0.24, + "logps_train/chosen": -76.18551635742188, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -52.27118682861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6685516238212585, + "rewards_train/margins": 1.7210670113563538, + "rewards_train/rejected": -2.3896186351776123, + "step": 873 + }, + { + "epoch": 0.24, + "logps_train/chosen": -4.010130405426025, + "logps_train/ref_chosen": -2.78125, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -24.203018188476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12288804352283478, + "rewards_train/margins": 0.7036638110876083, + "rewards_train/rejected": -0.8265518546104431, + "step": 873 + }, + { + "epoch": 0.24, + "logps_train/chosen": -76.82364654541016, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -76.9224853515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.41763535141944885, + "rewards_train/margins": 0.009883880615234375, + "rewards_train/rejected": 0.4077514708042145, + "step": 873 + }, + { + "epoch": 0.24, + "learning_rate": 1.5480550628954719e-06, + "loss": 0.3444, + "step": 874 + }, + { + "epoch": 0.24, + "logps_train/chosen": -194.74029541015625, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -194.11563110351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.774029731750488, + "rewards_train/margins": 0.6375336647033691, + "rewards_train/rejected": -5.411563396453857, + "step": 874 + }, + { + "epoch": 0.24, + "logps_train/chosen": -90.11862182617188, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -137.794677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3618621826171875, + "rewards_train/margins": 3.367605686187744, + "rewards_train/rejected": -4.729467868804932, + "step": 874 + }, + { + "epoch": 0.24, + "logps_train/chosen": -135.916259765625, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -211.90829467773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.591625928878784, + "rewards_train/margins": 5.399203538894653, + "rewards_train/rejected": -7.9908294677734375, + "step": 874 + }, + { + "epoch": 0.24, + "logps_train/chosen": -160.63449096679688, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -193.44468688964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.463449001312256, + "rewards_train/margins": 0.5810198783874512, + "rewards_train/rejected": -6.044468879699707, + "step": 874 + }, + { + "epoch": 0.24, + "logps_train/chosen": -34.60459899902344, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -23.525253295898438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7854599356651306, + "rewards_train/margins": -0.43293461203575134, + "rewards_train/rejected": -0.3525253236293793, + "step": 874 + }, + { + "epoch": 0.24, + "logps_train/chosen": -12.164617538452148, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -24.20828628540039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1977117508649826, + "rewards_train/margins": 0.7293669134378433, + "rewards_train/rejected": -0.9270786643028259, + "step": 874 + }, + { + "epoch": 0.24, + "logps_train/chosen": -33.29872131347656, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -3.296875, + "logps_train/rejected": -19.28209114074707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.00487220287323, + "rewards_train/margins": 0.5936493873596191, + "rewards_train/rejected": -1.5985215902328491, + "step": 874 + }, + { + "epoch": 0.24, + "logps_train/chosen": -189.932373046875, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -181.13998413085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.893237590789795, + "rewards_train/margins": 2.3707613945007324, + "rewards_train/rejected": -8.263998985290527, + "step": 874 + }, + { + "epoch": 0.24, + "logps_train/chosen": -131.79505920410156, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -120.47434997558594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1295058727264404, + "rewards_train/margins": -2.4320708513259888, + "rewards_train/rejected": -0.6974350214004517, + "step": 875 + }, + { + "epoch": 0.24, + "logps_train/chosen": -135.74188232421875, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -129.6588897705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9741883277893066, + "rewards_train/margins": 0.5917007923126221, + "rewards_train/rejected": -3.5658891201019287, + "step": 875 + }, + { + "epoch": 0.24, + "logps_train/chosen": -20.886531829833984, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -40.15190124511719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8511531949043274, + "rewards_train/margins": 1.2015370726585388, + "rewards_train/rejected": -2.052690267562866, + "step": 875 + }, + { + "epoch": 0.24, + "logps_train/chosen": -17.6624755859375, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -39.47966766357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.859997570514679, + "rewards_train/margins": 1.3379692435264587, + "rewards_train/rejected": -2.1979668140411377, + "step": 875 + }, + { + "epoch": 0.24, + "logps_train/chosen": -35.26648712158203, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -43.85780715942383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1641486883163452, + "rewards_train/margins": 1.4966319799423218, + "rewards_train/rejected": -2.660780668258667, + "step": 875 + }, + { + "epoch": 0.24, + "logps_train/chosen": -110.54635620117188, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -144.09762573242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3546355962753296, + "rewards_train/margins": 5.355126976966858, + "rewards_train/rejected": -6.7097625732421875, + "step": 875 + }, + { + "epoch": 0.24, + "logps_train/chosen": -22.318710327148438, + "logps_train/ref_chosen": -15.0, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -43.60594177246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7318710684776306, + "rewards_train/margins": 1.4662230610847473, + "rewards_train/rejected": -2.198094129562378, + "step": 875 + }, + { + "epoch": 0.24, + "logps_train/chosen": -33.27529525756836, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -2.140625, + "logps_train/rejected": -16.450679779052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6900295615196228, + "rewards_train/margins": 0.7409759163856506, + "rewards_train/rejected": -1.4310054779052734, + "step": 875 + }, + { + "epoch": 0.24, + "learning_rate": 1.5458402963346608e-06, + "loss": 0.4386, + "step": 876 + }, + { + "epoch": 0.24, + "logps_train/chosen": -18.442190170288086, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -39.36066436767578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1067190170288086, + "rewards_train/margins": 1.8668473958969116, + "rewards_train/rejected": -1.9735664129257202, + "step": 876 + }, + { + "epoch": 0.24, + "logps_train/chosen": -18.976356506347656, + "logps_train/ref_chosen": -7.375, + "logps_train/ref_rejected": -3.421875, + "logps_train/rejected": -15.842985153198242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1601356267929077, + "rewards_train/margins": 0.08197546005249023, + "rewards_train/rejected": -1.242111086845398, + "step": 876 + }, + { + "epoch": 0.24, + "logps_train/chosen": -15.082664489746094, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -19.324874877929688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9895164370536804, + "rewards_train/margins": -0.33202892541885376, + "rewards_train/rejected": -0.6574875116348267, + "step": 876 + }, + { + "epoch": 0.24, + "logps_train/chosen": -0.2375468909740448, + "logps_train/ref_chosen": -0.66796875, + "logps_train/ref_rejected": -4.25, + "logps_train/rejected": -4.320155620574951, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04304218664765358, + "rewards_train/margins": 0.050057748798280954, + "rewards_train/rejected": -0.007015562150627375, + "step": 876 + }, + { + "epoch": 0.24, + "logps_train/chosen": -15.438880920410156, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -29.00638198852539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20638810098171234, + "rewards_train/margins": 0.7317500859498978, + "rewards_train/rejected": -0.9381381869316101, + "step": 876 + }, + { + "epoch": 0.24, + "logps_train/chosen": -181.34146118164062, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -181.73333740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5341460704803467, + "rewards_train/margins": 0.03918766975402832, + "rewards_train/rejected": -2.573333740234375, + "step": 876 + }, + { + "epoch": 0.24, + "logps_train/chosen": -8.568432807922363, + "logps_train/ref_chosen": -1.5703125, + "logps_train/ref_rejected": -5.84375, + "logps_train/rejected": -22.10550308227539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6998120546340942, + "rewards_train/margins": 0.9263632297515869, + "rewards_train/rejected": -1.6261752843856812, + "step": 876 + }, + { + "epoch": 0.24, + "logps_train/chosen": -59.26365280151367, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -29.625, + "logps_train/rejected": -72.32576751708984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.676365375518799, + "rewards_train/margins": 1.5937113761901855, + "rewards_train/rejected": -4.270076751708984, + "step": 876 + }, + { + "epoch": 0.25, + "logps_train/chosen": -240.0926513671875, + "logps_train/ref_chosen": -202.0, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -231.83343505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.80926513671875, + "rewards_train/margins": 0.37407827377319336, + "rewards_train/rejected": -4.183343410491943, + "step": 877 + }, + { + "epoch": 0.25, + "logps_train/chosen": -41.18003845214844, + "logps_train/ref_chosen": -27.875, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -46.9237174987793, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3305038213729858, + "rewards_train/margins": -0.21313202381134033, + "rewards_train/rejected": -1.1173717975616455, + "step": 877 + }, + { + "epoch": 0.25, + "logps_train/chosen": -19.01185417175293, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -37.55058670043945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.907435417175293, + "rewards_train/margins": 0.33512330055236816, + "rewards_train/rejected": -1.2425587177276611, + "step": 877 + }, + { + "epoch": 0.25, + "logps_train/chosen": -187.97695922851562, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -228.59637451171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9976959228515625, + "rewards_train/margins": 8.461941719055176, + "rewards_train/rejected": -11.459637641906738, + "step": 877 + }, + { + "epoch": 0.25, + "logps_train/chosen": -8.026836395263672, + "logps_train/ref_chosen": -3.546875, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -23.216121673583984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4479961395263672, + "rewards_train/margins": 0.04861602187156677, + "rewards_train/rejected": -0.49661216139793396, + "step": 877 + }, + { + "epoch": 0.25, + "logps_train/chosen": -152.65301513671875, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -170.695556640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8153016567230225, + "rewards_train/margins": 0.6042540073394775, + "rewards_train/rejected": -4.4195556640625, + "step": 877 + }, + { + "epoch": 0.25, + "logps_train/chosen": -193.55670166015625, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -221.42706298828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.005670547485352, + "rewards_train/margins": 1.5870361328125, + "rewards_train/rejected": -9.592706680297852, + "step": 877 + }, + { + "epoch": 0.25, + "logps_train/chosen": -95.56058502197266, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -156.42120361328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7060585021972656, + "rewards_train/margins": 3.6360621452331543, + "rewards_train/rejected": -4.34212064743042, + "step": 877 + }, + { + "epoch": 0.25, + "learning_rate": 1.5436217094704976e-06, + "loss": 0.4443, + "step": 878 + }, + { + "epoch": 0.25, + "logps_train/chosen": -207.10519409179688, + "logps_train/ref_chosen": -202.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -171.05709838867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5105194449424744, + "rewards_train/margins": 1.5951903462409973, + "rewards_train/rejected": -2.1057097911834717, + "step": 878 + }, + { + "epoch": 0.25, + "logps_train/chosen": -23.598154067993164, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -4.997105598449707, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2473154067993164, + "rewards_train/margins": -0.24447984690777957, + "rewards_train/rejected": -0.002835559891536832, + "step": 878 + }, + { + "epoch": 0.25, + "logps_train/chosen": -8.516931533813477, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -20.061403274536133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17356815934181213, + "rewards_train/margins": 0.9075722396373749, + "rewards_train/rejected": -1.081140398979187, + "step": 878 + }, + { + "epoch": 0.25, + "logps_train/chosen": -38.82806396484375, + "logps_train/ref_chosen": -29.0, + "logps_train/ref_rejected": -4.46875, + "logps_train/rejected": -34.17024230957031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.982806384563446, + "rewards_train/margins": 1.987342894077301, + "rewards_train/rejected": -2.970149278640747, + "step": 878 + }, + { + "epoch": 0.25, + "logps_train/chosen": -25.940265655517578, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -32.90178298950195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7440266013145447, + "rewards_train/margins": 0.7836517691612244, + "rewards_train/rejected": -1.527678370475769, + "step": 878 + }, + { + "epoch": 0.25, + "logps_train/chosen": -72.58784484863281, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -110.59319305419922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3587844967842102, + "rewards_train/margins": 1.9505347609519958, + "rewards_train/rejected": -2.309319257736206, + "step": 878 + }, + { + "epoch": 0.25, + "logps_train/chosen": -20.538299560546875, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -31.35077667236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7225799560546875, + "rewards_train/margins": 0.6874977350234985, + "rewards_train/rejected": -1.410077691078186, + "step": 878 + }, + { + "epoch": 0.25, + "logps_train/chosen": -83.43256378173828, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -133.77037048339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0932563543319702, + "rewards_train/margins": 3.433780789375305, + "rewards_train/rejected": -4.527037143707275, + "step": 878 + }, + { + "epoch": 0.25, + "logps_train/chosen": -22.037113189697266, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -15.274105072021484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3474613428115845, + "rewards_train/margins": -0.27317583560943604, + "rewards_train/rejected": -1.0742855072021484, + "step": 879 + }, + { + "epoch": 0.25, + "logps_train/chosen": -101.35916900634766, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -119.20976257324219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8359169363975525, + "rewards_train/margins": 1.2850592732429504, + "rewards_train/rejected": -2.120976209640503, + "step": 879 + }, + { + "epoch": 0.25, + "logps_train/chosen": -30.437881469726562, + "logps_train/ref_chosen": -24.125, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -125.02816009521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6312881708145142, + "rewards_train/margins": 3.471527934074402, + "rewards_train/rejected": -4.102816104888916, + "step": 879 + }, + { + "epoch": 0.25, + "logps_train/chosen": -5.817441940307617, + "logps_train/ref_chosen": -2.546875, + "logps_train/ref_rejected": -15.75, + "logps_train/rejected": -26.421356201171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3270567059516907, + "rewards_train/margins": 0.7400789856910706, + "rewards_train/rejected": -1.0671356916427612, + "step": 879 + }, + { + "epoch": 0.25, + "logps_train/chosen": -181.50584411621094, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -232.52542114257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.750584363937378, + "rewards_train/margins": 7.401957750320435, + "rewards_train/rejected": -11.152542114257812, + "step": 879 + }, + { + "epoch": 0.25, + "logps_train/chosen": -90.57179260253906, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -172.82191467285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.05717933177948, + "rewards_train/margins": 6.625012040138245, + "rewards_train/rejected": -7.682191371917725, + "step": 879 + }, + { + "epoch": 0.25, + "logps_train/chosen": -1.5782760381698608, + "logps_train/ref_chosen": -1.203125, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -11.054540634155273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.037515103816986084, + "rewards_train/margins": 0.47418898344039917, + "rewards_train/rejected": -0.5117040872573853, + "step": 879 + }, + { + "epoch": 0.25, + "logps_train/chosen": -32.13905334472656, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -19.98671531677246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7514053583145142, + "rewards_train/margins": 0.6878912448883057, + "rewards_train/rejected": -1.4392966032028198, + "step": 879 + }, + { + "epoch": 0.25, + "learning_rate": 1.5413993178307379e-06, + "loss": 0.3015, + "step": 880 + }, + { + "epoch": 0.25, + "logps_train/chosen": -12.753227233886719, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -5.4375, + "logps_train/rejected": -6.123037338256836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.18157272040843964, + "rewards_train/margins": -0.11301898211240768, + "rewards_train/rejected": -0.06855373829603195, + "step": 880 + }, + { + "epoch": 0.25, + "logps_train/chosen": -6.627520561218262, + "logps_train/ref_chosen": -3.546875, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -7.2247209548950195, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3080645501613617, + "rewards_train/margins": -0.28246745467185974, + "rewards_train/rejected": -0.025597095489501953, + "step": 880 + }, + { + "epoch": 0.25, + "logps_train/chosen": -168.25323486328125, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -287.244384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.275323390960693, + "rewards_train/margins": 7.5491156578063965, + "rewards_train/rejected": -13.82443904876709, + "step": 880 + }, + { + "epoch": 0.25, + "logps_train/chosen": -38.77286911010742, + "logps_train/ref_chosen": -27.125, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -25.608623504638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1647869348526, + "rewards_train/margins": 0.09607541561126709, + "rewards_train/rejected": -1.2608623504638672, + "step": 880 + }, + { + "epoch": 0.25, + "logps_train/chosen": -129.67550659179688, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -225.826904296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2675507068634033, + "rewards_train/margins": 5.91513991355896, + "rewards_train/rejected": -8.182690620422363, + "step": 880 + }, + { + "epoch": 0.25, + "logps_train/chosen": -132.49098205566406, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -75.85150909423828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.349098205566406, + "rewards_train/margins": -1.913947343826294, + "rewards_train/rejected": -2.4351508617401123, + "step": 880 + }, + { + "epoch": 0.25, + "logps_train/chosen": -164.84024047851562, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -161.58340454101562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6840240359306335, + "rewards_train/margins": -0.025683581829071045, + "rewards_train/rejected": -0.6583404541015625, + "step": 880 + }, + { + "epoch": 0.25, + "logps_train/chosen": -3.2155213356018066, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -7.986570358276367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.001885366509668529, + "rewards_train/margins": 0.1974174053175375, + "rewards_train/rejected": -0.19553203880786896, + "step": 880 + }, + { + "epoch": 0.25, + "logps_train/chosen": -19.985631942749023, + "logps_train/ref_chosen": -3.90625, + "logps_train/ref_rejected": -3.671875, + "logps_train/rejected": -15.78026294708252, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6079381704330444, + "rewards_train/margins": -0.3970993757247925, + "rewards_train/rejected": -1.210838794708252, + "step": 881 + }, + { + "epoch": 0.25, + "logps_train/chosen": -144.32302856445312, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -106.87150573730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.932302951812744, + "rewards_train/margins": 0.00484776496887207, + "rewards_train/rejected": -2.937150716781616, + "step": 881 + }, + { + "epoch": 0.25, + "logps_train/chosen": -72.01570129394531, + "logps_train/ref_chosen": -54.0, + "logps_train/ref_rejected": -54.0, + "logps_train/rejected": -71.92059326171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.801570177078247, + "rewards_train/margins": -0.00951087474822998, + "rewards_train/rejected": -1.792059302330017, + "step": 881 + }, + { + "epoch": 0.25, + "logps_train/chosen": -6.507901191711426, + "logps_train/ref_chosen": -2.90625, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -15.731945991516113, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3601651191711426, + "rewards_train/margins": 0.17552947998046875, + "rewards_train/rejected": -0.5356945991516113, + "step": 881 + }, + { + "epoch": 0.25, + "logps_train/chosen": -1.5744035243988037, + "logps_train/ref_chosen": -2.078125, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -3.9127421379089355, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.05037214979529381, + "rewards_train/margins": -0.011478636413812637, + "rewards_train/rejected": 0.061850786209106445, + "step": 881 + }, + { + "epoch": 0.25, + "logps_train/chosen": -71.7253646850586, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -101.18636322021484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1475365161895752, + "rewards_train/margins": 2.5710997581481934, + "rewards_train/rejected": -3.7186362743377686, + "step": 881 + }, + { + "epoch": 0.25, + "logps_train/chosen": -19.497093200683594, + "logps_train/ref_chosen": -1.5234375, + "logps_train/ref_rejected": -3.515625, + "logps_train/rejected": -22.850505828857422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7973655462265015, + "rewards_train/margins": 0.13612258434295654, + "rewards_train/rejected": -1.933488130569458, + "step": 881 + }, + { + "epoch": 0.25, + "logps_train/chosen": -14.331207275390625, + "logps_train/ref_chosen": -7.15625, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -22.92392349243164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7174957394599915, + "rewards_train/margins": -0.17510336637496948, + "rewards_train/rejected": -0.542392373085022, + "step": 881 + }, + { + "epoch": 0.25, + "learning_rate": 1.5391731369697674e-06, + "loss": 0.6686, + "step": 882 + }, + { + "epoch": 0.25, + "logps_train/chosen": -35.68394088745117, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -4.9375, + "logps_train/rejected": -15.174736022949219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.193394184112549, + "rewards_train/margins": -1.169670581817627, + "rewards_train/rejected": -1.0237236022949219, + "step": 882 + }, + { + "epoch": 0.25, + "logps_train/chosen": -56.999629974365234, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -54.75, + "logps_train/rejected": -56.99571990966797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2249629944562912, + "rewards_train/margins": -0.0003910064697265625, + "rewards_train/rejected": -0.22457198798656464, + "step": 882 + }, + { + "epoch": 0.25, + "logps_train/chosen": -148.18898010253906, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -162.70018005371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6188980340957642, + "rewards_train/margins": 2.1511200666427612, + "rewards_train/rejected": -2.7700181007385254, + "step": 882 + }, + { + "epoch": 0.25, + "logps_train/chosen": -166.47662353515625, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -57.0, + "logps_train/rejected": -77.31591796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8476624488830566, + "rewards_train/margins": -0.816070556640625, + "rewards_train/rejected": -2.0315918922424316, + "step": 882 + }, + { + "epoch": 0.25, + "logps_train/chosen": -306.480224609375, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -225.7721710205078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.8480224609375, + "rewards_train/margins": -1.820805549621582, + "rewards_train/rejected": -11.027216911315918, + "step": 882 + }, + { + "epoch": 0.25, + "logps_train/chosen": -136.4250946044922, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -171.14324951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.692509412765503, + "rewards_train/margins": 3.471815824508667, + "rewards_train/rejected": -7.16432523727417, + "step": 882 + }, + { + "epoch": 0.25, + "logps_train/chosen": -238.3443145751953, + "logps_train/ref_chosen": -183.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -239.64889526367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.534431457519531, + "rewards_train/margins": 0.13045835494995117, + "rewards_train/rejected": -5.664889812469482, + "step": 882 + }, + { + "epoch": 0.25, + "logps_train/chosen": -122.96913146972656, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -151.18170166015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3469131588935852, + "rewards_train/margins": 5.521257102489471, + "rewards_train/rejected": -5.868170261383057, + "step": 882 + }, + { + "epoch": 0.25, + "logps_train/chosen": -0.48290225863456726, + "logps_train/ref_chosen": -0.447265625, + "logps_train/ref_rejected": -2.34375, + "logps_train/rejected": -3.6954867839813232, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0035636634565889835, + "rewards_train/margins": 0.13161001494154334, + "rewards_train/rejected": -0.13517367839813232, + "step": 883 + }, + { + "epoch": 0.25, + "logps_train/chosen": -23.393787384033203, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -35.25, + "logps_train/rejected": -48.73250961303711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6518787741661072, + "rewards_train/margins": 0.6963722109794617, + "rewards_train/rejected": -1.3482509851455688, + "step": 883 + }, + { + "epoch": 0.25, + "logps_train/chosen": -228.64395141601562, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -27.080150604248047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.464395046234131, + "rewards_train/margins": -6.806379973888397, + "rewards_train/rejected": -0.6580150723457336, + "step": 883 + }, + { + "epoch": 0.25, + "logps_train/chosen": -124.27375030517578, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -139.82351684570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7273750305175781, + "rewards_train/margins": 4.104976654052734, + "rewards_train/rejected": -5.8323516845703125, + "step": 883 + }, + { + "epoch": 0.25, + "logps_train/chosen": -8.23652458190918, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -17.776948928833008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0013475418090820312, + "rewards_train/margins": 0.016542434692382812, + "rewards_train/rejected": -0.015194892883300781, + "step": 883 + }, + { + "epoch": 0.25, + "logps_train/chosen": -85.51483154296875, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -132.29164123535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4514831602573395, + "rewards_train/margins": 2.077680915594101, + "rewards_train/rejected": -2.5291640758514404, + "step": 883 + }, + { + "epoch": 0.25, + "logps_train/chosen": -63.944278717041016, + "logps_train/ref_chosen": -53.5, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -48.87284469604492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0444278717041016, + "rewards_train/margins": 0.2678565979003906, + "rewards_train/rejected": -1.3122844696044922, + "step": 883 + }, + { + "epoch": 0.25, + "logps_train/chosen": -117.59856414794922, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -93.55747985839844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4598565101623535, + "rewards_train/margins": -0.8041085004806519, + "rewards_train/rejected": -1.6557480096817017, + "step": 883 + }, + { + "epoch": 0.25, + "learning_rate": 1.5369431824684914e-06, + "loss": 1.0259, + "step": 884 + }, + { + "epoch": 0.25, + "logps_train/chosen": -186.35244750976562, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -203.78561401367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.135244846343994, + "rewards_train/margins": 3.44331693649292, + "rewards_train/rejected": -9.578561782836914, + "step": 884 + }, + { + "epoch": 0.25, + "logps_train/chosen": -9.69422721862793, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -5.0, + "logps_train/rejected": -9.983297348022461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07567272335290909, + "rewards_train/margins": 0.42265700548887253, + "rewards_train/rejected": -0.4983297288417816, + "step": 884 + }, + { + "epoch": 0.25, + "logps_train/chosen": -251.3876953125, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -188.0, + "logps_train/rejected": -268.8617858886719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.338769912719727, + "rewards_train/margins": -1.2525911331176758, + "rewards_train/rejected": -8.08617877960205, + "step": 884 + }, + { + "epoch": 0.25, + "logps_train/chosen": -19.099023818969727, + "logps_train/ref_chosen": -3.328125, + "logps_train/ref_rejected": -2.921875, + "logps_train/rejected": -19.214412689208984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5770899057388306, + "rewards_train/margins": 0.05216383934020996, + "rewards_train/rejected": -1.6292537450790405, + "step": 884 + }, + { + "epoch": 0.25, + "logps_train/chosen": -49.22633361816406, + "logps_train/ref_chosen": -46.75, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -96.43130493164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24763336777687073, + "rewards_train/margins": 2.1454971730709076, + "rewards_train/rejected": -2.3931305408477783, + "step": 884 + }, + { + "epoch": 0.25, + "logps_train/chosen": -2.2887439727783203, + "logps_train/ref_chosen": -0.494140625, + "logps_train/ref_rejected": -0.494140625, + "logps_train/rejected": -2.3394758701324463, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1794603317975998, + "rewards_train/margins": 0.005073189735412598, + "rewards_train/rejected": -0.1845335215330124, + "step": 884 + }, + { + "epoch": 0.25, + "logps_train/chosen": -197.22122192382812, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -327.0504455566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.122122287750244, + "rewards_train/margins": 11.082922458648682, + "rewards_train/rejected": -15.205044746398926, + "step": 884 + }, + { + "epoch": 0.25, + "logps_train/chosen": -14.504402160644531, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -1.21875, + "logps_train/rejected": -10.984477996826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6379402279853821, + "rewards_train/margins": 0.33863258361816406, + "rewards_train/rejected": -0.9765728116035461, + "step": 884 + }, + { + "epoch": 0.25, + "logps_train/chosen": -5.853455543518066, + "logps_train/ref_chosen": -3.1875, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -21.24681854248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2665955722332001, + "rewards_train/margins": 0.8768363296985626, + "rewards_train/rejected": -1.1434319019317627, + "step": 885 + }, + { + "epoch": 0.25, + "logps_train/chosen": -90.29420471191406, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -53.25, + "logps_train/rejected": -69.50067901611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3294204771518707, + "rewards_train/margins": 1.2956474721431732, + "rewards_train/rejected": -1.625067949295044, + "step": 885 + }, + { + "epoch": 0.25, + "logps_train/chosen": -55.61274719238281, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -41.89105987548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08627472072839737, + "rewards_train/margins": 2.9715813621878624, + "rewards_train/rejected": -3.0578560829162598, + "step": 885 + }, + { + "epoch": 0.25, + "logps_train/chosen": -25.319730758666992, + "logps_train/ref_chosen": -23.0, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -19.484079360961914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2319730818271637, + "rewards_train/margins": 0.9508099257946014, + "rewards_train/rejected": -1.1827830076217651, + "step": 885 + }, + { + "epoch": 0.25, + "logps_train/chosen": -11.6893892288208, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -3.234375, + "logps_train/rejected": -15.998493194580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.828313946723938, + "rewards_train/margins": 0.44809794425964355, + "rewards_train/rejected": -1.2764118909835815, + "step": 885 + }, + { + "epoch": 0.25, + "logps_train/chosen": -206.49415588378906, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -208.3955078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.949415683746338, + "rewards_train/margins": -0.4098649024963379, + "rewards_train/rejected": -5.53955078125, + "step": 885 + }, + { + "epoch": 0.25, + "logps_train/chosen": -13.471284866333008, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -22.96197509765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4346284866333008, + "rewards_train/margins": 0.86781907081604, + "rewards_train/rejected": -1.3024475574493408, + "step": 885 + }, + { + "epoch": 0.25, + "logps_train/chosen": -271.750244140625, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -266.26959228515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -12.575024604797363, + "rewards_train/margins": 0.5519351959228516, + "rewards_train/rejected": -13.126959800720215, + "step": 885 + }, + { + "epoch": 0.25, + "learning_rate": 1.534709469934227e-06, + "loss": 0.4519, + "step": 886 + }, + { + "epoch": 0.25, + "logps_train/chosen": -133.96002197265625, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -181.58615112304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.246002197265625, + "rewards_train/margins": 5.112613201141357, + "rewards_train/rejected": -6.358615398406982, + "step": 886 + }, + { + "epoch": 0.25, + "logps_train/chosen": -29.381511688232422, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -34.640167236328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.025651216506958, + "rewards_train/margins": -0.11788451671600342, + "rewards_train/rejected": -1.9077666997909546, + "step": 886 + }, + { + "epoch": 0.25, + "logps_train/chosen": -200.38763427734375, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -171.27659606933594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.488763332366943, + "rewards_train/margins": -0.8111038208007812, + "rewards_train/rejected": -6.677659511566162, + "step": 886 + }, + { + "epoch": 0.25, + "logps_train/chosen": -137.8165740966797, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -212.8511962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0316574573516846, + "rewards_train/margins": 7.453462362289429, + "rewards_train/rejected": -9.485119819641113, + "step": 886 + }, + { + "epoch": 0.25, + "logps_train/chosen": -21.733837127685547, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -38.22539520263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1796337366104126, + "rewards_train/margins": 0.7054058313369751, + "rewards_train/rejected": -1.8850395679473877, + "step": 886 + }, + { + "epoch": 0.25, + "logps_train/chosen": -4.725667953491211, + "logps_train/ref_chosen": -3.375, + "logps_train/ref_rejected": -2.140625, + "logps_train/rejected": -8.314945220947266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13506679236888885, + "rewards_train/margins": 0.4823652654886246, + "rewards_train/rejected": -0.6174320578575134, + "step": 886 + }, + { + "epoch": 0.25, + "logps_train/chosen": -2.33785080909729, + "logps_train/ref_chosen": -2.453125, + "logps_train/ref_rejected": -0.97265625, + "logps_train/rejected": -1.1691592931747437, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011527419090270996, + "rewards_train/margins": 0.03117772378027439, + "rewards_train/rejected": -0.019650304690003395, + "step": 886 + }, + { + "epoch": 0.25, + "logps_train/chosen": -16.03921890258789, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -33.54609680175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07892189174890518, + "rewards_train/margins": 2.0819379314780235, + "rewards_train/rejected": -2.1608598232269287, + "step": 886 + }, + { + "epoch": 0.25, + "logps_train/chosen": -20.545778274536133, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -18.17502212524414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7608278393745422, + "rewards_train/margins": 0.5847993493080139, + "rewards_train/rejected": -1.3456271886825562, + "step": 887 + }, + { + "epoch": 0.25, + "logps_train/chosen": -108.58460235595703, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -125.14376831054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3584602475166321, + "rewards_train/margins": 2.9059166312217712, + "rewards_train/rejected": -3.2643768787384033, + "step": 887 + }, + { + "epoch": 0.25, + "logps_train/chosen": -1.7695142030715942, + "logps_train/ref_chosen": -2.90625, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -26.903413772583008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11367358267307281, + "rewards_train/margins": 1.1165149360895157, + "rewards_train/rejected": -1.0028413534164429, + "step": 887 + }, + { + "epoch": 0.25, + "logps_train/chosen": -223.86180114746094, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -191.90603637695312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.736180305480957, + "rewards_train/margins": -2.345576763153076, + "rewards_train/rejected": -7.390603542327881, + "step": 887 + }, + { + "epoch": 0.25, + "logps_train/chosen": -108.56230163574219, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -166.14537048339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9062302112579346, + "rewards_train/margins": 3.5083067417144775, + "rewards_train/rejected": -5.414536952972412, + "step": 887 + }, + { + "epoch": 0.25, + "logps_train/chosen": -87.64190673828125, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -111.75177001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2141907215118408, + "rewards_train/margins": 0.760986328125, + "rewards_train/rejected": -1.9751770496368408, + "step": 887 + }, + { + "epoch": 0.25, + "logps_train/chosen": -157.87066650390625, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -232.40516662597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.53706693649292, + "rewards_train/margins": 0.8034496307373047, + "rewards_train/rejected": -6.340516567230225, + "step": 887 + }, + { + "epoch": 0.25, + "logps_train/chosen": -93.7237319946289, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -24.644901275634766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3723732233047485, + "rewards_train/margins": 0.15461695194244385, + "rewards_train/rejected": -1.5269901752471924, + "step": 887 + }, + { + "epoch": 0.25, + "learning_rate": 1.5324720150005942e-06, + "loss": 0.515, + "step": 888 + }, + { + "epoch": 0.25, + "logps_train/chosen": -20.478694915771484, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -134.47177124023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2853695154190063, + "rewards_train/margins": 1.7118076086044312, + "rewards_train/rejected": -2.9971771240234375, + "step": 888 + }, + { + "epoch": 0.25, + "logps_train/chosen": -191.9891357421875, + "logps_train/ref_chosen": -197.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -193.42283630371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.501086413860321, + "rewards_train/margins": 2.1433700919151306, + "rewards_train/rejected": -1.6422836780548096, + "step": 888 + }, + { + "epoch": 0.25, + "logps_train/chosen": -29.421310424804688, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -16.673049926757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9421310424804688, + "rewards_train/margins": 0.09392392635345459, + "rewards_train/rejected": -1.0360549688339233, + "step": 888 + }, + { + "epoch": 0.25, + "logps_train/chosen": -61.625526428222656, + "logps_train/ref_chosen": -58.25, + "logps_train/ref_rejected": -3.078125, + "logps_train/rejected": -9.322240829467773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33755263686180115, + "rewards_train/margins": 0.2868589460849762, + "rewards_train/rejected": -0.6244115829467773, + "step": 888 + }, + { + "epoch": 0.25, + "logps_train/chosen": -134.76275634765625, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -187.28164672851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.976275682449341, + "rewards_train/margins": 4.751888990402222, + "rewards_train/rejected": -7.7281646728515625, + "step": 888 + }, + { + "epoch": 0.25, + "logps_train/chosen": -188.55616760253906, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -216.71890258789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7556169033050537, + "rewards_train/margins": 2.8162734508514404, + "rewards_train/rejected": -6.571890354156494, + "step": 888 + }, + { + "epoch": 0.25, + "logps_train/chosen": -2.4396939277648926, + "logps_train/ref_chosen": -1.34375, + "logps_train/ref_rejected": -1.09375, + "logps_train/rejected": -1.7608977556228638, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10959439724683762, + "rewards_train/margins": -0.042879618704319, + "rewards_train/rejected": -0.06671477854251862, + "step": 888 + }, + { + "epoch": 0.25, + "logps_train/chosen": -33.23394775390625, + "logps_train/ref_chosen": -30.75, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -27.05537986755371, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24839477241039276, + "rewards_train/margins": 1.1133932620286942, + "rewards_train/rejected": -1.361788034439087, + "step": 888 + }, + { + "epoch": 0.25, + "logps_train/chosen": -68.40298461914062, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -57.643951416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7652984857559204, + "rewards_train/margins": 1.6990967988967896, + "rewards_train/rejected": -2.46439528465271, + "step": 889 + }, + { + "epoch": 0.25, + "logps_train/chosen": -3.1163694858551025, + "logps_train/ref_chosen": -0.8046875, + "logps_train/ref_rejected": -0.8046875, + "logps_train/rejected": -3.1409530639648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23116819560527802, + "rewards_train/margins": 0.0024583637714385986, + "rewards_train/rejected": -0.2336265593767166, + "step": 889 + }, + { + "epoch": 0.25, + "logps_train/chosen": -8.537457466125488, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -5.84375, + "logps_train/rejected": -14.114744186401367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36312076449394226, + "rewards_train/margins": 0.46397867798805237, + "rewards_train/rejected": -0.8270994424819946, + "step": 889 + }, + { + "epoch": 0.25, + "logps_train/chosen": -2.8250010013580322, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -0.97265625, + "logps_train/rejected": -1.2643344402313232, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.020000100135803223, + "rewards_train/margins": 0.009167719632387161, + "rewards_train/rejected": -0.029167819768190384, + "step": 889 + }, + { + "epoch": 0.25, + "logps_train/chosen": -3.3553757667541504, + "logps_train/ref_chosen": -2.15625, + "logps_train/ref_rejected": -1.765625, + "logps_train/rejected": -5.63796854019165, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11991257965564728, + "rewards_train/margins": 0.26732178032398224, + "rewards_train/rejected": -0.3872343599796295, + "step": 889 + }, + { + "epoch": 0.25, + "logps_train/chosen": -121.52738189697266, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -222.30406188964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6527382135391235, + "rewards_train/margins": 9.277668356895447, + "rewards_train/rejected": -10.93040657043457, + "step": 889 + }, + { + "epoch": 0.25, + "logps_train/chosen": -16.941776275634766, + "logps_train/ref_chosen": -4.9375, + "logps_train/ref_rejected": -7.5625, + "logps_train/rejected": -15.584758758544922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2004276514053345, + "rewards_train/margins": -0.39820176362991333, + "rewards_train/rejected": -0.8022258877754211, + "step": 889 + }, + { + "epoch": 0.25, + "logps_train/chosen": -78.9581298828125, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -78.9651870727539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4958129823207855, + "rewards_train/margins": 0.000705718994140625, + "rewards_train/rejected": -0.49651870131492615, + "step": 889 + }, + { + "epoch": 0.25, + "learning_rate": 1.5302308333274048e-06, + "loss": 0.4224, + "step": 890 + }, + { + "epoch": 0.25, + "logps_train/chosen": -6.082213878631592, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -26.462879180908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24572138488292694, + "rewards_train/margins": 1.6349415332078934, + "rewards_train/rejected": -1.8806629180908203, + "step": 890 + }, + { + "epoch": 0.25, + "logps_train/chosen": -93.47784423828125, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -116.68759155273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7477844953536987, + "rewards_train/margins": 1.670974612236023, + "rewards_train/rejected": -3.4187591075897217, + "step": 890 + }, + { + "epoch": 0.25, + "logps_train/chosen": -120.6605224609375, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -152.24147033691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8660523891448975, + "rewards_train/margins": 4.208094835281372, + "rewards_train/rejected": -7.0741472244262695, + "step": 890 + }, + { + "epoch": 0.25, + "logps_train/chosen": -172.38076782226562, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -175.712646484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.1380767822265625, + "rewards_train/margins": -1.2668120861053467, + "rewards_train/rejected": -3.871264696121216, + "step": 890 + }, + { + "epoch": 0.25, + "logps_train/chosen": -176.45693969726562, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -211.4684295654297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.345694065093994, + "rewards_train/margins": 4.0011491775512695, + "rewards_train/rejected": -6.346843242645264, + "step": 890 + }, + { + "epoch": 0.25, + "logps_train/chosen": -21.366844177246094, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -6.375, + "logps_train/rejected": -22.119321823120117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19918441772460938, + "rewards_train/margins": 1.375247836112976, + "rewards_train/rejected": -1.5744322538375854, + "step": 890 + }, + { + "epoch": 0.25, + "logps_train/chosen": -20.072219848632812, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -38.91303253173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16972199082374573, + "rewards_train/margins": 1.3840813338756561, + "rewards_train/rejected": -1.5538033246994019, + "step": 890 + }, + { + "epoch": 0.25, + "logps_train/chosen": -147.6372528076172, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -117.54686737060547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.063725233078003, + "rewards_train/margins": -0.30903851985931396, + "rewards_train/rejected": -1.754686713218689, + "step": 890 + }, + { + "epoch": 0.25, + "logps_train/chosen": -156.05624389648438, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -173.70562744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.4056243896484375, + "rewards_train/margins": 2.114938259124756, + "rewards_train/rejected": -6.520562648773193, + "step": 891 + }, + { + "epoch": 0.25, + "logps_train/chosen": -30.249834060668945, + "logps_train/ref_chosen": -34.0, + "logps_train/ref_rejected": -4.0, + "logps_train/rejected": -12.849836349487305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.37501659989356995, + "rewards_train/margins": 1.2600002586841583, + "rewards_train/rejected": -0.8849836587905884, + "step": 891 + }, + { + "epoch": 0.25, + "logps_train/chosen": -125.18608093261719, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -121.87955474853516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1186081171035767, + "rewards_train/margins": -0.6806526482105255, + "rewards_train/rejected": -0.43795546889305115, + "step": 891 + }, + { + "epoch": 0.25, + "logps_train/chosen": -132.42877197265625, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -209.53909301757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2428772449493408, + "rewards_train/margins": 6.711032152175903, + "rewards_train/rejected": -7.953909397125244, + "step": 891 + }, + { + "epoch": 0.25, + "logps_train/chosen": -23.24526023864746, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -27.75, + "logps_train/rejected": -53.340187072753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19952602684497833, + "rewards_train/margins": 2.3594928234815598, + "rewards_train/rejected": -2.559018850326538, + "step": 891 + }, + { + "epoch": 0.25, + "logps_train/chosen": -83.54191589355469, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -109.43075561523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15419159829616547, + "rewards_train/margins": 0.888883963227272, + "rewards_train/rejected": -1.0430755615234375, + "step": 891 + }, + { + "epoch": 0.25, + "logps_train/chosen": -98.23178100585938, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -115.35226440429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12317810207605362, + "rewards_train/margins": 0.21204834431409836, + "rewards_train/rejected": -0.335226446390152, + "step": 891 + }, + { + "epoch": 0.25, + "logps_train/chosen": -65.31990051269531, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -108.42799377441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11800994724035263, + "rewards_train/margins": 1.6108093485236168, + "rewards_train/rejected": -1.4927994012832642, + "step": 891 + }, + { + "epoch": 0.25, + "learning_rate": 1.5279859406005545e-06, + "loss": 0.367, + "step": 892 + }, + { + "epoch": 0.25, + "logps_train/chosen": -9.347084999084473, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -28.735261917114258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15279150009155273, + "rewards_train/margins": 1.5200676918029785, + "rewards_train/rejected": -1.3672761917114258, + "step": 892 + }, + { + "epoch": 0.25, + "logps_train/chosen": -15.393824577331543, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -32.13133239746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2268824577331543, + "rewards_train/margins": 0.8362507820129395, + "rewards_train/rejected": -1.0631332397460938, + "step": 892 + }, + { + "epoch": 0.25, + "logps_train/chosen": -3.206397771835327, + "logps_train/ref_chosen": -1.53125, + "logps_train/ref_rejected": -2.75, + "logps_train/rejected": -4.785812854766846, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16751478612422943, + "rewards_train/margins": 0.03606650233268738, + "rewards_train/rejected": -0.2035812884569168, + "step": 892 + }, + { + "epoch": 0.25, + "logps_train/chosen": -17.920360565185547, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -6.21875, + "logps_train/rejected": -27.52852439880371, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9857860803604126, + "rewards_train/margins": 1.1451913118362427, + "rewards_train/rejected": -2.1309773921966553, + "step": 892 + }, + { + "epoch": 0.25, + "logps_train/chosen": -177.34234619140625, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -255.97555541992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.834234595298767, + "rewards_train/margins": 6.463321328163147, + "rewards_train/rejected": -8.297555923461914, + "step": 892 + }, + { + "epoch": 0.25, + "logps_train/chosen": -26.35907554626465, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -45.22510528564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6484075784683228, + "rewards_train/margins": 1.6991029977798462, + "rewards_train/rejected": -2.347510576248169, + "step": 892 + }, + { + "epoch": 0.25, + "logps_train/chosen": -16.005836486816406, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -7.5625, + "logps_train/rejected": -26.247848510742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1724586486816406, + "rewards_train/margins": 0.6960762739181519, + "rewards_train/rejected": -1.8685349225997925, + "step": 892 + }, + { + "epoch": 0.25, + "logps_train/chosen": -33.31322479248047, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -87.03241729736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2813224792480469, + "rewards_train/margins": 1.296919345855713, + "rewards_train/rejected": -2.5782418251037598, + "step": 892 + }, + { + "epoch": 0.25, + "logps_train/chosen": -104.19914245605469, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -140.89276123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6699142456054688, + "rewards_train/margins": 1.2193620204925537, + "rewards_train/rejected": -2.8892762660980225, + "step": 893 + }, + { + "epoch": 0.25, + "logps_train/chosen": -195.39297485351562, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -216.49404907226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.489297389984131, + "rewards_train/margins": 0.6601080894470215, + "rewards_train/rejected": -8.149405479431152, + "step": 893 + }, + { + "epoch": 0.25, + "logps_train/chosen": -45.96845626831055, + "logps_train/ref_chosen": -27.875, + "logps_train/ref_rejected": -61.0, + "logps_train/rejected": -107.7872314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8093456029891968, + "rewards_train/margins": 2.8693777322769165, + "rewards_train/rejected": -4.678723335266113, + "step": 893 + }, + { + "epoch": 0.25, + "logps_train/chosen": -131.72848510742188, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -113.0238037109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4228485822677612, + "rewards_train/margins": -0.17046821117401123, + "rewards_train/rejected": -1.25238037109375, + "step": 893 + }, + { + "epoch": 0.25, + "logps_train/chosen": -20.207609176635742, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -19.904430389404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7645109295845032, + "rewards_train/margins": 0.4009321331977844, + "rewards_train/rejected": -1.1654430627822876, + "step": 893 + }, + { + "epoch": 0.25, + "logps_train/chosen": -78.7033920288086, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -126.08068084716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0796607956290245, + "rewards_train/margins": 0.3377288803458214, + "rewards_train/rejected": -0.2580680847167969, + "step": 893 + }, + { + "epoch": 0.25, + "logps_train/chosen": -249.93775939941406, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -249.91355895996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.493776321411133, + "rewards_train/margins": 1.6975793838500977, + "rewards_train/rejected": -13.19135570526123, + "step": 893 + }, + { + "epoch": 0.25, + "logps_train/chosen": -83.10331726074219, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -62.5, + "logps_train/rejected": -91.3605728149414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.210331678390503, + "rewards_train/margins": 0.6757256984710693, + "rewards_train/rejected": -2.8860573768615723, + "step": 893 + }, + { + "epoch": 0.25, + "learning_rate": 1.5257373525319124e-06, + "loss": 0.3417, + "step": 894 + }, + { + "epoch": 0.25, + "logps_train/chosen": -56.23374938964844, + "logps_train/ref_chosen": -30.125, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -112.0163803100586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.610874891281128, + "rewards_train/margins": 0.6407632827758789, + "rewards_train/rejected": -3.251638174057007, + "step": 894 + }, + { + "epoch": 0.25, + "logps_train/chosen": -37.076255798339844, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -78.48123931884766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.44237443804740906, + "rewards_train/margins": 0.4904983714222908, + "rewards_train/rejected": -0.048123933374881744, + "step": 894 + }, + { + "epoch": 0.25, + "logps_train/chosen": -98.03325653076172, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -196.2975311279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9033256769180298, + "rewards_train/margins": 6.676427721977234, + "rewards_train/rejected": -7.579753398895264, + "step": 894 + }, + { + "epoch": 0.25, + "logps_train/chosen": -98.16798400878906, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -130.4518280029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7167985439300537, + "rewards_train/margins": 3.2783844470977783, + "rewards_train/rejected": -5.995182991027832, + "step": 894 + }, + { + "epoch": 0.25, + "logps_train/chosen": -194.71066284179688, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -230.80075073242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.071066379547119, + "rewards_train/margins": 4.309008598327637, + "rewards_train/rejected": -6.380074977874756, + "step": 894 + }, + { + "epoch": 0.25, + "logps_train/chosen": -3.0814926624298096, + "logps_train/ref_chosen": -2.015625, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -9.45793628692627, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1065867692232132, + "rewards_train/margins": 0.42358188331127167, + "rewards_train/rejected": -0.5301686525344849, + "step": 894 + }, + { + "epoch": 0.25, + "logps_train/chosen": -157.4776611328125, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -208.94810485839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.597766160964966, + "rewards_train/margins": 6.497044324874878, + "rewards_train/rejected": -10.094810485839844, + "step": 894 + }, + { + "epoch": 0.25, + "logps_train/chosen": -110.96517944335938, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -158.3996124267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.046518087387085, + "rewards_train/margins": 2.543443441390991, + "rewards_train/rejected": -4.589961528778076, + "step": 894 + }, + { + "epoch": 0.25, + "logps_train/chosen": -138.82156372070312, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -161.3917236328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9821563959121704, + "rewards_train/margins": 3.2570160627365112, + "rewards_train/rejected": -4.239172458648682, + "step": 895 + }, + { + "epoch": 0.25, + "logps_train/chosen": -7.482151031494141, + "logps_train/ref_chosen": -6.46875, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -18.65777015686035, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10134010761976242, + "rewards_train/margins": 1.0925619080662727, + "rewards_train/rejected": -1.1939020156860352, + "step": 895 + }, + { + "epoch": 0.25, + "logps_train/chosen": -16.832460403442383, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -29.863323211669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40199604630470276, + "rewards_train/margins": 0.7218362987041473, + "rewards_train/rejected": -1.12383234500885, + "step": 895 + }, + { + "epoch": 0.25, + "logps_train/chosen": -60.36011505126953, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -32.05854034423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7860115170478821, + "rewards_train/margins": 0.09484255313873291, + "rewards_train/rejected": -0.880854070186615, + "step": 895 + }, + { + "epoch": 0.25, + "logps_train/chosen": -43.633705139160156, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -36.31912612915039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48837050795555115, + "rewards_train/margins": 1.1560421288013458, + "rewards_train/rejected": -1.644412636756897, + "step": 895 + }, + { + "epoch": 0.25, + "logps_train/chosen": -187.77706909179688, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -191.91053771972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.877707004547119, + "rewards_train/margins": 5.613346576690674, + "rewards_train/rejected": -8.491053581237793, + "step": 895 + }, + { + "epoch": 0.25, + "logps_train/chosen": -39.02963638305664, + "logps_train/ref_chosen": -29.25, + "logps_train/ref_rejected": -2.75, + "logps_train/rejected": -14.7687406539917, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9779636263847351, + "rewards_train/margins": 0.22391051054000854, + "rewards_train/rejected": -1.2018741369247437, + "step": 895 + }, + { + "epoch": 0.25, + "logps_train/chosen": -32.136314392089844, + "logps_train/ref_chosen": -25.5, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -30.61493682861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6636314392089844, + "rewards_train/margins": 1.5416123867034912, + "rewards_train/rejected": -2.2052438259124756, + "step": 895 + }, + { + "epoch": 0.25, + "learning_rate": 1.5234850848592107e-06, + "loss": 0.2478, + "step": 896 + }, + { + "epoch": 0.25, + "logps_train/chosen": -2.4265382289886475, + "logps_train/ref_chosen": -1.1328125, + "logps_train/ref_rejected": -1.1328125, + "logps_train/rejected": -2.3678486347198486, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12937258183956146, + "rewards_train/margins": -0.005868963897228241, + "rewards_train/rejected": -0.12350361794233322, + "step": 896 + }, + { + "epoch": 0.25, + "logps_train/chosen": -153.22418212890625, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -137.9738311767578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.172418117523193, + "rewards_train/margins": 0.47496509552001953, + "rewards_train/rejected": -4.647383213043213, + "step": 896 + }, + { + "epoch": 0.25, + "logps_train/chosen": -283.247314453125, + "logps_train/ref_chosen": -224.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -234.45098876953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.924731731414795, + "rewards_train/margins": -0.07963275909423828, + "rewards_train/rejected": -5.845098972320557, + "step": 896 + }, + { + "epoch": 0.25, + "logps_train/chosen": -104.83221435546875, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -158.5196533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.416778564453125, + "rewards_train/margins": 4.468743801116943, + "rewards_train/rejected": -4.051965236663818, + "step": 896 + }, + { + "epoch": 0.25, + "logps_train/chosen": -19.09994125366211, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -20.43724822998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8787441253662109, + "rewards_train/margins": 0.23373067378997803, + "rewards_train/rejected": -1.112474799156189, + "step": 896 + }, + { + "epoch": 0.25, + "logps_train/chosen": -249.2998504638672, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -274.08392333984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.429985046386719, + "rewards_train/margins": -0.22159290313720703, + "rewards_train/rejected": -10.208392143249512, + "step": 896 + }, + { + "epoch": 0.25, + "logps_train/chosen": -100.14534759521484, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -96.658203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8645347952842712, + "rewards_train/margins": 0.001285552978515625, + "rewards_train/rejected": -0.8658203482627869, + "step": 896 + }, + { + "epoch": 0.25, + "logps_train/chosen": -110.86721801757812, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -101.6509780883789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7867218255996704, + "rewards_train/margins": 1.1283760070800781, + "rewards_train/rejected": -1.9150978326797485, + "step": 896 + }, + { + "epoch": 0.25, + "logps_train/chosen": -14.434348106384277, + "logps_train/ref_chosen": -1.171875, + "logps_train/ref_rejected": -2.6875, + "logps_train/rejected": -2.7614450454711914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3262473344802856, + "rewards_train/margins": -1.318852829746902, + "rewards_train/rejected": -0.0073945047333836555, + "step": 897 + }, + { + "epoch": 0.25, + "logps_train/chosen": -195.5243377685547, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -207.10287475585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.852434158325195, + "rewards_train/margins": 0.35785388946533203, + "rewards_train/rejected": -9.210288047790527, + "step": 897 + }, + { + "epoch": 0.25, + "logps_train/chosen": -346.4266662597656, + "logps_train/ref_chosen": -236.0, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -296.4949645996094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.0426664352417, + "rewards_train/margins": -0.49316978454589844, + "rewards_train/rejected": -10.5494966506958, + "step": 897 + }, + { + "epoch": 0.25, + "logps_train/chosen": -106.42924499511719, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -164.291748046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1929244995117188, + "rewards_train/margins": 2.5862503051757812, + "rewards_train/rejected": -4.7791748046875, + "step": 897 + }, + { + "epoch": 0.25, + "logps_train/chosen": -67.88754272460938, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -61.5, + "logps_train/rejected": -82.9792251586914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.388754278421402, + "rewards_train/margins": 1.7591682374477386, + "rewards_train/rejected": -2.1479225158691406, + "step": 897 + }, + { + "epoch": 0.25, + "logps_train/chosen": -131.0318603515625, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -146.12738037109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.70318603515625, + "rewards_train/margins": 0.7095522880554199, + "rewards_train/rejected": -4.41273832321167, + "step": 897 + }, + { + "epoch": 0.25, + "logps_train/chosen": -64.69307708740234, + "logps_train/ref_chosen": -55.0, + "logps_train/ref_rejected": -56.5, + "logps_train/rejected": -99.92655944824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9693077206611633, + "rewards_train/margins": 3.3733484148979187, + "rewards_train/rejected": -4.342656135559082, + "step": 897 + }, + { + "epoch": 0.25, + "logps_train/chosen": -77.44538879394531, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -78.84701538085938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4445388913154602, + "rewards_train/margins": -0.05983734130859375, + "rewards_train/rejected": -0.38470155000686646, + "step": 897 + }, + { + "epoch": 0.25, + "learning_rate": 1.5212291533459354e-06, + "loss": 0.546, + "step": 898 + }, + { + "epoch": 0.25, + "logps_train/chosen": -27.63585662841797, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -32.75, + "logps_train/rejected": -72.98031616210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40108567476272583, + "rewards_train/margins": 3.6219460368156433, + "rewards_train/rejected": -4.023031711578369, + "step": 898 + }, + { + "epoch": 0.25, + "logps_train/chosen": -94.55320739746094, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -130.61337280273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3053207397460938, + "rewards_train/margins": 1.6060166358947754, + "rewards_train/rejected": -3.911337375640869, + "step": 898 + }, + { + "epoch": 0.25, + "logps_train/chosen": -37.95545196533203, + "logps_train/ref_chosen": -27.75, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -52.29171371459961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.020545244216919, + "rewards_train/margins": 1.1836261749267578, + "rewards_train/rejected": -2.2041714191436768, + "step": 898 + }, + { + "epoch": 0.25, + "logps_train/chosen": -21.400375366210938, + "logps_train/ref_chosen": -22.25, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -27.950822830200195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08496246486902237, + "rewards_train/margins": 0.8050447478890419, + "rewards_train/rejected": -0.7200822830200195, + "step": 898 + }, + { + "epoch": 0.25, + "logps_train/chosen": -128.40492248535156, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -257.2108459472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0904922485351562, + "rewards_train/margins": 10.130592346191406, + "rewards_train/rejected": -11.221084594726562, + "step": 898 + }, + { + "epoch": 0.25, + "logps_train/chosen": -81.10197448730469, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -150.1689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4851975440979004, + "rewards_train/margins": 0.7816970348358154, + "rewards_train/rejected": -3.266894578933716, + "step": 898 + }, + { + "epoch": 0.25, + "logps_train/chosen": -166.47918701171875, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -209.88955688476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.147918701171875, + "rewards_train/margins": 1.1410369873046875, + "rewards_train/rejected": -5.2889556884765625, + "step": 898 + }, + { + "epoch": 0.25, + "logps_train/chosen": -136.8226318359375, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -206.5377197265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1822632551193237, + "rewards_train/margins": 6.721508622169495, + "rewards_train/rejected": -7.903771877288818, + "step": 898 + }, + { + "epoch": 0.25, + "logps_train/chosen": -229.3265838623047, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -99.27727508544922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.1326584815979, + "rewards_train/margins": -4.704931020736694, + "rewards_train/rejected": -2.427727460861206, + "step": 899 + }, + { + "epoch": 0.25, + "logps_train/chosen": -183.79812622070312, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -223.0, + "logps_train/rejected": -252.19996643066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5798126459121704, + "rewards_train/margins": 2.3401840925216675, + "rewards_train/rejected": -2.919996738433838, + "step": 899 + }, + { + "epoch": 0.25, + "logps_train/chosen": -113.5262451171875, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -239.77027893066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6026245355606079, + "rewards_train/margins": 8.174403548240662, + "rewards_train/rejected": -8.77702808380127, + "step": 899 + }, + { + "epoch": 0.25, + "logps_train/chosen": -115.45114135742188, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -143.6964111328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3951141834259033, + "rewards_train/margins": 2.474526882171631, + "rewards_train/rejected": -3.869641065597534, + "step": 899 + }, + { + "epoch": 0.25, + "logps_train/chosen": -153.49069213867188, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -145.03514099121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4490692615509033, + "rewards_train/margins": 0.25444483757019043, + "rewards_train/rejected": -2.7035140991210938, + "step": 899 + }, + { + "epoch": 0.25, + "logps_train/chosen": -5.7438249588012695, + "logps_train/ref_chosen": -3.09375, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -11.824049949645996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26500749588012695, + "rewards_train/margins": 0.01739749312400818, + "rewards_train/rejected": -0.28240498900413513, + "step": 899 + }, + { + "epoch": 0.25, + "logps_train/chosen": -9.70901107788086, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -21.872879028320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3802761137485504, + "rewards_train/margins": 0.7570118606090546, + "rewards_train/rejected": -1.137287974357605, + "step": 899 + }, + { + "epoch": 0.25, + "logps_train/chosen": -91.9893569946289, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -116.74275970458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9989356994628906, + "rewards_train/margins": 0.42534029483795166, + "rewards_train/rejected": -1.4242759943008423, + "step": 899 + }, + { + "epoch": 0.25, + "learning_rate": 1.5189695737812151e-06, + "loss": 0.5334, + "step": 900 + }, + { + "epoch": 0.25, + "logps_train/chosen": -75.54804992675781, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -127.94352722167969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0048049925826489925, + "rewards_train/margins": -0.0104522705078125, + "rewards_train/rejected": 0.0056472779251635075, + "step": 900 + }, + { + "epoch": 0.25, + "logps_train/chosen": -112.37394714355469, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -250.20095825195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3373947143554688, + "rewards_train/margins": 9.88270092010498, + "rewards_train/rejected": -12.22009563446045, + "step": 900 + }, + { + "epoch": 0.25, + "logps_train/chosen": -153.3997344970703, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -215.33164978027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.739973545074463, + "rewards_train/margins": 2.893191337585449, + "rewards_train/rejected": -7.633164882659912, + "step": 900 + }, + { + "epoch": 0.25, + "logps_train/chosen": -110.376708984375, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -231.49032592773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8876709342002869, + "rewards_train/margins": 7.461361467838287, + "rewards_train/rejected": -8.349032402038574, + "step": 900 + }, + { + "epoch": 0.25, + "logps_train/chosen": -123.29505157470703, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -146.41700744628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7795051336288452, + "rewards_train/margins": 5.062195897102356, + "rewards_train/rejected": -6.841701030731201, + "step": 900 + }, + { + "epoch": 0.25, + "logps_train/chosen": -17.390899658203125, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -12.342759132385254, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6078400015830994, + "rewards_train/margins": -0.40481407940387726, + "rewards_train/rejected": -0.2030259221792221, + "step": 900 + }, + { + "epoch": 0.25, + "logps_train/chosen": -16.01097297668457, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -58.25, + "logps_train/rejected": -85.38348388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09890270233154297, + "rewards_train/margins": 2.812251091003418, + "rewards_train/rejected": -2.713348388671875, + "step": 900 + }, + { + "epoch": 0.25, + "logps_train/chosen": -0.3081624507904053, + "logps_train/ref_chosen": -0.59375, + "logps_train/ref_rejected": -0.59375, + "logps_train/rejected": -0.30828848481178284, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028558755293488503, + "rewards_train/margins": 1.2602657079696655e-05, + "rewards_train/rejected": 0.028546152636408806, + "step": 900 + }, + { + "epoch": 0.25, + "logps_train/chosen": -7.05621337890625, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -21.35393524169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.011871337890625, + "rewards_train/margins": 1.6828972101211548, + "rewards_train/rejected": -1.6947685480117798, + "step": 901 + }, + { + "epoch": 0.25, + "logps_train/chosen": -41.03548812866211, + "logps_train/ref_chosen": -35.0, + "logps_train/ref_rejected": -58.0, + "logps_train/rejected": -73.6502685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6035488247871399, + "rewards_train/margins": 0.961478054523468, + "rewards_train/rejected": -1.565026879310608, + "step": 901 + }, + { + "epoch": 0.25, + "logps_train/chosen": -93.86795806884766, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -297.3400573730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3867958784103394, + "rewards_train/margins": 10.647210240364075, + "rewards_train/rejected": -12.034006118774414, + "step": 901 + }, + { + "epoch": 0.25, + "logps_train/chosen": -161.33578491210938, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -126.36857604980469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0664215087890625, + "rewards_train/margins": 2.503279209136963, + "rewards_train/rejected": -2.4368577003479004, + "step": 901 + }, + { + "epoch": 0.25, + "logps_train/chosen": -1.0438835620880127, + "logps_train/ref_chosen": -0.9453125, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -16.58810806274414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009857106022536755, + "rewards_train/margins": 0.17395370919257402, + "rewards_train/rejected": -0.18381081521511078, + "step": 901 + }, + { + "epoch": 0.25, + "logps_train/chosen": -27.586383819580078, + "logps_train/ref_chosen": -13.5625, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -48.857601165771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4023884534835815, + "rewards_train/margins": 0.6958717107772827, + "rewards_train/rejected": -2.0982601642608643, + "step": 901 + }, + { + "epoch": 0.25, + "logps_train/chosen": -20.24201011657715, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -7.0625, + "logps_train/rejected": -25.956762313842773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6304510235786438, + "rewards_train/margins": 1.2589752078056335, + "rewards_train/rejected": -1.8894262313842773, + "step": 901 + }, + { + "epoch": 0.25, + "logps_train/chosen": -54.098052978515625, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -69.47346496582031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0848053693771362, + "rewards_train/margins": -1.887458860874176, + "rewards_train/rejected": 0.8026534914970398, + "step": 901 + }, + { + "epoch": 0.25, + "learning_rate": 1.516706361979711e-06, + "loss": 0.3933, + "step": 902 + }, + { + "epoch": 0.25, + "logps_train/chosen": -8.412331581115723, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -32.45627212524414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5177956819534302, + "rewards_train/margins": 1.171581506729126, + "rewards_train/rejected": -1.6893771886825562, + "step": 902 + }, + { + "epoch": 0.25, + "logps_train/chosen": -100.02926635742188, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -74.9795150756836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0029265880584717, + "rewards_train/margins": -1.4049750566482544, + "rewards_train/rejected": -0.5979515314102173, + "step": 902 + }, + { + "epoch": 0.25, + "logps_train/chosen": -67.68152618408203, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -41.01026916503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6931526064872742, + "rewards_train/margins": 1.9391244053840637, + "rewards_train/rejected": -2.632277011871338, + "step": 902 + }, + { + "epoch": 0.25, + "logps_train/chosen": -173.90391540527344, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -209.36334228515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.240391731262207, + "rewards_train/margins": 0.9959425926208496, + "rewards_train/rejected": -6.236334323883057, + "step": 902 + }, + { + "epoch": 0.25, + "logps_train/chosen": -132.31329345703125, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -129.3055419921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.231329321861267, + "rewards_train/margins": 0.6492248773574829, + "rewards_train/rejected": -1.88055419921875, + "step": 902 + }, + { + "epoch": 0.25, + "logps_train/chosen": -136.357177734375, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -116.68638610839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0357178449630737, + "rewards_train/margins": 2.3329209089279175, + "rewards_train/rejected": -3.368638753890991, + "step": 902 + }, + { + "epoch": 0.25, + "logps_train/chosen": -13.819235801696777, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -12.784683227539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.39307641983032227, + "rewards_train/margins": 1.2824822664260864, + "rewards_train/rejected": -0.8894058465957642, + "step": 902 + }, + { + "epoch": 0.25, + "logps_train/chosen": -99.70745849609375, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -88.4093246459961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7207459211349487, + "rewards_train/margins": -0.17981338500976562, + "rewards_train/rejected": -1.540932536125183, + "step": 902 + }, + { + "epoch": 0.25, + "logps_train/chosen": -126.36039733886719, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -125.51741027832031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9360398054122925, + "rewards_train/margins": 0.26570117473602295, + "rewards_train/rejected": -2.2017409801483154, + "step": 903 + }, + { + "epoch": 0.25, + "logps_train/chosen": -11.267369270324707, + "logps_train/ref_chosen": -6.25, + "logps_train/ref_rejected": -2.453125, + "logps_train/rejected": -7.497198581695557, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5017369389533997, + "rewards_train/margins": 0.0026704072952270508, + "rewards_train/rejected": -0.5044073462486267, + "step": 903 + }, + { + "epoch": 0.25, + "logps_train/chosen": -16.879976272583008, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -27.873706817626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.012997627258300781, + "rewards_train/margins": 0.8243730664253235, + "rewards_train/rejected": -0.8373706936836243, + "step": 903 + }, + { + "epoch": 0.25, + "logps_train/chosen": -119.6832275390625, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -205.21926879882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.118322730064392, + "rewards_train/margins": 5.703604340553284, + "rewards_train/rejected": -6.821927070617676, + "step": 903 + }, + { + "epoch": 0.25, + "logps_train/chosen": -99.33648681640625, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -140.92648315429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.583648681640625, + "rewards_train/margins": 2.30899977684021, + "rewards_train/rejected": -3.892648458480835, + "step": 903 + }, + { + "epoch": 0.25, + "logps_train/chosen": -4.015702724456787, + "logps_train/ref_chosen": -1.2421875, + "logps_train/ref_rejected": -1.3046875, + "logps_train/rejected": -2.3660922050476074, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2773515284061432, + "rewards_train/margins": -0.17121105641126633, + "rewards_train/rejected": -0.10614047199487686, + "step": 903 + }, + { + "epoch": 0.25, + "logps_train/chosen": -106.79993438720703, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -135.08570861816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.52999347448349, + "rewards_train/margins": 0.9785774350166321, + "rewards_train/rejected": -1.508570909500122, + "step": 903 + }, + { + "epoch": 0.25, + "logps_train/chosen": -2.0421340465545654, + "logps_train/ref_chosen": -1.171875, + "logps_train/ref_rejected": -1.171875, + "logps_train/rejected": -2.1329383850097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08702590316534042, + "rewards_train/margins": 0.009080439805984497, + "rewards_train/rejected": -0.09610634297132492, + "step": 903 + }, + { + "epoch": 0.25, + "learning_rate": 1.5144395337815063e-06, + "loss": 0.4625, + "step": 904 + }, + { + "epoch": 0.25, + "logps_train/chosen": -16.385269165039062, + "logps_train/ref_chosen": -7.4375, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -38.70576095581055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8947769403457642, + "rewards_train/margins": 1.1382991075515747, + "rewards_train/rejected": -2.033076047897339, + "step": 904 + }, + { + "epoch": 0.25, + "logps_train/chosen": -15.770380973815918, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -17.68843650817871, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4582881033420563, + "rewards_train/margins": 0.6011805236339569, + "rewards_train/rejected": -1.0594686269760132, + "step": 904 + }, + { + "epoch": 0.25, + "logps_train/chosen": -25.90353012084961, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -28.013336181640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7278530597686768, + "rewards_train/margins": -0.03276944160461426, + "rewards_train/rejected": -1.6950836181640625, + "step": 904 + }, + { + "epoch": 0.25, + "logps_train/chosen": -60.25286865234375, + "logps_train/ref_chosen": -49.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -107.68701171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1252869367599487, + "rewards_train/margins": 1.943414330482483, + "rewards_train/rejected": -3.0687012672424316, + "step": 904 + }, + { + "epoch": 0.25, + "logps_train/chosen": -247.07595825195312, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -189.0, + "logps_train/rejected": -274.975830078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.507596015930176, + "rewards_train/margins": 0.08998680114746094, + "rewards_train/rejected": -8.597582817077637, + "step": 904 + }, + { + "epoch": 0.25, + "logps_train/chosen": -107.92253112792969, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -91.72817993164062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4422531127929688, + "rewards_train/margins": -2.1694351136684418, + "rewards_train/rejected": -0.272817999124527, + "step": 904 + }, + { + "epoch": 0.25, + "logps_train/chosen": -89.95504760742188, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -131.58987426757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0455048084259033, + "rewards_train/margins": 0.8634827136993408, + "rewards_train/rejected": -3.908987522125244, + "step": 904 + }, + { + "epoch": 0.25, + "logps_train/chosen": -97.73014831542969, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -44.48326873779297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8730148673057556, + "rewards_train/margins": 1.4503119587898254, + "rewards_train/rejected": -2.323326826095581, + "step": 904 + }, + { + "epoch": 0.25, + "logps_train/chosen": -11.762205123901367, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -8.558066368103027, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5449705123901367, + "rewards_train/margins": -0.332913875579834, + "rewards_train/rejected": -0.21205663681030273, + "step": 905 + }, + { + "epoch": 0.25, + "logps_train/chosen": -117.44998931884766, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -170.26321411132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6949989795684814, + "rewards_train/margins": 2.031322479248047, + "rewards_train/rejected": -3.7263214588165283, + "step": 905 + }, + { + "epoch": 0.25, + "logps_train/chosen": -2.467254400253296, + "logps_train/ref_chosen": -3.390625, + "logps_train/ref_rejected": -6.65625, + "logps_train/rejected": -10.094131469726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09233706444501877, + "rewards_train/margins": 0.436125211417675, + "rewards_train/rejected": -0.34378814697265625, + "step": 905 + }, + { + "epoch": 0.25, + "logps_train/chosen": -50.16608428955078, + "logps_train/ref_chosen": -38.5, + "logps_train/ref_rejected": -28.0, + "logps_train/rejected": -45.067256927490234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.166608452796936, + "rewards_train/margins": 0.5401172637939453, + "rewards_train/rejected": -1.7067257165908813, + "step": 905 + }, + { + "epoch": 0.25, + "logps_train/chosen": -181.54379272460938, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -233.4697723388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.554379463195801, + "rewards_train/margins": 1.3925976753234863, + "rewards_train/rejected": -5.946977138519287, + "step": 905 + }, + { + "epoch": 0.25, + "logps_train/chosen": -100.79679107666016, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -208.28721618652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.079679250717163, + "rewards_train/margins": 4.749042272567749, + "rewards_train/rejected": -6.828721523284912, + "step": 905 + }, + { + "epoch": 0.25, + "logps_train/chosen": -88.21258544921875, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -147.26571655273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.671258568763733, + "rewards_train/margins": 3.305312991142273, + "rewards_train/rejected": -4.976571559906006, + "step": 905 + }, + { + "epoch": 0.25, + "logps_train/chosen": -55.402252197265625, + "logps_train/ref_chosen": -52.0, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -76.84950256347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3402252197265625, + "rewards_train/margins": 1.2947250604629517, + "rewards_train/rejected": -1.6349502801895142, + "step": 905 + }, + { + "epoch": 0.25, + "learning_rate": 1.512169105051995e-06, + "loss": 0.4691, + "step": 906 + }, + { + "epoch": 0.25, + "logps_train/chosen": -142.83480834960938, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -209.88946533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.033481121063232, + "rewards_train/margins": 5.555465221405029, + "rewards_train/rejected": -9.588946342468262, + "step": 906 + }, + { + "epoch": 0.25, + "logps_train/chosen": -121.29788208007812, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -193.10772705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0797882080078125, + "rewards_train/margins": 3.530984401702881, + "rewards_train/rejected": -5.610772609710693, + "step": 906 + }, + { + "epoch": 0.25, + "logps_train/chosen": -6.833725452423096, + "logps_train/ref_chosen": -3.5625, + "logps_train/ref_rejected": -2.546875, + "logps_train/rejected": -4.971195220947266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3271225392818451, + "rewards_train/margins": -0.08469051122665405, + "rewards_train/rejected": -0.24243202805519104, + "step": 906 + }, + { + "epoch": 0.25, + "logps_train/chosen": -5.087909698486328, + "logps_train/ref_chosen": -3.125, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -19.46053123474121, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1962909698486328, + "rewards_train/margins": 1.2591371536254883, + "rewards_train/rejected": -1.455428123474121, + "step": 906 + }, + { + "epoch": 0.25, + "logps_train/chosen": -60.39187240600586, + "logps_train/ref_chosen": -29.0, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -37.931617736816406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1391873359680176, + "rewards_train/margins": -0.2397754192352295, + "rewards_train/rejected": -2.899411916732788, + "step": 906 + }, + { + "epoch": 0.25, + "logps_train/chosen": -8.370992660522461, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -19.014184951782227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36522427201271057, + "rewards_train/margins": 0.2986942231655121, + "rewards_train/rejected": -0.6639184951782227, + "step": 906 + }, + { + "epoch": 0.25, + "logps_train/chosen": -126.33316040039062, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -136.75259399414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4833160638809204, + "rewards_train/margins": 1.091943383216858, + "rewards_train/rejected": -2.5752594470977783, + "step": 906 + }, + { + "epoch": 0.25, + "logps_train/chosen": -12.487772941589355, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -15.747276306152344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.330027312040329, + "rewards_train/margins": -0.14279967546463013, + "rewards_train/rejected": -0.18722763657569885, + "step": 906 + }, + { + "epoch": 0.25, + "logps_train/chosen": -9.62138557434082, + "logps_train/ref_chosen": -3.09375, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -37.5031623840332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6527635455131531, + "rewards_train/margins": 1.047552764415741, + "rewards_train/rejected": -1.700316309928894, + "step": 907 + }, + { + "epoch": 0.25, + "logps_train/chosen": -141.5762481689453, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -239.88990783691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.507624864578247, + "rewards_train/margins": 4.681366205215454, + "rewards_train/rejected": -6.188991069793701, + "step": 907 + }, + { + "epoch": 0.25, + "logps_train/chosen": -83.28038787841797, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -114.38235473632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7280387878417969, + "rewards_train/margins": 0.3101966381072998, + "rewards_train/rejected": -2.0382354259490967, + "step": 907 + }, + { + "epoch": 0.25, + "logps_train/chosen": -109.34492492675781, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -109.42381286621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8844925165176392, + "rewards_train/margins": 0.0078887939453125, + "rewards_train/rejected": -0.8923813104629517, + "step": 907 + }, + { + "epoch": 0.25, + "logps_train/chosen": -60.85613250732422, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -19.90174102783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13561324775218964, + "rewards_train/margins": 0.7733108550310135, + "rewards_train/rejected": -0.9089241027832031, + "step": 907 + }, + { + "epoch": 0.25, + "logps_train/chosen": -86.29621887207031, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -25.808822631835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1296218633651733, + "rewards_train/margins": 0.5387604236602783, + "rewards_train/rejected": -1.6683822870254517, + "step": 907 + }, + { + "epoch": 0.25, + "logps_train/chosen": -14.853615760803223, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -29.313983917236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24161158502101898, + "rewards_train/margins": 0.8397867828607559, + "rewards_train/rejected": -1.081398367881775, + "step": 907 + }, + { + "epoch": 0.25, + "logps_train/chosen": -133.29360961914062, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -156.97042846679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0293610095977783, + "rewards_train/margins": 3.1176817417144775, + "rewards_train/rejected": -6.147042751312256, + "step": 907 + }, + { + "epoch": 0.25, + "learning_rate": 1.509895091681771e-06, + "loss": 0.3898, + "step": 908 + }, + { + "epoch": 0.25, + "logps_train/chosen": -100.53557586669922, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -100.45646667480469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3035576343536377, + "rewards_train/margins": -0.007910966873168945, + "rewards_train/rejected": -2.2956466674804688, + "step": 908 + }, + { + "epoch": 0.25, + "logps_train/chosen": -28.71811866760254, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -41.535491943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8718118667602539, + "rewards_train/margins": 0.8067373037338257, + "rewards_train/rejected": -1.6785491704940796, + "step": 908 + }, + { + "epoch": 0.25, + "logps_train/chosen": -13.9853515625, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -12.841692924499512, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.579785168170929, + "rewards_train/margins": -0.65811587870121, + "rewards_train/rejected": 0.07833071053028107, + "step": 908 + }, + { + "epoch": 0.25, + "logps_train/chosen": -290.9943542480469, + "logps_train/ref_chosen": -183.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -280.2899169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.79943561553955, + "rewards_train/margins": 0.42955589294433594, + "rewards_train/rejected": -11.228991508483887, + "step": 908 + }, + { + "epoch": 0.25, + "logps_train/chosen": -5.427370071411133, + "logps_train/ref_chosen": -2.703125, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -22.6088924407959, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27242451906204224, + "rewards_train/margins": 1.0759647488594055, + "rewards_train/rejected": -1.3483892679214478, + "step": 908 + }, + { + "epoch": 0.25, + "logps_train/chosen": -16.761127471923828, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -35.04176330566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9761127829551697, + "rewards_train/margins": 1.1718135476112366, + "rewards_train/rejected": -2.1479263305664062, + "step": 908 + }, + { + "epoch": 0.25, + "logps_train/chosen": -176.7859344482422, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -171.65744018554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0785934925079346, + "rewards_train/margins": 1.1871507167816162, + "rewards_train/rejected": -4.265744209289551, + "step": 908 + }, + { + "epoch": 0.25, + "logps_train/chosen": -123.57395935058594, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -210.07168579101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.407396078109741, + "rewards_train/margins": 4.799772500991821, + "rewards_train/rejected": -7.2071685791015625, + "step": 908 + }, + { + "epoch": 0.25, + "logps_train/chosen": -188.36724853515625, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -142.49752807617188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.336724758148193, + "rewards_train/margins": -1.9869718551635742, + "rewards_train/rejected": -5.349752902984619, + "step": 909 + }, + { + "epoch": 0.25, + "logps_train/chosen": -23.645124435424805, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -43.650611877441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20201244950294495, + "rewards_train/margins": 0.3380487263202667, + "rewards_train/rejected": -0.5400611758232117, + "step": 909 + }, + { + "epoch": 0.25, + "logps_train/chosen": -215.7361602783203, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -203.97964477539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.8736162185668945, + "rewards_train/margins": 0.4243483543395996, + "rewards_train/rejected": -6.297964572906494, + "step": 909 + }, + { + "epoch": 0.25, + "logps_train/chosen": -60.524532318115234, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -33.54240798950195, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4524532556533813, + "rewards_train/margins": -0.28571248054504395, + "rewards_train/rejected": -1.1667407751083374, + "step": 909 + }, + { + "epoch": 0.25, + "logps_train/chosen": -11.261383056640625, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -32.683223724365234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28863832354545593, + "rewards_train/margins": 0.47968408465385437, + "rewards_train/rejected": -0.7683224081993103, + "step": 909 + }, + { + "epoch": 0.25, + "logps_train/chosen": -2.6601479053497314, + "logps_train/ref_chosen": -1.0, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -41.700592041015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16601479053497314, + "rewards_train/margins": 3.3509193658828735, + "rewards_train/rejected": -3.5169341564178467, + "step": 909 + }, + { + "epoch": 0.25, + "logps_train/chosen": -2.10528826713562, + "logps_train/ref_chosen": -1.4296875, + "logps_train/ref_rejected": -5.0, + "logps_train/rejected": -7.674220085144043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06756007671356201, + "rewards_train/margins": 0.19986194372177124, + "rewards_train/rejected": -0.26742202043533325, + "step": 909 + }, + { + "epoch": 0.25, + "logps_train/chosen": -95.05923461914062, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -129.84426879882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5559234619140625, + "rewards_train/margins": 1.02850341796875, + "rewards_train/rejected": -1.5844268798828125, + "step": 909 + }, + { + "epoch": 0.25, + "learning_rate": 1.5076175095865168e-06, + "loss": 0.5565, + "step": 910 + }, + { + "epoch": 0.25, + "logps_train/chosen": -14.949545860290527, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -22.349685668945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6324545741081238, + "rewards_train/margins": 1.1837640404701233, + "rewards_train/rejected": -1.816218614578247, + "step": 910 + }, + { + "epoch": 0.25, + "logps_train/chosen": -134.7553253173828, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -211.6090087890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.625532627105713, + "rewards_train/margins": 6.635368824005127, + "rewards_train/rejected": -10.26090145111084, + "step": 910 + }, + { + "epoch": 0.25, + "logps_train/chosen": -24.205867767333984, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -43.981590270996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3330868482589722, + "rewards_train/margins": 1.0650721788406372, + "rewards_train/rejected": -2.3981590270996094, + "step": 910 + }, + { + "epoch": 0.25, + "logps_train/chosen": -92.09988403320312, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -97.00968170166016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.059988498687744, + "rewards_train/margins": 1.1909797191619873, + "rewards_train/rejected": -3.2509682178497314, + "step": 910 + }, + { + "epoch": 0.25, + "logps_train/chosen": -219.13380432128906, + "logps_train/ref_chosen": -199.0, + "logps_train/ref_rejected": -205.0, + "logps_train/rejected": -226.13150024414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.013380527496338, + "rewards_train/margins": 0.09976959228515625, + "rewards_train/rejected": -2.113150119781494, + "step": 910 + }, + { + "epoch": 0.25, + "logps_train/chosen": -99.88600158691406, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -100.39773559570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8886001706123352, + "rewards_train/margins": 0.051173388957977295, + "rewards_train/rejected": -0.9397735595703125, + "step": 910 + }, + { + "epoch": 0.25, + "logps_train/chosen": -181.83187866210938, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -209.25933837890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.183187961578369, + "rewards_train/margins": 3.3927464485168457, + "rewards_train/rejected": -9.575934410095215, + "step": 910 + }, + { + "epoch": 0.25, + "logps_train/chosen": -25.10301971435547, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -43.98228073120117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6603019833564758, + "rewards_train/margins": 0.9004260897636414, + "rewards_train/rejected": -1.5607280731201172, + "step": 910 + }, + { + "epoch": 0.25, + "logps_train/chosen": -146.78929138183594, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -207.61058044433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1789292097091675, + "rewards_train/margins": 6.282128930091858, + "rewards_train/rejected": -7.461058139801025, + "step": 911 + }, + { + "epoch": 0.25, + "logps_train/chosen": -28.138591766357422, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -52.118736267089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8388592004776001, + "rewards_train/margins": 0.8980144262313843, + "rewards_train/rejected": -1.7368736267089844, + "step": 911 + }, + { + "epoch": 0.25, + "logps_train/chosen": -13.98570728302002, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -24.358497619628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.511070728302002, + "rewards_train/margins": 0.5185290575027466, + "rewards_train/rejected": -1.0295997858047485, + "step": 911 + }, + { + "epoch": 0.25, + "logps_train/chosen": -0.36466097831726074, + "logps_train/ref_chosen": -0.83203125, + "logps_train/ref_rejected": -0.83203125, + "logps_train/rejected": -0.37691530585289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.046737026423215866, + "rewards_train/margins": 0.0012254305183887482, + "rewards_train/rejected": 0.04551159590482712, + "step": 911 + }, + { + "epoch": 0.25, + "logps_train/chosen": -138.54299926757812, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -166.323486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4542999267578125, + "rewards_train/margins": 1.2780487537384033, + "rewards_train/rejected": -3.732348680496216, + "step": 911 + }, + { + "epoch": 0.25, + "logps_train/chosen": -89.51734924316406, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -179.17613220214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4517349302768707, + "rewards_train/margins": 6.765878289937973, + "rewards_train/rejected": -7.217613220214844, + "step": 911 + }, + { + "epoch": 0.25, + "logps_train/chosen": -112.3691177368164, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -211.73226928710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9369118213653564, + "rewards_train/margins": 8.436315298080444, + "rewards_train/rejected": -10.3732271194458, + "step": 911 + }, + { + "epoch": 0.25, + "logps_train/chosen": -133.0389404296875, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -204.08694458007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.753894031047821, + "rewards_train/margins": 4.654800713062286, + "rewards_train/rejected": -5.408694744110107, + "step": 911 + }, + { + "epoch": 0.25, + "learning_rate": 1.5053363747068924e-06, + "loss": 0.2673, + "step": 912 + }, + { + "epoch": 0.25, + "logps_train/chosen": -66.09953308105469, + "logps_train/ref_chosen": -52.75, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -73.56669616699219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3349533081054688, + "rewards_train/margins": 0.8717162609100342, + "rewards_train/rejected": -2.206669569015503, + "step": 912 + }, + { + "epoch": 0.25, + "logps_train/chosen": -229.11746215820312, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -240.2655029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.2117462158203125, + "rewards_train/margins": 0.3148040771484375, + "rewards_train/rejected": -7.52655029296875, + "step": 912 + }, + { + "epoch": 0.25, + "logps_train/chosen": -172.12506103515625, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -31.875, + "logps_train/rejected": -79.44869995117188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.762506008148193, + "rewards_train/margins": -2.005136013031006, + "rewards_train/rejected": -4.7573699951171875, + "step": 912 + }, + { + "epoch": 0.25, + "logps_train/chosen": -112.5842056274414, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -179.25466918945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5084205865859985, + "rewards_train/margins": 6.517046332359314, + "rewards_train/rejected": -7.0254669189453125, + "step": 912 + }, + { + "epoch": 0.25, + "logps_train/chosen": -23.044078826904297, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -21.570674896240234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2294079065322876, + "rewards_train/margins": 0.25578463077545166, + "rewards_train/rejected": -1.4851925373077393, + "step": 912 + }, + { + "epoch": 0.25, + "logps_train/chosen": -22.90559959411621, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -23.60565757751465, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7718099355697632, + "rewards_train/margins": -0.29249417781829834, + "rewards_train/rejected": -1.4793157577514648, + "step": 912 + }, + { + "epoch": 0.25, + "logps_train/chosen": -135.06988525390625, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -140.31578063964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.956988573074341, + "rewards_train/margins": 0.5245895385742188, + "rewards_train/rejected": -3.4815781116485596, + "step": 912 + }, + { + "epoch": 0.25, + "logps_train/chosen": -6.657832145690918, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -14.052071571350098, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18765822052955627, + "rewards_train/margins": 0.32379892468452454, + "rewards_train/rejected": -0.5114571452140808, + "step": 912 + }, + { + "epoch": 0.26, + "logps_train/chosen": -164.52479553222656, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -42.25, + "logps_train/rejected": -53.052242279052734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.5524797439575195, + "rewards_train/margins": -5.47225546836853, + "rewards_train/rejected": -1.0802242755889893, + "step": 913 + }, + { + "epoch": 0.26, + "logps_train/chosen": -78.82898712158203, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -189.36776733398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2171012908220291, + "rewards_train/margins": 4.35387821495533, + "rewards_train/rejected": -4.136776924133301, + "step": 913 + }, + { + "epoch": 0.26, + "logps_train/chosen": -2.946526050567627, + "logps_train/ref_chosen": -0.96875, + "logps_train/ref_rejected": -2.375, + "logps_train/rejected": -4.818846702575684, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1977776139974594, + "rewards_train/margins": 0.046607062220573425, + "rewards_train/rejected": -0.24438467621803284, + "step": 913 + }, + { + "epoch": 0.26, + "logps_train/chosen": -13.111984252929688, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -25.209712982177734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4049484431743622, + "rewards_train/margins": 0.7347728312015533, + "rewards_train/rejected": -1.1397212743759155, + "step": 913 + }, + { + "epoch": 0.26, + "logps_train/chosen": -134.17051696777344, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -181.79295349121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.617051601409912, + "rewards_train/margins": 1.562243938446045, + "rewards_train/rejected": -6.179295539855957, + "step": 913 + }, + { + "epoch": 0.26, + "logps_train/chosen": -101.35055541992188, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -104.38457489013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1350555419921875, + "rewards_train/margins": 0.953402042388916, + "rewards_train/rejected": -3.0884575843811035, + "step": 913 + }, + { + "epoch": 0.26, + "logps_train/chosen": -170.31455993652344, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -208.0, + "logps_train/rejected": -232.1546630859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4314560890197754, + "rewards_train/margins": -0.01598978042602539, + "rewards_train/rejected": -2.41546630859375, + "step": 913 + }, + { + "epoch": 0.26, + "logps_train/chosen": -62.269020080566406, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -52.25, + "logps_train/rejected": -57.4918212890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5019020438194275, + "rewards_train/margins": 0.022280097007751465, + "rewards_train/rejected": -0.524182140827179, + "step": 913 + }, + { + "epoch": 0.26, + "learning_rate": 1.5030517030084232e-06, + "loss": 0.8703, + "step": 914 + }, + { + "epoch": 0.26, + "logps_train/chosen": -15.996184349060059, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -1.125, + "logps_train/rejected": -14.708847045898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2996184527873993, + "rewards_train/margins": 1.0587662756443024, + "rewards_train/rejected": -1.3583847284317017, + "step": 914 + }, + { + "epoch": 0.26, + "logps_train/chosen": -141.1290283203125, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -47.07356643676758, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.012902736663818, + "rewards_train/margins": -2.0430461168289185, + "rewards_train/rejected": -1.9698566198349, + "step": 914 + }, + { + "epoch": 0.26, + "logps_train/chosen": -138.5648193359375, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -135.6589813232422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6064820289611816, + "rewards_train/margins": -0.640583872795105, + "rewards_train/rejected": -1.9658981561660767, + "step": 914 + }, + { + "epoch": 0.26, + "logps_train/chosen": -21.528718948364258, + "logps_train/ref_chosen": -29.125, + "logps_train/ref_rejected": -7.71875, + "logps_train/rejected": -21.043163299560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7596281170845032, + "rewards_train/margins": 2.092069447040558, + "rewards_train/rejected": -1.3324413299560547, + "step": 914 + }, + { + "epoch": 0.26, + "logps_train/chosen": -263.9349060058594, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -58.0, + "logps_train/rejected": -70.65884399414062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.8934907913208, + "rewards_train/margins": -9.627606391906738, + "rewards_train/rejected": -1.2658843994140625, + "step": 914 + }, + { + "epoch": 0.26, + "logps_train/chosen": -148.89715576171875, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -185.91842651367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.489715576171875, + "rewards_train/margins": 5.152127265930176, + "rewards_train/rejected": -7.641842842102051, + "step": 914 + }, + { + "epoch": 0.26, + "logps_train/chosen": -19.672367095947266, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -126.0633316040039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0922367572784424, + "rewards_train/margins": 2.4140963554382324, + "rewards_train/rejected": -3.506333112716675, + "step": 914 + }, + { + "epoch": 0.26, + "logps_train/chosen": -94.47193145751953, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -140.49917602539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39719316363334656, + "rewards_train/margins": 3.1527245342731476, + "rewards_train/rejected": -3.549917697906494, + "step": 914 + }, + { + "epoch": 0.26, + "logps_train/chosen": -8.313348770141602, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -2.125, + "logps_train/rejected": -4.96414852142334, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.062415122985839844, + "rewards_train/margins": 0.3463299870491028, + "rewards_train/rejected": -0.28391486406326294, + "step": 915 + }, + { + "epoch": 0.26, + "logps_train/chosen": -79.7181396484375, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -227.509033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.121814012527466, + "rewards_train/margins": 7.7290894985198975, + "rewards_train/rejected": -9.850903511047363, + "step": 915 + }, + { + "epoch": 0.26, + "logps_train/chosen": -143.01419067382812, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -201.6612091064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5014190673828125, + "rewards_train/margins": 7.364702224731445, + "rewards_train/rejected": -8.866121292114258, + "step": 915 + }, + { + "epoch": 0.26, + "logps_train/chosen": -8.527044296264648, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -1.640625, + "logps_train/rejected": -2.9081389904022217, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3058294355869293, + "rewards_train/margins": -0.17907802760601044, + "rewards_train/rejected": -0.12675140798091888, + "step": 915 + }, + { + "epoch": 0.26, + "logps_train/chosen": -8.287324905395508, + "logps_train/ref_chosen": -2.796875, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -12.644865989685059, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5490450263023376, + "rewards_train/margins": 0.06544160842895508, + "rewards_train/rejected": -0.6144866347312927, + "step": 915 + }, + { + "epoch": 0.26, + "logps_train/chosen": -132.9203643798828, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -111.79386901855469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1420364379882812, + "rewards_train/margins": 0.1373504400253296, + "rewards_train/rejected": -1.2793868780136108, + "step": 915 + }, + { + "epoch": 0.26, + "logps_train/chosen": -10.054755210876465, + "logps_train/ref_chosen": -6.125, + "logps_train/ref_rejected": -3.15625, + "logps_train/rejected": -15.963179588317871, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3929755389690399, + "rewards_train/margins": 0.8877173960208893, + "rewards_train/rejected": -1.2806929349899292, + "step": 915 + }, + { + "epoch": 0.26, + "logps_train/chosen": -36.566932678222656, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -35.00670623779297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.019193410873413, + "rewards_train/margins": -0.1435227394104004, + "rewards_train/rejected": -2.8756706714630127, + "step": 915 + }, + { + "epoch": 0.26, + "learning_rate": 1.5007635104813892e-06, + "loss": 1.071, + "step": 916 + }, + { + "epoch": 0.26, + "logps_train/chosen": -16.562992095947266, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -27.75, + "logps_train/rejected": -30.0181884765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5875492095947266, + "rewards_train/margins": -0.3607303649187088, + "rewards_train/rejected": -0.22681884467601776, + "step": 916 + }, + { + "epoch": 0.26, + "logps_train/chosen": -213.74606323242188, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -226.8535919189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.1746063232421875, + "rewards_train/margins": 2.5107526779174805, + "rewards_train/rejected": -8.685359001159668, + "step": 916 + }, + { + "epoch": 0.26, + "logps_train/chosen": -129.7257080078125, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -129.48304748535156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.172570824623108, + "rewards_train/margins": -0.02426600456237793, + "rewards_train/rejected": -1.14830482006073, + "step": 916 + }, + { + "epoch": 0.26, + "logps_train/chosen": -3.742727279663086, + "logps_train/ref_chosen": -1.46875, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -4.71718692779541, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.22739772498607635, + "rewards_train/margins": -0.33380403369665146, + "rewards_train/rejected": 0.1064063087105751, + "step": 916 + }, + { + "epoch": 0.26, + "logps_train/chosen": -13.752115249633789, + "logps_train/ref_chosen": -2.671875, + "logps_train/ref_rejected": -2.515625, + "logps_train/rejected": -9.961816787719727, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.108024001121521, + "rewards_train/margins": -0.3634048104286194, + "rewards_train/rejected": -0.7446191906929016, + "step": 916 + }, + { + "epoch": 0.26, + "logps_train/chosen": -19.601333618164062, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -2.078125, + "logps_train/rejected": -7.838160514831543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5663833618164062, + "rewards_train/margins": 0.009620189666748047, + "rewards_train/rejected": -0.5760035514831543, + "step": 916 + }, + { + "epoch": 0.26, + "logps_train/chosen": -94.24027252197266, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -31.625, + "logps_train/rejected": -68.25237274169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5740272998809814, + "rewards_train/margins": 1.088710069656372, + "rewards_train/rejected": -3.6627373695373535, + "step": 916 + }, + { + "epoch": 0.26, + "logps_train/chosen": -123.84095764160156, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -186.80796813964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1340957880020142, + "rewards_train/margins": 5.446700930595398, + "rewards_train/rejected": -6.580796718597412, + "step": 916 + }, + { + "epoch": 0.26, + "logps_train/chosen": -8.279278755187988, + "logps_train/ref_chosen": -0.26171875, + "logps_train/ref_rejected": -0.26171875, + "logps_train/rejected": -8.284293174743652, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8017560243606567, + "rewards_train/margins": 0.0005014538764953613, + "rewards_train/rejected": -0.8022574782371521, + "step": 917 + }, + { + "epoch": 0.26, + "logps_train/chosen": -87.5976333618164, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -62.5, + "logps_train/rejected": -75.3979263305664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0097633600234985, + "rewards_train/margins": 0.280029296875, + "rewards_train/rejected": -1.2897926568984985, + "step": 917 + }, + { + "epoch": 0.26, + "logps_train/chosen": -122.3216323852539, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -140.9174041748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6321632862091064, + "rewards_train/margins": 1.7595772743225098, + "rewards_train/rejected": -3.391740560531616, + "step": 917 + }, + { + "epoch": 0.26, + "logps_train/chosen": -108.86946868896484, + "logps_train/ref_chosen": -59.75, + "logps_train/ref_rejected": -59.5, + "logps_train/rejected": -119.72505187988281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.911946773529053, + "rewards_train/margins": 1.1105585098266602, + "rewards_train/rejected": -6.022505283355713, + "step": 917 + }, + { + "epoch": 0.26, + "logps_train/chosen": -109.28231048583984, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -160.56980895996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.128231048583984, + "rewards_train/margins": 1.028749942779541, + "rewards_train/rejected": -5.156980991363525, + "step": 917 + }, + { + "epoch": 0.26, + "logps_train/chosen": -41.10416793823242, + "logps_train/ref_chosen": -30.875, + "logps_train/ref_rejected": -27.75, + "logps_train/rejected": -50.05509948730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0229167938232422, + "rewards_train/margins": 1.2075932025909424, + "rewards_train/rejected": -2.2305099964141846, + "step": 917 + }, + { + "epoch": 0.26, + "logps_train/chosen": -86.13163757324219, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -91.00096893310547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9131637811660767, + "rewards_train/margins": 0.6369332075119019, + "rewards_train/rejected": -2.5500969886779785, + "step": 917 + }, + { + "epoch": 0.26, + "logps_train/chosen": -2.5821611881256104, + "logps_train/ref_chosen": -0.54296875, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -11.263249397277832, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20391924679279327, + "rewards_train/margins": -0.008844301104545593, + "rewards_train/rejected": -0.19507494568824768, + "step": 917 + }, + { + "epoch": 0.26, + "learning_rate": 1.4984718131407115e-06, + "loss": 0.4876, + "step": 918 + }, + { + "epoch": 0.26, + "logps_train/chosen": -143.0166778564453, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -161.9738006591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.0016679763793945, + "rewards_train/margins": 0.39571237564086914, + "rewards_train/rejected": -4.397380352020264, + "step": 918 + }, + { + "epoch": 0.26, + "logps_train/chosen": -20.808452606201172, + "logps_train/ref_chosen": -6.625, + "logps_train/ref_rejected": -4.5625, + "logps_train/rejected": -13.87286376953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.418345332145691, + "rewards_train/margins": -0.48730891942977905, + "rewards_train/rejected": -0.9310364127159119, + "step": 918 + }, + { + "epoch": 0.26, + "logps_train/chosen": -15.872176170349121, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -2.328125, + "logps_train/rejected": -17.94118309020996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9903426170349121, + "rewards_train/margins": 0.5709632635116577, + "rewards_train/rejected": -1.5613058805465698, + "step": 918 + }, + { + "epoch": 0.26, + "logps_train/chosen": -139.40591430664062, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -138.08966064453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.05940857157111168, + "rewards_train/margins": -0.13162537291646004, + "rewards_train/rejected": 0.19103394448757172, + "step": 918 + }, + { + "epoch": 0.26, + "logps_train/chosen": -16.16450309753418, + "logps_train/ref_chosen": -3.671875, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -28.495956420898438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.249262809753418, + "rewards_train/margins": -0.1996670961380005, + "rewards_train/rejected": -1.0495957136154175, + "step": 918 + }, + { + "epoch": 0.26, + "logps_train/chosen": -90.84880065917969, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -60.0972900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3348801136016846, + "rewards_train/margins": 1.1998488903045654, + "rewards_train/rejected": -2.53472900390625, + "step": 918 + }, + { + "epoch": 0.26, + "logps_train/chosen": -14.91795539855957, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -11.032618522644043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09804554283618927, + "rewards_train/margins": 0.18646632134914398, + "rewards_train/rejected": -0.28451186418533325, + "step": 918 + }, + { + "epoch": 0.26, + "logps_train/chosen": -199.2803192138672, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -172.01632690429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.82803201675415, + "rewards_train/margins": 0.27360057830810547, + "rewards_train/rejected": -5.101632595062256, + "step": 918 + }, + { + "epoch": 0.26, + "logps_train/chosen": -5.666737079620361, + "logps_train/ref_chosen": -3.734375, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -29.287656784057617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19323621690273285, + "rewards_train/margins": 1.548029437661171, + "rewards_train/rejected": -1.7412656545639038, + "step": 919 + }, + { + "epoch": 0.26, + "logps_train/chosen": -104.28717041015625, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -88.26274108886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.828717052936554, + "rewards_train/margins": 1.0975571274757385, + "rewards_train/rejected": -1.9262741804122925, + "step": 919 + }, + { + "epoch": 0.26, + "logps_train/chosen": -0.7929791212081909, + "logps_train/ref_chosen": -1.6953125, + "logps_train/ref_rejected": -1.578125, + "logps_train/rejected": -3.247340202331543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09023334085941315, + "rewards_train/margins": 0.2571548670530319, + "rewards_train/rejected": -0.16692152619361877, + "step": 919 + }, + { + "epoch": 0.26, + "logps_train/chosen": -152.3230438232422, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -155.42620849609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.482304334640503, + "rewards_train/margins": 0.5603165626525879, + "rewards_train/rejected": -3.042620897293091, + "step": 919 + }, + { + "epoch": 0.26, + "logps_train/chosen": -4.700277805328369, + "logps_train/ref_chosen": -3.6875, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -9.167837142944336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10127778351306915, + "rewards_train/margins": -0.01574406772851944, + "rewards_train/rejected": -0.08553371578454971, + "step": 919 + }, + { + "epoch": 0.26, + "logps_train/chosen": -12.883355140686035, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -12.51823616027832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.036664485931396484, + "rewards_train/margins": 0.03223810205236077, + "rewards_train/rejected": 0.004426383879035711, + "step": 919 + }, + { + "epoch": 0.26, + "logps_train/chosen": -28.937471389770508, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -34.351165771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0937471389770508, + "rewards_train/margins": 1.028869390487671, + "rewards_train/rejected": -2.1226165294647217, + "step": 919 + }, + { + "epoch": 0.26, + "logps_train/chosen": -169.93429565429688, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -223.29312133789062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.893429756164551, + "rewards_train/margins": -0.3641176223754883, + "rewards_train/rejected": -5.5293121337890625, + "step": 919 + }, + { + "epoch": 0.26, + "learning_rate": 1.4961766270258421e-06, + "loss": 0.5625, + "step": 920 + }, + { + "epoch": 0.26, + "logps_train/chosen": -39.35425567626953, + "logps_train/ref_chosen": -29.75, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -28.668912887573242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9604255557060242, + "rewards_train/margins": 0.5502157807350159, + "rewards_train/rejected": -1.51064133644104, + "step": 920 + }, + { + "epoch": 0.26, + "logps_train/chosen": -105.26661682128906, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -36.58676528930664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.02666175365448, + "rewards_train/margins": 1.0320147275924683, + "rewards_train/rejected": -2.0586764812469482, + "step": 920 + }, + { + "epoch": 0.26, + "logps_train/chosen": -144.2389678955078, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -166.32620239257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4238967895507812, + "rewards_train/margins": 2.758723735809326, + "rewards_train/rejected": -4.182620525360107, + "step": 920 + }, + { + "epoch": 0.26, + "logps_train/chosen": -52.34471893310547, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -22.790557861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.059471894055604935, + "rewards_train/margins": 1.6102089397609234, + "rewards_train/rejected": -1.6696808338165283, + "step": 920 + }, + { + "epoch": 0.26, + "logps_train/chosen": -106.82383728027344, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -189.91342163085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3823837339878082, + "rewards_train/margins": 3.208958476781845, + "rewards_train/rejected": -3.5913422107696533, + "step": 920 + }, + { + "epoch": 0.26, + "logps_train/chosen": -75.71477508544922, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -59.25, + "logps_train/rejected": -86.7370376586914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1785224974155426, + "rewards_train/margins": 2.9272262156009674, + "rewards_train/rejected": -2.748703718185425, + "step": 920 + }, + { + "epoch": 0.26, + "logps_train/chosen": -104.25507354736328, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -120.59974670410156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1755073070526123, + "rewards_train/margins": -0.2655324935913086, + "rewards_train/rejected": -2.9099748134613037, + "step": 920 + }, + { + "epoch": 0.26, + "logps_train/chosen": -19.66095733642578, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -18.233592987060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32859572768211365, + "rewards_train/margins": 0.1885136067867279, + "rewards_train/rejected": -0.5171093344688416, + "step": 920 + }, + { + "epoch": 0.26, + "logps_train/chosen": -75.8427734375, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -133.12750244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4842773377895355, + "rewards_train/margins": 0.9284729063510895, + "rewards_train/rejected": -1.412750244140625, + "step": 921 + }, + { + "epoch": 0.26, + "logps_train/chosen": -15.192206382751465, + "logps_train/ref_chosen": -2.3125, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -13.4486722946167, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2879706621170044, + "rewards_train/margins": -0.6212283968925476, + "rewards_train/rejected": -0.6667422652244568, + "step": 921 + }, + { + "epoch": 0.26, + "logps_train/chosen": -110.48219299316406, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -155.04544067382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6482192873954773, + "rewards_train/margins": 2.056324779987335, + "rewards_train/rejected": -2.7045440673828125, + "step": 921 + }, + { + "epoch": 0.26, + "logps_train/chosen": -6.341561317443848, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -16.353466033935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13415613770484924, + "rewards_train/margins": 0.6074405014514923, + "rewards_train/rejected": -0.7415966391563416, + "step": 921 + }, + { + "epoch": 0.26, + "logps_train/chosen": -114.81573486328125, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -71.73211669921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.081573486328125, + "rewards_train/margins": -0.9083618074655533, + "rewards_train/rejected": -0.17321167886257172, + "step": 921 + }, + { + "epoch": 0.26, + "logps_train/chosen": -72.427490234375, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -67.96121215820312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.217748999595642, + "rewards_train/margins": -0.7216277718544006, + "rewards_train/rejected": -0.49612122774124146, + "step": 921 + }, + { + "epoch": 0.26, + "logps_train/chosen": -142.55740356445312, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -194.6510009765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6557403802871704, + "rewards_train/margins": 6.809359908103943, + "rewards_train/rejected": -7.465100288391113, + "step": 921 + }, + { + "epoch": 0.26, + "logps_train/chosen": -14.916400909423828, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -44.75, + "logps_train/rejected": -62.21971893310547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1083599105477333, + "rewards_train/margins": 1.855331875383854, + "rewards_train/rejected": -1.7469719648361206, + "step": 921 + }, + { + "epoch": 0.26, + "learning_rate": 1.4938779682006502e-06, + "loss": 0.4363, + "step": 922 + }, + { + "epoch": 0.26, + "logps_train/chosen": -10.162826538085938, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -30.308917999267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5037826895713806, + "rewards_train/margins": 0.827109158039093, + "rewards_train/rejected": -1.3308918476104736, + "step": 922 + }, + { + "epoch": 0.26, + "logps_train/chosen": -13.218379974365234, + "logps_train/ref_chosen": -5.8125, + "logps_train/ref_rejected": -7.75, + "logps_train/rejected": -15.118499755859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7405880093574524, + "rewards_train/margins": -0.0037380456924438477, + "rewards_train/rejected": -0.7368499636650085, + "step": 922 + }, + { + "epoch": 0.26, + "logps_train/chosen": -8.257928848266602, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -12.4375, + "logps_train/rejected": -17.628068923950195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48516789078712463, + "rewards_train/margins": 0.03388902544975281, + "rewards_train/rejected": -0.5190569162368774, + "step": 922 + }, + { + "epoch": 0.26, + "logps_train/chosen": -4.776172637939453, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -4.714423179626465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07136726379394531, + "rewards_train/margins": 0.16101256012916565, + "rewards_train/rejected": -0.23237982392311096, + "step": 922 + }, + { + "epoch": 0.26, + "logps_train/chosen": -134.3472137451172, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -160.81814575195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06527862697839737, + "rewards_train/margins": 5.947093106806278, + "rewards_train/rejected": -5.881814479827881, + "step": 922 + }, + { + "epoch": 0.26, + "logps_train/chosen": -2.4970717430114746, + "logps_train/ref_chosen": -2.21875, + "logps_train/ref_rejected": -1.21875, + "logps_train/rejected": -15.464520454406738, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02783217467367649, + "rewards_train/margins": 1.396744942292571, + "rewards_train/rejected": -1.4245771169662476, + "step": 922 + }, + { + "epoch": 0.26, + "logps_train/chosen": -1.3055357933044434, + "logps_train/ref_chosen": -1.1328125, + "logps_train/ref_rejected": -3.71875, + "logps_train/rejected": -4.91520881652832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.017272328957915306, + "rewards_train/margins": 0.10237355716526508, + "rewards_train/rejected": -0.11964588612318039, + "step": 922 + }, + { + "epoch": 0.26, + "logps_train/chosen": -17.26669692993164, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -37.5, + "logps_train/rejected": -52.1126594543457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.020419716835022, + "rewards_train/margins": 0.44084620475769043, + "rewards_train/rejected": -1.4612659215927124, + "step": 922 + }, + { + "epoch": 0.26, + "logps_train/chosen": -43.327796936035156, + "logps_train/ref_chosen": -25.625, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -33.48097610473633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7702797651290894, + "rewards_train/margins": 0.015317916870117188, + "rewards_train/rejected": -1.7855976819992065, + "step": 923 + }, + { + "epoch": 0.26, + "logps_train/chosen": -142.30453491210938, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -218.970703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2804534435272217, + "rewards_train/margins": 5.316617250442505, + "rewards_train/rejected": -8.597070693969727, + "step": 923 + }, + { + "epoch": 0.26, + "logps_train/chosen": -93.4081802368164, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -116.2874984741211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3908180296421051, + "rewards_train/margins": 2.137931913137436, + "rewards_train/rejected": -2.528749942779541, + "step": 923 + }, + { + "epoch": 0.26, + "logps_train/chosen": -0.09972873330116272, + "logps_train/ref_chosen": -0.1611328125, + "logps_train/ref_rejected": -0.1611328125, + "logps_train/rejected": -0.10090917348861694, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006140408106148243, + "rewards_train/margins": 0.00011804420500993729, + "rewards_train/rejected": 0.006022363901138306, + "step": 923 + }, + { + "epoch": 0.26, + "logps_train/chosen": -47.230201721191406, + "logps_train/ref_chosen": -47.25, + "logps_train/ref_rejected": -54.5, + "logps_train/rejected": -68.75206756591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.001979827880859375, + "rewards_train/margins": 1.4271866083145142, + "rewards_train/rejected": -1.4252067804336548, + "step": 923 + }, + { + "epoch": 0.26, + "logps_train/chosen": -141.65985107421875, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -231.6754150390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.665985107421875, + "rewards_train/margins": 8.301556587219238, + "rewards_train/rejected": -8.967541694641113, + "step": 923 + }, + { + "epoch": 0.26, + "logps_train/chosen": -40.097957611083984, + "logps_train/ref_chosen": -28.25, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -78.9220962524414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1847957372665405, + "rewards_train/margins": 0.9574140310287476, + "rewards_train/rejected": -2.142209768295288, + "step": 923 + }, + { + "epoch": 0.26, + "logps_train/chosen": -110.59780883789062, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -110.01515197753906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.04021911695599556, + "rewards_train/margins": -0.05826568976044655, + "rewards_train/rejected": 0.09848480671644211, + "step": 923 + }, + { + "epoch": 0.26, + "learning_rate": 1.4915758527533106e-06, + "loss": 0.4046, + "step": 924 + }, + { + "epoch": 0.26, + "logps_train/chosen": -5.764500617980957, + "logps_train/ref_chosen": -1.640625, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -29.21121597290039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41238757967948914, + "rewards_train/margins": 0.48373404145240784, + "rewards_train/rejected": -0.896121621131897, + "step": 924 + }, + { + "epoch": 0.26, + "logps_train/chosen": -37.3044548034668, + "logps_train/ref_chosen": -24.125, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -36.362037658691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3179454803466797, + "rewards_train/margins": 0.7057583332061768, + "rewards_train/rejected": -2.0237038135528564, + "step": 924 + }, + { + "epoch": 0.26, + "logps_train/chosen": -2.650580883026123, + "logps_train/ref_chosen": -2.765625, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -24.785245895385742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011504411697387695, + "rewards_train/margins": 1.3462790250778198, + "rewards_train/rejected": -1.3347746133804321, + "step": 924 + }, + { + "epoch": 0.26, + "logps_train/chosen": -8.72511100769043, + "logps_train/ref_chosen": -7.4375, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -18.037822723388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12876109778881073, + "rewards_train/margins": 0.9437711983919144, + "rewards_train/rejected": -1.072532296180725, + "step": 924 + }, + { + "epoch": 0.26, + "logps_train/chosen": -5.207574844360352, + "logps_train/ref_chosen": -2.203125, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -12.523765563964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30044499039649963, + "rewards_train/margins": 0.3456815779209137, + "rewards_train/rejected": -0.6461265683174133, + "step": 924 + }, + { + "epoch": 0.26, + "logps_train/chosen": -113.01363372802734, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -112.74560546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.85136342048645, + "rewards_train/margins": -0.026802778244018555, + "rewards_train/rejected": -2.8245606422424316, + "step": 924 + }, + { + "epoch": 0.26, + "logps_train/chosen": -142.08096313476562, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -155.46258544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5080963373184204, + "rewards_train/margins": 1.8381623029708862, + "rewards_train/rejected": -2.3462586402893066, + "step": 924 + }, + { + "epoch": 0.26, + "logps_train/chosen": -159.15695190429688, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -157.82769775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.4156951904296875, + "rewards_train/margins": 0.8670744895935059, + "rewards_train/rejected": -6.282769680023193, + "step": 924 + }, + { + "epoch": 0.26, + "logps_train/chosen": -173.0377960205078, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -130.64471435546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.703779697418213, + "rewards_train/margins": 0.9106917381286621, + "rewards_train/rejected": -3.614471435546875, + "step": 925 + }, + { + "epoch": 0.26, + "logps_train/chosen": -112.29637908935547, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -157.96029663085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3796379566192627, + "rewards_train/margins": 0.5163917541503906, + "rewards_train/rejected": -1.8960297107696533, + "step": 925 + }, + { + "epoch": 0.26, + "logps_train/chosen": -8.983861923217773, + "logps_train/ref_chosen": -1.21875, + "logps_train/ref_rejected": -1.671875, + "logps_train/rejected": -9.404455184936523, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7765111923217773, + "rewards_train/margins": -0.003253161907196045, + "rewards_train/rejected": -0.7732580304145813, + "step": 925 + }, + { + "epoch": 0.26, + "logps_train/chosen": -133.93580627441406, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -192.38284301757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3935806453227997, + "rewards_train/margins": 1.4447037279605865, + "rewards_train/rejected": -1.8382843732833862, + "step": 925 + }, + { + "epoch": 0.26, + "logps_train/chosen": -182.07516479492188, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -223.03121948242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.207516670227051, + "rewards_train/margins": 1.295605182647705, + "rewards_train/rejected": -6.503121852874756, + "step": 925 + }, + { + "epoch": 0.26, + "logps_train/chosen": -7.513254642486572, + "logps_train/ref_chosen": -6.4375, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -15.972677230834961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10757546871900558, + "rewards_train/margins": 0.8646922782063484, + "rewards_train/rejected": -0.972267746925354, + "step": 925 + }, + { + "epoch": 0.26, + "logps_train/chosen": -19.95394515991211, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -6.34375, + "logps_train/rejected": -21.038808822631836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27039453387260437, + "rewards_train/margins": 1.1991113722324371, + "rewards_train/rejected": -1.4695059061050415, + "step": 925 + }, + { + "epoch": 0.26, + "logps_train/chosen": -6.944197177886963, + "logps_train/ref_chosen": -3.84375, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -17.06684112548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3100447356700897, + "rewards_train/margins": 0.25288936495780945, + "rewards_train/rejected": -0.5629341006278992, + "step": 925 + }, + { + "epoch": 0.26, + "learning_rate": 1.4892702967961903e-06, + "loss": 0.3955, + "step": 926 + }, + { + "epoch": 0.26, + "logps_train/chosen": -72.05866241455078, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -154.03807067871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5808662176132202, + "rewards_train/margins": 2.522940754890442, + "rewards_train/rejected": -4.103806972503662, + "step": 926 + }, + { + "epoch": 0.26, + "logps_train/chosen": -9.25812816619873, + "logps_train/ref_chosen": -6.03125, + "logps_train/ref_rejected": -2.59375, + "logps_train/rejected": -6.419342041015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3226878345012665, + "rewards_train/margins": 0.0598713755607605, + "rewards_train/rejected": -0.382559210062027, + "step": 926 + }, + { + "epoch": 0.26, + "logps_train/chosen": -26.27941131591797, + "logps_train/ref_chosen": -14.0, + "logps_train/ref_rejected": -2.09375, + "logps_train/rejected": -17.873777389526367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2279411554336548, + "rewards_train/margins": 0.35006165504455566, + "rewards_train/rejected": -1.5780028104782104, + "step": 926 + }, + { + "epoch": 0.26, + "logps_train/chosen": -14.196271896362305, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -3.75, + "logps_train/rejected": -16.234867095947266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5133771896362305, + "rewards_train/margins": 0.7351095676422119, + "rewards_train/rejected": -1.2484867572784424, + "step": 926 + }, + { + "epoch": 0.26, + "logps_train/chosen": -8.139839172363281, + "logps_train/ref_chosen": -2.8125, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -18.059343338012695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5327339172363281, + "rewards_train/margins": 0.7075754404067993, + "rewards_train/rejected": -1.2403093576431274, + "step": 926 + }, + { + "epoch": 0.26, + "logps_train/chosen": -3.943161964416504, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -43.403560638427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01818380318582058, + "rewards_train/margins": 2.23353991471231, + "rewards_train/rejected": -2.2153561115264893, + "step": 926 + }, + { + "epoch": 0.26, + "logps_train/chosen": -78.64260864257812, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -98.33114624023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06426086276769638, + "rewards_train/margins": 2.768853761255741, + "rewards_train/rejected": -2.8331146240234375, + "step": 926 + }, + { + "epoch": 0.26, + "logps_train/chosen": -131.2033233642578, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -112.7237548828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6203323602676392, + "rewards_train/margins": -0.7479568719863892, + "rewards_train/rejected": -0.87237548828125, + "step": 926 + }, + { + "epoch": 0.26, + "logps_train/chosen": -76.72723388671875, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -161.49063110351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4727233946323395, + "rewards_train/margins": 3.376339763402939, + "rewards_train/rejected": -3.8490631580352783, + "step": 927 + }, + { + "epoch": 0.26, + "logps_train/chosen": -38.22502517700195, + "logps_train/ref_chosen": -34.75, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -41.795955657958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34750252962112427, + "rewards_train/margins": 0.0570930540561676, + "rewards_train/rejected": -0.40459558367729187, + "step": 927 + }, + { + "epoch": 0.26, + "logps_train/chosen": -57.15614318847656, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -45.485435485839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19061432778835297, + "rewards_train/margins": 2.220429316163063, + "rewards_train/rejected": -2.411043643951416, + "step": 927 + }, + { + "epoch": 0.26, + "logps_train/chosen": -133.81103515625, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -183.03604125976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.131103515625, + "rewards_train/margins": 5.722500801086426, + "rewards_train/rejected": -6.853604316711426, + "step": 927 + }, + { + "epoch": 0.26, + "logps_train/chosen": -123.56660461425781, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -155.26077270507812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.3566606044769287, + "rewards_train/margins": -1.030583381652832, + "rewards_train/rejected": -2.3260772228240967, + "step": 927 + }, + { + "epoch": 0.26, + "logps_train/chosen": -6.772710800170898, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -14.556455612182617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08522891998291016, + "rewards_train/margins": 0.9783744812011719, + "rewards_train/rejected": -0.8931455612182617, + "step": 927 + }, + { + "epoch": 0.26, + "logps_train/chosen": -116.50179290771484, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -191.914306640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.650179386138916, + "rewards_train/margins": 3.891251564025879, + "rewards_train/rejected": -6.541430950164795, + "step": 927 + }, + { + "epoch": 0.26, + "logps_train/chosen": -2.4964029788970947, + "logps_train/ref_chosen": -1.390625, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -8.919916152954102, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11057779937982559, + "rewards_train/margins": 0.05328882485628128, + "rewards_train/rejected": -0.16386662423610687, + "step": 927 + }, + { + "epoch": 0.26, + "learning_rate": 1.486961316465737e-06, + "loss": 0.4069, + "step": 928 + }, + { + "epoch": 0.26, + "logps_train/chosen": -98.90933227539062, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -134.97592163085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0909332036972046, + "rewards_train/margins": 2.706658959388733, + "rewards_train/rejected": -3.7975921630859375, + "step": 928 + }, + { + "epoch": 0.26, + "logps_train/chosen": -11.713212966918945, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -32.908851623535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6119462847709656, + "rewards_train/margins": 0.49143892526626587, + "rewards_train/rejected": -1.1033852100372314, + "step": 928 + }, + { + "epoch": 0.26, + "logps_train/chosen": -22.1195125579834, + "logps_train/ref_chosen": -7.6875, + "logps_train/ref_rejected": -1.34375, + "logps_train/rejected": -15.209207534790039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4432013034820557, + "rewards_train/margins": -0.05665552616119385, + "rewards_train/rejected": -1.3865457773208618, + "step": 928 + }, + { + "epoch": 0.26, + "logps_train/chosen": -13.493939399719238, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -16.00651741027832, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9275189638137817, + "rewards_train/margins": -0.1331171989440918, + "rewards_train/rejected": -0.7944017648696899, + "step": 928 + }, + { + "epoch": 0.26, + "logps_train/chosen": -125.72623443603516, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -124.23786926269531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8726234436035156, + "rewards_train/margins": -0.14883649349212646, + "rewards_train/rejected": -1.7237869501113892, + "step": 928 + }, + { + "epoch": 0.26, + "logps_train/chosen": -23.39954948425293, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -45.0, + "logps_train/rejected": -86.87952423095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2774549424648285, + "rewards_train/margins": 3.9104975759983063, + "rewards_train/rejected": -4.187952518463135, + "step": 928 + }, + { + "epoch": 0.26, + "logps_train/chosen": -147.3151092529297, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -139.27413940429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.3315110206604, + "rewards_train/margins": 1.345902919769287, + "rewards_train/rejected": -5.6774139404296875, + "step": 928 + }, + { + "epoch": 0.26, + "logps_train/chosen": -47.721954345703125, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -44.93646240234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5221954584121704, + "rewards_train/margins": 0.3089507818222046, + "rewards_train/rejected": -1.831146240234375, + "step": 928 + }, + { + "epoch": 0.26, + "logps_train/chosen": -163.44827270507812, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -201.41879272460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.84482741355896, + "rewards_train/margins": 4.8470518589019775, + "rewards_train/rejected": -7.6918792724609375, + "step": 929 + }, + { + "epoch": 0.26, + "logps_train/chosen": -36.24589157104492, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -49.984275817871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0370891094207764, + "rewards_train/margins": 1.0863385200500488, + "rewards_train/rejected": -3.123427629470825, + "step": 929 + }, + { + "epoch": 0.26, + "logps_train/chosen": -2.7426676750183105, + "logps_train/ref_chosen": -2.15625, + "logps_train/ref_rejected": -7.34375, + "logps_train/rejected": -13.034992218017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.058641768991947174, + "rewards_train/margins": 0.5104824528098106, + "rewards_train/rejected": -0.5691242218017578, + "step": 929 + }, + { + "epoch": 0.26, + "logps_train/chosen": -39.27373123168945, + "logps_train/ref_chosen": -21.375, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -26.750537872314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7898731231689453, + "rewards_train/margins": 0.2320556640625, + "rewards_train/rejected": -2.0219287872314453, + "step": 929 + }, + { + "epoch": 0.26, + "logps_train/chosen": -12.545942306518555, + "logps_train/ref_chosen": -4.8125, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -28.233213424682617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7733442187309265, + "rewards_train/margins": 0.5999771952629089, + "rewards_train/rejected": -1.3733214139938354, + "step": 929 + }, + { + "epoch": 0.26, + "logps_train/chosen": -7.773076057434082, + "logps_train/ref_chosen": -7.5625, + "logps_train/ref_rejected": -7.5625, + "logps_train/rejected": -7.808253288269043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.021057605743408203, + "rewards_train/margins": 0.003517722710967064, + "rewards_train/rejected": -0.024575328454375267, + "step": 929 + }, + { + "epoch": 0.26, + "logps_train/chosen": -156.12493896484375, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -199.98410034179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.812493920326233, + "rewards_train/margins": 2.2859162092208862, + "rewards_train/rejected": -4.098410129547119, + "step": 929 + }, + { + "epoch": 0.26, + "logps_train/chosen": -36.42218017578125, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -1.671875, + "logps_train/rejected": -29.416589736938477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.492218017578125, + "rewards_train/margins": 0.2822535037994385, + "rewards_train/rejected": -2.7744715213775635, + "step": 929 + }, + { + "epoch": 0.26, + "learning_rate": 1.4846489279223652e-06, + "loss": 0.421, + "step": 930 + }, + { + "epoch": 0.26, + "logps_train/chosen": -9.502551078796387, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -26.237342834472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19974489510059357, + "rewards_train/margins": 0.9484791904687881, + "rewards_train/rejected": -0.7487342953681946, + "step": 930 + }, + { + "epoch": 0.26, + "logps_train/chosen": -64.9428482055664, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -65.3447036743164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4442848265171051, + "rewards_train/margins": 0.04018554091453552, + "rewards_train/rejected": -0.4844703674316406, + "step": 930 + }, + { + "epoch": 0.26, + "logps_train/chosen": -1.9786590337753296, + "logps_train/ref_chosen": -3.0625, + "logps_train/ref_rejected": -0.2265625, + "logps_train/rejected": -2.4282639026641846, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10838409513235092, + "rewards_train/margins": 0.3285542353987694, + "rewards_train/rejected": -0.22017014026641846, + "step": 930 + }, + { + "epoch": 0.26, + "logps_train/chosen": -15.567493438720703, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -17.260276794433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6004993319511414, + "rewards_train/margins": 0.13802838325500488, + "rewards_train/rejected": -0.7385277152061462, + "step": 930 + }, + { + "epoch": 0.26, + "logps_train/chosen": -17.717267990112305, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -41.107688903808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3092268109321594, + "rewards_train/margins": 2.4577922224998474, + "rewards_train/rejected": -2.767019033432007, + "step": 930 + }, + { + "epoch": 0.26, + "logps_train/chosen": -26.636859893798828, + "logps_train/ref_chosen": -15.625, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -54.9074821472168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1011860370635986, + "rewards_train/margins": 0.7645622491836548, + "rewards_train/rejected": -1.8657482862472534, + "step": 930 + }, + { + "epoch": 0.26, + "logps_train/chosen": -12.73802375793457, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -4.5625, + "logps_train/rejected": -24.910911560058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21130238473415375, + "rewards_train/margins": 1.823538914322853, + "rewards_train/rejected": -2.034841299057007, + "step": 930 + }, + { + "epoch": 0.26, + "logps_train/chosen": -205.21661376953125, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -114.06027221679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.221661329269409, + "rewards_train/margins": 0.5843658447265625, + "rewards_train/rejected": -3.8060271739959717, + "step": 930 + }, + { + "epoch": 0.26, + "logps_train/chosen": -28.300132751464844, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -25.25, + "logps_train/rejected": -43.719932556152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.38001328706741333, + "rewards_train/margins": 1.4669800400733948, + "rewards_train/rejected": -1.846993327140808, + "step": 931 + }, + { + "epoch": 0.26, + "logps_train/chosen": -180.47640991210938, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -232.78182983398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8476409912109375, + "rewards_train/margins": 5.030541896820068, + "rewards_train/rejected": -6.878182888031006, + "step": 931 + }, + { + "epoch": 0.26, + "logps_train/chosen": -8.0792236328125, + "logps_train/ref_chosen": -4.8125, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -14.48600959777832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32667237520217896, + "rewards_train/margins": 0.5094286203384399, + "rewards_train/rejected": -0.8361009955406189, + "step": 931 + }, + { + "epoch": 0.26, + "logps_train/chosen": -185.57400512695312, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -244.13926696777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.057400703430176, + "rewards_train/margins": 5.656526565551758, + "rewards_train/rejected": -9.713927268981934, + "step": 931 + }, + { + "epoch": 0.26, + "logps_train/chosen": -4.292429447174072, + "logps_train/ref_chosen": -1.9609375, + "logps_train/ref_rejected": -1.40625, + "logps_train/rejected": -3.477137327194214, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2331492006778717, + "rewards_train/margins": -0.02606046199798584, + "rewards_train/rejected": -0.20708873867988586, + "step": 931 + }, + { + "epoch": 0.26, + "logps_train/chosen": -130.87490844726562, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -123.54682159423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18749085068702698, + "rewards_train/margins": 1.4671913087368011, + "rewards_train/rejected": -1.6546821594238281, + "step": 931 + }, + { + "epoch": 0.26, + "logps_train/chosen": -99.07958221435547, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -158.50320434570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1579582244157791, + "rewards_train/margins": 4.542362496256828, + "rewards_train/rejected": -4.700320720672607, + "step": 931 + }, + { + "epoch": 0.26, + "logps_train/chosen": -21.232845306396484, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -23.43976593017578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2920345067977905, + "rewards_train/margins": -0.37930792570114136, + "rewards_train/rejected": -0.9127265810966492, + "step": 931 + }, + { + "epoch": 0.26, + "learning_rate": 1.4823331473503416e-06, + "loss": 0.3587, + "step": 932 + }, + { + "epoch": 0.26, + "logps_train/chosen": -99.73436737060547, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -93.29756164550781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5234367251396179, + "rewards_train/margins": 0.10631942749023438, + "rewards_train/rejected": -0.6297561526298523, + "step": 932 + }, + { + "epoch": 0.26, + "logps_train/chosen": -108.7774887084961, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -145.5475616455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3277488946914673, + "rewards_train/margins": 1.977007269859314, + "rewards_train/rejected": -3.3047561645507812, + "step": 932 + }, + { + "epoch": 0.26, + "logps_train/chosen": -34.607765197753906, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -33.621158599853516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.267026662826538, + "rewards_train/margins": -0.09866070747375488, + "rewards_train/rejected": -2.168365955352783, + "step": 932 + }, + { + "epoch": 0.26, + "logps_train/chosen": -135.15447998046875, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -200.0728302001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0654480457305908, + "rewards_train/margins": 6.14183497428894, + "rewards_train/rejected": -7.207283020019531, + "step": 932 + }, + { + "epoch": 0.26, + "logps_train/chosen": -10.919382095336914, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -10.423885345458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12318821251392365, + "rewards_train/margins": 0.4285753220319748, + "rewards_train/rejected": -0.5517635345458984, + "step": 932 + }, + { + "epoch": 0.26, + "logps_train/chosen": -20.590511322021484, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -28.57425308227539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6528011560440063, + "rewards_train/margins": 1.5952492952346802, + "rewards_train/rejected": -2.2480504512786865, + "step": 932 + }, + { + "epoch": 0.26, + "logps_train/chosen": -153.07968139648438, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -191.1279754638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.707968235015869, + "rewards_train/margins": 4.454829216003418, + "rewards_train/rejected": -7.162797451019287, + "step": 932 + }, + { + "epoch": 0.26, + "logps_train/chosen": -5.277958869934082, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -2.0, + "logps_train/rejected": -3.3807997703552246, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21998338401317596, + "rewards_train/margins": -0.08190339803695679, + "rewards_train/rejected": -0.13807998597621918, + "step": 932 + }, + { + "epoch": 0.26, + "logps_train/chosen": -16.016647338867188, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -39.67365264892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3454147279262543, + "rewards_train/margins": 2.4594506323337555, + "rewards_train/rejected": -2.8048653602600098, + "step": 933 + }, + { + "epoch": 0.26, + "logps_train/chosen": -44.42243194580078, + "logps_train/ref_chosen": -31.625, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -31.656997680664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2797431945800781, + "rewards_train/margins": 0.23595654964447021, + "rewards_train/rejected": -1.5156997442245483, + "step": 933 + }, + { + "epoch": 0.26, + "logps_train/chosen": -152.22100830078125, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -267.3001403808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.922100782394409, + "rewards_train/margins": 6.107913255691528, + "rewards_train/rejected": -9.030014038085938, + "step": 933 + }, + { + "epoch": 0.26, + "logps_train/chosen": -189.92779541015625, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -219.09097290039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.542779445648193, + "rewards_train/margins": 1.5663180351257324, + "rewards_train/rejected": -8.109097480773926, + "step": 933 + }, + { + "epoch": 0.26, + "logps_train/chosen": -194.23731994628906, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -179.53878784179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.423732042312622, + "rewards_train/margins": 0.4301466941833496, + "rewards_train/rejected": -3.8538787364959717, + "step": 933 + }, + { + "epoch": 0.26, + "logps_train/chosen": -109.64224243164062, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -96.11264038085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5357757806777954, + "rewards_train/margins": 0.5470398189499974, + "rewards_train/rejected": -0.011264038272202015, + "step": 933 + }, + { + "epoch": 0.26, + "logps_train/chosen": -74.3995361328125, + "logps_train/ref_chosen": -58.25, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -93.39619445800781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.614953637123108, + "rewards_train/margins": -0.9753341674804688, + "rewards_train/rejected": -0.6396194696426392, + "step": 933 + }, + { + "epoch": 0.26, + "logps_train/chosen": -195.90562438964844, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -246.5352325439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.490562438964844, + "rewards_train/margins": 0.5629606246948242, + "rewards_train/rejected": -9.053523063659668, + "step": 933 + }, + { + "epoch": 0.26, + "learning_rate": 1.4800139909576766e-06, + "loss": 0.4068, + "step": 934 + }, + { + "epoch": 0.26, + "logps_train/chosen": -116.84324645996094, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -176.67018127441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1843246221542358, + "rewards_train/margins": 5.432693600654602, + "rewards_train/rejected": -6.617018222808838, + "step": 934 + }, + { + "epoch": 0.26, + "logps_train/chosen": -6.013530731201172, + "logps_train/ref_chosen": -2.453125, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -37.71818542480469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3560405671596527, + "rewards_train/margins": 1.615777999162674, + "rewards_train/rejected": -1.9718185663223267, + "step": 934 + }, + { + "epoch": 0.26, + "logps_train/chosen": -233.2325439453125, + "logps_train/ref_chosen": -182.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -176.42779541015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.123254299163818, + "rewards_train/margins": -1.3804748058319092, + "rewards_train/rejected": -3.742779493331909, + "step": 934 + }, + { + "epoch": 0.26, + "logps_train/chosen": -7.238083839416504, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -16.5969295501709, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19880838692188263, + "rewards_train/margins": 0.9108845442533493, + "rewards_train/rejected": -1.109692931175232, + "step": 934 + }, + { + "epoch": 0.26, + "logps_train/chosen": -36.352264404296875, + "logps_train/ref_chosen": -22.75, + "logps_train/ref_rejected": -4.03125, + "logps_train/rejected": -25.59827995300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3602265119552612, + "rewards_train/margins": 0.7964764833450317, + "rewards_train/rejected": -2.156702995300293, + "step": 934 + }, + { + "epoch": 0.26, + "logps_train/chosen": -226.19163513183594, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -229.60826110839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.31916332244873, + "rewards_train/margins": 1.14166259765625, + "rewards_train/rejected": -12.46082592010498, + "step": 934 + }, + { + "epoch": 0.26, + "logps_train/chosen": -19.750228881835938, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -17.961349487304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06252288818359375, + "rewards_train/margins": 0.40861207246780396, + "rewards_train/rejected": -0.4711349606513977, + "step": 934 + }, + { + "epoch": 0.26, + "logps_train/chosen": -74.61174011230469, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -72.33506774902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5611740350723267, + "rewards_train/margins": 0.07233273983001709, + "rewards_train/rejected": -0.6335067749023438, + "step": 934 + }, + { + "epoch": 0.26, + "logps_train/chosen": -110.98332214355469, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -114.56622314453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.19833222031593323, + "rewards_train/margins": -0.04170989990234375, + "rewards_train/rejected": -0.15662232041358948, + "step": 935 + }, + { + "epoch": 0.26, + "logps_train/chosen": -138.5169677734375, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -204.31558227539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.001697063446045, + "rewards_train/margins": 4.029860973358154, + "rewards_train/rejected": -8.0315580368042, + "step": 935 + }, + { + "epoch": 0.26, + "logps_train/chosen": -8.10090446472168, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -3.78125, + "logps_train/rejected": -9.543360710144043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5024095773696899, + "rewards_train/margins": 1.0786206722259521, + "rewards_train/rejected": -0.5762110948562622, + "step": 935 + }, + { + "epoch": 0.26, + "logps_train/chosen": -205.6808624267578, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -119.10443115234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.368086338043213, + "rewards_train/margins": -4.15764307975769, + "rewards_train/rejected": -3.2104432582855225, + "step": 935 + }, + { + "epoch": 0.26, + "logps_train/chosen": -81.69976043701172, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -232.765380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7199760675430298, + "rewards_train/margins": 6.956561923027039, + "rewards_train/rejected": -7.676537990570068, + "step": 935 + }, + { + "epoch": 0.26, + "logps_train/chosen": -3.6621673107147217, + "logps_train/ref_chosen": -2.375, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -25.127681732177734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12871673703193665, + "rewards_train/margins": 0.8778015077114105, + "rewards_train/rejected": -1.0065182447433472, + "step": 935 + }, + { + "epoch": 0.26, + "logps_train/chosen": -21.301084518432617, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -23.652650833129883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5363584756851196, + "rewards_train/margins": 0.20390665531158447, + "rewards_train/rejected": -1.740265130996704, + "step": 935 + }, + { + "epoch": 0.26, + "logps_train/chosen": -136.7491455078125, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -196.43988037109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.674914598464966, + "rewards_train/margins": 3.819073438644409, + "rewards_train/rejected": -7.493988037109375, + "step": 935 + }, + { + "epoch": 0.26, + "learning_rate": 1.477691474976005e-06, + "loss": 0.6317, + "step": 936 + }, + { + "epoch": 0.26, + "logps_train/chosen": -141.06678771972656, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -163.92559814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2066787481307983, + "rewards_train/margins": 3.5858813524246216, + "rewards_train/rejected": -4.79256010055542, + "step": 936 + }, + { + "epoch": 0.26, + "logps_train/chosen": -191.08258056640625, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -204.13624572753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.358258247375488, + "rewards_train/margins": 1.1553664207458496, + "rewards_train/rejected": -7.513624668121338, + "step": 936 + }, + { + "epoch": 0.26, + "logps_train/chosen": -17.345596313476562, + "logps_train/ref_chosen": -5.8125, + "logps_train/ref_rejected": -27.625, + "logps_train/rejected": -44.99189758300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.15330970287323, + "rewards_train/margins": 0.5833801031112671, + "rewards_train/rejected": -1.736689805984497, + "step": 936 + }, + { + "epoch": 0.26, + "logps_train/chosen": -150.60726928710938, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -232.50765991210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5607268810272217, + "rewards_train/margins": 3.290039300918579, + "rewards_train/rejected": -5.850766181945801, + "step": 936 + }, + { + "epoch": 0.26, + "logps_train/chosen": -15.956668853759766, + "logps_train/ref_chosen": -3.796875, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -29.21010398864746, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2159794569015503, + "rewards_train/margins": -0.3449690341949463, + "rewards_train/rejected": -0.871010422706604, + "step": 936 + }, + { + "epoch": 0.26, + "logps_train/chosen": -124.68586730957031, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -185.41708374023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0185867547988892, + "rewards_train/margins": 6.02312171459198, + "rewards_train/rejected": -7.041708469390869, + "step": 936 + }, + { + "epoch": 0.26, + "logps_train/chosen": -150.00094604492188, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -140.93472290039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5000946521759033, + "rewards_train/margins": 3.493377923965454, + "rewards_train/rejected": -4.993472576141357, + "step": 936 + }, + { + "epoch": 0.26, + "logps_train/chosen": -123.48943328857422, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -202.7987518310547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.248943328857422, + "rewards_train/margins": 5.580931663513184, + "rewards_train/rejected": -8.829874992370605, + "step": 936 + }, + { + "epoch": 0.26, + "logps_train/chosen": -212.1498565673828, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -218.92935180664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9149856567382812, + "rewards_train/margins": 0.07794952392578125, + "rewards_train/rejected": -3.9929351806640625, + "step": 937 + }, + { + "epoch": 0.26, + "logps_train/chosen": -104.33932495117188, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -177.0804443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7839325666427612, + "rewards_train/margins": 6.274111866950989, + "rewards_train/rejected": -8.05804443359375, + "step": 937 + }, + { + "epoch": 0.26, + "logps_train/chosen": -66.13897705078125, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -17.805797576904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23889771103858948, + "rewards_train/margins": 0.7104320824146271, + "rewards_train/rejected": -0.9493297934532166, + "step": 937 + }, + { + "epoch": 0.26, + "logps_train/chosen": -21.94635772705078, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -37.799652099609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5446357727050781, + "rewards_train/margins": 1.4728295803070068, + "rewards_train/rejected": -2.017465353012085, + "step": 937 + }, + { + "epoch": 0.26, + "logps_train/chosen": -89.37652587890625, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -100.42786407470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.187652587890625, + "rewards_train/margins": 3.305133819580078, + "rewards_train/rejected": -3.492786407470703, + "step": 937 + }, + { + "epoch": 0.26, + "logps_train/chosen": -77.62075805664062, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -148.996826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1120758056640625, + "rewards_train/margins": 3.837606906890869, + "rewards_train/rejected": -3.9496827125549316, + "step": 937 + }, + { + "epoch": 0.26, + "logps_train/chosen": -71.89617156982422, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -114.66181182861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1646171808242798, + "rewards_train/margins": 2.30156409740448, + "rewards_train/rejected": -3.4661812782287598, + "step": 937 + }, + { + "epoch": 0.26, + "logps_train/chosen": -25.313419342041016, + "logps_train/ref_chosen": -15.0, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -31.770700454711914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0313419103622437, + "rewards_train/margins": 0.45822811126708984, + "rewards_train/rejected": -1.4895700216293335, + "step": 937 + }, + { + "epoch": 0.26, + "learning_rate": 1.4753656156604769e-06, + "loss": 0.2251, + "step": 938 + }, + { + "epoch": 0.26, + "logps_train/chosen": -230.10801696777344, + "logps_train/ref_chosen": -194.0, + "logps_train/ref_rejected": -192.0, + "logps_train/rejected": -235.05661010742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6108016967773438, + "rewards_train/margins": 0.694859504699707, + "rewards_train/rejected": -4.305661201477051, + "step": 938 + }, + { + "epoch": 0.26, + "logps_train/chosen": -2.640561580657959, + "logps_train/ref_chosen": -2.53125, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -9.192172050476074, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010931158438324928, + "rewards_train/margins": 0.2301610466092825, + "rewards_train/rejected": -0.24109220504760742, + "step": 938 + }, + { + "epoch": 0.26, + "logps_train/chosen": -96.46815490722656, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -61.5, + "logps_train/rejected": -83.95175170898438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.846815586090088, + "rewards_train/margins": -0.6016404628753662, + "rewards_train/rejected": -2.2451751232147217, + "step": 938 + }, + { + "epoch": 0.26, + "logps_train/chosen": -144.39181518554688, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -181.9342498779297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4391815662384033, + "rewards_train/margins": 4.554243326187134, + "rewards_train/rejected": -5.993424892425537, + "step": 938 + }, + { + "epoch": 0.26, + "logps_train/chosen": -55.44501495361328, + "logps_train/ref_chosen": -36.75, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -17.87425994873047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8695014715194702, + "rewards_train/margins": -0.6227004528045654, + "rewards_train/rejected": -1.2468010187149048, + "step": 938 + }, + { + "epoch": 0.26, + "logps_train/chosen": -104.24657440185547, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -128.44265747070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0246574878692627, + "rewards_train/margins": 2.3696084022521973, + "rewards_train/rejected": -3.39426589012146, + "step": 938 + }, + { + "epoch": 0.26, + "logps_train/chosen": -12.496273040771484, + "logps_train/ref_chosen": -1.9453125, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -26.058713912963867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0550960302352905, + "rewards_train/margins": 1.0164004564285278, + "rewards_train/rejected": -2.0714964866638184, + "step": 938 + }, + { + "epoch": 0.26, + "logps_train/chosen": -145.23904418945312, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -192.4517364501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4739043712615967, + "rewards_train/margins": 3.721269369125366, + "rewards_train/rejected": -7.195173740386963, + "step": 938 + }, + { + "epoch": 0.26, + "logps_train/chosen": -12.106748580932617, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -26.309581756591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42942485213279724, + "rewards_train/margins": 1.2765333950519562, + "rewards_train/rejected": -1.7059582471847534, + "step": 939 + }, + { + "epoch": 0.26, + "logps_train/chosen": -186.59768676757812, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -189.7362518310547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.3097686767578125, + "rewards_train/margins": -0.03614330291748047, + "rewards_train/rejected": -7.273625373840332, + "step": 939 + }, + { + "epoch": 0.26, + "logps_train/chosen": -33.887550354003906, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -5.09375, + "logps_train/rejected": -19.189905166625977, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5262550115585327, + "rewards_train/margins": -0.11663949489593506, + "rewards_train/rejected": -1.4096155166625977, + "step": 939 + }, + { + "epoch": 0.26, + "logps_train/chosen": -214.29693603515625, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -170.8893585205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.129693508148193, + "rewards_train/margins": 2.009242534637451, + "rewards_train/rejected": -7.1389360427856445, + "step": 939 + }, + { + "epoch": 0.26, + "logps_train/chosen": -8.582574844360352, + "logps_train/ref_chosen": -7.3125, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -21.310087203979492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12700748443603516, + "rewards_train/margins": 0.660251259803772, + "rewards_train/rejected": -0.7872587442398071, + "step": 939 + }, + { + "epoch": 0.26, + "logps_train/chosen": -60.64787292480469, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -26.3497257232666, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2897872924804688, + "rewards_train/margins": -0.6173146963119507, + "rewards_train/rejected": -0.6724725961685181, + "step": 939 + }, + { + "epoch": 0.26, + "logps_train/chosen": -31.64345359802246, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -0.99609375, + "logps_train/rejected": -20.72149085998535, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.201845407485962, + "rewards_train/margins": 0.770694375038147, + "rewards_train/rejected": -1.9725397825241089, + "step": 939 + }, + { + "epoch": 0.26, + "logps_train/chosen": -70.15766143798828, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -164.32814025878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21576614677906036, + "rewards_train/margins": 7.167047783732414, + "rewards_train/rejected": -7.382813930511475, + "step": 939 + }, + { + "epoch": 0.26, + "learning_rate": 1.4730364292896408e-06, + "loss": 0.4497, + "step": 940 + }, + { + "epoch": 0.26, + "logps_train/chosen": -4.44500207901001, + "logps_train/ref_chosen": -2.828125, + "logps_train/ref_rejected": -1.9375, + "logps_train/rejected": -4.395137310028076, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1616877168416977, + "rewards_train/margins": 0.08407601714134216, + "rewards_train/rejected": -0.24576373398303986, + "step": 940 + }, + { + "epoch": 0.26, + "logps_train/chosen": -112.78983306884766, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -154.61709594726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3789833784103394, + "rewards_train/margins": 1.5327261686325073, + "rewards_train/rejected": -2.9117095470428467, + "step": 940 + }, + { + "epoch": 0.26, + "logps_train/chosen": -75.75723266601562, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -170.642822265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6757232546806335, + "rewards_train/margins": 3.3885591626167297, + "rewards_train/rejected": -4.064282417297363, + "step": 940 + }, + { + "epoch": 0.26, + "logps_train/chosen": -206.504638671875, + "logps_train/ref_chosen": -193.0, + "logps_train/ref_rejected": -222.0, + "logps_train/rejected": -315.25616455078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3504638671875, + "rewards_train/margins": 7.975152969360352, + "rewards_train/rejected": -9.325616836547852, + "step": 940 + }, + { + "epoch": 0.26, + "logps_train/chosen": -8.426864624023438, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -23.023893356323242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007313537877053022, + "rewards_train/margins": 1.8222028496675193, + "rewards_train/rejected": -1.8148893117904663, + "step": 940 + }, + { + "epoch": 0.26, + "logps_train/chosen": -28.90279769897461, + "logps_train/ref_chosen": -22.75, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -50.91648864746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6152797937393188, + "rewards_train/margins": 1.176369071006775, + "rewards_train/rejected": -1.7916488647460938, + "step": 940 + }, + { + "epoch": 0.26, + "logps_train/chosen": -176.09201049804688, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -167.10348510742188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.0092010498046875, + "rewards_train/margins": -0.29885244369506836, + "rewards_train/rejected": -6.710348606109619, + "step": 940 + }, + { + "epoch": 0.26, + "logps_train/chosen": -42.77577209472656, + "logps_train/ref_chosen": -41.75, + "logps_train/ref_rejected": -22.875, + "logps_train/rejected": -35.9537353515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10257720947265625, + "rewards_train/margins": 1.2052963972091675, + "rewards_train/rejected": -1.3078736066818237, + "step": 940 + }, + { + "epoch": 0.26, + "logps_train/chosen": -11.44477367401123, + "logps_train/ref_chosen": -2.546875, + "logps_train/ref_rejected": -7.71875, + "logps_train/rejected": -28.705530166625977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.889789879322052, + "rewards_train/margins": 1.2088882327079773, + "rewards_train/rejected": -2.0986781120300293, + "step": 941 + }, + { + "epoch": 0.26, + "logps_train/chosen": -81.43804168701172, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -134.56956481933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5438041687011719, + "rewards_train/margins": 3.063152313232422, + "rewards_train/rejected": -3.6069564819335938, + "step": 941 + }, + { + "epoch": 0.26, + "logps_train/chosen": -39.793453216552734, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -30.567672729492188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.066845417022705, + "rewards_train/margins": -0.3975781202316284, + "rewards_train/rejected": -1.6692672967910767, + "step": 941 + }, + { + "epoch": 0.26, + "logps_train/chosen": -9.269037246704102, + "logps_train/ref_chosen": -3.9375, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -13.205253601074219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5331537127494812, + "rewards_train/margins": 0.17487168312072754, + "rewards_train/rejected": -0.7080253958702087, + "step": 941 + }, + { + "epoch": 0.26, + "logps_train/chosen": -264.83026123046875, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -200.11465454101562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.783026695251465, + "rewards_train/margins": -1.9715614318847656, + "rewards_train/rejected": -8.8114652633667, + "step": 941 + }, + { + "epoch": 0.26, + "logps_train/chosen": -6.574991703033447, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -4.9836201667785645, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09499917179346085, + "rewards_train/margins": -0.06851215474307537, + "rewards_train/rejected": -0.026487017050385475, + "step": 941 + }, + { + "epoch": 0.26, + "logps_train/chosen": -140.84457397460938, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -239.96188354492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2844574451446533, + "rewards_train/margins": 5.211730718612671, + "rewards_train/rejected": -8.496188163757324, + "step": 941 + }, + { + "epoch": 0.26, + "logps_train/chosen": -99.67400360107422, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -180.09658813476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7674005031585693, + "rewards_train/margins": 6.0422585010528564, + "rewards_train/rejected": -8.809659004211426, + "step": 941 + }, + { + "epoch": 0.26, + "learning_rate": 1.4707039321653328e-06, + "loss": 0.4426, + "step": 942 + }, + { + "epoch": 0.26, + "logps_train/chosen": -135.6070098876953, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -188.03805541992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.0107011795043945, + "rewards_train/margins": 2.9931044578552246, + "rewards_train/rejected": -7.003805637359619, + "step": 942 + }, + { + "epoch": 0.26, + "logps_train/chosen": -204.0755615234375, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -210.81942749023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.507556438446045, + "rewards_train/margins": 0.6743865013122559, + "rewards_train/rejected": -5.181942939758301, + "step": 942 + }, + { + "epoch": 0.26, + "logps_train/chosen": -31.748672485351562, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -26.91280174255371, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7248672842979431, + "rewards_train/margins": 1.0601629614830017, + "rewards_train/rejected": -1.7850302457809448, + "step": 942 + }, + { + "epoch": 0.26, + "logps_train/chosen": -12.252890586853027, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -14.875, + "logps_train/rejected": -29.24947166442871, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6284140944480896, + "rewards_train/margins": 0.8090330958366394, + "rewards_train/rejected": -1.437447190284729, + "step": 942 + }, + { + "epoch": 0.26, + "logps_train/chosen": -106.05467987060547, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -179.96963500976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7554681301116943, + "rewards_train/margins": 4.341495752334595, + "rewards_train/rejected": -8.096963882446289, + "step": 942 + }, + { + "epoch": 0.26, + "logps_train/chosen": -85.2384033203125, + "logps_train/ref_chosen": -49.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -173.24017333984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.573840379714966, + "rewards_train/margins": 2.8001768589019775, + "rewards_train/rejected": -6.374017238616943, + "step": 942 + }, + { + "epoch": 0.26, + "logps_train/chosen": -19.90024757385254, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -18.08929443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8275247812271118, + "rewards_train/margins": 0.400154709815979, + "rewards_train/rejected": -1.2276794910430908, + "step": 942 + }, + { + "epoch": 0.26, + "logps_train/chosen": -16.337509155273438, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -30.68598175048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04000091552734375, + "rewards_train/margins": 2.692659854888916, + "rewards_train/rejected": -2.7326607704162598, + "step": 942 + }, + { + "epoch": 0.26, + "logps_train/chosen": -129.1162567138672, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -169.9710693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.611625671386719, + "rewards_train/margins": 0.9854812622070312, + "rewards_train/rejected": -5.59710693359375, + "step": 943 + }, + { + "epoch": 0.26, + "logps_train/chosen": -186.81988525390625, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -154.26890563964844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.58198881149292, + "rewards_train/margins": -2.4550981521606445, + "rewards_train/rejected": -5.126890659332275, + "step": 943 + }, + { + "epoch": 0.26, + "logps_train/chosen": -6.204272270202637, + "logps_train/ref_chosen": -2.828125, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -33.75593566894531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3376147449016571, + "rewards_train/margins": 1.112978845834732, + "rewards_train/rejected": -1.4505935907363892, + "step": 943 + }, + { + "epoch": 0.26, + "logps_train/chosen": -94.1776123046875, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -103.35244750976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 1.0322388410568237, + "rewards_train/margins": 3.0174835920333862, + "rewards_train/rejected": -1.9852447509765625, + "step": 943 + }, + { + "epoch": 0.26, + "logps_train/chosen": -10.946535110473633, + "logps_train/ref_chosen": -3.71875, + "logps_train/ref_rejected": -14.8125, + "logps_train/rejected": -24.299348831176758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7227784991264343, + "rewards_train/margins": 0.2259063720703125, + "rewards_train/rejected": -0.9486848711967468, + "step": 943 + }, + { + "epoch": 0.26, + "logps_train/chosen": -105.24956512451172, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -124.70088195800781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3249565362930298, + "rewards_train/margins": 0.19513165950775146, + "rewards_train/rejected": -1.5200881958007812, + "step": 943 + }, + { + "epoch": 0.26, + "logps_train/chosen": -172.5636444091797, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -238.54071044921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.456364631652832, + "rewards_train/margins": 1.7977066040039062, + "rewards_train/rejected": -6.254071235656738, + "step": 943 + }, + { + "epoch": 0.26, + "logps_train/chosen": -137.14321899414062, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -148.710693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.814321994781494, + "rewards_train/margins": 1.8567476272583008, + "rewards_train/rejected": -6.671069622039795, + "step": 943 + }, + { + "epoch": 0.26, + "learning_rate": 1.4683681406125586e-06, + "loss": 0.403, + "step": 944 + }, + { + "epoch": 0.26, + "logps_train/chosen": -83.89494323730469, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -196.50155639648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8394943475723267, + "rewards_train/margins": 8.910661101341248, + "rewards_train/rejected": -9.750155448913574, + "step": 944 + }, + { + "epoch": 0.26, + "logps_train/chosen": -93.58907318115234, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -37.5, + "logps_train/rejected": -83.69878387451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.058907508850098, + "rewards_train/margins": 0.5609707832336426, + "rewards_train/rejected": -4.61987829208374, + "step": 944 + }, + { + "epoch": 0.26, + "logps_train/chosen": -278.6188049316406, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -203.86061096191406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.8618803024292, + "rewards_train/margins": -3.625819206237793, + "rewards_train/rejected": -8.236061096191406, + "step": 944 + }, + { + "epoch": 0.26, + "logps_train/chosen": -15.08202075958252, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -19.161033630371094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0519521236419678, + "rewards_train/margins": -0.3233487606048584, + "rewards_train/rejected": -0.7286033630371094, + "step": 944 + }, + { + "epoch": 0.26, + "logps_train/chosen": -154.68927001953125, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -202.472900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5689270496368408, + "rewards_train/margins": 1.978363037109375, + "rewards_train/rejected": -3.547290086746216, + "step": 944 + }, + { + "epoch": 0.26, + "logps_train/chosen": -139.39955139160156, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -149.40365600585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8399551510810852, + "rewards_train/margins": 1.8004104495048523, + "rewards_train/rejected": -2.6403656005859375, + "step": 944 + }, + { + "epoch": 0.26, + "logps_train/chosen": -41.36362838745117, + "logps_train/ref_chosen": -25.5, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -28.99584197998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5863628387451172, + "rewards_train/margins": 0.3507213592529297, + "rewards_train/rejected": -1.9370841979980469, + "step": 944 + }, + { + "epoch": 0.26, + "logps_train/chosen": -90.10806274414062, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -117.27816009521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8608062863349915, + "rewards_train/margins": 3.1170098185539246, + "rewards_train/rejected": -3.977816104888916, + "step": 944 + }, + { + "epoch": 0.26, + "logps_train/chosen": -132.8013458251953, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -166.3009033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.330134630203247, + "rewards_train/margins": 0.7999558448791504, + "rewards_train/rejected": -3.1300904750823975, + "step": 945 + }, + { + "epoch": 0.26, + "logps_train/chosen": -85.1405029296875, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -152.74453735351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.564050316810608, + "rewards_train/margins": 2.5104035139083862, + "rewards_train/rejected": -4.074453830718994, + "step": 945 + }, + { + "epoch": 0.26, + "logps_train/chosen": -90.43647766113281, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -196.018798828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5936477184295654, + "rewards_train/margins": 6.208232164382935, + "rewards_train/rejected": -8.8018798828125, + "step": 945 + }, + { + "epoch": 0.26, + "logps_train/chosen": -258.5743103027344, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -254.10205078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.857431411743164, + "rewards_train/margins": 0.6527738571166992, + "rewards_train/rejected": -9.510205268859863, + "step": 945 + }, + { + "epoch": 0.26, + "logps_train/chosen": -55.66126251220703, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -150.0444793701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.041126251220703125, + "rewards_train/margins": 2.063321828842163, + "rewards_train/rejected": -2.104448080062866, + "step": 945 + }, + { + "epoch": 0.26, + "logps_train/chosen": -11.287973403930664, + "logps_train/ref_chosen": -6.625, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -13.654279708862305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46629735827445984, + "rewards_train/margins": 0.13663062453269958, + "rewards_train/rejected": -0.6029279828071594, + "step": 945 + }, + { + "epoch": 0.26, + "logps_train/chosen": -85.61276245117188, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -185.21383666992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6612762212753296, + "rewards_train/margins": 4.310107350349426, + "rewards_train/rejected": -5.971383571624756, + "step": 945 + }, + { + "epoch": 0.26, + "logps_train/chosen": -172.1669921875, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -211.33871459960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7166993618011475, + "rewards_train/margins": 4.617172002792358, + "rewards_train/rejected": -7.333871364593506, + "step": 945 + }, + { + "epoch": 0.26, + "learning_rate": 1.4660290709793833e-06, + "loss": 0.4671, + "step": 946 + }, + { + "epoch": 0.26, + "logps_train/chosen": -42.66576385498047, + "logps_train/ref_chosen": -27.375, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -60.88754653930664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5290764570236206, + "rewards_train/margins": 0.43467819690704346, + "rewards_train/rejected": -1.963754653930664, + "step": 946 + }, + { + "epoch": 0.26, + "logps_train/chosen": -107.45263671875, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -156.0775604248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9452637434005737, + "rewards_train/margins": 3.46249258518219, + "rewards_train/rejected": -5.407756328582764, + "step": 946 + }, + { + "epoch": 0.26, + "logps_train/chosen": -125.97373962402344, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -179.78358459472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9473739862442017, + "rewards_train/margins": 2.930984377861023, + "rewards_train/rejected": -4.878358364105225, + "step": 946 + }, + { + "epoch": 0.26, + "logps_train/chosen": -120.25831604003906, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -187.44874572753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2258316278457642, + "rewards_train/margins": 2.2190428972244263, + "rewards_train/rejected": -3.4448745250701904, + "step": 946 + }, + { + "epoch": 0.26, + "logps_train/chosen": -91.99856567382812, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -148.65853881835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24985657632350922, + "rewards_train/margins": 2.2659973055124283, + "rewards_train/rejected": -2.5158538818359375, + "step": 946 + }, + { + "epoch": 0.26, + "logps_train/chosen": -108.8809814453125, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -159.01669311523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8380982875823975, + "rewards_train/margins": 1.1635711193084717, + "rewards_train/rejected": -4.001669406890869, + "step": 946 + }, + { + "epoch": 0.26, + "logps_train/chosen": -9.214977264404297, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -1.515625, + "logps_train/rejected": -11.021400451660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04725227504968643, + "rewards_train/margins": 0.997829832136631, + "rewards_train/rejected": -0.9505775570869446, + "step": 946 + }, + { + "epoch": 0.26, + "logps_train/chosen": -150.65744018554688, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -163.5096435546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.765744209289551, + "rewards_train/margins": 1.6352200508117676, + "rewards_train/rejected": -6.400964260101318, + "step": 946 + }, + { + "epoch": 0.26, + "logps_train/chosen": -28.061887741088867, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -17.82819366455078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8936887979507446, + "rewards_train/margins": -0.8514944314956665, + "rewards_train/rejected": -1.0421943664550781, + "step": 947 + }, + { + "epoch": 0.26, + "logps_train/chosen": -13.487884521484375, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -3.828125, + "logps_train/rejected": -16.825687408447266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9050384759902954, + "rewards_train/margins": 0.394717812538147, + "rewards_train/rejected": -1.2997562885284424, + "step": 947 + }, + { + "epoch": 0.26, + "logps_train/chosen": -154.14181518554688, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -159.00718688964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.8641815185546875, + "rewards_train/margins": 0.3865370750427246, + "rewards_train/rejected": -5.250718593597412, + "step": 947 + }, + { + "epoch": 0.26, + "logps_train/chosen": -15.90739917755127, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -3.5625, + "logps_train/rejected": -9.060223579406738, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5344899296760559, + "rewards_train/margins": 0.01528245210647583, + "rewards_train/rejected": -0.5497723817825317, + "step": 947 + }, + { + "epoch": 0.26, + "logps_train/chosen": -105.17292022705078, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -188.00262451171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.767292022705078, + "rewards_train/margins": 3.232970714569092, + "rewards_train/rejected": -6.00026273727417, + "step": 947 + }, + { + "epoch": 0.26, + "logps_train/chosen": -61.374114990234375, + "logps_train/ref_chosen": -49.5, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -86.91801452636719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1874115467071533, + "rewards_train/margins": 1.129390001296997, + "rewards_train/rejected": -2.3168015480041504, + "step": 947 + }, + { + "epoch": 0.26, + "logps_train/chosen": -219.01580810546875, + "logps_train/ref_chosen": -184.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -184.1610870361328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.5015809535980225, + "rewards_train/margins": -1.8854721784591675, + "rewards_train/rejected": -1.616108775138855, + "step": 947 + }, + { + "epoch": 0.26, + "logps_train/chosen": -113.67237091064453, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -163.16635131835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13276290893554688, + "rewards_train/margins": 4.149398326873779, + "rewards_train/rejected": -4.016635417938232, + "step": 947 + }, + { + "epoch": 0.26, + "learning_rate": 1.4636867396368145e-06, + "loss": 0.4272, + "step": 948 + }, + { + "epoch": 0.26, + "logps_train/chosen": -14.37306022644043, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -14.692083358764648, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.759181022644043, + "rewards_train/margins": -0.30247268080711365, + "rewards_train/rejected": -0.4567083418369293, + "step": 948 + }, + { + "epoch": 0.26, + "logps_train/chosen": -208.0208740234375, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -86.53502655029297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.402087688446045, + "rewards_train/margins": -3.8985849618911743, + "rewards_train/rejected": -1.5035027265548706, + "step": 948 + }, + { + "epoch": 0.26, + "logps_train/chosen": -174.6669158935547, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -261.44921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.566691875457764, + "rewards_train/margins": 7.378230571746826, + "rewards_train/rejected": -11.94492244720459, + "step": 948 + }, + { + "epoch": 0.26, + "logps_train/chosen": -40.88801956176758, + "logps_train/ref_chosen": -45.25, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -98.98545837402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.43619805574417114, + "rewards_train/margins": 2.4347439408302307, + "rewards_train/rejected": -1.9985458850860596, + "step": 948 + }, + { + "epoch": 0.26, + "logps_train/chosen": -16.812301635742188, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -2.09375, + "logps_train/rejected": -14.166576385498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4624801576137543, + "rewards_train/margins": 0.7448025047779083, + "rewards_train/rejected": -1.2072826623916626, + "step": 948 + }, + { + "epoch": 0.26, + "logps_train/chosen": -136.6070556640625, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -210.50711059570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.91070556640625, + "rewards_train/margins": 6.340005397796631, + "rewards_train/rejected": -7.250710964202881, + "step": 948 + }, + { + "epoch": 0.26, + "logps_train/chosen": -1.3373960256576538, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -7.1555705070495605, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.28188538551330566, + "rewards_train/margins": 0.23494243621826172, + "rewards_train/rejected": 0.046942949295043945, + "step": 948 + }, + { + "epoch": 0.26, + "logps_train/chosen": -96.69317626953125, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -155.08401489257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.769317626953125, + "rewards_train/margins": 7.189084053039551, + "rewards_train/rejected": -7.958401679992676, + "step": 948 + }, + { + "epoch": 0.27, + "logps_train/chosen": -101.71119689941406, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -138.50457763671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.321119785308838, + "rewards_train/margins": 0.07933807373046875, + "rewards_train/rejected": -2.4004578590393066, + "step": 949 + }, + { + "epoch": 0.27, + "logps_train/chosen": -0.31009146571159363, + "logps_train/ref_chosen": -0.29296875, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -14.125328063964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0017122715944424272, + "rewards_train/margins": 0.9358205705648288, + "rewards_train/rejected": -0.9375328421592712, + "step": 949 + }, + { + "epoch": 0.27, + "logps_train/chosen": -136.99644470214844, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -158.75210571289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9496445655822754, + "rewards_train/margins": 3.075566291809082, + "rewards_train/rejected": -6.025210857391357, + "step": 949 + }, + { + "epoch": 0.27, + "logps_train/chosen": -11.728240966796875, + "logps_train/ref_chosen": -4.625, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -15.504571914672852, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7103241086006165, + "rewards_train/margins": 0.4541955590248108, + "rewards_train/rejected": -1.1645196676254272, + "step": 949 + }, + { + "epoch": 0.27, + "logps_train/chosen": -50.019866943359375, + "logps_train/ref_chosen": -49.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -140.69137573242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10198669880628586, + "rewards_train/margins": 4.117150969803333, + "rewards_train/rejected": -4.219137668609619, + "step": 949 + }, + { + "epoch": 0.27, + "logps_train/chosen": -88.56999969482422, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -146.20249938964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3569999933242798, + "rewards_train/margins": 4.263250231742859, + "rewards_train/rejected": -5.620250225067139, + "step": 949 + }, + { + "epoch": 0.27, + "logps_train/chosen": -17.320011138916016, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -18.48157501220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09299888461828232, + "rewards_train/margins": 1.2192814573645592, + "rewards_train/rejected": -1.1262825727462769, + "step": 949 + }, + { + "epoch": 0.27, + "logps_train/chosen": -241.19082641601562, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -232.42575073242188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.319083213806152, + "rewards_train/margins": -0.2765083312988281, + "rewards_train/rejected": -9.042574882507324, + "step": 949 + }, + { + "epoch": 0.27, + "learning_rate": 1.4613411629786878e-06, + "loss": 0.5303, + "step": 950 + }, + { + "epoch": 0.27, + "logps_train/chosen": -34.818443298339844, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -21.906768798828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6068443655967712, + "rewards_train/margins": -0.14116749167442322, + "rewards_train/rejected": -0.465676873922348, + "step": 950 + }, + { + "epoch": 0.27, + "logps_train/chosen": -139.04786682128906, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -181.03802490234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.254786968231201, + "rewards_train/margins": 0.7490158081054688, + "rewards_train/rejected": -5.00380277633667, + "step": 950 + }, + { + "epoch": 0.27, + "logps_train/chosen": -119.88264465332031, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -138.53773498535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1382644176483154, + "rewards_train/margins": 1.2655091285705566, + "rewards_train/rejected": -3.403773546218872, + "step": 950 + }, + { + "epoch": 0.27, + "logps_train/chosen": -27.880107879638672, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -27.582639694213867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9567607641220093, + "rewards_train/margins": 0.32025325298309326, + "rewards_train/rejected": -2.2770140171051025, + "step": 950 + }, + { + "epoch": 0.27, + "logps_train/chosen": -241.55337524414062, + "logps_train/ref_chosen": -210.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -112.61660766601562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1553375720977783, + "rewards_train/margins": -1.243676781654358, + "rewards_train/rejected": -1.9116607904434204, + "step": 950 + }, + { + "epoch": 0.27, + "logps_train/chosen": -158.28134155273438, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -171.81399536132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7281341552734375, + "rewards_train/margins": 2.95326566696167, + "rewards_train/rejected": -5.681399822235107, + "step": 950 + }, + { + "epoch": 0.27, + "logps_train/chosen": -197.66485595703125, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -243.80612182617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.366485595703125, + "rewards_train/margins": 4.314126968383789, + "rewards_train/rejected": -12.680612564086914, + "step": 950 + }, + { + "epoch": 0.27, + "logps_train/chosen": -26.606552124023438, + "logps_train/ref_chosen": -28.125, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -15.978147506713867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.151844784617424, + "rewards_train/margins": -0.16284047067165375, + "rewards_train/rejected": 0.31468525528907776, + "step": 950 + }, + { + "epoch": 0.27, + "logps_train/chosen": -64.96565246582031, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -32.1748046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.421565294265747, + "rewards_train/margins": -0.12908482551574707, + "rewards_train/rejected": -1.29248046875, + "step": 951 + }, + { + "epoch": 0.27, + "logps_train/chosen": -62.04212951660156, + "logps_train/ref_chosen": -50.25, + "logps_train/ref_rejected": -22.375, + "logps_train/rejected": -54.74458694458008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1792129278182983, + "rewards_train/margins": 2.0577458143234253, + "rewards_train/rejected": -3.2369587421417236, + "step": 951 + }, + { + "epoch": 0.27, + "logps_train/chosen": -92.5934066772461, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -174.40277099609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.409340739250183, + "rewards_train/margins": 4.13093626499176, + "rewards_train/rejected": -5.540277004241943, + "step": 951 + }, + { + "epoch": 0.27, + "logps_train/chosen": -129.438232421875, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -98.709228515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34382325410842896, + "rewards_train/margins": 0.727099597454071, + "rewards_train/rejected": -1.0709228515625, + "step": 951 + }, + { + "epoch": 0.27, + "logps_train/chosen": -79.37344360351562, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -84.57119750976562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.06265564262866974, + "rewards_train/margins": -0.28022460639476776, + "rewards_train/rejected": 0.3428802490234375, + "step": 951 + }, + { + "epoch": 0.27, + "logps_train/chosen": -135.92599487304688, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -151.81016540527344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2925995588302612, + "rewards_train/margins": -0.21158301830291748, + "rewards_train/rejected": -1.0810165405273438, + "step": 951 + }, + { + "epoch": 0.27, + "logps_train/chosen": -129.79794311523438, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -135.63833618164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8797943592071533, + "rewards_train/margins": 3.5840394496917725, + "rewards_train/rejected": -6.463833808898926, + "step": 951 + }, + { + "epoch": 0.27, + "logps_train/chosen": -110.49483489990234, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -97.46849060058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5994834899902344, + "rewards_train/margins": 0.14736557006835938, + "rewards_train/rejected": -0.7468490600585938, + "step": 951 + }, + { + "epoch": 0.27, + "learning_rate": 1.458992357421553e-06, + "loss": 0.4921, + "step": 952 + }, + { + "epoch": 0.27, + "logps_train/chosen": -151.61813354492188, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -206.44869995117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.911813497543335, + "rewards_train/margins": 4.633056402206421, + "rewards_train/rejected": -7.544869899749756, + "step": 952 + }, + { + "epoch": 0.27, + "logps_train/chosen": -0.08236554265022278, + "logps_train/ref_chosen": -0.09619140625, + "logps_train/ref_rejected": -0.09619140625, + "logps_train/rejected": -0.08775173872709274, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0013825864298269153, + "rewards_train/margins": 0.000538619642611593, + "rewards_train/rejected": 0.0008439667872153223, + "step": 952 + }, + { + "epoch": 0.27, + "logps_train/chosen": -116.00166320800781, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -146.84072875976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.300166368484497, + "rewards_train/margins": 1.383906602859497, + "rewards_train/rejected": -2.684072971343994, + "step": 952 + }, + { + "epoch": 0.27, + "logps_train/chosen": -22.144283294677734, + "logps_train/ref_chosen": -14.5, + "logps_train/ref_rejected": -50.0, + "logps_train/rejected": -76.37657928466797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7644283175468445, + "rewards_train/margins": 1.8732295632362366, + "rewards_train/rejected": -2.637657880783081, + "step": 952 + }, + { + "epoch": 0.27, + "logps_train/chosen": -161.86492919921875, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -41.0, + "logps_train/rejected": -54.85664749145508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.013507080264389515, + "rewards_train/margins": 1.399171900935471, + "rewards_train/rejected": -1.3856648206710815, + "step": 952 + }, + { + "epoch": 0.27, + "logps_train/chosen": -186.68707275390625, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -220.8689727783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.668707370758057, + "rewards_train/margins": 1.5181899070739746, + "rewards_train/rejected": -6.186897277832031, + "step": 952 + }, + { + "epoch": 0.27, + "logps_train/chosen": -104.3955078125, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -157.00779724121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9395508170127869, + "rewards_train/margins": 2.961228907108307, + "rewards_train/rejected": -3.9007797241210938, + "step": 952 + }, + { + "epoch": 0.27, + "logps_train/chosen": -131.5367431640625, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -246.53900146484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9036743640899658, + "rewards_train/margins": 7.450225591659546, + "rewards_train/rejected": -9.353899955749512, + "step": 952 + }, + { + "epoch": 0.27, + "logps_train/chosen": -28.802143096923828, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -5.4375, + "logps_train/rejected": -27.249942779541016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.202089309692383, + "rewards_train/margins": -0.02084493637084961, + "rewards_train/rejected": -2.181244373321533, + "step": 953 + }, + { + "epoch": 0.27, + "logps_train/chosen": -13.716450691223145, + "logps_train/ref_chosen": -1.8046875, + "logps_train/ref_rejected": -1.0546875, + "logps_train/rejected": -12.931706428527832, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1911762952804565, + "rewards_train/margins": -0.0034743547439575195, + "rewards_train/rejected": -1.187701940536499, + "step": 953 + }, + { + "epoch": 0.27, + "logps_train/chosen": -191.25247192382812, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -137.26751708984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.975247383117676, + "rewards_train/margins": -0.9484953880310059, + "rewards_train/rejected": -6.02675199508667, + "step": 953 + }, + { + "epoch": 0.27, + "logps_train/chosen": -37.26810836791992, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -6.375, + "logps_train/rejected": -15.561646461486816, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4018108546733856, + "rewards_train/margins": 0.5168537795543671, + "rewards_train/rejected": -0.9186646342277527, + "step": 953 + }, + { + "epoch": 0.27, + "logps_train/chosen": -30.75821304321289, + "logps_train/ref_chosen": -24.125, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -42.622684478759766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.663321316242218, + "rewards_train/margins": 1.2864471077919006, + "rewards_train/rejected": -1.9497684240341187, + "step": 953 + }, + { + "epoch": 0.27, + "logps_train/chosen": -69.52263641357422, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -92.55003356933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10226364433765411, + "rewards_train/margins": 1.9527397602796555, + "rewards_train/rejected": -2.0550034046173096, + "step": 953 + }, + { + "epoch": 0.27, + "logps_train/chosen": -7.539461612701416, + "logps_train/ref_chosen": -3.53125, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -17.802371978759766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.40082117915153503, + "rewards_train/margins": -0.08933398127555847, + "rewards_train/rejected": -0.31148719787597656, + "step": 953 + }, + { + "epoch": 0.27, + "logps_train/chosen": -22.194984436035156, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -30.191814422607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7007484436035156, + "rewards_train/margins": 0.7059330940246582, + "rewards_train/rejected": -2.406681537628174, + "step": 953 + }, + { + "epoch": 0.27, + "learning_rate": 1.4566403394045585e-06, + "loss": 0.3872, + "step": 954 + }, + { + "epoch": 0.27, + "logps_train/chosen": -8.361577987670898, + "logps_train/ref_chosen": -5.03125, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -17.884918212890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3330328166484833, + "rewards_train/margins": 0.43045899271965027, + "rewards_train/rejected": -0.7634918093681335, + "step": 954 + }, + { + "epoch": 0.27, + "logps_train/chosen": -7.313483238220215, + "logps_train/ref_chosen": -6.25, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -53.252933502197266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10634832829236984, + "rewards_train/margins": 2.906444974243641, + "rewards_train/rejected": -3.0127933025360107, + "step": 954 + }, + { + "epoch": 0.27, + "logps_train/chosen": -28.067058563232422, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -38.27445602416992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.556705892086029, + "rewards_train/margins": 1.4582398533821106, + "rewards_train/rejected": -2.0149457454681396, + "step": 954 + }, + { + "epoch": 0.27, + "logps_train/chosen": -19.569808959960938, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -19.50199317932129, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3805191218852997, + "rewards_train/margins": 1.4182184636592865, + "rewards_train/rejected": -1.0376993417739868, + "step": 954 + }, + { + "epoch": 0.27, + "logps_train/chosen": -246.19287109375, + "logps_train/ref_chosen": -203.0, + "logps_train/ref_rejected": -195.0, + "logps_train/rejected": -236.2628173828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.319287300109863, + "rewards_train/margins": -0.19300556182861328, + "rewards_train/rejected": -4.12628173828125, + "step": 954 + }, + { + "epoch": 0.27, + "logps_train/chosen": -118.37564086914062, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -131.00466918945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6875641345977783, + "rewards_train/margins": 2.5629026889801025, + "rewards_train/rejected": -5.250466823577881, + "step": 954 + }, + { + "epoch": 0.27, + "logps_train/chosen": -23.14501953125, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -16.515024185180664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43950197100639343, + "rewards_train/margins": 0.30575045943260193, + "rewards_train/rejected": -0.7452524304389954, + "step": 954 + }, + { + "epoch": 0.27, + "logps_train/chosen": -94.06980895996094, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -148.39004516601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5069808959960938, + "rewards_train/margins": 4.132023811340332, + "rewards_train/rejected": -5.639004707336426, + "step": 954 + }, + { + "epoch": 0.27, + "logps_train/chosen": -54.13634490966797, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -96.18766021728516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.23636551201343536, + "rewards_train/margins": 1.005131557583809, + "rewards_train/rejected": -0.7687660455703735, + "step": 955 + }, + { + "epoch": 0.27, + "logps_train/chosen": -100.9075927734375, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -120.91325378417969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05924072489142418, + "rewards_train/margins": 0.30056610330939293, + "rewards_train/rejected": -0.24132537841796875, + "step": 955 + }, + { + "epoch": 0.27, + "logps_train/chosen": -170.50540161132812, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -219.7392120361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5505402088165283, + "rewards_train/margins": 6.523380994796753, + "rewards_train/rejected": -10.073921203613281, + "step": 955 + }, + { + "epoch": 0.27, + "logps_train/chosen": -79.5625991821289, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -47.0, + "logps_train/rejected": -71.64252471923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2562599182128906, + "rewards_train/margins": 1.2079925537109375, + "rewards_train/rejected": -2.464252471923828, + "step": 955 + }, + { + "epoch": 0.27, + "logps_train/chosen": -42.19976043701172, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -31.843021392822266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9074760675430298, + "rewards_train/margins": -0.9981738924980164, + "rewards_train/rejected": -0.9093021750450134, + "step": 955 + }, + { + "epoch": 0.27, + "logps_train/chosen": -19.807571411132812, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -45.163631439208984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9432571530342102, + "rewards_train/margins": 0.09810596704483032, + "rewards_train/rejected": -1.0413631200790405, + "step": 955 + }, + { + "epoch": 0.27, + "logps_train/chosen": -13.414481163024902, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -14.962435722351074, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.13519811630249023, + "rewards_train/margins": -0.37645454704761505, + "rewards_train/rejected": 0.24125643074512482, + "step": 955 + }, + { + "epoch": 0.27, + "logps_train/chosen": -161.15762329101562, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -183.20269775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.665762424468994, + "rewards_train/margins": 4.504507541656494, + "rewards_train/rejected": -8.170269966125488, + "step": 955 + }, + { + "epoch": 0.27, + "learning_rate": 1.4542851253893371e-06, + "loss": 0.4008, + "step": 956 + }, + { + "epoch": 0.27, + "logps_train/chosen": -85.19039916992188, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -129.66030883789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9190399050712585, + "rewards_train/margins": 1.8469911217689514, + "rewards_train/rejected": -2.76603102684021, + "step": 956 + }, + { + "epoch": 0.27, + "logps_train/chosen": -14.364619255065918, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -6.46875, + "logps_train/rejected": -10.278044700622559, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0598994493484497, + "rewards_train/margins": -0.6789699792861938, + "rewards_train/rejected": -0.38092947006225586, + "step": 956 + }, + { + "epoch": 0.27, + "logps_train/chosen": -102.84039306640625, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -122.03408813476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.734039306640625, + "rewards_train/margins": 3.1193695068359375, + "rewards_train/rejected": -4.8534088134765625, + "step": 956 + }, + { + "epoch": 0.27, + "logps_train/chosen": -129.64601135253906, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -197.52890014648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.514601230621338, + "rewards_train/margins": 3.7382888793945312, + "rewards_train/rejected": -6.252890110015869, + "step": 956 + }, + { + "epoch": 0.27, + "logps_train/chosen": -66.19068145751953, + "logps_train/ref_chosen": -49.0, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -115.9569320678711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.719068169593811, + "rewards_train/margins": 1.3766251802444458, + "rewards_train/rejected": -3.095693349838257, + "step": 956 + }, + { + "epoch": 0.27, + "logps_train/chosen": -96.20852661132812, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -168.38812255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.77085280418396, + "rewards_train/margins": 5.417959451675415, + "rewards_train/rejected": -8.188812255859375, + "step": 956 + }, + { + "epoch": 0.27, + "logps_train/chosen": -103.80342102050781, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -94.41424560546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.080342173576355, + "rewards_train/margins": 1.8610824346542358, + "rewards_train/rejected": -2.941424608230591, + "step": 956 + }, + { + "epoch": 0.27, + "logps_train/chosen": -8.69123363494873, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -15.182465553283691, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6066233515739441, + "rewards_train/margins": 0.358498215675354, + "rewards_train/rejected": -0.9651215672492981, + "step": 956 + }, + { + "epoch": 0.27, + "logps_train/chosen": -8.83768367767334, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -12.429722785949707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.059981632977724075, + "rewards_train/margins": 0.2217039205133915, + "rewards_train/rejected": -0.16172228753566742, + "step": 957 + }, + { + "epoch": 0.27, + "logps_train/chosen": -82.80034637451172, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -88.89196014404297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6800346374511719, + "rewards_train/margins": -0.390838623046875, + "rewards_train/rejected": -1.2891960144042969, + "step": 957 + }, + { + "epoch": 0.27, + "logps_train/chosen": -76.65097045898438, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -99.29508209228516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16509704291820526, + "rewards_train/margins": 0.6644112020730972, + "rewards_train/rejected": -0.8295082449913025, + "step": 957 + }, + { + "epoch": 0.27, + "logps_train/chosen": -27.94092559814453, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -33.57049560546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7190925478935242, + "rewards_train/margins": 0.5004569888114929, + "rewards_train/rejected": -1.219549536705017, + "step": 957 + }, + { + "epoch": 0.27, + "logps_train/chosen": -15.352986335754395, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -24.10389518737793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5227986574172974, + "rewards_train/margins": 0.06259089708328247, + "rewards_train/rejected": -0.5853895545005798, + "step": 957 + }, + { + "epoch": 0.27, + "logps_train/chosen": -151.82449340820312, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -237.56326293945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.582449436187744, + "rewards_train/margins": 3.473876953125, + "rewards_train/rejected": -6.056326389312744, + "step": 957 + }, + { + "epoch": 0.27, + "logps_train/chosen": -137.838134765625, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -193.96044921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0338134765625, + "rewards_train/margins": 3.862231731414795, + "rewards_train/rejected": -5.896045207977295, + "step": 957 + }, + { + "epoch": 0.27, + "logps_train/chosen": -14.358978271484375, + "logps_train/ref_chosen": -1.2890625, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -17.69580078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3069915771484375, + "rewards_train/margins": -0.13116145133972168, + "rewards_train/rejected": -1.1758301258087158, + "step": 957 + }, + { + "epoch": 0.27, + "learning_rate": 1.4519267318598895e-06, + "loss": 0.3791, + "step": 958 + }, + { + "epoch": 0.27, + "logps_train/chosen": -103.88558959960938, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -128.04141235351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7885589599609375, + "rewards_train/margins": 0.3655822277069092, + "rewards_train/rejected": -2.1541411876678467, + "step": 958 + }, + { + "epoch": 0.27, + "logps_train/chosen": -136.72357177734375, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -113.21922302246094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7223572731018066, + "rewards_train/margins": -0.10043501853942871, + "rewards_train/rejected": -3.621922254562378, + "step": 958 + }, + { + "epoch": 0.27, + "logps_train/chosen": -13.664266586303711, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -24.807397842407227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4164266586303711, + "rewards_train/margins": 1.3455631732940674, + "rewards_train/rejected": -1.7619898319244385, + "step": 958 + }, + { + "epoch": 0.27, + "logps_train/chosen": -93.73667907714844, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -134.59371948242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6736679077148438, + "rewards_train/margins": 2.7857041358947754, + "rewards_train/rejected": -3.459372043609619, + "step": 958 + }, + { + "epoch": 0.27, + "logps_train/chosen": -13.65636920928955, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -18.594139099121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3968869149684906, + "rewards_train/margins": 0.46252700686454773, + "rewards_train/rejected": -0.8594139218330383, + "step": 958 + }, + { + "epoch": 0.27, + "logps_train/chosen": -153.51760864257812, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -222.3969268798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5517609119415283, + "rewards_train/margins": 6.087931871414185, + "rewards_train/rejected": -7.639692783355713, + "step": 958 + }, + { + "epoch": 0.27, + "logps_train/chosen": -112.53125762939453, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -186.67196655273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9531258344650269, + "rewards_train/margins": 4.664070725440979, + "rewards_train/rejected": -6.617196559906006, + "step": 958 + }, + { + "epoch": 0.27, + "logps_train/chosen": -33.150081634521484, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -100.21988677978516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2525081634521484, + "rewards_train/margins": 2.0694806575775146, + "rewards_train/rejected": -3.321988821029663, + "step": 958 + }, + { + "epoch": 0.27, + "logps_train/chosen": -28.756179809570312, + "logps_train/ref_chosen": -31.875, + "logps_train/ref_rejected": -31.875, + "logps_train/rejected": -28.82241439819336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.31188201904296875, + "rewards_train/margins": 0.006623446941375732, + "rewards_train/rejected": 0.305258572101593, + "step": 959 + }, + { + "epoch": 0.27, + "logps_train/chosen": -121.00846099853516, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -176.24459838867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8008460998535156, + "rewards_train/margins": 4.3736138343811035, + "rewards_train/rejected": -6.174459934234619, + "step": 959 + }, + { + "epoch": 0.27, + "logps_train/chosen": -17.31608772277832, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -16.353811264038086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45660877227783203, + "rewards_train/margins": 0.11002236604690552, + "rewards_train/rejected": -0.5666311383247375, + "step": 959 + }, + { + "epoch": 0.27, + "logps_train/chosen": -25.0843563079834, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -27.71808624267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3271856307983398, + "rewards_train/margins": 0.31337296962738037, + "rewards_train/rejected": -1.6405586004257202, + "step": 959 + }, + { + "epoch": 0.27, + "logps_train/chosen": -6.785042762756348, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -13.90923023223877, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24100427329540253, + "rewards_train/margins": 0.5686687380075455, + "rewards_train/rejected": -0.809673011302948, + "step": 959 + }, + { + "epoch": 0.27, + "logps_train/chosen": -37.131690979003906, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -42.79994201660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3381690979003906, + "rewards_train/margins": 0.11682510375976562, + "rewards_train/rejected": -0.45499420166015625, + "step": 959 + }, + { + "epoch": 0.27, + "logps_train/chosen": -3.7382829189300537, + "logps_train/ref_chosen": -1.671875, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -8.2007474899292, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2066407948732376, + "rewards_train/margins": 0.1478089541196823, + "rewards_train/rejected": -0.3544497489929199, + "step": 959 + }, + { + "epoch": 0.27, + "logps_train/chosen": -207.4556884765625, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -186.74496459960938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.245568752288818, + "rewards_train/margins": -1.3710722923278809, + "rewards_train/rejected": -5.8744964599609375, + "step": 959 + }, + { + "epoch": 0.27, + "learning_rate": 1.4495651753224704e-06, + "loss": 0.4611, + "step": 960 + }, + { + "epoch": 0.27, + "logps_train/chosen": -71.66742706298828, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -152.77520751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11674270778894424, + "rewards_train/margins": 3.4607781395316124, + "rewards_train/rejected": -3.5775208473205566, + "step": 960 + }, + { + "epoch": 0.27, + "logps_train/chosen": -10.376348495483398, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -10.447431564331055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0373651497066021, + "rewards_train/margins": 0.6664832942187786, + "rewards_train/rejected": -0.6291181445121765, + "step": 960 + }, + { + "epoch": 0.27, + "logps_train/chosen": -22.527799606323242, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -39.127681732177734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45277997851371765, + "rewards_train/margins": 2.034988194704056, + "rewards_train/rejected": -2.4877681732177734, + "step": 960 + }, + { + "epoch": 0.27, + "logps_train/chosen": -9.055670738220215, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -31.668996810913086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1131829246878624, + "rewards_train/margins": 1.0800826177001, + "rewards_train/rejected": -0.9668996930122375, + "step": 960 + }, + { + "epoch": 0.27, + "logps_train/chosen": -25.5734920501709, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -29.73232650756836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5635992288589478, + "rewards_train/margins": 0.059633493423461914, + "rewards_train/rejected": -1.6232327222824097, + "step": 960 + }, + { + "epoch": 0.27, + "logps_train/chosen": -2.645578622817993, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -8.179533958435059, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.04325463995337486, + "rewards_train/margins": -0.013791963458061218, + "rewards_train/rejected": 0.05704660341143608, + "step": 960 + }, + { + "epoch": 0.27, + "logps_train/chosen": -0.5714545249938965, + "logps_train/ref_chosen": -0.47265625, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -6.502748489379883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009879827499389648, + "rewards_train/margins": 0.04977002367377281, + "rewards_train/rejected": -0.05964985117316246, + "step": 960 + }, + { + "epoch": 0.27, + "logps_train/chosen": -13.960947036743164, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -19.289997100830078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6617197394371033, + "rewards_train/margins": -0.1327199935913086, + "rewards_train/rejected": -0.5289997458457947, + "step": 960 + }, + { + "epoch": 0.27, + "logps_train/chosen": -16.209169387817383, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -3.296875, + "logps_train/rejected": -17.95208740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5084169507026672, + "rewards_train/margins": 0.9571042656898499, + "rewards_train/rejected": -1.465521216392517, + "step": 961 + }, + { + "epoch": 0.27, + "logps_train/chosen": -146.2857208251953, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -192.0, + "logps_train/rejected": -224.41226196289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8785721063613892, + "rewards_train/margins": 1.3626540899276733, + "rewards_train/rejected": -3.2412261962890625, + "step": 961 + }, + { + "epoch": 0.27, + "logps_train/chosen": -156.15673828125, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -162.70880126953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9656739234924316, + "rewards_train/margins": -0.444793701171875, + "rewards_train/rejected": -3.5208802223205566, + "step": 961 + }, + { + "epoch": 0.27, + "logps_train/chosen": -83.56407928466797, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -121.5318374633789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.793592095375061, + "rewards_train/margins": 3.6467758417129517, + "rewards_train/rejected": -2.8531837463378906, + "step": 961 + }, + { + "epoch": 0.27, + "logps_train/chosen": -32.605079650878906, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -40.62134552001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9355079531669617, + "rewards_train/margins": 1.289126694202423, + "rewards_train/rejected": -2.2246346473693848, + "step": 961 + }, + { + "epoch": 0.27, + "logps_train/chosen": -99.33949279785156, + "logps_train/ref_chosen": -60.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -216.2708740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9339492321014404, + "rewards_train/margins": 6.043138742446899, + "rewards_train/rejected": -9.97708797454834, + "step": 961 + }, + { + "epoch": 0.27, + "logps_train/chosen": -14.080819129943848, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -3.8125, + "logps_train/rejected": -14.22264575958252, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9612069129943848, + "rewards_train/margins": 0.07980763912200928, + "rewards_train/rejected": -1.041014552116394, + "step": 961 + }, + { + "epoch": 0.27, + "logps_train/chosen": -11.418974876403809, + "logps_train/ref_chosen": -2.34375, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -9.71843433380127, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9075224995613098, + "rewards_train/margins": -0.19974154233932495, + "rewards_train/rejected": -0.7077809572219849, + "step": 961 + }, + { + "epoch": 0.27, + "learning_rate": 1.4472004723054711e-06, + "loss": 0.4295, + "step": 962 + }, + { + "epoch": 0.27, + "logps_train/chosen": -142.02682495117188, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -171.85177612304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.602682590484619, + "rewards_train/margins": 3.4324951171875, + "rewards_train/rejected": -7.035177707672119, + "step": 962 + }, + { + "epoch": 0.27, + "logps_train/chosen": -23.15198516845703, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -15.442251205444336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.89644855260849, + "rewards_train/margins": 0.035276591777801514, + "rewards_train/rejected": -0.9317251443862915, + "step": 962 + }, + { + "epoch": 0.27, + "logps_train/chosen": -168.24362182617188, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -142.48675537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.624362468719482, + "rewards_train/margins": 2.224313259124756, + "rewards_train/rejected": -6.848675727844238, + "step": 962 + }, + { + "epoch": 0.27, + "logps_train/chosen": -94.05938720703125, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -168.7462158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.605938732624054, + "rewards_train/margins": 2.4686829447746277, + "rewards_train/rejected": -3.0746216773986816, + "step": 962 + }, + { + "epoch": 0.27, + "logps_train/chosen": -205.86090087890625, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -196.2220916748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.28609037399292, + "rewards_train/margins": 2.586118698120117, + "rewards_train/rejected": -7.872209072113037, + "step": 962 + }, + { + "epoch": 0.27, + "logps_train/chosen": -35.71970748901367, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -62.579227447509766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19697074592113495, + "rewards_train/margins": 2.385951951146126, + "rewards_train/rejected": -2.5829226970672607, + "step": 962 + }, + { + "epoch": 0.27, + "logps_train/chosen": -64.78218078613281, + "logps_train/ref_chosen": -37.25, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -19.22393226623535, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.753218173980713, + "rewards_train/margins": -1.343324899673462, + "rewards_train/rejected": -1.409893274307251, + "step": 962 + }, + { + "epoch": 0.27, + "logps_train/chosen": -14.232417106628418, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -29.258480072021484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3919917047023773, + "rewards_train/margins": 0.9088563024997711, + "rewards_train/rejected": -1.3008480072021484, + "step": 962 + }, + { + "epoch": 0.27, + "logps_train/chosen": -190.0026092529297, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -231.2906494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8002610206604004, + "rewards_train/margins": 4.1288042068481445, + "rewards_train/rejected": -7.929065227508545, + "step": 963 + }, + { + "epoch": 0.27, + "logps_train/chosen": -69.013916015625, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -128.32846069335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2513916492462158, + "rewards_train/margins": 3.1314547061920166, + "rewards_train/rejected": -4.382846355438232, + "step": 963 + }, + { + "epoch": 0.27, + "logps_train/chosen": -23.041622161865234, + "logps_train/ref_chosen": -6.5, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -50.8392448425293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6541622877120972, + "rewards_train/margins": 2.0860122442245483, + "rewards_train/rejected": -3.7401745319366455, + "step": 963 + }, + { + "epoch": 0.27, + "logps_train/chosen": -5.39152717590332, + "logps_train/ref_chosen": -1.6953125, + "logps_train/ref_rejected": -2.421875, + "logps_train/rejected": -5.353696346282959, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.36962148547172546, + "rewards_train/margins": -0.07643935084342957, + "rewards_train/rejected": -0.2931821346282959, + "step": 963 + }, + { + "epoch": 0.27, + "logps_train/chosen": -121.69668579101562, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -157.9231719970703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2196686267852783, + "rewards_train/margins": 1.022648572921753, + "rewards_train/rejected": -3.2423171997070312, + "step": 963 + }, + { + "epoch": 0.27, + "logps_train/chosen": -211.96044921875, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -193.1279754638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.596045017242432, + "rewards_train/margins": 1.916752815246582, + "rewards_train/rejected": -6.512797832489014, + "step": 963 + }, + { + "epoch": 0.27, + "logps_train/chosen": -118.82373046875, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -199.50965881347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.632373034954071, + "rewards_train/margins": 6.918592751026154, + "rewards_train/rejected": -7.550965785980225, + "step": 963 + }, + { + "epoch": 0.27, + "logps_train/chosen": -137.87985229492188, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -136.5247802734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9879852533340454, + "rewards_train/margins": 0.16449284553527832, + "rewards_train/rejected": -1.1524780988693237, + "step": 963 + }, + { + "epoch": 0.27, + "learning_rate": 1.4448326393593061e-06, + "loss": 0.309, + "step": 964 + }, + { + "epoch": 0.27, + "logps_train/chosen": -13.43075180053711, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -19.334083557128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31807518005371094, + "rewards_train/margins": 0.5153331756591797, + "rewards_train/rejected": -0.8334083557128906, + "step": 964 + }, + { + "epoch": 0.27, + "logps_train/chosen": -15.646163940429688, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -17.902193069458008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6458663940429688, + "rewards_train/margins": 0.23810291290283203, + "rewards_train/rejected": -0.8839693069458008, + "step": 964 + }, + { + "epoch": 0.27, + "logps_train/chosen": -94.0765380859375, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -33.3484992980957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.257653832435608, + "rewards_train/margins": 0.33969616889953613, + "rewards_train/rejected": -1.597350001335144, + "step": 964 + }, + { + "epoch": 0.27, + "logps_train/chosen": -41.341060638427734, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -29.5894775390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8903560638427734, + "rewards_train/margins": -0.6251583099365234, + "rewards_train/rejected": -2.26519775390625, + "step": 964 + }, + { + "epoch": 0.27, + "logps_train/chosen": -146.97299194335938, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -130.83612060546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.647299289703369, + "rewards_train/margins": 0.03631281852722168, + "rewards_train/rejected": -2.683612108230591, + "step": 964 + }, + { + "epoch": 0.27, + "logps_train/chosen": -144.2677001953125, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -177.87567138671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.176770210266113, + "rewards_train/margins": 0.8607969284057617, + "rewards_train/rejected": -5.037567138671875, + "step": 964 + }, + { + "epoch": 0.27, + "logps_train/chosen": -19.98699188232422, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -20.222240447998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1674492359161377, + "rewards_train/margins": 0.364149808883667, + "rewards_train/rejected": -1.5315990447998047, + "step": 964 + }, + { + "epoch": 0.27, + "logps_train/chosen": -170.1247100830078, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -234.5970458984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.162471294403076, + "rewards_train/margins": 1.1972332000732422, + "rewards_train/rejected": -6.359704494476318, + "step": 964 + }, + { + "epoch": 0.27, + "logps_train/chosen": -20.256580352783203, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -3.96875, + "logps_train/rejected": -23.035585403442383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1631580591201782, + "rewards_train/margins": 0.743525505065918, + "rewards_train/rejected": -1.9066835641860962, + "step": 965 + }, + { + "epoch": 0.27, + "logps_train/chosen": -60.9869384765625, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -58.06716537475586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02369384840130806, + "rewards_train/margins": 2.233022641390562, + "rewards_train/rejected": -2.25671648979187, + "step": 965 + }, + { + "epoch": 0.27, + "logps_train/chosen": -106.98262023925781, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -125.66119384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7482620477676392, + "rewards_train/margins": 2.2178574800491333, + "rewards_train/rejected": -3.9661195278167725, + "step": 965 + }, + { + "epoch": 0.27, + "logps_train/chosen": -81.70513153076172, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -101.04695892333984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8205131888389587, + "rewards_train/margins": 0.9341827034950256, + "rewards_train/rejected": -1.7546958923339844, + "step": 965 + }, + { + "epoch": 0.27, + "logps_train/chosen": -121.42762756347656, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -196.05731201171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9427627921104431, + "rewards_train/margins": 2.3629685044288635, + "rewards_train/rejected": -3.3057312965393066, + "step": 965 + }, + { + "epoch": 0.27, + "logps_train/chosen": -8.903898239135742, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -14.047117233276367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2560148239135742, + "rewards_train/margins": 0.07369691133499146, + "rewards_train/rejected": -0.3297117352485657, + "step": 965 + }, + { + "epoch": 0.27, + "logps_train/chosen": -167.39495849609375, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -211.45468139648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.239495754241943, + "rewards_train/margins": 0.6059722900390625, + "rewards_train/rejected": -5.845468044281006, + "step": 965 + }, + { + "epoch": 0.27, + "logps_train/chosen": -10.187058448791504, + "logps_train/ref_chosen": -6.09375, + "logps_train/ref_rejected": -0.75, + "logps_train/rejected": -2.2725038528442383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4093308448791504, + "rewards_train/margins": -0.25708045065402985, + "rewards_train/rejected": -0.15225039422512054, + "step": 965 + }, + { + "epoch": 0.27, + "learning_rate": 1.442461693056295e-06, + "loss": 0.4627, + "step": 966 + }, + { + "epoch": 0.27, + "logps_train/chosen": -4.463702201843262, + "logps_train/ref_chosen": -1.4140625, + "logps_train/ref_rejected": -3.953125, + "logps_train/rejected": -7.4324951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30496397614479065, + "rewards_train/margins": 0.04297304153442383, + "rewards_train/rejected": -0.3479370176792145, + "step": 966 + }, + { + "epoch": 0.27, + "logps_train/chosen": -4.162717342376709, + "logps_train/ref_chosen": -5.5625, + "logps_train/ref_rejected": -1.328125, + "logps_train/rejected": -2.0432140827178955, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13997827470302582, + "rewards_train/margins": 0.21148718148469925, + "rewards_train/rejected": -0.07150890678167343, + "step": 966 + }, + { + "epoch": 0.27, + "logps_train/chosen": -43.08946990966797, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -40.873512268066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6589469909667969, + "rewards_train/margins": 1.3659043312072754, + "rewards_train/rejected": -2.0248513221740723, + "step": 966 + }, + { + "epoch": 0.27, + "logps_train/chosen": -4.202646732330322, + "logps_train/ref_chosen": -0.515625, + "logps_train/ref_rejected": -12.875, + "logps_train/rejected": -20.41727638244629, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3687021732330322, + "rewards_train/margins": 0.3855254650115967, + "rewards_train/rejected": -0.7542276382446289, + "step": 966 + }, + { + "epoch": 0.27, + "logps_train/chosen": -18.162139892578125, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -23.0509090423584, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8599640130996704, + "rewards_train/margins": 0.23887693881988525, + "rewards_train/rejected": -1.0988409519195557, + "step": 966 + }, + { + "epoch": 0.27, + "logps_train/chosen": -103.59523010253906, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -214.81948852539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2595230042934418, + "rewards_train/margins": 8.022426038980484, + "rewards_train/rejected": -8.281949043273926, + "step": 966 + }, + { + "epoch": 0.27, + "logps_train/chosen": -87.33082580566406, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -143.23915100097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5830826163291931, + "rewards_train/margins": 0.9408325552940369, + "rewards_train/rejected": -1.52391517162323, + "step": 966 + }, + { + "epoch": 0.27, + "logps_train/chosen": -14.71437931060791, + "logps_train/ref_chosen": -1.7890625, + "logps_train/ref_rejected": -2.03125, + "logps_train/rejected": -9.02448844909668, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2925317287445068, + "rewards_train/margins": -0.5932078957557678, + "rewards_train/rejected": -0.699323832988739, + "step": 966 + }, + { + "epoch": 0.27, + "logps_train/chosen": -110.79891204833984, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -104.38264465332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0798912048339844, + "rewards_train/margins": 0.10837328433990479, + "rewards_train/rejected": -1.1882644891738892, + "step": 967 + }, + { + "epoch": 0.27, + "logps_train/chosen": -96.01850128173828, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -119.6898422241211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.451850175857544, + "rewards_train/margins": 1.6671340465545654, + "rewards_train/rejected": -3.1189842224121094, + "step": 967 + }, + { + "epoch": 0.27, + "logps_train/chosen": -83.94840240478516, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -177.59310913085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2948402166366577, + "rewards_train/margins": 4.26447069644928, + "rewards_train/rejected": -5.5593109130859375, + "step": 967 + }, + { + "epoch": 0.27, + "logps_train/chosen": -70.54296112060547, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -169.83326721191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7292961478233337, + "rewards_train/margins": 8.104031145572662, + "rewards_train/rejected": -8.833327293395996, + "step": 967 + }, + { + "epoch": 0.27, + "logps_train/chosen": -15.9727783203125, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -23.66314697265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7035278677940369, + "rewards_train/margins": 0.9409118294715881, + "rewards_train/rejected": -1.644439697265625, + "step": 967 + }, + { + "epoch": 0.27, + "logps_train/chosen": -158.53787231445312, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -198.6674041748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8537873029708862, + "rewards_train/margins": 7.612953305244446, + "rewards_train/rejected": -9.466740608215332, + "step": 967 + }, + { + "epoch": 0.27, + "logps_train/chosen": -141.59207153320312, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -183.27532958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.20920729637146, + "rewards_train/margins": 4.918325662612915, + "rewards_train/rejected": -7.127532958984375, + "step": 967 + }, + { + "epoch": 0.27, + "logps_train/chosen": -165.12950134277344, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -156.6622314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0129501819610596, + "rewards_train/margins": 2.3032729625701904, + "rewards_train/rejected": -4.31622314453125, + "step": 967 + }, + { + "epoch": 0.27, + "learning_rate": 1.440087649990549e-06, + "loss": 0.3259, + "step": 968 + }, + { + "epoch": 0.27, + "logps_train/chosen": -118.894287109375, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -104.89399719238281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.189428687095642, + "rewards_train/margins": 0.8999711275100708, + "rewards_train/rejected": -2.089399814605713, + "step": 968 + }, + { + "epoch": 0.27, + "logps_train/chosen": -6.0312089920043945, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -12.560261726379395, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009370899759232998, + "rewards_train/margins": 0.37165527883917093, + "rewards_train/rejected": -0.38102617859840393, + "step": 968 + }, + { + "epoch": 0.27, + "logps_train/chosen": -94.31657409667969, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -163.90237426757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1816574335098267, + "rewards_train/margins": 3.958579897880554, + "rewards_train/rejected": -5.140237331390381, + "step": 968 + }, + { + "epoch": 0.27, + "logps_train/chosen": -0.47618091106414795, + "logps_train/ref_chosen": -0.310546875, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -11.350992202758789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.016563404351472855, + "rewards_train/margins": 0.709160815924406, + "rewards_train/rejected": -0.7257242202758789, + "step": 968 + }, + { + "epoch": 0.27, + "logps_train/chosen": -1.1431095600128174, + "logps_train/ref_chosen": -1.109375, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -8.276493072509766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0033734559547156096, + "rewards_train/margins": 0.4024008691776544, + "rewards_train/rejected": -0.40577432513237, + "step": 968 + }, + { + "epoch": 0.27, + "logps_train/chosen": -30.19818687438965, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -29.85999870300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2573187351226807, + "rewards_train/margins": 0.18805623054504395, + "rewards_train/rejected": -2.4453749656677246, + "step": 968 + }, + { + "epoch": 0.27, + "logps_train/chosen": -130.26760864257812, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -204.1331787109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.2267608642578125, + "rewards_train/margins": 1.8865571022033691, + "rewards_train/rejected": -7.113317966461182, + "step": 968 + }, + { + "epoch": 0.27, + "logps_train/chosen": -66.04366302490234, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -46.5, + "logps_train/rejected": -75.31224822998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.49563369154930115, + "rewards_train/margins": 3.376858562231064, + "rewards_train/rejected": -2.8812248706817627, + "step": 968 + }, + { + "epoch": 0.27, + "logps_train/chosen": -43.509037017822266, + "logps_train/ref_chosen": -40.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -115.45690155029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35090371966362, + "rewards_train/margins": 2.144786387681961, + "rewards_train/rejected": -2.495690107345581, + "step": 969 + }, + { + "epoch": 0.27, + "logps_train/chosen": -126.0195541381836, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -170.11508178710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8019554615020752, + "rewards_train/margins": 1.7095527648925781, + "rewards_train/rejected": -3.5115082263946533, + "step": 969 + }, + { + "epoch": 0.27, + "logps_train/chosen": -164.439208984375, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -184.0, + "logps_train/rejected": -221.3536834716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5439209938049316, + "rewards_train/margins": 0.19144749641418457, + "rewards_train/rejected": -3.735368490219116, + "step": 969 + }, + { + "epoch": 0.27, + "logps_train/chosen": -19.641559600830078, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -20.735389709472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3766559660434723, + "rewards_train/margins": 1.1593830287456512, + "rewards_train/rejected": -1.5360389947891235, + "step": 969 + }, + { + "epoch": 0.27, + "logps_train/chosen": -129.71202087402344, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -187.2620391845703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.871202230453491, + "rewards_train/margins": 4.005001783370972, + "rewards_train/rejected": -7.876204013824463, + "step": 969 + }, + { + "epoch": 0.27, + "logps_train/chosen": -9.112577438354492, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -20.92266845703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02624225616455078, + "rewards_train/margins": 0.36850911378860474, + "rewards_train/rejected": -0.34226685762405396, + "step": 969 + }, + { + "epoch": 0.27, + "logps_train/chosen": -5.601370811462402, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -27.966217041015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11326207965612411, + "rewards_train/margins": 0.7708596363663673, + "rewards_train/rejected": -0.8841217160224915, + "step": 969 + }, + { + "epoch": 0.27, + "logps_train/chosen": -30.230186462402344, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -54.25, + "logps_train/rejected": -62.56708526611328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9855186343193054, + "rewards_train/margins": -0.15381008386611938, + "rewards_train/rejected": -0.831708550453186, + "step": 969 + }, + { + "epoch": 0.27, + "learning_rate": 1.4377105267778518e-06, + "loss": 0.3394, + "step": 970 + }, + { + "epoch": 0.27, + "logps_train/chosen": -96.06341552734375, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -170.25640869140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.206341505050659, + "rewards_train/margins": 5.669299364089966, + "rewards_train/rejected": -7.875640869140625, + "step": 970 + }, + { + "epoch": 0.27, + "logps_train/chosen": -119.24185943603516, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -188.445068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4741859436035156, + "rewards_train/margins": 3.2703208923339844, + "rewards_train/rejected": -4.7445068359375, + "step": 970 + }, + { + "epoch": 0.27, + "logps_train/chosen": -16.35508155822754, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -30.75, + "logps_train/rejected": -38.780517578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48550817370414734, + "rewards_train/margins": 0.3175435960292816, + "rewards_train/rejected": -0.803051769733429, + "step": 970 + }, + { + "epoch": 0.27, + "logps_train/chosen": -191.37860107421875, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -195.26963806152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9378602504730225, + "rewards_train/margins": 2.5891034603118896, + "rewards_train/rejected": -6.526963710784912, + "step": 970 + }, + { + "epoch": 0.27, + "logps_train/chosen": -10.994678497314453, + "logps_train/ref_chosen": -1.1484375, + "logps_train/ref_rejected": -13.6875, + "logps_train/rejected": -28.710098266601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9846240878105164, + "rewards_train/margins": 0.5176357626914978, + "rewards_train/rejected": -1.5022598505020142, + "step": 970 + }, + { + "epoch": 0.27, + "logps_train/chosen": -101.37261962890625, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -206.70452880859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5872620344161987, + "rewards_train/margins": 8.633191227912903, + "rewards_train/rejected": -10.220453262329102, + "step": 970 + }, + { + "epoch": 0.27, + "logps_train/chosen": -20.215232849121094, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -25.946552276611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08402328938245773, + "rewards_train/margins": 1.1106320098042488, + "rewards_train/rejected": -1.1946552991867065, + "step": 970 + }, + { + "epoch": 0.27, + "logps_train/chosen": -11.513972282409668, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -20.179805755615234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44514724612236023, + "rewards_train/margins": 0.4540833532810211, + "rewards_train/rejected": -0.8992305994033813, + "step": 970 + }, + { + "epoch": 0.27, + "logps_train/chosen": -121.81426239013672, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -155.715576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.731426239013672, + "rewards_train/margins": 0.3401315212249756, + "rewards_train/rejected": -3.0715577602386475, + "step": 971 + }, + { + "epoch": 0.27, + "logps_train/chosen": -24.63888168334961, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -26.608259201049805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8013882040977478, + "rewards_train/margins": 0.7594377398490906, + "rewards_train/rejected": -1.5608259439468384, + "step": 971 + }, + { + "epoch": 0.27, + "logps_train/chosen": -9.554784774780273, + "logps_train/ref_chosen": -2.9375, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -23.10331916809082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6617285013198853, + "rewards_train/margins": 1.0579783916473389, + "rewards_train/rejected": -1.7197068929672241, + "step": 971 + }, + { + "epoch": 0.27, + "logps_train/chosen": -78.8794937133789, + "logps_train/ref_chosen": -61.25, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -147.0673065185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7629493474960327, + "rewards_train/margins": 4.793781399726868, + "rewards_train/rejected": -6.5567307472229, + "step": 971 + }, + { + "epoch": 0.27, + "logps_train/chosen": -194.29039001464844, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -157.1799774169922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.229039192199707, + "rewards_train/margins": -1.0110414028167725, + "rewards_train/rejected": -3.2179977893829346, + "step": 971 + }, + { + "epoch": 0.27, + "logps_train/chosen": -120.2596664428711, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -96.51042175292969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.525966644287109, + "rewards_train/margins": -1.5249245166778564, + "rewards_train/rejected": -3.001042127609253, + "step": 971 + }, + { + "epoch": 0.27, + "logps_train/chosen": -41.60297775268555, + "logps_train/ref_chosen": -37.0, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -50.597347259521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4602977931499481, + "rewards_train/margins": 0.19943693280220032, + "rewards_train/rejected": -0.6597347259521484, + "step": 971 + }, + { + "epoch": 0.27, + "logps_train/chosen": -174.46063232421875, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -192.23660278320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.253936767578125, + "rewards_train/margins": 2.0775970220565796, + "rewards_train/rejected": -1.8236602544784546, + "step": 971 + }, + { + "epoch": 0.27, + "learning_rate": 1.4353303400555457e-06, + "loss": 0.4308, + "step": 972 + }, + { + "epoch": 0.27, + "logps_train/chosen": -82.76585388183594, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -175.2835693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7765854001045227, + "rewards_train/margins": 5.301771819591522, + "rewards_train/rejected": -6.078357219696045, + "step": 972 + }, + { + "epoch": 0.27, + "logps_train/chosen": -13.6913423538208, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -20.564041137695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8347592353820801, + "rewards_train/margins": 0.6653949022293091, + "rewards_train/rejected": -1.5001541376113892, + "step": 972 + }, + { + "epoch": 0.27, + "logps_train/chosen": -4.122950077056885, + "logps_train/ref_chosen": -1.8671875, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -32.13416290283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2255762666463852, + "rewards_train/margins": 1.59408999979496, + "rewards_train/rejected": -1.8196662664413452, + "step": 972 + }, + { + "epoch": 0.27, + "logps_train/chosen": -33.218502044677734, + "logps_train/ref_chosen": -22.75, + "logps_train/ref_rejected": -37.0, + "logps_train/rejected": -47.758087158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0468502044677734, + "rewards_train/margins": 0.028958559036254883, + "rewards_train/rejected": -1.0758087635040283, + "step": 972 + }, + { + "epoch": 0.27, + "logps_train/chosen": -167.3721923828125, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -187.48191833496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.887219429016113, + "rewards_train/margins": 1.4109725952148438, + "rewards_train/rejected": -8.298192024230957, + "step": 972 + }, + { + "epoch": 0.27, + "logps_train/chosen": -71.53578186035156, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -125.00325012207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7785782217979431, + "rewards_train/margins": 2.171746790409088, + "rewards_train/rejected": -2.9503250122070312, + "step": 972 + }, + { + "epoch": 0.27, + "logps_train/chosen": -179.36721801757812, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -222.19003295898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.336721897125244, + "rewards_train/margins": 5.082281589508057, + "rewards_train/rejected": -7.419003486633301, + "step": 972 + }, + { + "epoch": 0.27, + "logps_train/chosen": -20.897872924804688, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -26.839576721191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6585372686386108, + "rewards_train/margins": 0.1879204511642456, + "rewards_train/rejected": -1.8464577198028564, + "step": 972 + }, + { + "epoch": 0.27, + "logps_train/chosen": -17.45243263244629, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -1.28125, + "logps_train/rejected": -8.59553050994873, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5389932990074158, + "rewards_train/margins": 0.19243478775024414, + "rewards_train/rejected": -0.7314280867576599, + "step": 973 + }, + { + "epoch": 0.27, + "logps_train/chosen": -47.39237976074219, + "logps_train/ref_chosen": -31.0, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -40.957069396972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6392379999160767, + "rewards_train/margins": 0.09396898746490479, + "rewards_train/rejected": -1.7332069873809814, + "step": 973 + }, + { + "epoch": 0.27, + "logps_train/chosen": -32.18309020996094, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -34.73479461669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.355808973312378, + "rewards_train/margins": 0.667670488357544, + "rewards_train/rejected": -3.023479461669922, + "step": 973 + }, + { + "epoch": 0.27, + "logps_train/chosen": -245.2091522216797, + "logps_train/ref_chosen": -220.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -223.3220977783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5209152698516846, + "rewards_train/margins": 4.111294507980347, + "rewards_train/rejected": -6.632209777832031, + "step": 973 + }, + { + "epoch": 0.27, + "logps_train/chosen": -15.983259201049805, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -3.875, + "logps_train/rejected": -10.131872177124023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5891740918159485, + "rewards_train/margins": 1.2148613333702087, + "rewards_train/rejected": -0.6256872415542603, + "step": 973 + }, + { + "epoch": 0.27, + "logps_train/chosen": -10.822040557861328, + "logps_train/ref_chosen": -7.03125, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -30.259353637695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37907907366752625, + "rewards_train/margins": 0.10935628414154053, + "rewards_train/rejected": -0.4884353578090668, + "step": 973 + }, + { + "epoch": 0.27, + "logps_train/chosen": -143.53627014160156, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -208.07411193847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6036269664764404, + "rewards_train/margins": 3.003784418106079, + "rewards_train/rejected": -5.6074113845825195, + "step": 973 + }, + { + "epoch": 0.27, + "logps_train/chosen": -77.9519271850586, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -90.72283935546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5451927185058594, + "rewards_train/margins": -0.5729087833315134, + "rewards_train/rejected": 0.02771606482565403, + "step": 973 + }, + { + "epoch": 0.27, + "learning_rate": 1.4329471064824143e-06, + "loss": 0.3668, + "step": 974 + }, + { + "epoch": 0.27, + "logps_train/chosen": -142.71163940429688, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -141.64630126953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.071164131164551, + "rewards_train/margins": -0.10653400421142578, + "rewards_train/rejected": -3.964630126953125, + "step": 974 + }, + { + "epoch": 0.27, + "logps_train/chosen": -166.19320678710938, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -181.17051696777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.019320964813232, + "rewards_train/margins": 1.3477306365966797, + "rewards_train/rejected": -7.367051601409912, + "step": 974 + }, + { + "epoch": 0.27, + "logps_train/chosen": -67.58149719238281, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -179.73272705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.333149790763855, + "rewards_train/margins": 7.290123105049133, + "rewards_train/rejected": -8.623272895812988, + "step": 974 + }, + { + "epoch": 0.27, + "logps_train/chosen": -11.249255180358887, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -5.0, + "logps_train/rejected": -20.638233184814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6686755418777466, + "rewards_train/margins": 0.8951478004455566, + "rewards_train/rejected": -1.5638233423233032, + "step": 974 + }, + { + "epoch": 0.27, + "logps_train/chosen": -15.390243530273438, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -97.39309692382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7140243649482727, + "rewards_train/margins": 2.8502854704856873, + "rewards_train/rejected": -3.56430983543396, + "step": 974 + }, + { + "epoch": 0.27, + "logps_train/chosen": -159.96917724609375, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -196.0, + "logps_train/rejected": -246.4329376220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.596917748451233, + "rewards_train/margins": 3.4463762044906616, + "rewards_train/rejected": -5.0432939529418945, + "step": 974 + }, + { + "epoch": 0.27, + "logps_train/chosen": -21.701480865478516, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -14.583580017089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4451480805873871, + "rewards_train/margins": 0.42570993304252625, + "rewards_train/rejected": -0.8708580136299133, + "step": 974 + }, + { + "epoch": 0.27, + "logps_train/chosen": -39.24137496948242, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -158.8240509033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5241374969482422, + "rewards_train/margins": 1.2582676410675049, + "rewards_train/rejected": -2.782405138015747, + "step": 974 + }, + { + "epoch": 0.27, + "logps_train/chosen": -9.22149658203125, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -3.203125, + "logps_train/rejected": -13.881326675415039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31589967012405396, + "rewards_train/margins": 0.7519205212593079, + "rewards_train/rejected": -1.0678201913833618, + "step": 975 + }, + { + "epoch": 0.27, + "logps_train/chosen": -15.194865226745605, + "logps_train/ref_chosen": -7.03125, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -22.057846069335938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8163615465164185, + "rewards_train/margins": -0.1293269395828247, + "rewards_train/rejected": -0.6870346069335938, + "step": 975 + }, + { + "epoch": 0.27, + "logps_train/chosen": -8.59676456451416, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -1.234375, + "logps_train/rejected": -20.113168716430664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04657354578375816, + "rewards_train/margins": 1.9344529174268246, + "rewards_train/rejected": -1.8878793716430664, + "step": 975 + }, + { + "epoch": 0.27, + "logps_train/chosen": -170.46331787109375, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -153.56385803222656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.196331977844238, + "rewards_train/margins": -1.0899462699890137, + "rewards_train/rejected": -4.106385707855225, + "step": 975 + }, + { + "epoch": 0.27, + "logps_train/chosen": -1.1046466827392578, + "logps_train/ref_chosen": -1.8515625, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -14.179855346679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07469158619642258, + "rewards_train/margins": 0.40517713874578476, + "rewards_train/rejected": -0.3304855525493622, + "step": 975 + }, + { + "epoch": 0.27, + "logps_train/chosen": -16.2071533203125, + "logps_train/ref_chosen": -6.96875, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -33.38883972167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.923840343952179, + "rewards_train/margins": 0.6900436282157898, + "rewards_train/rejected": -1.6138839721679688, + "step": 975 + }, + { + "epoch": 0.27, + "logps_train/chosen": -7.351612091064453, + "logps_train/ref_chosen": -2.484375, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -11.547314643859863, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48672372102737427, + "rewards_train/margins": 0.06175774335861206, + "rewards_train/rejected": -0.5484814643859863, + "step": 975 + }, + { + "epoch": 0.27, + "logps_train/chosen": -6.5423994064331055, + "logps_train/ref_chosen": -3.46875, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -6.38345193862915, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.30736494064331055, + "rewards_train/margins": -0.004957228899002075, + "rewards_train/rejected": -0.30240771174430847, + "step": 975 + }, + { + "epoch": 0.27, + "learning_rate": 1.4305608427385657e-06, + "loss": 0.4435, + "step": 976 + }, + { + "epoch": 0.27, + "logps_train/chosen": -88.46680450439453, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -208.22833251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.621680498123169, + "rewards_train/margins": 6.651153326034546, + "rewards_train/rejected": -9.272833824157715, + "step": 976 + }, + { + "epoch": 0.27, + "logps_train/chosen": -3.817075490951538, + "logps_train/ref_chosen": -1.2265625, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -8.93031120300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25905129313468933, + "rewards_train/margins": 0.09647983312606812, + "rewards_train/rejected": -0.35553112626075745, + "step": 976 + }, + { + "epoch": 0.27, + "logps_train/chosen": -8.982831001281738, + "logps_train/ref_chosen": -6.0, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -18.453725814819336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29828310012817383, + "rewards_train/margins": 0.8095895051956177, + "rewards_train/rejected": -1.1078726053237915, + "step": 976 + }, + { + "epoch": 0.27, + "logps_train/chosen": -154.50656127929688, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -187.03506469726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.450656414031982, + "rewards_train/margins": 0.9028501510620117, + "rewards_train/rejected": -6.353506565093994, + "step": 976 + }, + { + "epoch": 0.27, + "logps_train/chosen": -22.649131774902344, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -69.96404266357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8711631894111633, + "rewards_train/margins": 1.0502410531044006, + "rewards_train/rejected": -1.921404242515564, + "step": 976 + }, + { + "epoch": 0.27, + "logps_train/chosen": -111.46288299560547, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -123.29169464111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3462882936000824, + "rewards_train/margins": 2.882881313562393, + "rewards_train/rejected": -3.2291696071624756, + "step": 976 + }, + { + "epoch": 0.27, + "logps_train/chosen": -139.9558868408203, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -211.63931274414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6955887079238892, + "rewards_train/margins": 1.2683426141738892, + "rewards_train/rejected": -2.9639313220977783, + "step": 976 + }, + { + "epoch": 0.27, + "logps_train/chosen": -24.579971313476562, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -24.516870498657227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.670497179031372, + "rewards_train/margins": -0.2688101530075073, + "rewards_train/rejected": -1.4016870260238647, + "step": 976 + }, + { + "epoch": 0.27, + "logps_train/chosen": -119.546875, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -136.2816162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2953124940395355, + "rewards_train/margins": 3.0234740674495697, + "rewards_train/rejected": -2.728161573410034, + "step": 977 + }, + { + "epoch": 0.27, + "logps_train/chosen": -43.12601089477539, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -36.63020706176758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012398910708725452, + "rewards_train/margins": 2.8129195692017674, + "rewards_train/rejected": -2.800520658493042, + "step": 977 + }, + { + "epoch": 0.27, + "logps_train/chosen": -225.27285766601562, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -225.77410888671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.077285766601562, + "rewards_train/margins": 1.150125503540039, + "rewards_train/rejected": -11.227411270141602, + "step": 977 + }, + { + "epoch": 0.27, + "logps_train/chosen": -104.55867004394531, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -109.90487670898438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6058670282363892, + "rewards_train/margins": -0.7153793573379517, + "rewards_train/rejected": -0.8904876708984375, + "step": 977 + }, + { + "epoch": 0.27, + "logps_train/chosen": -13.885213851928711, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -13.6875, + "logps_train/rejected": -26.41385841369629, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04272861406207085, + "rewards_train/margins": 1.3153644315898418, + "rewards_train/rejected": -1.272635817527771, + "step": 977 + }, + { + "epoch": 0.27, + "logps_train/chosen": -130.93882751464844, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -122.66324615478516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4438828229904175, + "rewards_train/margins": 2.472441792488098, + "rewards_train/rejected": -3.9163246154785156, + "step": 977 + }, + { + "epoch": 0.27, + "logps_train/chosen": -168.63580322265625, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -179.0298309326172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.913580417633057, + "rewards_train/margins": -1.210597038269043, + "rewards_train/rejected": -6.702983379364014, + "step": 977 + }, + { + "epoch": 0.27, + "logps_train/chosen": -123.27849578857422, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -157.5941162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.027849555015564, + "rewards_train/margins": 0.7315621376037598, + "rewards_train/rejected": -1.7594116926193237, + "step": 977 + }, + { + "epoch": 0.27, + "learning_rate": 1.4281715655253159e-06, + "loss": 0.4045, + "step": 978 + }, + { + "epoch": 0.27, + "logps_train/chosen": -107.70552062988281, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -141.4385223388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4794479310512543, + "rewards_train/margins": 2.7733002603054047, + "rewards_train/rejected": -2.2938523292541504, + "step": 978 + }, + { + "epoch": 0.27, + "logps_train/chosen": -84.86103057861328, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -132.71426391601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08610305935144424, + "rewards_train/margins": 3.185323379933834, + "rewards_train/rejected": -3.2714264392852783, + "step": 978 + }, + { + "epoch": 0.27, + "logps_train/chosen": -106.11946105957031, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -158.089111328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2619460821151733, + "rewards_train/margins": 5.79696524143219, + "rewards_train/rejected": -7.058911323547363, + "step": 978 + }, + { + "epoch": 0.27, + "logps_train/chosen": -93.49357604980469, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -168.43630981445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7993575930595398, + "rewards_train/margins": 4.044273674488068, + "rewards_train/rejected": -4.843631267547607, + "step": 978 + }, + { + "epoch": 0.27, + "logps_train/chosen": -7.088160514831543, + "logps_train/ref_chosen": -1.4921875, + "logps_train/ref_rejected": -3.34375, + "logps_train/rejected": -12.195423126220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5595973134040833, + "rewards_train/margins": 0.3255699872970581, + "rewards_train/rejected": -0.8851673007011414, + "step": 978 + }, + { + "epoch": 0.27, + "logps_train/chosen": -130.65182495117188, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -111.29953002929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2651824951171875, + "rewards_train/margins": 2.964770793914795, + "rewards_train/rejected": -4.229953289031982, + "step": 978 + }, + { + "epoch": 0.27, + "logps_train/chosen": -132.8594970703125, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -168.5629425048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08594971150159836, + "rewards_train/margins": 1.8703445866703987, + "rewards_train/rejected": -1.956294298171997, + "step": 978 + }, + { + "epoch": 0.27, + "logps_train/chosen": -17.376361846923828, + "logps_train/ref_chosen": -3.171875, + "logps_train/ref_rejected": -3.0625, + "logps_train/rejected": -19.09787368774414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.420448660850525, + "rewards_train/margins": 0.1830887794494629, + "rewards_train/rejected": -1.6035374402999878, + "step": 978 + }, + { + "epoch": 0.27, + "logps_train/chosen": -33.72189712524414, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -43.83556365966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5034396648406982, + "rewards_train/margins": 0.8051166534423828, + "rewards_train/rejected": -3.308556318283081, + "step": 979 + }, + { + "epoch": 0.27, + "logps_train/chosen": -14.107187271118164, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -2.15625, + "logps_train/rejected": -18.43268585205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5169687271118164, + "rewards_train/margins": 1.1106748580932617, + "rewards_train/rejected": -1.6276435852050781, + "step": 979 + }, + { + "epoch": 0.27, + "logps_train/chosen": -28.43289566040039, + "logps_train/ref_chosen": -29.25, + "logps_train/ref_rejected": -12.0, + "logps_train/rejected": -28.878890991210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08171043545007706, + "rewards_train/margins": 1.7695995345711708, + "rewards_train/rejected": -1.6878890991210938, + "step": 979 + }, + { + "epoch": 0.27, + "logps_train/chosen": -14.667075157165527, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -3.140625, + "logps_train/rejected": -26.455245971679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9042075276374817, + "rewards_train/margins": 1.4272546172142029, + "rewards_train/rejected": -2.3314621448516846, + "step": 979 + }, + { + "epoch": 0.27, + "logps_train/chosen": -117.95562744140625, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -198.27102661132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5955628156661987, + "rewards_train/margins": 6.981539845466614, + "rewards_train/rejected": -8.577102661132812, + "step": 979 + }, + { + "epoch": 0.27, + "logps_train/chosen": -161.07125854492188, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -185.90179443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.857125759124756, + "rewards_train/margins": 0.9830536842346191, + "rewards_train/rejected": -5.840179443359375, + "step": 979 + }, + { + "epoch": 0.27, + "logps_train/chosen": -12.31872844696045, + "logps_train/ref_chosen": -7.40625, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -12.228857040405273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49124786257743835, + "rewards_train/margins": 0.30038782954216003, + "rewards_train/rejected": -0.7916356921195984, + "step": 979 + }, + { + "epoch": 0.27, + "logps_train/chosen": -272.7845458984375, + "logps_train/ref_chosen": -222.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -168.6642608642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.078454494476318, + "rewards_train/margins": 0.1879715919494629, + "rewards_train/rejected": -5.266426086425781, + "step": 979 + }, + { + "epoch": 0.27, + "learning_rate": 1.4257792915650725e-06, + "loss": 0.248, + "step": 980 + }, + { + "epoch": 0.27, + "logps_train/chosen": -114.01493835449219, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -184.89450073242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.401493787765503, + "rewards_train/margins": 4.337956190109253, + "rewards_train/rejected": -6.739449977874756, + "step": 980 + }, + { + "epoch": 0.27, + "logps_train/chosen": -141.7120361328125, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -242.41049194335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5212037563323975, + "rewards_train/margins": 5.71984601020813, + "rewards_train/rejected": -8.241049766540527, + "step": 980 + }, + { + "epoch": 0.27, + "logps_train/chosen": -87.12651824951172, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -161.39315795898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16265182197093964, + "rewards_train/margins": 6.476664260029793, + "rewards_train/rejected": -6.639316082000732, + "step": 980 + }, + { + "epoch": 0.27, + "logps_train/chosen": -6.9721479415893555, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -20.94315528869629, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2722148001194, + "rewards_train/margins": 0.6658507287502289, + "rewards_train/rejected": -0.9380655288696289, + "step": 980 + }, + { + "epoch": 0.27, + "logps_train/chosen": -8.58578109741211, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -18.106998443603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09142189472913742, + "rewards_train/margins": 0.002121739089488983, + "rewards_train/rejected": 0.08930015563964844, + "step": 980 + }, + { + "epoch": 0.27, + "logps_train/chosen": -177.17442321777344, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -214.1710205078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.967442512512207, + "rewards_train/margins": 4.149660110473633, + "rewards_train/rejected": -10.11710262298584, + "step": 980 + }, + { + "epoch": 0.27, + "logps_train/chosen": -21.479623794555664, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -47.5, + "logps_train/rejected": -54.38560104370117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3729623854160309, + "rewards_train/margins": 0.3155977427959442, + "rewards_train/rejected": -0.6885601282119751, + "step": 980 + }, + { + "epoch": 0.27, + "logps_train/chosen": -111.54215240478516, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -206.2852783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.804215431213379, + "rewards_train/margins": 4.724312782287598, + "rewards_train/rejected": -9.528528213500977, + "step": 980 + }, + { + "epoch": 0.27, + "logps_train/chosen": -97.05209350585938, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -62.25, + "logps_train/rejected": -71.91740417480469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8052093386650085, + "rewards_train/margins": 0.16153109073638916, + "rewards_train/rejected": -0.9667404294013977, + "step": 981 + }, + { + "epoch": 0.27, + "logps_train/chosen": -87.02364349365234, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -146.54238891601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.202364444732666, + "rewards_train/margins": 1.7518744468688965, + "rewards_train/rejected": -3.9542388916015625, + "step": 981 + }, + { + "epoch": 0.27, + "logps_train/chosen": -52.661048889160156, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -60.25, + "logps_train/rejected": -65.80783081054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18389511108398438, + "rewards_train/margins": 0.7396782040596008, + "rewards_train/rejected": -0.5557830929756165, + "step": 981 + }, + { + "epoch": 0.27, + "logps_train/chosen": -188.21714782714844, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -197.0, + "logps_train/rejected": -248.36724853515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.821714878082275, + "rewards_train/margins": 0.31501007080078125, + "rewards_train/rejected": -5.136724948883057, + "step": 981 + }, + { + "epoch": 0.27, + "logps_train/chosen": -17.397321701049805, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -3.28125, + "logps_train/rejected": -20.21941375732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9522321820259094, + "rewards_train/margins": 0.7415842413902283, + "rewards_train/rejected": -1.6938164234161377, + "step": 981 + }, + { + "epoch": 0.27, + "logps_train/chosen": -15.810248374938965, + "logps_train/ref_chosen": -3.265625, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -94.09871673583984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2544623613357544, + "rewards_train/margins": 1.2054094076156616, + "rewards_train/rejected": -2.459871768951416, + "step": 981 + }, + { + "epoch": 0.27, + "logps_train/chosen": -102.50291442871094, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -138.07635498046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2002915143966675, + "rewards_train/margins": 4.357344269752502, + "rewards_train/rejected": -5.55763578414917, + "step": 981 + }, + { + "epoch": 0.27, + "logps_train/chosen": -119.63358306884766, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -105.02388000488281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.11335825920105, + "rewards_train/margins": -1.010970115661621, + "rewards_train/rejected": -2.1023881435394287, + "step": 981 + }, + { + "epoch": 0.27, + "learning_rate": 1.423384037601217e-06, + "loss": 0.3369, + "step": 982 + }, + { + "epoch": 0.27, + "logps_train/chosen": -9.838424682617188, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -30.048215866088867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06509246677160263, + "rewards_train/margins": 1.139729119837284, + "rewards_train/rejected": -1.2048215866088867, + "step": 982 + }, + { + "epoch": 0.27, + "logps_train/chosen": -3.3449244499206543, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -0.2294921875, + "logps_train/rejected": -0.09585113823413849, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04855494573712349, + "rewards_train/margins": -0.061919051222503185, + "rewards_train/rejected": 0.013364105485379696, + "step": 982 + }, + { + "epoch": 0.27, + "logps_train/chosen": -219.21463012695312, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -193.51681518554688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.221463203430176, + "rewards_train/margins": -1.369781494140625, + "rewards_train/rejected": -4.851681709289551, + "step": 982 + }, + { + "epoch": 0.27, + "logps_train/chosen": -2.3379299640655518, + "logps_train/ref_chosen": -1.3671875, + "logps_train/ref_rejected": -1.140625, + "logps_train/rejected": -5.810132026672363, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0970742478966713, + "rewards_train/margins": 0.369876466691494, + "rewards_train/rejected": -0.4669507145881653, + "step": 982 + }, + { + "epoch": 0.27, + "logps_train/chosen": -115.22587585449219, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -138.67398071289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3725876808166504, + "rewards_train/margins": 4.644810676574707, + "rewards_train/rejected": -7.017398357391357, + "step": 982 + }, + { + "epoch": 0.27, + "logps_train/chosen": -107.8227767944336, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -179.2126007080078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.782277822494507, + "rewards_train/margins": 2.4889824390411377, + "rewards_train/rejected": -5.2712602615356445, + "step": 982 + }, + { + "epoch": 0.27, + "logps_train/chosen": -16.27547836303711, + "logps_train/ref_chosen": -1.140625, + "logps_train/ref_rejected": -5.4375, + "logps_train/rejected": -17.45362663269043, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.513485312461853, + "rewards_train/margins": -0.31187260150909424, + "rewards_train/rejected": -1.2016127109527588, + "step": 982 + }, + { + "epoch": 0.27, + "logps_train/chosen": -63.901039123535156, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -93.34699249267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16510391235351562, + "rewards_train/margins": 2.469595432281494, + "rewards_train/rejected": -2.6346993446350098, + "step": 982 + }, + { + "epoch": 0.27, + "logps_train/chosen": -34.21928405761719, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -29.120681762695312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8781784772872925, + "rewards_train/margins": -0.10361027717590332, + "rewards_train/rejected": -1.7745682001113892, + "step": 983 + }, + { + "epoch": 0.27, + "logps_train/chosen": -8.508947372436523, + "logps_train/ref_chosen": -3.5625, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -10.826335906982422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.49464473128318787, + "rewards_train/margins": -0.0807611346244812, + "rewards_train/rejected": -0.41388359665870667, + "step": 983 + }, + { + "epoch": 0.27, + "logps_train/chosen": -194.03097534179688, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -211.43502807617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.303097724914551, + "rewards_train/margins": 2.0404052734375, + "rewards_train/rejected": -9.34350299835205, + "step": 983 + }, + { + "epoch": 0.27, + "logps_train/chosen": -132.50537109375, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -171.80882263183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.650537133216858, + "rewards_train/margins": 5.680345416069031, + "rewards_train/rejected": -7.330882549285889, + "step": 983 + }, + { + "epoch": 0.27, + "logps_train/chosen": -42.08355712890625, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -25.159276962280273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33335572481155396, + "rewards_train/margins": 1.1825719475746155, + "rewards_train/rejected": -1.5159276723861694, + "step": 983 + }, + { + "epoch": 0.27, + "logps_train/chosen": -103.64736938476562, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -144.3763427734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9147369265556335, + "rewards_train/margins": 1.4228973984718323, + "rewards_train/rejected": -2.337634325027466, + "step": 983 + }, + { + "epoch": 0.27, + "logps_train/chosen": -146.43338012695312, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -158.24407958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6433380842208862, + "rewards_train/margins": 1.7310699224472046, + "rewards_train/rejected": -3.374408006668091, + "step": 983 + }, + { + "epoch": 0.27, + "logps_train/chosen": -21.111801147460938, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -33.2676887512207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0424301624298096, + "rewards_train/margins": 1.2343387603759766, + "rewards_train/rejected": -2.276768922805786, + "step": 983 + }, + { + "epoch": 0.28, + "learning_rate": 1.4209858203979872e-06, + "loss": 0.4165, + "step": 984 + }, + { + "epoch": 0.28, + "logps_train/chosen": -68.8004150390625, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -172.68960571289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.455041527748108, + "rewards_train/margins": 3.263919234275818, + "rewards_train/rejected": -4.718960762023926, + "step": 984 + }, + { + "epoch": 0.28, + "logps_train/chosen": -3.5810787677764893, + "logps_train/ref_chosen": -1.2578125, + "logps_train/ref_rejected": -1.6328125, + "logps_train/rejected": -1.6546450853347778, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.23232662677764893, + "rewards_train/margins": -0.23014336824417114, + "rewards_train/rejected": -0.002183258533477783, + "step": 984 + }, + { + "epoch": 0.28, + "logps_train/chosen": -146.3258819580078, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -257.1701965332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.732588291168213, + "rewards_train/margins": 8.8844313621521, + "rewards_train/rejected": -12.617019653320312, + "step": 984 + }, + { + "epoch": 0.28, + "logps_train/chosen": -14.067228317260742, + "logps_train/ref_chosen": -13.0625, + "logps_train/ref_rejected": -3.734375, + "logps_train/rejected": -11.656004905700684, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1004728302359581, + "rewards_train/margins": 0.6916901841759682, + "rewards_train/rejected": -0.7921630144119263, + "step": 984 + }, + { + "epoch": 0.28, + "logps_train/chosen": -32.43286895751953, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -10.1875, + "logps_train/rejected": -15.459290504455566, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08171310275793076, + "rewards_train/margins": 0.6088921651244164, + "rewards_train/rejected": -0.5271790623664856, + "step": 984 + }, + { + "epoch": 0.28, + "logps_train/chosen": -162.29055786132812, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -181.425537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5790557861328125, + "rewards_train/margins": 0.8634982109069824, + "rewards_train/rejected": -4.442553997039795, + "step": 984 + }, + { + "epoch": 0.28, + "logps_train/chosen": -8.027328491210938, + "logps_train/ref_chosen": -3.875, + "logps_train/ref_rejected": -6.59375, + "logps_train/rejected": -24.296646118164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4152328670024872, + "rewards_train/margins": 1.3550567924976349, + "rewards_train/rejected": -1.770289659500122, + "step": 984 + }, + { + "epoch": 0.28, + "logps_train/chosen": -147.90223693847656, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -158.00973510742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4902237057685852, + "rewards_train/margins": 1.2107498049736023, + "rewards_train/rejected": -1.7009735107421875, + "step": 984 + }, + { + "epoch": 0.28, + "logps_train/chosen": -133.36941528320312, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -229.8367919921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2869415283203125, + "rewards_train/margins": 6.596737861633301, + "rewards_train/rejected": -9.883679389953613, + "step": 985 + }, + { + "epoch": 0.28, + "logps_train/chosen": -80.23583221435547, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -127.4541015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1735832244157791, + "rewards_train/margins": 3.8218269795179367, + "rewards_train/rejected": -3.995410203933716, + "step": 985 + }, + { + "epoch": 0.28, + "logps_train/chosen": -14.445064544677734, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -47.5, + "logps_train/rejected": -65.06849670410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6382564902305603, + "rewards_train/margins": 1.118593156337738, + "rewards_train/rejected": -1.7568496465682983, + "step": 985 + }, + { + "epoch": 0.28, + "logps_train/chosen": -127.59990692138672, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -74.3353500366211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6599907875061035, + "rewards_train/margins": 1.5485444068908691, + "rewards_train/rejected": -4.208535194396973, + "step": 985 + }, + { + "epoch": 0.28, + "logps_train/chosen": -12.383523941040039, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -55.47039031982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2008523941040039, + "rewards_train/margins": 3.1336867809295654, + "rewards_train/rejected": -3.3345391750335693, + "step": 985 + }, + { + "epoch": 0.28, + "logps_train/chosen": -109.97602844238281, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -117.30146026611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.797602891921997, + "rewards_train/margins": 0.7825431823730469, + "rewards_train/rejected": -2.580146074295044, + "step": 985 + }, + { + "epoch": 0.28, + "logps_train/chosen": -2.561610221862793, + "logps_train/ref_chosen": -1.28125, + "logps_train/ref_rejected": -58.25, + "logps_train/rejected": -76.21955871582031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1280360221862793, + "rewards_train/margins": 1.6689199209213257, + "rewards_train/rejected": -1.796955943107605, + "step": 985 + }, + { + "epoch": 0.28, + "logps_train/chosen": -136.16845703125, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -134.9022216796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.966845691204071, + "rewards_train/margins": -0.1266235113143921, + "rewards_train/rejected": -0.840222179889679, + "step": 985 + }, + { + "epoch": 0.28, + "learning_rate": 1.4185846567403609e-06, + "loss": 0.2739, + "step": 986 + }, + { + "epoch": 0.28, + "logps_train/chosen": -6.410865306854248, + "logps_train/ref_chosen": -1.1796875, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -12.6383056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5231177806854248, + "rewards_train/margins": 0.0969628095626831, + "rewards_train/rejected": -0.6200805902481079, + "step": 986 + }, + { + "epoch": 0.28, + "logps_train/chosen": -105.34245300292969, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -216.36712646484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.184245303273201, + "rewards_train/margins": 8.152467533946037, + "rewards_train/rejected": -8.336712837219238, + "step": 986 + }, + { + "epoch": 0.28, + "logps_train/chosen": -175.4447479248047, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -196.45437622070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9444748163223267, + "rewards_train/margins": 0.9009629487991333, + "rewards_train/rejected": -2.84543776512146, + "step": 986 + }, + { + "epoch": 0.28, + "logps_train/chosen": -20.767478942871094, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -31.290918350219727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1454979181289673, + "rewards_train/margins": 0.9023438692092896, + "rewards_train/rejected": -2.047841787338257, + "step": 986 + }, + { + "epoch": 0.28, + "logps_train/chosen": -124.52823638916016, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -128.69210815429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9528236389160156, + "rewards_train/margins": 3.316387176513672, + "rewards_train/rejected": -4.2692108154296875, + "step": 986 + }, + { + "epoch": 0.28, + "logps_train/chosen": -119.73377990722656, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -135.1619873046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4766220152378082, + "rewards_train/margins": 2.6928206980228424, + "rewards_train/rejected": -2.216198682785034, + "step": 986 + }, + { + "epoch": 0.28, + "logps_train/chosen": -18.070117950439453, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -23.396244049072266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9320117831230164, + "rewards_train/margins": 0.857612669467926, + "rewards_train/rejected": -1.7896244525909424, + "step": 986 + }, + { + "epoch": 0.28, + "logps_train/chosen": -77.26861572265625, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -148.0410919189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.276861548423767, + "rewards_train/margins": 0.7272475957870483, + "rewards_train/rejected": -2.0041091442108154, + "step": 986 + }, + { + "epoch": 0.28, + "logps_train/chosen": -29.698776245117188, + "logps_train/ref_chosen": -7.6875, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -44.46625518798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.201127767562866, + "rewards_train/margins": 0.4829978942871094, + "rewards_train/rejected": -2.6841256618499756, + "step": 987 + }, + { + "epoch": 0.28, + "logps_train/chosen": -8.966728210449219, + "logps_train/ref_chosen": -6.5625, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -18.050460815429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2404228299856186, + "rewards_train/margins": 0.5958732515573502, + "rewards_train/rejected": -0.8362960815429688, + "step": 987 + }, + { + "epoch": 0.28, + "logps_train/chosen": -134.43606567382812, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -135.05796813964844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2436065673828125, + "rewards_train/margins": -0.08780980110168457, + "rewards_train/rejected": -3.155796766281128, + "step": 987 + }, + { + "epoch": 0.28, + "logps_train/chosen": -48.408687591552734, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -3.390625, + "logps_train/rejected": -32.6325798034668, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.140868902206421, + "rewards_train/margins": -0.2166733741760254, + "rewards_train/rejected": -2.9241955280303955, + "step": 987 + }, + { + "epoch": 0.28, + "logps_train/chosen": -1.580261468887329, + "logps_train/ref_chosen": -0.26953125, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -7.243194580078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1310730278491974, + "rewards_train/margins": -0.13487856998108327, + "rewards_train/rejected": 0.003805542131885886, + "step": 987 + }, + { + "epoch": 0.28, + "logps_train/chosen": -133.73072814941406, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -147.19654846191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7730729579925537, + "rewards_train/margins": 0.5965821743011475, + "rewards_train/rejected": -4.369655132293701, + "step": 987 + }, + { + "epoch": 0.28, + "logps_train/chosen": -46.113922119140625, + "logps_train/ref_chosen": -31.5, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -24.423709869384766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4613922834396362, + "rewards_train/margins": -1.319021299481392, + "rewards_train/rejected": -0.14237098395824432, + "step": 987 + }, + { + "epoch": 0.28, + "logps_train/chosen": 0.0, + "logps_train/ref_chosen": 0.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -91.05096435546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0, + "rewards_train/margins": 1.0550965070724487, + "rewards_train/rejected": -1.0550965070724487, + "step": 987 + }, + { + "epoch": 0.28, + "learning_rate": 1.4161805634339384e-06, + "loss": 0.481, + "step": 988 + }, + { + "epoch": 0.28, + "logps_train/chosen": -30.27353286743164, + "logps_train/ref_chosen": -26.25, + "logps_train/ref_rejected": -1.2421875, + "logps_train/rejected": -6.102953910827637, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40235328674316406, + "rewards_train/margins": 0.08372336626052856, + "rewards_train/rejected": -0.4860766530036926, + "step": 988 + }, + { + "epoch": 0.28, + "logps_train/chosen": -202.314208984375, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -196.06076049804688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.031421184539795, + "rewards_train/margins": -0.07534503936767578, + "rewards_train/rejected": -6.956076145172119, + "step": 988 + }, + { + "epoch": 0.28, + "logps_train/chosen": -52.58677291870117, + "logps_train/ref_chosen": -34.75, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -14.324795722961426, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.783677339553833, + "rewards_train/margins": -0.8011977672576904, + "rewards_train/rejected": -0.9824795722961426, + "step": 988 + }, + { + "epoch": 0.28, + "logps_train/chosen": -92.97587585449219, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -109.26185607910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05241241678595543, + "rewards_train/margins": 0.7785980366170406, + "rewards_train/rejected": -0.7261856198310852, + "step": 988 + }, + { + "epoch": 0.28, + "logps_train/chosen": -46.956459045410156, + "logps_train/ref_chosen": -26.0, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -31.080310821533203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0956459045410156, + "rewards_train/margins": -0.6626148223876953, + "rewards_train/rejected": -1.4330310821533203, + "step": 988 + }, + { + "epoch": 0.28, + "logps_train/chosen": -3.9432270526885986, + "logps_train/ref_chosen": -2.96875, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -9.0086669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0974477082490921, + "rewards_train/margins": 0.23466898500919342, + "rewards_train/rejected": -0.3321166932582855, + "step": 988 + }, + { + "epoch": 0.28, + "logps_train/chosen": -12.819339752197266, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -15.790546417236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14931602776050568, + "rewards_train/margins": 0.7846206575632095, + "rewards_train/rejected": -0.6353046298027039, + "step": 988 + }, + { + "epoch": 0.28, + "logps_train/chosen": -162.84146118164062, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -221.8822021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2841460704803467, + "rewards_train/margins": 6.104074716567993, + "rewards_train/rejected": -9.38822078704834, + "step": 988 + }, + { + "epoch": 0.28, + "logps_train/chosen": -135.0457305908203, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -131.79705810546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8545730113983154, + "rewards_train/margins": -0.27486705780029297, + "rewards_train/rejected": -2.5797059535980225, + "step": 989 + }, + { + "epoch": 0.28, + "logps_train/chosen": -47.2027587890625, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -41.80957794189453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.59527587890625, + "rewards_train/margins": -0.27681803703308105, + "rewards_train/rejected": -2.318457841873169, + "step": 989 + }, + { + "epoch": 0.28, + "logps_train/chosen": -0.5080298185348511, + "logps_train/ref_chosen": -0.306640625, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -12.408989906311035, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.020138919353485107, + "rewards_train/margins": 0.6520100831985474, + "rewards_train/rejected": -0.6721490025520325, + "step": 989 + }, + { + "epoch": 0.28, + "logps_train/chosen": -17.276123046875, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -24.4360408782959, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8026123046875, + "rewards_train/margins": 1.0253667831420898, + "rewards_train/rejected": -1.8279790878295898, + "step": 989 + }, + { + "epoch": 0.28, + "logps_train/chosen": -7.70472526550293, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -24.12239646911621, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27047252655029297, + "rewards_train/margins": 0.835517168045044, + "rewards_train/rejected": -1.105989694595337, + "step": 989 + }, + { + "epoch": 0.28, + "logps_train/chosen": -118.50566101074219, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -208.37643432617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8505661487579346, + "rewards_train/margins": 4.487077474594116, + "rewards_train/rejected": -7.337643623352051, + "step": 989 + }, + { + "epoch": 0.28, + "logps_train/chosen": -18.469890594482422, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -18.55629539489746, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9594890475273132, + "rewards_train/margins": 0.05864053964614868, + "rewards_train/rejected": -1.018129587173462, + "step": 989 + }, + { + "epoch": 0.28, + "logps_train/chosen": -22.453243255615234, + "logps_train/ref_chosen": -14.75, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -25.03506851196289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7703243494033813, + "rewards_train/margins": -0.04181748628616333, + "rewards_train/rejected": -0.728506863117218, + "step": 989 + }, + { + "epoch": 0.28, + "learning_rate": 1.413773557304823e-06, + "loss": 0.5706, + "step": 990 + }, + { + "epoch": 0.28, + "logps_train/chosen": -105.2896728515625, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -195.89959716796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.87896728515625, + "rewards_train/margins": 7.810992240905762, + "rewards_train/rejected": -8.689959526062012, + "step": 990 + }, + { + "epoch": 0.28, + "logps_train/chosen": -40.69993591308594, + "logps_train/ref_chosen": -30.125, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -81.38411712646484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0574935674667358, + "rewards_train/margins": 2.6809180974960327, + "rewards_train/rejected": -3.7384116649627686, + "step": 990 + }, + { + "epoch": 0.28, + "logps_train/chosen": -11.427133560180664, + "logps_train/ref_chosen": -3.671875, + "logps_train/ref_rejected": -7.59375, + "logps_train/rejected": -20.61969757080078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7755258679389954, + "rewards_train/margins": 0.5270689129829407, + "rewards_train/rejected": -1.302594780921936, + "step": 990 + }, + { + "epoch": 0.28, + "logps_train/chosen": -16.36266326904297, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -3.703125, + "logps_train/rejected": -12.733781814575195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5300163626670837, + "rewards_train/margins": 0.3730493187904358, + "rewards_train/rejected": -0.9030656814575195, + "step": 990 + }, + { + "epoch": 0.28, + "logps_train/chosen": -140.6448974609375, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -150.60061645507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2644898891448975, + "rewards_train/margins": 0.595571756362915, + "rewards_train/rejected": -3.8600616455078125, + "step": 990 + }, + { + "epoch": 0.28, + "logps_train/chosen": -144.14724731445312, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -170.86441040039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0147247314453125, + "rewards_train/margins": 1.7217164039611816, + "rewards_train/rejected": -4.736441135406494, + "step": 990 + }, + { + "epoch": 0.28, + "logps_train/chosen": -3.6775412559509277, + "logps_train/ref_chosen": -2.984375, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -21.247373580932617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06931662559509277, + "rewards_train/margins": 0.9366707801818848, + "rewards_train/rejected": -1.0059874057769775, + "step": 990 + }, + { + "epoch": 0.28, + "logps_train/chosen": -177.10650634765625, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -191.0803985595703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.410650730133057, + "rewards_train/margins": -1.3026108741760254, + "rewards_train/rejected": -3.1080398559570312, + "step": 990 + }, + { + "epoch": 0.28, + "logps_train/chosen": -10.241037368774414, + "logps_train/ref_chosen": -3.5625, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -23.768095016479492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6678537726402283, + "rewards_train/margins": 1.012080729007721, + "rewards_train/rejected": -1.6799345016479492, + "step": 991 + }, + { + "epoch": 0.28, + "logps_train/chosen": -128.44976806640625, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -101.51467895507812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6449768543243408, + "rewards_train/margins": -0.9935089349746704, + "rewards_train/rejected": -0.6514679193496704, + "step": 991 + }, + { + "epoch": 0.28, + "logps_train/chosen": -146.03407287597656, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -157.92689514160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8034073114395142, + "rewards_train/margins": 3.5892823934555054, + "rewards_train/rejected": -4.3926897048950195, + "step": 991 + }, + { + "epoch": 0.28, + "logps_train/chosen": -148.11123657226562, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -144.77862548828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4111236333847046, + "rewards_train/margins": 2.4167388677597046, + "rewards_train/rejected": -3.827862501144409, + "step": 991 + }, + { + "epoch": 0.28, + "logps_train/chosen": -35.52492141723633, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -51.887359619140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5274921655654907, + "rewards_train/margins": 2.0362437963485718, + "rewards_train/rejected": -3.5637359619140625, + "step": 991 + }, + { + "epoch": 0.28, + "logps_train/chosen": -139.18711853027344, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -128.72488403320312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3687119483947754, + "rewards_train/margins": -0.4962235689163208, + "rewards_train/rejected": -1.8724883794784546, + "step": 991 + }, + { + "epoch": 0.28, + "logps_train/chosen": -30.691097259521484, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -24.624523162841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6753597259521484, + "rewards_train/margins": 0.20584261417388916, + "rewards_train/rejected": -1.8812023401260376, + "step": 991 + }, + { + "epoch": 0.28, + "logps_train/chosen": -18.214031219482422, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -3.59375, + "logps_train/rejected": -6.828283786773682, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8651531338691711, + "rewards_train/margins": -0.5416997373104095, + "rewards_train/rejected": -0.3234533965587616, + "step": 991 + }, + { + "epoch": 0.28, + "learning_rate": 1.4113636551995063e-06, + "loss": 0.497, + "step": 992 + }, + { + "epoch": 0.28, + "logps_train/chosen": -103.89186096191406, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -116.02444458007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.139186143875122, + "rewards_train/margins": 1.813258409500122, + "rewards_train/rejected": -2.952444553375244, + "step": 992 + }, + { + "epoch": 0.28, + "logps_train/chosen": -157.78866577148438, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -198.26571655273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6788665652275085, + "rewards_train/margins": 1.5477052330970764, + "rewards_train/rejected": -2.226571798324585, + "step": 992 + }, + { + "epoch": 0.28, + "logps_train/chosen": -8.692447662353516, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -45.62568664550781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45986977219581604, + "rewards_train/margins": 0.3276989161968231, + "rewards_train/rejected": -0.7875686883926392, + "step": 992 + }, + { + "epoch": 0.28, + "logps_train/chosen": -7.743435382843018, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -61.851158142089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15246854722499847, + "rewards_train/margins": 2.5576473623514175, + "rewards_train/rejected": -2.710115909576416, + "step": 992 + }, + { + "epoch": 0.28, + "logps_train/chosen": -86.86936950683594, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -125.14767456054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6869369745254517, + "rewards_train/margins": 0.6778305768966675, + "rewards_train/rejected": -2.364767551422119, + "step": 992 + }, + { + "epoch": 0.28, + "logps_train/chosen": -267.06298828125, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -230.2032012939453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.10629940032959, + "rewards_train/margins": -0.18597888946533203, + "rewards_train/rejected": -8.920320510864258, + "step": 992 + }, + { + "epoch": 0.28, + "logps_train/chosen": -338.504638671875, + "logps_train/ref_chosen": -237.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -278.2575988769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.150464057922363, + "rewards_train/margins": 0.4752960205078125, + "rewards_train/rejected": -10.625760078430176, + "step": 992 + }, + { + "epoch": 0.28, + "logps_train/chosen": -29.963973999023438, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -38.5, + "logps_train/rejected": -67.08511352539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9963974356651306, + "rewards_train/margins": 1.8621140122413635, + "rewards_train/rejected": -2.858511447906494, + "step": 992 + }, + { + "epoch": 0.28, + "logps_train/chosen": -114.9869384765625, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -205.7667236328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.698693871498108, + "rewards_train/margins": 3.9779785871505737, + "rewards_train/rejected": -5.676672458648682, + "step": 993 + }, + { + "epoch": 0.28, + "logps_train/chosen": -143.08045959472656, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -153.8558807373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7580459117889404, + "rewards_train/margins": 1.0775420665740967, + "rewards_train/rejected": -4.835587978363037, + "step": 993 + }, + { + "epoch": 0.28, + "logps_train/chosen": -71.80183410644531, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -50.5, + "logps_train/rejected": -54.47197723388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3301834166049957, + "rewards_train/margins": 0.06701430678367615, + "rewards_train/rejected": -0.3971977233886719, + "step": 993 + }, + { + "epoch": 0.28, + "logps_train/chosen": -115.64505004882812, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -148.82867431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4645050764083862, + "rewards_train/margins": 0.21836233139038086, + "rewards_train/rejected": -1.682867407798767, + "step": 993 + }, + { + "epoch": 0.28, + "logps_train/chosen": -41.191741943359375, + "logps_train/ref_chosen": -39.25, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -134.9463348388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19417420029640198, + "rewards_train/margins": 2.4504593312740326, + "rewards_train/rejected": -2.6446335315704346, + "step": 993 + }, + { + "epoch": 0.28, + "logps_train/chosen": -31.439340591430664, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -60.01383590698242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3814340829849243, + "rewards_train/margins": 2.619949698448181, + "rewards_train/rejected": -4.0013837814331055, + "step": 993 + }, + { + "epoch": 0.28, + "logps_train/chosen": -25.139114379882812, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -34.78961944580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4639114141464233, + "rewards_train/margins": 0.1775505542755127, + "rewards_train/rejected": -1.641461968421936, + "step": 993 + }, + { + "epoch": 0.28, + "logps_train/chosen": -183.97836303710938, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -192.068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.7978363037109375, + "rewards_train/margins": 2.258999824523926, + "rewards_train/rejected": -7.056836128234863, + "step": 993 + }, + { + "epoch": 0.28, + "learning_rate": 1.4089508739847478e-06, + "loss": 0.3259, + "step": 994 + }, + { + "epoch": 0.28, + "logps_train/chosen": -124.02742767333984, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -195.19235229492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9027428030967712, + "rewards_train/margins": 4.016492426395416, + "rewards_train/rejected": -4.9192352294921875, + "step": 994 + }, + { + "epoch": 0.28, + "logps_train/chosen": -11.856895446777344, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -1.9765625, + "logps_train/rejected": -12.155105590820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3419395387172699, + "rewards_train/margins": 0.6759147942066193, + "rewards_train/rejected": -1.0178543329238892, + "step": 994 + }, + { + "epoch": 0.28, + "logps_train/chosen": -5.671245574951172, + "logps_train/ref_chosen": -2.140625, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -10.255378723144531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35306206345558167, + "rewards_train/margins": 0.10372582077980042, + "rewards_train/rejected": -0.4567878842353821, + "step": 994 + }, + { + "epoch": 0.28, + "logps_train/chosen": -8.084542274475098, + "logps_train/ref_chosen": -2.125, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -7.369350910186768, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5959542393684387, + "rewards_train/margins": -0.31214413046836853, + "rewards_train/rejected": -0.2838101089000702, + "step": 994 + }, + { + "epoch": 0.28, + "logps_train/chosen": -124.6953125, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -169.659423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.719531297683716, + "rewards_train/margins": 0.6464111804962158, + "rewards_train/rejected": -3.3659424781799316, + "step": 994 + }, + { + "epoch": 0.28, + "logps_train/chosen": -127.59561157226562, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -157.17620849609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4095611572265625, + "rewards_train/margins": 2.0080597400665283, + "rewards_train/rejected": -3.417620897293091, + "step": 994 + }, + { + "epoch": 0.28, + "logps_train/chosen": -189.30577087402344, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -229.67745971679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.630577087402344, + "rewards_train/margins": 5.637168884277344, + "rewards_train/rejected": -11.267745971679688, + "step": 994 + }, + { + "epoch": 0.28, + "logps_train/chosen": -113.25749206542969, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -211.22994995117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.875749349594116, + "rewards_train/margins": 3.047245740890503, + "rewards_train/rejected": -5.922995090484619, + "step": 994 + }, + { + "epoch": 0.28, + "logps_train/chosen": -92.96533966064453, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -170.38394165039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3965339660644531, + "rewards_train/margins": 1.3418601751327515, + "rewards_train/rejected": -1.7383941411972046, + "step": 995 + }, + { + "epoch": 0.28, + "logps_train/chosen": -116.32064819335938, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -145.85232543945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.282064825296402, + "rewards_train/margins": 5.553167909383774, + "rewards_train/rejected": -5.835232734680176, + "step": 995 + }, + { + "epoch": 0.28, + "logps_train/chosen": -19.45730972290039, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -26.46978187561035, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.883230984210968, + "rewards_train/margins": 0.5762472748756409, + "rewards_train/rejected": -1.4594782590866089, + "step": 995 + }, + { + "epoch": 0.28, + "logps_train/chosen": -141.3959503173828, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -58.5, + "logps_train/rejected": -85.15345764160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1895949840545654, + "rewards_train/margins": -0.5242490768432617, + "rewards_train/rejected": -2.6653459072113037, + "step": 995 + }, + { + "epoch": 0.28, + "logps_train/chosen": -18.32301139831543, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -37.736572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5760511755943298, + "rewards_train/margins": 1.4226060509681702, + "rewards_train/rejected": -1.9986572265625, + "step": 995 + }, + { + "epoch": 0.28, + "logps_train/chosen": -152.72579956054688, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -176.76321411132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.572580099105835, + "rewards_train/margins": 3.603741407394409, + "rewards_train/rejected": -7.176321506500244, + "step": 995 + }, + { + "epoch": 0.28, + "logps_train/chosen": -10.500885009765625, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -25.617908477783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2625885009765625, + "rewards_train/margins": 1.2242023944854736, + "rewards_train/rejected": -1.4867908954620361, + "step": 995 + }, + { + "epoch": 0.28, + "logps_train/chosen": -2.0973353385925293, + "logps_train/ref_chosen": -0.96875, + "logps_train/ref_rejected": -0.96875, + "logps_train/rejected": -2.2152464389801025, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11285853385925293, + "rewards_train/margins": 0.011791110038757324, + "rewards_train/rejected": -0.12464964389801025, + "step": 995 + }, + { + "epoch": 0.28, + "learning_rate": 1.406535230547458e-06, + "loss": 0.3368, + "step": 996 + }, + { + "epoch": 0.28, + "logps_train/chosen": -92.12411499023438, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -120.55757904052734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4624115228652954, + "rewards_train/margins": -0.30665361881256104, + "rewards_train/rejected": -1.1557579040527344, + "step": 996 + }, + { + "epoch": 0.28, + "logps_train/chosen": -18.76093292236328, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -17.840904235839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1510932892560959, + "rewards_train/margins": 0.43924714624881744, + "rewards_train/rejected": -0.5903404355049133, + "step": 996 + }, + { + "epoch": 0.28, + "logps_train/chosen": -10.555195808410645, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -26.91640853881836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.31948041915893555, + "rewards_train/margins": 2.2048712968826294, + "rewards_train/rejected": -1.8853908777236938, + "step": 996 + }, + { + "epoch": 0.28, + "logps_train/chosen": -172.14312744140625, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -217.9627685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.414312839508057, + "rewards_train/margins": 3.3819642066955566, + "rewards_train/rejected": -7.796277046203613, + "step": 996 + }, + { + "epoch": 0.28, + "logps_train/chosen": -41.16693115234375, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -160.28225708007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9166931509971619, + "rewards_train/margins": 6.161532461643219, + "rewards_train/rejected": -7.078225612640381, + "step": 996 + }, + { + "epoch": 0.28, + "logps_train/chosen": -39.71291732788086, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -39.07417678833008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.596291720867157, + "rewards_train/margins": 2.601750910282135, + "rewards_train/rejected": -3.198042631149292, + "step": 996 + }, + { + "epoch": 0.28, + "logps_train/chosen": -154.33631896972656, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -191.6280975341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.083631992340088, + "rewards_train/margins": 0.8291778564453125, + "rewards_train/rejected": -6.9128098487854, + "step": 996 + }, + { + "epoch": 0.28, + "logps_train/chosen": -48.379730224609375, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -161.04136657714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6379730701446533, + "rewards_train/margins": 4.01616358757019, + "rewards_train/rejected": -5.654136657714844, + "step": 996 + }, + { + "epoch": 0.28, + "logps_train/chosen": -145.63877868652344, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -231.63589477539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.063877820968628, + "rewards_train/margins": 2.399711847305298, + "rewards_train/rejected": -5.463589668273926, + "step": 997 + }, + { + "epoch": 0.28, + "logps_train/chosen": -108.3335189819336, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -107.04740142822266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1833518743515015, + "rewards_train/margins": 2.7213882207870483, + "rewards_train/rejected": -3.90474009513855, + "step": 997 + }, + { + "epoch": 0.28, + "logps_train/chosen": -14.732987403869629, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -6.59375, + "logps_train/rejected": -22.50761604309082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05454874038696289, + "rewards_train/margins": 1.5368379354476929, + "rewards_train/rejected": -1.5913866758346558, + "step": 997 + }, + { + "epoch": 0.28, + "logps_train/chosen": -101.03634643554688, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -185.81761169433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3536347150802612, + "rewards_train/margins": 4.228126645088196, + "rewards_train/rejected": -5.581761360168457, + "step": 997 + }, + { + "epoch": 0.28, + "logps_train/chosen": -116.58143615722656, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -160.71568298339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8081436157226562, + "rewards_train/margins": 2.7134246826171875, + "rewards_train/rejected": -4.521568298339844, + "step": 997 + }, + { + "epoch": 0.28, + "logps_train/chosen": -94.06939697265625, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -28.25, + "logps_train/rejected": -68.47862243652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8569397330284119, + "rewards_train/margins": 3.165922701358795, + "rewards_train/rejected": -4.022862434387207, + "step": 997 + }, + { + "epoch": 0.28, + "logps_train/chosen": -32.788047790527344, + "logps_train/ref_chosen": -31.375, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -14.165984153747559, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14130477607250214, + "rewards_train/margins": 0.8565436750650406, + "rewards_train/rejected": -0.9978484511375427, + "step": 997 + }, + { + "epoch": 0.28, + "logps_train/chosen": -102.43412780761719, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -99.74266052246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19341278076171875, + "rewards_train/margins": 0.8808532953262329, + "rewards_train/rejected": -1.0742660760879517, + "step": 997 + }, + { + "epoch": 0.28, + "learning_rate": 1.4041167417945793e-06, + "loss": 0.1945, + "step": 998 + }, + { + "epoch": 0.28, + "logps_train/chosen": -6.106284141540527, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -7.59375, + "logps_train/rejected": -16.043651580810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10125341266393661, + "rewards_train/margins": 0.7437367811799049, + "rewards_train/rejected": -0.8449901938438416, + "step": 998 + }, + { + "epoch": 0.28, + "logps_train/chosen": -126.1972427368164, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -186.62347412109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9197242856025696, + "rewards_train/margins": 2.1426231265068054, + "rewards_train/rejected": -3.062347412109375, + "step": 998 + }, + { + "epoch": 0.28, + "logps_train/chosen": -14.54608154296875, + "logps_train/ref_chosen": -3.5, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -25.90501594543457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.104608178138733, + "rewards_train/margins": 0.8858934640884399, + "rewards_train/rejected": -1.9905016422271729, + "step": 998 + }, + { + "epoch": 0.28, + "logps_train/chosen": -24.647315979003906, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -56.2056884765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4272316098213196, + "rewards_train/margins": 3.193337380886078, + "rewards_train/rejected": -3.6205689907073975, + "step": 998 + }, + { + "epoch": 0.28, + "logps_train/chosen": -155.929931640625, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -200.05670166015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3429932594299316, + "rewards_train/margins": 4.212677001953125, + "rewards_train/rejected": -7.555670261383057, + "step": 998 + }, + { + "epoch": 0.28, + "logps_train/chosen": -2.705747127532959, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -3.40625, + "logps_train/rejected": -18.3143253326416, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008074712939560413, + "rewards_train/margins": 1.4827328203245997, + "rewards_train/rejected": -1.4908075332641602, + "step": 998 + }, + { + "epoch": 0.28, + "logps_train/chosen": -138.96759033203125, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -144.82493591308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9967591762542725, + "rewards_train/margins": 3.735734701156616, + "rewards_train/rejected": -6.732493877410889, + "step": 998 + }, + { + "epoch": 0.28, + "logps_train/chosen": -150.8914794921875, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -231.85189819335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7891480922698975, + "rewards_train/margins": 4.896041631698608, + "rewards_train/rejected": -7.685189723968506, + "step": 998 + }, + { + "epoch": 0.28, + "logps_train/chosen": -99.03587341308594, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -100.58941650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3535873889923096, + "rewards_train/margins": 0.25535428524017334, + "rewards_train/rejected": -1.608941674232483, + "step": 999 + }, + { + "epoch": 0.28, + "logps_train/chosen": -193.861572265625, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -197.0, + "logps_train/rejected": -255.75717163085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.386157274246216, + "rewards_train/margins": 2.4895598888397217, + "rewards_train/rejected": -5.8757171630859375, + "step": 999 + }, + { + "epoch": 0.28, + "logps_train/chosen": -18.535051345825195, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -24.743064880371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6347551345825195, + "rewards_train/margins": 0.1395513415336609, + "rewards_train/rejected": -0.7743064761161804, + "step": 999 + }, + { + "epoch": 0.28, + "logps_train/chosen": -41.01601028442383, + "logps_train/ref_chosen": -26.0, + "logps_train/ref_rejected": -41.75, + "logps_train/rejected": -85.9673843383789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5016010999679565, + "rewards_train/margins": 2.9201375246047974, + "rewards_train/rejected": -4.421738624572754, + "step": 999 + }, + { + "epoch": 0.28, + "logps_train/chosen": -146.8496551513672, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -222.82958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4849655330181122, + "rewards_train/margins": 5.097993642091751, + "rewards_train/rejected": -5.582959175109863, + "step": 999 + }, + { + "epoch": 0.28, + "logps_train/chosen": -94.33181762695312, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -94.13973999023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.36681824922561646, + "rewards_train/margins": 1.080792248249054, + "rewards_train/rejected": -0.7139739990234375, + "step": 999 + }, + { + "epoch": 0.28, + "logps_train/chosen": -11.46153736114502, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -40.17431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.266346275806427, + "rewards_train/margins": 2.558777868747711, + "rewards_train/rejected": -2.292431592941284, + "step": 999 + }, + { + "epoch": 0.28, + "logps_train/chosen": -0.9212051630020142, + "logps_train/ref_chosen": -0.48046875, + "logps_train/ref_rejected": -2.015625, + "logps_train/rejected": -6.066527843475342, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.044073641300201416, + "rewards_train/margins": 0.3610166609287262, + "rewards_train/rejected": -0.4050903022289276, + "step": 999 + }, + { + "epoch": 0.28, + "learning_rate": 1.4016954246529694e-06, + "loss": 0.2105, + "step": 1000 + }, + { + "epoch": 0.28, + "logps_train/chosen": -151.2151336669922, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -222.72056579589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.621513366699219, + "rewards_train/margins": 2.8505430221557617, + "rewards_train/rejected": -8.47205638885498, + "step": 1000 + }, + { + "epoch": 0.28, + "logps_train/chosen": -204.32907104492188, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -183.69979858398438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.9329071044921875, + "rewards_train/margins": -0.012927055358886719, + "rewards_train/rejected": -5.919980049133301, + "step": 1000 + }, + { + "epoch": 0.28, + "logps_train/chosen": -23.099401473999023, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -12.683743476867676, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6599401831626892, + "rewards_train/margins": -0.4728158265352249, + "rewards_train/rejected": -0.1871243566274643, + "step": 1000 + }, + { + "epoch": 0.28, + "logps_train/chosen": -74.28819274902344, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -37.48817825317383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.003819227218628, + "rewards_train/margins": 0.8137485980987549, + "rewards_train/rejected": -2.817567825317383, + "step": 1000 + }, + { + "epoch": 0.28, + "logps_train/chosen": -6.366300106048584, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -9.277545928955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2821199893951416, + "rewards_train/margins": 0.7911245822906494, + "rewards_train/rejected": -0.5090045928955078, + "step": 1000 + }, + { + "epoch": 0.28, + "logps_train/chosen": -27.61574363708496, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -7.875, + "logps_train/rejected": -31.705276489257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39907437562942505, + "rewards_train/margins": 1.9839534163475037, + "rewards_train/rejected": -2.3830277919769287, + "step": 1000 + }, + { + "epoch": 0.28, + "logps_train/chosen": -7.457093238830566, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -9.467443466186523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2613343298435211, + "rewards_train/margins": 0.4213475286960602, + "rewards_train/rejected": -0.6826818585395813, + "step": 1000 + }, + { + "epoch": 0.28, + "logps_train/chosen": -13.453268051147461, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -26.04230308532715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15157680213451385, + "rewards_train/margins": 1.296403482556343, + "rewards_train/rejected": -1.447980284690857, + "step": 1000 + }, + { + "epoch": 0.28, + "logps_train/chosen": -161.18600463867188, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -217.90960693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.0186004638671875, + "rewards_train/margins": 4.672360420227051, + "rewards_train/rejected": -8.690960884094238, + "step": 1001 + }, + { + "epoch": 0.28, + "logps_train/chosen": -64.12748718261719, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -22.875, + "logps_train/rejected": -39.611412048339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7372512817382812, + "rewards_train/margins": 2.4108924865722656, + "rewards_train/rejected": -1.6736412048339844, + "step": 1001 + }, + { + "epoch": 0.28, + "logps_train/chosen": -25.18459701538086, + "logps_train/ref_chosen": -22.25, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -46.82707977294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2934597134590149, + "rewards_train/margins": 1.3642483353614807, + "rewards_train/rejected": -1.6577080488204956, + "step": 1001 + }, + { + "epoch": 0.28, + "logps_train/chosen": -133.763671875, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -200.4988250732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.226367235183716, + "rewards_train/margins": 6.473515272140503, + "rewards_train/rejected": -8.699882507324219, + "step": 1001 + }, + { + "epoch": 0.28, + "logps_train/chosen": -34.980674743652344, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -33.844764709472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8480675220489502, + "rewards_train/margins": 0.33015894889831543, + "rewards_train/rejected": -2.1782264709472656, + "step": 1001 + }, + { + "epoch": 0.28, + "logps_train/chosen": -155.3687286376953, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -243.35028076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.186872959136963, + "rewards_train/margins": 7.748155117034912, + "rewards_train/rejected": -12.935028076171875, + "step": 1001 + }, + { + "epoch": 0.28, + "logps_train/chosen": -233.93592834472656, + "logps_train/ref_chosen": -208.0, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -33.14421081542969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.593592882156372, + "rewards_train/margins": -0.8604217767715454, + "rewards_train/rejected": -1.7331711053848267, + "step": 1001 + }, + { + "epoch": 0.28, + "logps_train/chosen": -135.0370635986328, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -163.41310119628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7537063360214233, + "rewards_train/margins": 4.537603974342346, + "rewards_train/rejected": -6.2913103103637695, + "step": 1001 + }, + { + "epoch": 0.28, + "learning_rate": 1.3992712960692807e-06, + "loss": 0.3395, + "step": 1002 + }, + { + "epoch": 0.28, + "logps_train/chosen": -59.95527267456055, + "logps_train/ref_chosen": -58.0, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -79.12066650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19552727043628693, + "rewards_train/margins": 4.316539570689201, + "rewards_train/rejected": -4.512066841125488, + "step": 1002 + }, + { + "epoch": 0.28, + "logps_train/chosen": -9.002483367919922, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -48.27473831176758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45337334275245667, + "rewards_train/margins": 2.8491004407405853, + "rewards_train/rejected": -3.302473783493042, + "step": 1002 + }, + { + "epoch": 0.28, + "logps_train/chosen": -193.40325927734375, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -318.0, + "logps_train/rejected": -363.22052001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.740325927734375, + "rewards_train/margins": 0.7817263603210449, + "rewards_train/rejected": -4.52205228805542, + "step": 1002 + }, + { + "epoch": 0.28, + "logps_train/chosen": -112.52693176269531, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -164.44114685058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2026931792497635, + "rewards_train/margins": 4.441421791911125, + "rewards_train/rejected": -4.644114971160889, + "step": 1002 + }, + { + "epoch": 0.28, + "logps_train/chosen": -12.793790817260742, + "logps_train/ref_chosen": -6.46875, + "logps_train/ref_rejected": -29.625, + "logps_train/rejected": -46.79651641845703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6325041055679321, + "rewards_train/margins": 1.084647536277771, + "rewards_train/rejected": -1.7171516418457031, + "step": 1002 + }, + { + "epoch": 0.28, + "logps_train/chosen": -88.68727111816406, + "logps_train/ref_chosen": -55.75, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -125.01569366455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.293727159500122, + "rewards_train/margins": 1.307842493057251, + "rewards_train/rejected": -4.601569652557373, + "step": 1002 + }, + { + "epoch": 0.28, + "logps_train/chosen": -219.19229125976562, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -204.7289581298828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.219229698181152, + "rewards_train/margins": -0.3963336944580078, + "rewards_train/rejected": -9.822896003723145, + "step": 1002 + }, + { + "epoch": 0.28, + "logps_train/chosen": -104.7376708984375, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -155.14968872070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.723767042160034, + "rewards_train/margins": 2.44120192527771, + "rewards_train/rejected": -5.164968967437744, + "step": 1002 + }, + { + "epoch": 0.28, + "logps_train/chosen": -150.72691345214844, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -135.5878143310547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.872691631317139, + "rewards_train/margins": -2.8139102458953857, + "rewards_train/rejected": -3.058781385421753, + "step": 1003 + }, + { + "epoch": 0.28, + "logps_train/chosen": -6.884063720703125, + "logps_train/ref_chosen": -2.375, + "logps_train/ref_rejected": -2.421875, + "logps_train/rejected": -13.773017883300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.450906366109848, + "rewards_train/margins": 0.684207946062088, + "rewards_train/rejected": -1.135114312171936, + "step": 1003 + }, + { + "epoch": 0.28, + "logps_train/chosen": -9.249834060668945, + "logps_train/ref_chosen": -7.59375, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -25.483394622802734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16560840606689453, + "rewards_train/margins": 0.5202310681343079, + "rewards_train/rejected": -0.6858394742012024, + "step": 1003 + }, + { + "epoch": 0.28, + "logps_train/chosen": -50.939476013183594, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -39.555267333984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.668947696685791, + "rewards_train/margins": 0.09907913208007812, + "rewards_train/rejected": -2.768026828765869, + "step": 1003 + }, + { + "epoch": 0.28, + "logps_train/chosen": -3.810110569000244, + "logps_train/ref_chosen": -3.484375, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -13.39385986328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03257355839014053, + "rewards_train/margins": 0.48806246370077133, + "rewards_train/rejected": -0.5206360220909119, + "step": 1003 + }, + { + "epoch": 0.28, + "logps_train/chosen": -7.974039077758789, + "logps_train/ref_chosen": -2.21875, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -30.497055053710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5755289196968079, + "rewards_train/margins": 0.7616766095161438, + "rewards_train/rejected": -1.3372055292129517, + "step": 1003 + }, + { + "epoch": 0.28, + "logps_train/chosen": -47.42558670043945, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -89.30326843261719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13244132697582245, + "rewards_train/margins": 2.162768170237541, + "rewards_train/rejected": -2.0303268432617188, + "step": 1003 + }, + { + "epoch": 0.28, + "logps_train/chosen": -12.355047225952148, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -5.0625, + "logps_train/rejected": -8.142221450805664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1855047196149826, + "rewards_train/margins": 0.12246744334697723, + "rewards_train/rejected": -0.30797216296195984, + "step": 1003 + }, + { + "epoch": 0.28, + "learning_rate": 1.3968443730098434e-06, + "loss": 0.4985, + "step": 1004 + }, + { + "epoch": 0.28, + "logps_train/chosen": -103.09126281738281, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -118.4273681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0091263055801392, + "rewards_train/margins": 1.6336106061935425, + "rewards_train/rejected": -2.6427369117736816, + "step": 1004 + }, + { + "epoch": 0.28, + "logps_train/chosen": -174.28662109375, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -186.30126953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.878662109375, + "rewards_train/margins": 1.451465129852295, + "rewards_train/rejected": -6.330127239227295, + "step": 1004 + }, + { + "epoch": 0.28, + "logps_train/chosen": -54.02897644042969, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -31.25, + "logps_train/rejected": -66.26675415039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8778977394104004, + "rewards_train/margins": 0.6237776279449463, + "rewards_train/rejected": -3.5016753673553467, + "step": 1004 + }, + { + "epoch": 0.28, + "logps_train/chosen": -44.437278747558594, + "logps_train/ref_chosen": -28.375, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -147.57717895507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6062278747558594, + "rewards_train/margins": 2.4514899253845215, + "rewards_train/rejected": -4.057717800140381, + "step": 1004 + }, + { + "epoch": 0.28, + "logps_train/chosen": -6.972339630126953, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -2.203125, + "logps_train/rejected": -17.599834442138672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24410896003246307, + "rewards_train/margins": 1.2955619841814041, + "rewards_train/rejected": -1.5396709442138672, + "step": 1004 + }, + { + "epoch": 0.28, + "logps_train/chosen": -121.31167602539062, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -156.08660888671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2811676263809204, + "rewards_train/margins": 4.5774935483932495, + "rewards_train/rejected": -5.85866117477417, + "step": 1004 + }, + { + "epoch": 0.28, + "logps_train/chosen": -131.44818115234375, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -158.64688110351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.694818139076233, + "rewards_train/margins": 2.3698700666427612, + "rewards_train/rejected": -4.064688205718994, + "step": 1004 + }, + { + "epoch": 0.28, + "logps_train/chosen": -142.70458984375, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -209.79690551757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.270458936691284, + "rewards_train/margins": 6.009231805801392, + "rewards_train/rejected": -8.279690742492676, + "step": 1004 + }, + { + "epoch": 0.28, + "logps_train/chosen": -11.328474998474121, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -33.96279525756836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2609024941921234, + "rewards_train/margins": 2.688432067632675, + "rewards_train/rejected": -2.4275295734405518, + "step": 1005 + }, + { + "epoch": 0.28, + "logps_train/chosen": -21.48955726623535, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -81.30082702636719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3364557027816772, + "rewards_train/margins": 0.4186270236968994, + "rewards_train/rejected": -1.7550827264785767, + "step": 1005 + }, + { + "epoch": 0.28, + "logps_train/chosen": -18.81829833984375, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -26.679058074951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.813079833984375, + "rewards_train/margins": 0.648576021194458, + "rewards_train/rejected": -1.461655855178833, + "step": 1005 + }, + { + "epoch": 0.28, + "logps_train/chosen": -96.65021514892578, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -36.937156677246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.065021514892578, + "rewards_train/margins": 0.10994410514831543, + "rewards_train/rejected": -2.1749656200408936, + "step": 1005 + }, + { + "epoch": 0.28, + "logps_train/chosen": -1.6939820051193237, + "logps_train/ref_chosen": -1.25, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -4.458170413970947, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04439819976687431, + "rewards_train/margins": -0.30795617029070854, + "rewards_train/rejected": 0.26355797052383423, + "step": 1005 + }, + { + "epoch": 0.28, + "logps_train/chosen": -10.076417922973633, + "logps_train/ref_chosen": -3.421875, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -28.052608489990234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6654543280601501, + "rewards_train/margins": 0.26480650901794434, + "rewards_train/rejected": -0.9302608370780945, + "step": 1005 + }, + { + "epoch": 0.28, + "logps_train/chosen": -94.37053680419922, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -77.31187438964844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.837053656578064, + "rewards_train/margins": -0.8058662414550781, + "rewards_train/rejected": -1.0311874151229858, + "step": 1005 + }, + { + "epoch": 0.28, + "logps_train/chosen": -159.249267578125, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -244.6492156982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7249268293380737, + "rewards_train/margins": 6.839994549751282, + "rewards_train/rejected": -8.564921379089355, + "step": 1005 + }, + { + "epoch": 0.28, + "learning_rate": 1.3944146724605456e-06, + "loss": 0.3423, + "step": 1006 + }, + { + "epoch": 0.28, + "logps_train/chosen": -42.914939880371094, + "logps_train/ref_chosen": -44.0, + "logps_train/ref_rejected": -3.96875, + "logps_train/rejected": -16.575279235839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10850601643323898, + "rewards_train/margins": 1.3691589161753654, + "rewards_train/rejected": -1.2606528997421265, + "step": 1006 + }, + { + "epoch": 0.28, + "logps_train/chosen": -33.18663787841797, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -2.96875, + "logps_train/rejected": -9.865150451660156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.462413787841797, + "rewards_train/margins": -1.7727737426757812, + "rewards_train/rejected": -0.6896400451660156, + "step": 1006 + }, + { + "epoch": 0.28, + "logps_train/chosen": -17.734912872314453, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -43.93220520019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23599128425121307, + "rewards_train/margins": 2.08222933113575, + "rewards_train/rejected": -2.318220615386963, + "step": 1006 + }, + { + "epoch": 0.28, + "logps_train/chosen": -25.607717514038086, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -31.043581008911133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8982717394828796, + "rewards_train/margins": 0.6185863614082336, + "rewards_train/rejected": -1.5168581008911133, + "step": 1006 + }, + { + "epoch": 0.28, + "logps_train/chosen": -218.10841369628906, + "logps_train/ref_chosen": -185.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -161.69436645507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3108413219451904, + "rewards_train/margins": 0.8085954189300537, + "rewards_train/rejected": -4.119436740875244, + "step": 1006 + }, + { + "epoch": 0.28, + "logps_train/chosen": -8.048328399658203, + "logps_train/ref_chosen": -3.171875, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -13.24935531616211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.48764535784721375, + "rewards_train/margins": -0.2002098262310028, + "rewards_train/rejected": -0.28743553161621094, + "step": 1006 + }, + { + "epoch": 0.28, + "logps_train/chosen": -19.57790184020996, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -40.4207763671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30779018998146057, + "rewards_train/margins": 2.659287542104721, + "rewards_train/rejected": -2.9670777320861816, + "step": 1006 + }, + { + "epoch": 0.28, + "logps_train/chosen": -14.9580078125, + "logps_train/ref_chosen": -6.0, + "logps_train/ref_rejected": -7.03125, + "logps_train/rejected": -26.24691390991211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.895800769329071, + "rewards_train/margins": 1.025765597820282, + "rewards_train/rejected": -1.921566367149353, + "step": 1006 + }, + { + "epoch": 0.28, + "logps_train/chosen": -30.42533302307129, + "logps_train/ref_chosen": -27.375, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -25.341903686523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30503329634666443, + "rewards_train/margins": 0.8479070961475372, + "rewards_train/rejected": -1.1529403924942017, + "step": 1007 + }, + { + "epoch": 0.28, + "logps_train/chosen": -178.67520141601562, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -180.64208984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7675201296806335, + "rewards_train/margins": 0.09668886661529541, + "rewards_train/rejected": -0.864208996295929, + "step": 1007 + }, + { + "epoch": 0.28, + "logps_train/chosen": -103.78455352783203, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -165.29385375976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7784554958343506, + "rewards_train/margins": 3.5509297847747803, + "rewards_train/rejected": -6.329385280609131, + "step": 1007 + }, + { + "epoch": 0.28, + "logps_train/chosen": -83.83842468261719, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -86.35791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0338424444198608, + "rewards_train/margins": 0.351948618888855, + "rewards_train/rejected": -1.3857910633087158, + "step": 1007 + }, + { + "epoch": 0.28, + "logps_train/chosen": -52.53036880493164, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -41.30287551879883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.853036880493164, + "rewards_train/margins": 0.2147507667541504, + "rewards_train/rejected": -3.0677876472473145, + "step": 1007 + }, + { + "epoch": 0.28, + "logps_train/chosen": -24.666658401489258, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -32.4255256652832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2916659116744995, + "rewards_train/margins": 0.20088672637939453, + "rewards_train/rejected": -1.492552638053894, + "step": 1007 + }, + { + "epoch": 0.28, + "logps_train/chosen": -20.747438430786133, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -31.508216857910156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3872438669204712, + "rewards_train/margins": -0.37392210960388184, + "rewards_train/rejected": -1.0133217573165894, + "step": 1007 + }, + { + "epoch": 0.28, + "logps_train/chosen": -7.702045440673828, + "logps_train/ref_chosen": -3.875, + "logps_train/ref_rejected": -1.421875, + "logps_train/rejected": -5.974015712738037, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.38270455598831177, + "rewards_train/margins": 0.0725095272064209, + "rewards_train/rejected": -0.45521408319473267, + "step": 1007 + }, + { + "epoch": 0.28, + "learning_rate": 1.3919822114267152e-06, + "loss": 0.5346, + "step": 1008 + }, + { + "epoch": 0.28, + "logps_train/chosen": -53.150760650634766, + "logps_train/ref_chosen": -49.25, + "logps_train/ref_rejected": -26.375, + "logps_train/rejected": -36.29906463623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39007607102394104, + "rewards_train/margins": 0.6023304164409637, + "rewards_train/rejected": -0.9924064874649048, + "step": 1008 + }, + { + "epoch": 0.28, + "logps_train/chosen": -17.34385108947754, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -18.489646911621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.615635097026825, + "rewards_train/margins": 0.3833296298980713, + "rewards_train/rejected": -0.9989647269248962, + "step": 1008 + }, + { + "epoch": 0.28, + "logps_train/chosen": -58.77341842651367, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -24.25, + "logps_train/rejected": -41.736331939697266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2523419857025146, + "rewards_train/margins": -0.5037087202072144, + "rewards_train/rejected": -1.7486332654953003, + "step": 1008 + }, + { + "epoch": 0.28, + "logps_train/chosen": -137.48707580566406, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -228.68182373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0487077236175537, + "rewards_train/margins": 5.119474649429321, + "rewards_train/rejected": -7.168182373046875, + "step": 1008 + }, + { + "epoch": 0.28, + "logps_train/chosen": -159.0610809326172, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -187.6182403564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.306107997894287, + "rewards_train/margins": 0.8557162284851074, + "rewards_train/rejected": -5.1618242263793945, + "step": 1008 + }, + { + "epoch": 0.28, + "logps_train/chosen": -127.9236068725586, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -186.0, + "logps_train/rejected": -327.21759033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8923606872558594, + "rewards_train/margins": 12.229398727416992, + "rewards_train/rejected": -14.121759414672852, + "step": 1008 + }, + { + "epoch": 0.28, + "logps_train/chosen": -144.70492553710938, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -154.69573974609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.570492744445801, + "rewards_train/margins": 0.34908151626586914, + "rewards_train/rejected": -4.91957426071167, + "step": 1008 + }, + { + "epoch": 0.28, + "logps_train/chosen": -12.691950798034668, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -18.906972885131836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0504450798034668, + "rewards_train/margins": 0.17775221168994904, + "rewards_train/rejected": -0.22819729149341583, + "step": 1008 + }, + { + "epoch": 0.28, + "logps_train/chosen": -170.00216674804688, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -183.7583465576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9002166986465454, + "rewards_train/margins": 2.0756179094314575, + "rewards_train/rejected": -2.975834608078003, + "step": 1009 + }, + { + "epoch": 0.28, + "logps_train/chosen": -9.94752311706543, + "logps_train/ref_chosen": -3.0625, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -12.684477806091309, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.688502311706543, + "rewards_train/margins": 0.1940079927444458, + "rewards_train/rejected": -0.8825103044509888, + "step": 1009 + }, + { + "epoch": 0.28, + "logps_train/chosen": -115.66759490966797, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -110.95084381103516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4167594909667969, + "rewards_train/margins": 1.3783248662948608, + "rewards_train/rejected": -1.7950843572616577, + "step": 1009 + }, + { + "epoch": 0.28, + "logps_train/chosen": -13.360297203063965, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -8.479202270507812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.329779714345932, + "rewards_train/margins": -0.0443594753742218, + "rewards_train/rejected": -0.2854202389717102, + "step": 1009 + }, + { + "epoch": 0.28, + "logps_train/chosen": -120.09120178222656, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -120.54096984863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9591201543807983, + "rewards_train/margins": 0.04497683048248291, + "rewards_train/rejected": -2.0040969848632812, + "step": 1009 + }, + { + "epoch": 0.28, + "logps_train/chosen": -15.230610847473145, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -3.28125, + "logps_train/rejected": -9.64422607421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9449360966682434, + "rewards_train/margins": -0.30863845348358154, + "rewards_train/rejected": -0.6362976431846619, + "step": 1009 + }, + { + "epoch": 0.28, + "logps_train/chosen": -158.06057739257812, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -43.288673400878906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5060577392578125, + "rewards_train/margins": -1.0146903991699219, + "rewards_train/rejected": -1.4913673400878906, + "step": 1009 + }, + { + "epoch": 0.28, + "logps_train/chosen": -107.32537078857422, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -140.5068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.332537055015564, + "rewards_train/margins": 1.5181466341018677, + "rewards_train/rejected": -2.8506836891174316, + "step": 1009 + }, + { + "epoch": 0.28, + "learning_rate": 1.3895470069330003e-06, + "loss": 0.5097, + "step": 1010 + }, + { + "epoch": 0.28, + "logps_train/chosen": -119.74388122558594, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -249.8929443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.824388265609741, + "rewards_train/margins": 9.514906167984009, + "rewards_train/rejected": -13.33929443359375, + "step": 1010 + }, + { + "epoch": 0.28, + "logps_train/chosen": -38.96846008300781, + "logps_train/ref_chosen": -35.25, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -112.66759490966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3718460202217102, + "rewards_train/margins": 3.6949135661125183, + "rewards_train/rejected": -4.0667595863342285, + "step": 1010 + }, + { + "epoch": 0.28, + "logps_train/chosen": -68.00804901123047, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -41.0, + "logps_train/rejected": -88.0639419555664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4758050441741943, + "rewards_train/margins": 2.2305891513824463, + "rewards_train/rejected": -4.706394195556641, + "step": 1010 + }, + { + "epoch": 0.28, + "logps_train/chosen": -41.33027648925781, + "logps_train/ref_chosen": -33.5, + "logps_train/ref_rejected": -49.25, + "logps_train/rejected": -59.20201873779297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7830276489257812, + "rewards_train/margins": 0.21217423677444458, + "rewards_train/rejected": -0.9952018857002258, + "step": 1010 + }, + { + "epoch": 0.28, + "logps_train/chosen": -24.93126678466797, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -28.11539077758789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49312669038772583, + "rewards_train/margins": 0.9246624112129211, + "rewards_train/rejected": -1.417789101600647, + "step": 1010 + }, + { + "epoch": 0.28, + "logps_train/chosen": -4.113714218139648, + "logps_train/ref_chosen": -0.7265625, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -20.437692642211914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33871516585350037, + "rewards_train/margins": 1.0425541698932648, + "rewards_train/rejected": -1.3812693357467651, + "step": 1010 + }, + { + "epoch": 0.28, + "logps_train/chosen": -27.959625244140625, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -12.4375, + "logps_train/rejected": -11.420951843261719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2522125244140625, + "rewards_train/margins": -1.353867344558239, + "rewards_train/rejected": 0.10165482014417648, + "step": 1010 + }, + { + "epoch": 0.28, + "logps_train/chosen": -35.978233337402344, + "logps_train/ref_chosen": -36.0, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -48.19707489013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.002176666399464011, + "rewards_train/margins": 2.45938410772942, + "rewards_train/rejected": -2.457207441329956, + "step": 1010 + }, + { + "epoch": 0.28, + "logps_train/chosen": -9.287910461425781, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -13.125958442687988, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.47254106402397156, + "rewards_train/margins": -0.0536952018737793, + "rewards_train/rejected": -0.41884586215019226, + "step": 1011 + }, + { + "epoch": 0.28, + "logps_train/chosen": -39.69110870361328, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -2.40625, + "logps_train/rejected": -12.096488952636719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.669110894203186, + "rewards_train/margins": 0.2999129891395569, + "rewards_train/rejected": -0.9690238833427429, + "step": 1011 + }, + { + "epoch": 0.28, + "logps_train/chosen": -4.293186664581299, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -21.13382339477539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20818133652210236, + "rewards_train/margins": 1.9356262236833572, + "rewards_train/rejected": -1.7274448871612549, + "step": 1011 + }, + { + "epoch": 0.28, + "logps_train/chosen": -113.77943420410156, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -159.2490997314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.127943515777588, + "rewards_train/margins": 4.596966743469238, + "rewards_train/rejected": -6.724910259246826, + "step": 1011 + }, + { + "epoch": 0.28, + "logps_train/chosen": -7.341745376586914, + "logps_train/ref_chosen": -2.9375, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -26.301259994506836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44042453169822693, + "rewards_train/margins": 1.1334514915943146, + "rewards_train/rejected": -1.5738760232925415, + "step": 1011 + }, + { + "epoch": 0.28, + "logps_train/chosen": -139.64036560058594, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -142.24661254882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2140365839004517, + "rewards_train/margins": 4.210624575614929, + "rewards_train/rejected": -5.424661159515381, + "step": 1011 + }, + { + "epoch": 0.28, + "logps_train/chosen": -220.4944305419922, + "logps_train/ref_chosen": -192.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -146.27403259277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.849443197250366, + "rewards_train/margins": 0.6779601573944092, + "rewards_train/rejected": -3.5274033546447754, + "step": 1011 + }, + { + "epoch": 0.28, + "logps_train/chosen": -6.888511657714844, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -6.1875, + "logps_train/rejected": -10.887495994567871, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25760117173194885, + "rewards_train/margins": 0.2123984396457672, + "rewards_train/rejected": -0.46999961137771606, + "step": 1011 + }, + { + "epoch": 0.28, + "learning_rate": 1.3871090760232505e-06, + "loss": 0.3585, + "step": 1012 + }, + { + "epoch": 0.28, + "logps_train/chosen": -105.54586791992188, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -115.96846771240234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8045867681503296, + "rewards_train/margins": 1.4922600984573364, + "rewards_train/rejected": -3.296846866607666, + "step": 1012 + }, + { + "epoch": 0.28, + "logps_train/chosen": -143.51211547851562, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -226.59654235839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1012115478515625, + "rewards_train/margins": 8.508442878723145, + "rewards_train/rejected": -11.609654426574707, + "step": 1012 + }, + { + "epoch": 0.28, + "logps_train/chosen": -15.045675277709961, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -3.8125, + "logps_train/rejected": -17.43701934814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6358175277709961, + "rewards_train/margins": 0.7266343832015991, + "rewards_train/rejected": -1.3624519109725952, + "step": 1012 + }, + { + "epoch": 0.28, + "logps_train/chosen": -95.54339599609375, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -199.56631469726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1543396711349487, + "rewards_train/margins": 3.4022918939590454, + "rewards_train/rejected": -4.556631565093994, + "step": 1012 + }, + { + "epoch": 0.28, + "logps_train/chosen": -207.17007446289062, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -265.50897216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.017007827758789, + "rewards_train/margins": 4.633889198303223, + "rewards_train/rejected": -14.650897026062012, + "step": 1012 + }, + { + "epoch": 0.28, + "logps_train/chosen": -34.247161865234375, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -29.330623626708984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6997162103652954, + "rewards_train/margins": 0.17709612846374512, + "rewards_train/rejected": -1.8768123388290405, + "step": 1012 + }, + { + "epoch": 0.28, + "logps_train/chosen": -114.28346252441406, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -56.25, + "logps_train/rejected": -93.62879943847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4783462285995483, + "rewards_train/margins": 2.2595337629318237, + "rewards_train/rejected": -3.737879991531372, + "step": 1012 + }, + { + "epoch": 0.28, + "logps_train/chosen": -20.538990020751953, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -15.691728591918945, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5288990139961243, + "rewards_train/margins": -0.24097615480422974, + "rewards_train/rejected": -0.28792285919189453, + "step": 1012 + }, + { + "epoch": 0.28, + "logps_train/chosen": -84.82369232177734, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -195.750244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.732369303703308, + "rewards_train/margins": 6.742654919624329, + "rewards_train/rejected": -8.475024223327637, + "step": 1013 + }, + { + "epoch": 0.28, + "logps_train/chosen": -109.45654296875, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -141.48806762695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.295654296875, + "rewards_train/margins": 2.053152561187744, + "rewards_train/rejected": -3.348806858062744, + "step": 1013 + }, + { + "epoch": 0.28, + "logps_train/chosen": -112.80098724365234, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -112.51821899414062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8800987601280212, + "rewards_train/margins": -0.02827686071395874, + "rewards_train/rejected": -0.8518218994140625, + "step": 1013 + }, + { + "epoch": 0.28, + "logps_train/chosen": -117.16558837890625, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -134.42169189453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.216558814048767, + "rewards_train/margins": 4.275610566139221, + "rewards_train/rejected": -5.492169380187988, + "step": 1013 + }, + { + "epoch": 0.28, + "logps_train/chosen": -107.44403839111328, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -114.69670104980469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.944403886795044, + "rewards_train/margins": -0.8247337341308594, + "rewards_train/rejected": -1.1196701526641846, + "step": 1013 + }, + { + "epoch": 0.28, + "logps_train/chosen": -133.6627655029297, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -144.23187255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.916276693344116, + "rewards_train/margins": 0.8069107532501221, + "rewards_train/rejected": -4.723187446594238, + "step": 1013 + }, + { + "epoch": 0.28, + "logps_train/chosen": -106.9950942993164, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -165.60079956054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.049509432166814804, + "rewards_train/margins": 5.460570428520441, + "rewards_train/rejected": -5.510079860687256, + "step": 1013 + }, + { + "epoch": 0.28, + "logps_train/chosen": -22.169809341430664, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -44.652706146240234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1544809341430664, + "rewards_train/margins": 1.1732897758483887, + "rewards_train/rejected": -2.327770709991455, + "step": 1013 + }, + { + "epoch": 0.28, + "learning_rate": 1.3846684357603976e-06, + "loss": 0.3027, + "step": 1014 + }, + { + "epoch": 0.28, + "logps_train/chosen": -132.00888061523438, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -213.19114685058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9008880853652954, + "rewards_train/margins": 4.118226885795593, + "rewards_train/rejected": -6.019114971160889, + "step": 1014 + }, + { + "epoch": 0.28, + "logps_train/chosen": -2.289491891860962, + "logps_train/ref_chosen": -0.498046875, + "logps_train/ref_rejected": -1.515625, + "logps_train/rejected": -4.05049467086792, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1791445016860962, + "rewards_train/margins": 0.07434245944023132, + "rewards_train/rejected": -0.2534869611263275, + "step": 1014 + }, + { + "epoch": 0.28, + "logps_train/chosen": -119.90794372558594, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -117.89372253417969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3907943964004517, + "rewards_train/margins": -0.3514220714569092, + "rewards_train/rejected": -1.0393723249435425, + "step": 1014 + }, + { + "epoch": 0.28, + "logps_train/chosen": -5.254034996032715, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -15.915568351745605, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.006653499789535999, + "rewards_train/margins": 1.1411533830687404, + "rewards_train/rejected": -1.1478068828582764, + "step": 1014 + }, + { + "epoch": 0.28, + "logps_train/chosen": -41.92801284790039, + "logps_train/ref_chosen": -36.0, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -38.92892074584961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5928012728691101, + "rewards_train/margins": 1.6875908970832825, + "rewards_train/rejected": -2.2803921699523926, + "step": 1014 + }, + { + "epoch": 0.28, + "logps_train/chosen": -5.134908199310303, + "logps_train/ref_chosen": -1.1875, + "logps_train/ref_rejected": -0.87109375, + "logps_train/rejected": -0.866610050201416, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3947408199310303, + "rewards_train/margins": -0.3951891899050679, + "rewards_train/rejected": 0.00044836997403763235, + "step": 1014 + }, + { + "epoch": 0.28, + "logps_train/chosen": -179.6895751953125, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -180.04763793945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.168957710266113, + "rewards_train/margins": 1.2858061790466309, + "rewards_train/rejected": -6.454763889312744, + "step": 1014 + }, + { + "epoch": 0.28, + "logps_train/chosen": -12.775275230407715, + "logps_train/ref_chosen": -1.4765625, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -29.186477661132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1298712491989136, + "rewards_train/margins": 0.4325265884399414, + "rewards_train/rejected": -1.562397837638855, + "step": 1014 + }, + { + "epoch": 0.28, + "logps_train/chosen": -29.35409164428711, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -57.25, + "logps_train/rejected": -78.27516174316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.585409164428711, + "rewards_train/margins": 0.5171070098876953, + "rewards_train/rejected": -2.1025161743164062, + "step": 1015 + }, + { + "epoch": 0.28, + "logps_train/chosen": -18.09488296508789, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -42.0, + "logps_train/rejected": -63.342567443847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.596988320350647, + "rewards_train/margins": 1.5372685194015503, + "rewards_train/rejected": -2.1342568397521973, + "step": 1015 + }, + { + "epoch": 0.28, + "logps_train/chosen": -41.19344711303711, + "logps_train/ref_chosen": -31.375, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -70.66175842285156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9818447232246399, + "rewards_train/margins": -0.7156688868999481, + "rewards_train/rejected": -0.2661758363246918, + "step": 1015 + }, + { + "epoch": 0.28, + "logps_train/chosen": -229.10226440429688, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -213.95602416992188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.3102264404296875, + "rewards_train/margins": -1.1146240234375, + "rewards_train/rejected": -6.1956024169921875, + "step": 1015 + }, + { + "epoch": 0.28, + "logps_train/chosen": -32.15474319458008, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -18.38218879699707, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4654743671417236, + "rewards_train/margins": -0.6210054755210876, + "rewards_train/rejected": -0.844468891620636, + "step": 1015 + }, + { + "epoch": 0.28, + "logps_train/chosen": -20.73065757751465, + "logps_train/ref_chosen": -2.75, + "logps_train/ref_rejected": -2.5625, + "logps_train/rejected": -16.751873016357422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7980657815933228, + "rewards_train/margins": -0.37912845611572266, + "rewards_train/rejected": -1.4189373254776, + "step": 1015 + }, + { + "epoch": 0.28, + "logps_train/chosen": -28.243152618408203, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -28.0, + "logps_train/rejected": -83.08865356445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0368152856826782, + "rewards_train/margins": 4.472050070762634, + "rewards_train/rejected": -5.5088653564453125, + "step": 1015 + }, + { + "epoch": 0.28, + "logps_train/chosen": -19.223363876342773, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -21.946168899536133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9910864233970642, + "rewards_train/margins": 0.1472805142402649, + "rewards_train/rejected": -1.138366937637329, + "step": 1015 + }, + { + "epoch": 0.28, + "learning_rate": 1.3822251032263352e-06, + "loss": 0.588, + "step": 1016 + }, + { + "epoch": 0.28, + "logps_train/chosen": -100.41368103027344, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -178.7717742919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0413681268692017, + "rewards_train/margins": 7.035809874534607, + "rewards_train/rejected": -8.077178001403809, + "step": 1016 + }, + { + "epoch": 0.28, + "logps_train/chosen": -17.954322814941406, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -26.87141227722168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4266822934150696, + "rewards_train/margins": 0.16045892238616943, + "rewards_train/rejected": -0.587141215801239, + "step": 1016 + }, + { + "epoch": 0.28, + "logps_train/chosen": -21.110536575317383, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -30.662490844726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4579286575317383, + "rewards_train/margins": 0.7083203792572021, + "rewards_train/rejected": -2.1662490367889404, + "step": 1016 + }, + { + "epoch": 0.28, + "logps_train/chosen": -242.25685119628906, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -237.9080352783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.825685501098633, + "rewards_train/margins": 2.465118408203125, + "rewards_train/rejected": -11.290803909301758, + "step": 1016 + }, + { + "epoch": 0.28, + "logps_train/chosen": -106.33038330078125, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -141.34815979003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.633038341999054, + "rewards_train/margins": 3.351777732372284, + "rewards_train/rejected": -3.984816074371338, + "step": 1016 + }, + { + "epoch": 0.28, + "logps_train/chosen": -9.846242904663086, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -24.855541229248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5096243023872375, + "rewards_train/margins": 0.932179868221283, + "rewards_train/rejected": -1.4418041706085205, + "step": 1016 + }, + { + "epoch": 0.28, + "logps_train/chosen": -45.862178802490234, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -50.25, + "logps_train/rejected": -61.12142562866211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18878212571144104, + "rewards_train/margins": 1.2759247124195099, + "rewards_train/rejected": -1.0871425867080688, + "step": 1016 + }, + { + "epoch": 0.28, + "logps_train/chosen": -5.180617809295654, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -4.0, + "logps_train/rejected": -9.583226203918457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08818822354078293, + "rewards_train/margins": 0.6465108320116997, + "rewards_train/rejected": -0.5583226084709167, + "step": 1016 + }, + { + "epoch": 0.28, + "logps_train/chosen": -146.51622009277344, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -175.76974487304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9016220569610596, + "rewards_train/margins": 1.2253525257110596, + "rewards_train/rejected": -5.126974582672119, + "step": 1017 + }, + { + "epoch": 0.28, + "logps_train/chosen": -169.6524658203125, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -122.49807739257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.06524658203125, + "rewards_train/margins": 1.0345611572265625, + "rewards_train/rejected": -2.0998077392578125, + "step": 1017 + }, + { + "epoch": 0.28, + "logps_train/chosen": -125.58079528808594, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -232.87559509277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15807953476905823, + "rewards_train/margins": 6.729479879140854, + "rewards_train/rejected": -6.887559413909912, + "step": 1017 + }, + { + "epoch": 0.28, + "logps_train/chosen": -6.873692989349365, + "logps_train/ref_chosen": -3.390625, + "logps_train/ref_rejected": -3.140625, + "logps_train/rejected": -7.100076198577881, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.348306804895401, + "rewards_train/margins": 0.04763832688331604, + "rewards_train/rejected": -0.39594513177871704, + "step": 1017 + }, + { + "epoch": 0.28, + "logps_train/chosen": -142.09646606445312, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -194.72274780273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0596466064453125, + "rewards_train/margins": 2.5126280784606934, + "rewards_train/rejected": -4.572274684906006, + "step": 1017 + }, + { + "epoch": 0.28, + "logps_train/chosen": -101.91580200195312, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -145.4882049560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.441580206155777, + "rewards_train/margins": 2.3072403371334076, + "rewards_train/rejected": -2.7488205432891846, + "step": 1017 + }, + { + "epoch": 0.28, + "logps_train/chosen": -184.01617431640625, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -232.3513641357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30161744356155396, + "rewards_train/margins": 6.23351925611496, + "rewards_train/rejected": -6.535136699676514, + "step": 1017 + }, + { + "epoch": 0.28, + "logps_train/chosen": -266.18707275390625, + "logps_train/ref_chosen": -208.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -204.422607421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.818707466125488, + "rewards_train/margins": 1.6235532760620117, + "rewards_train/rejected": -7.4422607421875, + "step": 1017 + }, + { + "epoch": 0.28, + "learning_rate": 1.379779095521801e-06, + "loss": 0.2325, + "step": 1018 + }, + { + "epoch": 0.28, + "logps_train/chosen": -106.17576599121094, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -182.58840942382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4175766706466675, + "rewards_train/margins": 6.591264843940735, + "rewards_train/rejected": -8.008841514587402, + "step": 1018 + }, + { + "epoch": 0.28, + "logps_train/chosen": -95.82975769042969, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -135.28829956054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1829757690429688, + "rewards_train/margins": 0.6458542346954346, + "rewards_train/rejected": -2.8288300037384033, + "step": 1018 + }, + { + "epoch": 0.28, + "logps_train/chosen": -11.684738159179688, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -1.8125, + "logps_train/rejected": -3.263284921646118, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.16222381591796875, + "rewards_train/margins": -0.017145320773124695, + "rewards_train/rejected": -0.14507849514484406, + "step": 1018 + }, + { + "epoch": 0.28, + "logps_train/chosen": -99.90470886230469, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -107.79698944091797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7404708862304688, + "rewards_train/margins": -0.8107719421386719, + "rewards_train/rejected": -0.9296989440917969, + "step": 1018 + }, + { + "epoch": 0.28, + "logps_train/chosen": -20.074417114257812, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -3.328125, + "logps_train/rejected": -23.397483825683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.554316759109497, + "rewards_train/margins": 0.4526190757751465, + "rewards_train/rejected": -2.0069358348846436, + "step": 1018 + }, + { + "epoch": 0.28, + "logps_train/chosen": -232.21707153320312, + "logps_train/ref_chosen": -199.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -210.71571350097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.321707248687744, + "rewards_train/margins": 1.1498641967773438, + "rewards_train/rejected": -4.471571445465088, + "step": 1018 + }, + { + "epoch": 0.28, + "logps_train/chosen": -115.66268157958984, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -110.3031997680664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.966268301010132, + "rewards_train/margins": 0.2140517234802246, + "rewards_train/rejected": -3.1803200244903564, + "step": 1018 + }, + { + "epoch": 0.28, + "logps_train/chosen": -128.3510284423828, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -162.424560546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8351028561592102, + "rewards_train/margins": 0.9073532223701477, + "rewards_train/rejected": -1.742456078529358, + "step": 1018 + }, + { + "epoch": 0.28, + "logps_train/chosen": -134.20359802246094, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -198.55950927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6203598380088806, + "rewards_train/margins": 4.135591089725494, + "rewards_train/rejected": -4.755950927734375, + "step": 1019 + }, + { + "epoch": 0.28, + "logps_train/chosen": -71.68377685546875, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -107.12753295898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.018377661705017, + "rewards_train/margins": 1.7443755865097046, + "rewards_train/rejected": -2.7627532482147217, + "step": 1019 + }, + { + "epoch": 0.28, + "logps_train/chosen": -49.12925720214844, + "logps_train/ref_chosen": -38.25, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -86.7631607055664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0879257917404175, + "rewards_train/margins": -0.7616097033023834, + "rewards_train/rejected": -0.32631608843803406, + "step": 1019 + }, + { + "epoch": 0.28, + "logps_train/chosen": -14.87009048461914, + "logps_train/ref_chosen": -7.59375, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -21.567867279052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.727634072303772, + "rewards_train/margins": 0.08540266752243042, + "rewards_train/rejected": -0.8130367398262024, + "step": 1019 + }, + { + "epoch": 0.28, + "logps_train/chosen": -46.55534362792969, + "logps_train/ref_chosen": -31.625, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -19.013442993164062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4930343627929688, + "rewards_train/margins": -0.5291900634765625, + "rewards_train/rejected": -0.9638442993164062, + "step": 1019 + }, + { + "epoch": 0.28, + "logps_train/chosen": -257.04864501953125, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -259.31396484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.604865074157715, + "rewards_train/margins": -0.773468017578125, + "rewards_train/rejected": -10.83139705657959, + "step": 1019 + }, + { + "epoch": 0.28, + "logps_train/chosen": -155.77658081054688, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -219.234130859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.077658176422119, + "rewards_train/margins": 4.145755290985107, + "rewards_train/rejected": -8.223413467407227, + "step": 1019 + }, + { + "epoch": 0.28, + "logps_train/chosen": -16.031269073486328, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -2.234375, + "logps_train/rejected": -7.641324043273926, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6406269073486328, + "rewards_train/margins": -0.09993201494216919, + "rewards_train/rejected": -0.5406948924064636, + "step": 1019 + }, + { + "epoch": 0.29, + "learning_rate": 1.3773304297662557e-06, + "loss": 0.555, + "step": 1020 + }, + { + "epoch": 0.29, + "logps_train/chosen": -148.04501342773438, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -234.062255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.604501485824585, + "rewards_train/margins": 6.101723909378052, + "rewards_train/rejected": -9.706225395202637, + "step": 1020 + }, + { + "epoch": 0.29, + "logps_train/chosen": -93.18540954589844, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -103.51262664794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8685410022735596, + "rewards_train/margins": 0.9827218055725098, + "rewards_train/rejected": -2.8512628078460693, + "step": 1020 + }, + { + "epoch": 0.29, + "logps_train/chosen": -24.405994415283203, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -26.25, + "logps_train/rejected": -34.57780838012695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49059945344924927, + "rewards_train/margins": 0.34218138456344604, + "rewards_train/rejected": -0.8327808380126953, + "step": 1020 + }, + { + "epoch": 0.29, + "logps_train/chosen": -105.55181884765625, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -112.65364074707031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1051819324493408, + "rewards_train/margins": 2.7601821422576904, + "rewards_train/rejected": -3.8653640747070312, + "step": 1020 + }, + { + "epoch": 0.29, + "logps_train/chosen": -177.26461791992188, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -169.429931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.526462078094482, + "rewards_train/margins": 2.1165313720703125, + "rewards_train/rejected": -6.642993450164795, + "step": 1020 + }, + { + "epoch": 0.29, + "logps_train/chosen": -5.032164573669434, + "logps_train/ref_chosen": -1.1875, + "logps_train/ref_rejected": -0.87109375, + "logps_train/rejected": -0.8721073269844055, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3844664692878723, + "rewards_train/margins": -0.38436511158943176, + "rewards_train/rejected": -0.00010135769844055176, + "step": 1020 + }, + { + "epoch": 0.29, + "logps_train/chosen": -6.575648307800293, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -16.875703811645508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24193982779979706, + "rewards_train/margins": 0.7237555533647537, + "rewards_train/rejected": -0.9656953811645508, + "step": 1020 + }, + { + "epoch": 0.29, + "logps_train/chosen": -251.09762573242188, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -220.09445190429688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.409762859344482, + "rewards_train/margins": -0.00031757354736328125, + "rewards_train/rejected": -7.409445285797119, + "step": 1020 + }, + { + "epoch": 0.29, + "logps_train/chosen": -130.4488067626953, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -163.20928955078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.944880723953247, + "rewards_train/margins": 3.6260483264923096, + "rewards_train/rejected": -7.570929050445557, + "step": 1021 + }, + { + "epoch": 0.29, + "logps_train/chosen": -9.617209434509277, + "logps_train/ref_chosen": -1.578125, + "logps_train/ref_rejected": -2.265625, + "logps_train/rejected": -15.987672805786133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8039084672927856, + "rewards_train/margins": 0.5682963132858276, + "rewards_train/rejected": -1.3722047805786133, + "step": 1021 + }, + { + "epoch": 0.29, + "logps_train/chosen": -168.255126953125, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -226.91262817382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.725512683391571, + "rewards_train/margins": 8.065749943256378, + "rewards_train/rejected": -8.79126262664795, + "step": 1021 + }, + { + "epoch": 0.29, + "logps_train/chosen": -49.84385681152344, + "logps_train/ref_chosen": -29.25, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -31.484512329101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0593857765197754, + "rewards_train/margins": 0.34844040870666504, + "rewards_train/rejected": -2.4078261852264404, + "step": 1021 + }, + { + "epoch": 0.29, + "logps_train/chosen": -2.8113551139831543, + "logps_train/ref_chosen": -0.92578125, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -12.32368278503418, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.18855738639831543, + "rewards_train/margins": -0.2686891108751297, + "rewards_train/rejected": 0.08013172447681427, + "step": 1021 + }, + { + "epoch": 0.29, + "logps_train/chosen": -91.85311889648438, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -91.64962005615234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4853118658065796, + "rewards_train/margins": -0.020349860191345215, + "rewards_train/rejected": -1.4649620056152344, + "step": 1021 + }, + { + "epoch": 0.29, + "logps_train/chosen": -15.283449172973633, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -25.80180549621582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7158449292182922, + "rewards_train/margins": 0.9018356204032898, + "rewards_train/rejected": -1.617680549621582, + "step": 1021 + }, + { + "epoch": 0.29, + "logps_train/chosen": -82.07272338867188, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -93.23435974121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05727234110236168, + "rewards_train/margins": 1.8661637045443058, + "rewards_train/rejected": -1.9234360456466675, + "step": 1021 + }, + { + "epoch": 0.29, + "learning_rate": 1.374879123097763e-06, + "loss": 0.3785, + "step": 1022 + }, + { + "epoch": 0.29, + "logps_train/chosen": -151.50672912597656, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -198.62496948242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.350672960281372, + "rewards_train/margins": 7.811824560165405, + "rewards_train/rejected": -10.162497520446777, + "step": 1022 + }, + { + "epoch": 0.29, + "logps_train/chosen": -17.620323181152344, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -45.64194107055664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0629676803946495, + "rewards_train/margins": 4.027161739766598, + "rewards_train/rejected": -3.9641940593719482, + "step": 1022 + }, + { + "epoch": 0.29, + "logps_train/chosen": -79.98228454589844, + "logps_train/ref_chosen": -53.75, + "logps_train/ref_rejected": -50.25, + "logps_train/rejected": -104.82968139648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6232285499572754, + "rewards_train/margins": 2.8347396850585938, + "rewards_train/rejected": -5.457968235015869, + "step": 1022 + }, + { + "epoch": 0.29, + "logps_train/chosen": -163.45545959472656, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -125.46408081054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6455459594726562, + "rewards_train/margins": 0.25086212158203125, + "rewards_train/rejected": -3.8964080810546875, + "step": 1022 + }, + { + "epoch": 0.29, + "logps_train/chosen": -35.58827209472656, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -29.8572940826416, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4525773525238037, + "rewards_train/margins": 0.06440210342407227, + "rewards_train/rejected": -2.516979455947876, + "step": 1022 + }, + { + "epoch": 0.29, + "logps_train/chosen": -158.98536682128906, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -205.0, + "logps_train/rejected": -261.6203308105469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4985367059707642, + "rewards_train/margins": 4.163496375083923, + "rewards_train/rejected": -5.6620330810546875, + "step": 1022 + }, + { + "epoch": 0.29, + "logps_train/chosen": -23.06485366821289, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -21.517210006713867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.762735366821289, + "rewards_train/margins": -0.14538931846618652, + "rewards_train/rejected": -1.6173460483551025, + "step": 1022 + }, + { + "epoch": 0.29, + "logps_train/chosen": -97.81916809082031, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -35.423439025878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3680832087993622, + "rewards_train/margins": 2.9979271590709686, + "rewards_train/rejected": -2.6298439502716064, + "step": 1022 + }, + { + "epoch": 0.29, + "logps_train/chosen": -28.342857360839844, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -22.375, + "logps_train/rejected": -40.435829162597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35928574204444885, + "rewards_train/margins": 1.4467972218990326, + "rewards_train/rejected": -1.8060829639434814, + "step": 1023 + }, + { + "epoch": 0.29, + "logps_train/chosen": -82.76338195800781, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -203.59921264648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1263381987810135, + "rewards_train/margins": 6.033582970499992, + "rewards_train/rejected": -6.159921169281006, + "step": 1023 + }, + { + "epoch": 0.29, + "logps_train/chosen": -29.983871459960938, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -5.4375, + "logps_train/rejected": -12.866090774536133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6608871817588806, + "rewards_train/margins": 0.08197188377380371, + "rewards_train/rejected": -0.7428590655326843, + "step": 1023 + }, + { + "epoch": 0.29, + "logps_train/chosen": -18.32747459411621, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -23.242685317993164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9077474474906921, + "rewards_train/margins": 0.813396155834198, + "rewards_train/rejected": -1.7211436033248901, + "step": 1023 + }, + { + "epoch": 0.29, + "logps_train/chosen": -110.88597106933594, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -237.2299346923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.838597059249878, + "rewards_train/margins": 5.98439621925354, + "rewards_train/rejected": -9.822993278503418, + "step": 1023 + }, + { + "epoch": 0.29, + "logps_train/chosen": -37.231231689453125, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -83.6659927368164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.53562331199646, + "rewards_train/margins": -1.0690239667892456, + "rewards_train/rejected": -1.4665993452072144, + "step": 1023 + }, + { + "epoch": 0.29, + "logps_train/chosen": -89.29174041748047, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -126.26303100585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9791740775108337, + "rewards_train/margins": 2.3971291184425354, + "rewards_train/rejected": -3.376303195953369, + "step": 1023 + }, + { + "epoch": 0.29, + "logps_train/chosen": -26.27105712890625, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -118.42720031738281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.258355736732483, + "rewards_train/margins": -0.06563568115234375, + "rewards_train/rejected": -1.1927200555801392, + "step": 1023 + }, + { + "epoch": 0.29, + "learning_rate": 1.3724251926728708e-06, + "loss": 0.3472, + "step": 1024 + }, + { + "epoch": 0.29, + "logps_train/chosen": -84.25489807128906, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -84.60039520263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.57548987865448, + "rewards_train/margins": 0.034549713134765625, + "rewards_train/rejected": -1.6100395917892456, + "step": 1024 + }, + { + "epoch": 0.29, + "logps_train/chosen": -110.13501739501953, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -157.63038635253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4364982545375824, + "rewards_train/margins": 5.249537080526352, + "rewards_train/rejected": -4.8130388259887695, + "step": 1024 + }, + { + "epoch": 0.29, + "logps_train/chosen": -11.434494018554688, + "logps_train/ref_chosen": -3.71875, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -24.84658432006836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7715744376182556, + "rewards_train/margins": -0.024415969848632812, + "rewards_train/rejected": -0.7471584677696228, + "step": 1024 + }, + { + "epoch": 0.29, + "logps_train/chosen": -111.29997253417969, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -157.63755798339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.929997205734253, + "rewards_train/margins": 2.883758783340454, + "rewards_train/rejected": -6.813755989074707, + "step": 1024 + }, + { + "epoch": 0.29, + "logps_train/chosen": -18.824501037597656, + "logps_train/ref_chosen": -7.1875, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -28.004901885986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1637001037597656, + "rewards_train/margins": 0.443040132522583, + "rewards_train/rejected": -1.6067402362823486, + "step": 1024 + }, + { + "epoch": 0.29, + "logps_train/chosen": -13.366442680358887, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -32.75, + "logps_train/rejected": -46.12215042114258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11789426952600479, + "rewards_train/margins": 1.219320796430111, + "rewards_train/rejected": -1.3372150659561157, + "step": 1024 + }, + { + "epoch": 0.29, + "logps_train/chosen": -47.72104263305664, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -28.5, + "logps_train/rejected": -53.84450912475586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2221043109893799, + "rewards_train/margins": 1.3123466968536377, + "rewards_train/rejected": -2.5344510078430176, + "step": 1024 + }, + { + "epoch": 0.29, + "logps_train/chosen": -302.0052185058594, + "logps_train/ref_chosen": -188.0, + "logps_train/ref_rejected": -213.0, + "logps_train/rejected": -340.47174072265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.400522232055664, + "rewards_train/margins": 1.3466520309448242, + "rewards_train/rejected": -12.747174263000488, + "step": 1024 + }, + { + "epoch": 0.29, + "logps_train/chosen": -12.97474479675293, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -14.1875, + "logps_train/rejected": -28.43160629272461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.258775532245636, + "rewards_train/margins": 1.6831862330436707, + "rewards_train/rejected": -1.4244107007980347, + "step": 1025 + }, + { + "epoch": 0.29, + "logps_train/chosen": -193.78990173339844, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -222.85357666015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.728990077972412, + "rewards_train/margins": 3.5063681602478027, + "rewards_train/rejected": -10.235358238220215, + "step": 1025 + }, + { + "epoch": 0.29, + "logps_train/chosen": -64.79853820800781, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -117.75942993164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7298538088798523, + "rewards_train/margins": 1.1960892081260681, + "rewards_train/rejected": -1.9259430170059204, + "step": 1025 + }, + { + "epoch": 0.29, + "logps_train/chosen": -128.5131072998047, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -164.95506286621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2513108253479004, + "rewards_train/margins": 3.144195556640625, + "rewards_train/rejected": -6.395506381988525, + "step": 1025 + }, + { + "epoch": 0.29, + "logps_train/chosen": -2.648179292678833, + "logps_train/ref_chosen": -1.921875, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -14.619325637817383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07263042777776718, + "rewards_train/margins": 0.35180214792490005, + "rewards_train/rejected": -0.42443257570266724, + "step": 1025 + }, + { + "epoch": 0.29, + "logps_train/chosen": -239.21730041503906, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -172.65444946289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.121729850769043, + "rewards_train/margins": 0.3437156677246094, + "rewards_train/rejected": -8.465445518493652, + "step": 1025 + }, + { + "epoch": 0.29, + "logps_train/chosen": -176.87913513183594, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -265.6026916503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7879135608673096, + "rewards_train/margins": 6.5723559856414795, + "rewards_train/rejected": -10.360269546508789, + "step": 1025 + }, + { + "epoch": 0.29, + "logps_train/chosen": -160.94332885742188, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -219.80935668945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0943329334259033, + "rewards_train/margins": 4.3866026401519775, + "rewards_train/rejected": -7.480935573577881, + "step": 1025 + }, + { + "epoch": 0.29, + "learning_rate": 1.3699686556664905e-06, + "loss": 0.2657, + "step": 1026 + }, + { + "epoch": 0.29, + "logps_train/chosen": -0.29646313190460205, + "logps_train/ref_chosen": -0.7890625, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -33.31401443481445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.049259938299655914, + "rewards_train/margins": 1.080661453306675, + "rewards_train/rejected": -1.031401515007019, + "step": 1026 + }, + { + "epoch": 0.29, + "logps_train/chosen": -113.37307739257812, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -199.5082244873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7873077392578125, + "rewards_train/margins": 7.8635149002075195, + "rewards_train/rejected": -8.650822639465332, + "step": 1026 + }, + { + "epoch": 0.29, + "logps_train/chosen": -19.791969299316406, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -23.099794387817383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.25830307602882385, + "rewards_train/margins": 1.91828253865242, + "rewards_train/rejected": -1.6599794626235962, + "step": 1026 + }, + { + "epoch": 0.29, + "logps_train/chosen": -31.81949234008789, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -39.32612991333008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2194492816925049, + "rewards_train/margins": 0.41316378116607666, + "rewards_train/rejected": -1.6326130628585815, + "step": 1026 + }, + { + "epoch": 0.29, + "logps_train/chosen": -9.514945983886719, + "logps_train/ref_chosen": -0.361328125, + "logps_train/ref_rejected": -0.361328125, + "logps_train/rejected": -10.263794898986816, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9153618216514587, + "rewards_train/margins": 0.07488489151000977, + "rewards_train/rejected": -0.9902467131614685, + "step": 1026 + }, + { + "epoch": 0.29, + "logps_train/chosen": -9.704630851745605, + "logps_train/ref_chosen": -5.40625, + "logps_train/ref_rejected": -26.875, + "logps_train/rejected": -28.094802856445312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.429838091135025, + "rewards_train/margins": -0.30785780400037766, + "rewards_train/rejected": -0.12198028713464737, + "step": 1026 + }, + { + "epoch": 0.29, + "logps_train/chosen": -33.54556655883789, + "logps_train/ref_chosen": -7.96875, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -35.62034606933594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5576817989349365, + "rewards_train/margins": -0.07689714431762695, + "rewards_train/rejected": -2.4807846546173096, + "step": 1026 + }, + { + "epoch": 0.29, + "logps_train/chosen": -23.17463493347168, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -3.15625, + "logps_train/rejected": -13.926900863647461, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.17996346950531, + "rewards_train/margins": -0.10289835929870605, + "rewards_train/rejected": -1.077065110206604, + "step": 1026 + }, + { + "epoch": 0.29, + "logps_train/chosen": -15.869227409362793, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -17.658634185791016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6244227290153503, + "rewards_train/margins": -0.13355931639671326, + "rewards_train/rejected": -0.4908634126186371, + "step": 1027 + }, + { + "epoch": 0.29, + "logps_train/chosen": -168.41473388671875, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -216.9855194091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.691473484039307, + "rewards_train/margins": 2.607079029083252, + "rewards_train/rejected": -8.298552513122559, + "step": 1027 + }, + { + "epoch": 0.29, + "logps_train/chosen": -120.74296569824219, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -152.847900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2242965698242188, + "rewards_train/margins": 1.9104933738708496, + "rewards_train/rejected": -5.134789943695068, + "step": 1027 + }, + { + "epoch": 0.29, + "logps_train/chosen": -192.96458435058594, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -174.77120971679688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.246458530426025, + "rewards_train/margins": -0.5693373680114746, + "rewards_train/rejected": -6.677121162414551, + "step": 1027 + }, + { + "epoch": 0.29, + "logps_train/chosen": -4.986324310302734, + "logps_train/ref_chosen": -2.03125, + "logps_train/ref_rejected": -2.03125, + "logps_train/rejected": -4.8156962394714355, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.29550743103027344, + "rewards_train/margins": -0.01706281304359436, + "rewards_train/rejected": -0.2784446179866791, + "step": 1027 + }, + { + "epoch": 0.29, + "logps_train/chosen": -9.74602222442627, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -23.505781173706055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.549602210521698, + "rewards_train/margins": 0.9697259068489075, + "rewards_train/rejected": -1.5193281173706055, + "step": 1027 + }, + { + "epoch": 0.29, + "logps_train/chosen": -181.0980987548828, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -221.45944213867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4098098278045654, + "rewards_train/margins": 6.736134767532349, + "rewards_train/rejected": -10.145944595336914, + "step": 1027 + }, + { + "epoch": 0.29, + "logps_train/chosen": -178.3719940185547, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -271.654541015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3371994495391846, + "rewards_train/margins": 9.728255033493042, + "rewards_train/rejected": -12.065454483032227, + "step": 1027 + }, + { + "epoch": 0.29, + "learning_rate": 1.3675095292717762e-06, + "loss": 0.4341, + "step": 1028 + }, + { + "epoch": 0.29, + "logps_train/chosen": -56.00056838989258, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -40.106048583984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7000569105148315, + "rewards_train/margins": 0.9605480432510376, + "rewards_train/rejected": -2.660604953765869, + "step": 1028 + }, + { + "epoch": 0.29, + "logps_train/chosen": -30.863086700439453, + "logps_train/ref_chosen": -26.875, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -30.33289337158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39880868792533875, + "rewards_train/margins": 2.0188556015491486, + "rewards_train/rejected": -2.4176642894744873, + "step": 1028 + }, + { + "epoch": 0.29, + "logps_train/chosen": -11.95727825164795, + "logps_train/ref_chosen": -3.09375, + "logps_train/ref_rejected": -3.234375, + "logps_train/rejected": -27.815710067749023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8863528370857239, + "rewards_train/margins": 1.5717806220054626, + "rewards_train/rejected": -2.4581334590911865, + "step": 1028 + }, + { + "epoch": 0.29, + "logps_train/chosen": -33.112178802490234, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -69.61353302001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3737179040908813, + "rewards_train/margins": 3.4626353979110718, + "rewards_train/rejected": -4.836353302001953, + "step": 1028 + }, + { + "epoch": 0.29, + "logps_train/chosen": -0.34030064940452576, + "logps_train/ref_chosen": -0.71484375, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -8.783110618591309, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.037454310804605484, + "rewards_train/margins": 0.2376403696835041, + "rewards_train/rejected": -0.20018605887889862, + "step": 1028 + }, + { + "epoch": 0.29, + "logps_train/chosen": -132.6629638671875, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -201.487548828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.01629638671875, + "rewards_train/margins": 5.58245849609375, + "rewards_train/rejected": -7.5987548828125, + "step": 1028 + }, + { + "epoch": 0.29, + "logps_train/chosen": -38.93757247924805, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -48.908363342285156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.062507390975952, + "rewards_train/margins": -0.7591710090637207, + "rewards_train/rejected": -2.3033363819122314, + "step": 1028 + }, + { + "epoch": 0.29, + "logps_train/chosen": -129.78761291503906, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -192.13381958007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.378761291503906, + "rewards_train/margins": 1.534620761871338, + "rewards_train/rejected": -5.913382053375244, + "step": 1028 + }, + { + "epoch": 0.29, + "logps_train/chosen": -3.917614221572876, + "logps_train/ref_chosen": -1.625, + "logps_train/ref_rejected": -26.5, + "logps_train/rejected": -41.4319953918457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22926142811775208, + "rewards_train/margins": 1.263938158750534, + "rewards_train/rejected": -1.4931995868682861, + "step": 1029 + }, + { + "epoch": 0.29, + "logps_train/chosen": -1.6667532920837402, + "logps_train/ref_chosen": -0.62109375, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -10.813684463500977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10456595569849014, + "rewards_train/margins": 0.37055250257253647, + "rewards_train/rejected": -0.4751184582710266, + "step": 1029 + }, + { + "epoch": 0.29, + "logps_train/chosen": -24.835861206054688, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -3.453125, + "logps_train/rejected": -23.054990768432617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4273360967636108, + "rewards_train/margins": 0.5328505039215088, + "rewards_train/rejected": -1.9601866006851196, + "step": 1029 + }, + { + "epoch": 0.29, + "logps_train/chosen": -103.63980102539062, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -172.68356323242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1639801263809204, + "rewards_train/margins": 1.804376244544983, + "rewards_train/rejected": -2.9683563709259033, + "step": 1029 + }, + { + "epoch": 0.29, + "logps_train/chosen": -34.826446533203125, + "logps_train/ref_chosen": -25.75, + "logps_train/ref_rejected": -15.75, + "logps_train/rejected": -27.789718627929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9076446890830994, + "rewards_train/margins": 0.2963271737098694, + "rewards_train/rejected": -1.2039718627929688, + "step": 1029 + }, + { + "epoch": 0.29, + "logps_train/chosen": -11.28640365600586, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -18.315555572509766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1463596373796463, + "rewards_train/margins": 0.8341652303934097, + "rewards_train/rejected": -0.6878055930137634, + "step": 1029 + }, + { + "epoch": 0.29, + "logps_train/chosen": -132.18377685546875, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -129.70774841308594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.118377685546875, + "rewards_train/margins": -0.24760282039642334, + "rewards_train/rejected": -0.8707748651504517, + "step": 1029 + }, + { + "epoch": 0.29, + "logps_train/chosen": -13.382989883422852, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -3.921875, + "logps_train/rejected": -31.464706420898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007048988249152899, + "rewards_train/margins": 2.7472342015244067, + "rewards_train/rejected": -2.7542831897735596, + "step": 1029 + }, + { + "epoch": 0.29, + "learning_rate": 1.3650478307000057e-06, + "loss": 0.3611, + "step": 1030 + }, + { + "epoch": 0.29, + "logps_train/chosen": -136.04273986816406, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -120.83518981933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9542739391326904, + "rewards_train/margins": 0.22924518585205078, + "rewards_train/rejected": -3.183519124984741, + "step": 1030 + }, + { + "epoch": 0.29, + "logps_train/chosen": -108.54878234863281, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -194.08544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8048782348632812, + "rewards_train/margins": 4.703666687011719, + "rewards_train/rejected": -7.508544921875, + "step": 1030 + }, + { + "epoch": 0.29, + "logps_train/chosen": -49.94023895263672, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -38.7718620300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3440239429473877, + "rewards_train/margins": 0.8581624031066895, + "rewards_train/rejected": -2.202186346054077, + "step": 1030 + }, + { + "epoch": 0.29, + "logps_train/chosen": -33.21440887451172, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -17.472965240478516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.021440887823700905, + "rewards_train/margins": 1.1227307077497244, + "rewards_train/rejected": -1.1441715955734253, + "step": 1030 + }, + { + "epoch": 0.29, + "logps_train/chosen": -57.22433090209961, + "logps_train/ref_chosen": -44.0, + "logps_train/ref_rejected": -40.25, + "logps_train/rejected": -56.53055191040039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3224331140518188, + "rewards_train/margins": 0.3056221008300781, + "rewards_train/rejected": -1.628055214881897, + "step": 1030 + }, + { + "epoch": 0.29, + "logps_train/chosen": -12.949468612670898, + "logps_train/ref_chosen": -7.15625, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -62.28560256958008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5793218612670898, + "rewards_train/margins": 0.49923837184906006, + "rewards_train/rejected": -1.07856023311615, + "step": 1030 + }, + { + "epoch": 0.29, + "logps_train/chosen": -84.81080627441406, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -137.36813354492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.781080722808838, + "rewards_train/margins": 1.005732774734497, + "rewards_train/rejected": -3.786813497543335, + "step": 1030 + }, + { + "epoch": 0.29, + "logps_train/chosen": -32.104209899902344, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -47.3333740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0979210138320923, + "rewards_train/margins": 1.310416340827942, + "rewards_train/rejected": -2.408337354660034, + "step": 1030 + }, + { + "epoch": 0.29, + "logps_train/chosen": -118.84809875488281, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -218.28512573242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5848098993301392, + "rewards_train/margins": 3.6437028646469116, + "rewards_train/rejected": -5.228512763977051, + "step": 1031 + }, + { + "epoch": 0.29, + "logps_train/chosen": -146.43934631347656, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -229.4019775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7439346313476562, + "rewards_train/margins": 8.09626293182373, + "rewards_train/rejected": -9.840197563171387, + "step": 1031 + }, + { + "epoch": 0.29, + "logps_train/chosen": -151.79164123535156, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -169.7225341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.579164028167725, + "rewards_train/margins": 0.2930893898010254, + "rewards_train/rejected": -4.87225341796875, + "step": 1031 + }, + { + "epoch": 0.29, + "logps_train/chosen": -74.03399658203125, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -99.63427734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2533997297286987, + "rewards_train/margins": 1.960028052330017, + "rewards_train/rejected": -3.213427782058716, + "step": 1031 + }, + { + "epoch": 0.29, + "logps_train/chosen": -55.53781509399414, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -83.10285949707031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.22878150641918182, + "rewards_train/margins": -0.21849555615335703, + "rewards_train/rejected": -0.010285950265824795, + "step": 1031 + }, + { + "epoch": 0.29, + "logps_train/chosen": -165.48947143554688, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -188.7310333251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0489470958709717, + "rewards_train/margins": 3.3241565227508545, + "rewards_train/rejected": -5.373103618621826, + "step": 1031 + }, + { + "epoch": 0.29, + "logps_train/chosen": -41.561370849609375, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -52.25, + "logps_train/rejected": -104.79423522949219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.343637228012085, + "rewards_train/margins": 2.9107863903045654, + "rewards_train/rejected": -5.25442361831665, + "step": 1031 + }, + { + "epoch": 0.29, + "logps_train/chosen": -31.376379013061523, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -32.777549743652344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9876379370689392, + "rewards_train/margins": -0.2723829746246338, + "rewards_train/rejected": -0.7152549624443054, + "step": 1031 + }, + { + "epoch": 0.29, + "learning_rate": 1.362583577180459e-06, + "loss": 0.3282, + "step": 1032 + }, + { + "epoch": 0.29, + "logps_train/chosen": -23.76485252380371, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -51.197120666503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.520235300064087, + "rewards_train/margins": 2.0057268142700195, + "rewards_train/rejected": -3.5259621143341064, + "step": 1032 + }, + { + "epoch": 0.29, + "logps_train/chosen": -99.68880462646484, + "logps_train/ref_chosen": -59.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -159.94342041015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.068880558013916, + "rewards_train/margins": 0.025461673736572266, + "rewards_train/rejected": -4.094342231750488, + "step": 1032 + }, + { + "epoch": 0.29, + "logps_train/chosen": -106.66901397705078, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -162.77713012695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46690139174461365, + "rewards_train/margins": 3.610811620950699, + "rewards_train/rejected": -4.0777130126953125, + "step": 1032 + }, + { + "epoch": 0.29, + "logps_train/chosen": -22.77341079711914, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -33.560203552246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.171091079711914, + "rewards_train/margins": 0.42242932319641113, + "rewards_train/rejected": -1.5935204029083252, + "step": 1032 + }, + { + "epoch": 0.29, + "logps_train/chosen": -39.50709915161133, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -97.65660858154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9132099151611328, + "rewards_train/margins": 3.8524508476257324, + "rewards_train/rejected": -5.765660762786865, + "step": 1032 + }, + { + "epoch": 0.29, + "logps_train/chosen": -2.2968015670776367, + "logps_train/ref_chosen": -0.498046875, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -14.243420600891113, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1798754781484604, + "rewards_train/margins": 0.419466570019722, + "rewards_train/rejected": -0.5993420481681824, + "step": 1032 + }, + { + "epoch": 0.29, + "logps_train/chosen": -7.972865581512451, + "logps_train/ref_chosen": -2.421875, + "logps_train/ref_rejected": -0.91015625, + "logps_train/rejected": -1.9429484605789185, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5550990700721741, + "rewards_train/margins": -0.45181984454393387, + "rewards_train/rejected": -0.1032792255282402, + "step": 1032 + }, + { + "epoch": 0.29, + "logps_train/chosen": -12.914323806762695, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -16.15380096435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29143238067626953, + "rewards_train/margins": 0.011447727680206299, + "rewards_train/rejected": -0.30288010835647583, + "step": 1032 + }, + { + "epoch": 0.29, + "logps_train/chosen": -18.331722259521484, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -6.21875, + "logps_train/rejected": -14.928023338317871, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2425472736358643, + "rewards_train/margins": -0.37161993980407715, + "rewards_train/rejected": -0.8709273338317871, + "step": 1033 + }, + { + "epoch": 0.29, + "logps_train/chosen": -121.77958679199219, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -115.88921356201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47795867919921875, + "rewards_train/margins": 2.1109626293182373, + "rewards_train/rejected": -2.588921308517456, + "step": 1033 + }, + { + "epoch": 0.29, + "logps_train/chosen": -31.13074493408203, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -26.5, + "logps_train/rejected": -56.116004943847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0380744934082031, + "rewards_train/margins": 1.9235260486602783, + "rewards_train/rejected": -2.9616005420684814, + "step": 1033 + }, + { + "epoch": 0.29, + "logps_train/chosen": -7.997697830200195, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -13.3125, + "logps_train/rejected": -18.91596794128418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11226978152990341, + "rewards_train/margins": 0.4480770006775856, + "rewards_train/rejected": -0.560346782207489, + "step": 1033 + }, + { + "epoch": 0.29, + "logps_train/chosen": -157.8209228515625, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -207.01190185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.032092571258545, + "rewards_train/margins": 3.46909761428833, + "rewards_train/rejected": -7.501190185546875, + "step": 1033 + }, + { + "epoch": 0.29, + "logps_train/chosen": -113.84368896484375, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -194.1619873046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.134368896484375, + "rewards_train/margins": 2.98183012008667, + "rewards_train/rejected": -4.116199016571045, + "step": 1033 + }, + { + "epoch": 0.29, + "logps_train/chosen": -41.16416931152344, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -91.06977844238281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8414169549942017, + "rewards_train/margins": 0.9155609607696533, + "rewards_train/rejected": -1.756977915763855, + "step": 1033 + }, + { + "epoch": 0.29, + "logps_train/chosen": -68.6551513671875, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -28.125, + "logps_train/rejected": -42.450164794921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.33448487520217896, + "rewards_train/margins": 1.7670013308525085, + "rewards_train/rejected": -1.4325164556503296, + "step": 1033 + }, + { + "epoch": 0.29, + "learning_rate": 1.3601167859602977e-06, + "loss": 0.3568, + "step": 1034 + }, + { + "epoch": 0.29, + "logps_train/chosen": -5.801602840423584, + "logps_train/ref_chosen": -5.71875, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -11.281949996948242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008285284042358398, + "rewards_train/margins": 0.7011597156524658, + "rewards_train/rejected": -0.7094449996948242, + "step": 1034 + }, + { + "epoch": 0.29, + "logps_train/chosen": -2.6213974952697754, + "logps_train/ref_chosen": -2.84375, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -31.709247589111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02223525010049343, + "rewards_train/margins": 1.3056599851697683, + "rewards_train/rejected": -1.283424735069275, + "step": 1034 + }, + { + "epoch": 0.29, + "logps_train/chosen": -56.276588439941406, + "logps_train/ref_chosen": -29.75, + "logps_train/ref_rejected": -52.0, + "logps_train/rejected": -111.54891204833984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6526589393615723, + "rewards_train/margins": 3.302232265472412, + "rewards_train/rejected": -5.954891204833984, + "step": 1034 + }, + { + "epoch": 0.29, + "logps_train/chosen": -12.24764633178711, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -16.919857025146484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7247646450996399, + "rewards_train/margins": 0.4953461289405823, + "rewards_train/rejected": -1.2201107740402222, + "step": 1034 + }, + { + "epoch": 0.29, + "logps_train/chosen": -121.49928283691406, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -169.345947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6999282836914062, + "rewards_train/margins": 0.8846664428710938, + "rewards_train/rejected": -4.5845947265625, + "step": 1034 + }, + { + "epoch": 0.29, + "logps_train/chosen": -21.627803802490234, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -2.5625, + "logps_train/rejected": -17.180910110473633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6846554279327393, + "rewards_train/margins": -0.2228144407272339, + "rewards_train/rejected": -1.4618409872055054, + "step": 1034 + }, + { + "epoch": 0.29, + "logps_train/chosen": -146.17599487304688, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -199.1815185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8175994753837585, + "rewards_train/margins": 1.8005524277687073, + "rewards_train/rejected": -2.618151903152466, + "step": 1034 + }, + { + "epoch": 0.29, + "logps_train/chosen": -14.163683891296387, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -6.172457695007324, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8632434010505676, + "rewards_train/margins": -0.7053726315498352, + "rewards_train/rejected": -0.15787076950073242, + "step": 1034 + }, + { + "epoch": 0.29, + "logps_train/chosen": -91.22642517089844, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -140.16018676757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5726425647735596, + "rewards_train/margins": 3.393376111984253, + "rewards_train/rejected": -4.9660186767578125, + "step": 1035 + }, + { + "epoch": 0.29, + "logps_train/chosen": -35.211830139160156, + "logps_train/ref_chosen": -35.25, + "logps_train/ref_rejected": -58.5, + "logps_train/rejected": -92.16934967041016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.003816986223682761, + "rewards_train/margins": 3.370752000948414, + "rewards_train/rejected": -3.3669350147247314, + "step": 1035 + }, + { + "epoch": 0.29, + "logps_train/chosen": -131.19497680664062, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -218.04736328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4194977283477783, + "rewards_train/margins": 6.535238981246948, + "rewards_train/rejected": -9.954736709594727, + "step": 1035 + }, + { + "epoch": 0.29, + "logps_train/chosen": -199.26239013671875, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -123.32644653320312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.62623929977417, + "rewards_train/margins": -2.193594455718994, + "rewards_train/rejected": -4.432644844055176, + "step": 1035 + }, + { + "epoch": 0.29, + "logps_train/chosen": -20.930391311645508, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -60.40962219238281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3069608807563782, + "rewards_train/margins": 3.7854230999946594, + "rewards_train/rejected": -3.4784622192382812, + "step": 1035 + }, + { + "epoch": 0.29, + "logps_train/chosen": -18.989547729492188, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -2.515625, + "logps_train/rejected": -13.908493995666504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7864547967910767, + "rewards_train/margins": 0.3528320789337158, + "rewards_train/rejected": -1.1392868757247925, + "step": 1035 + }, + { + "epoch": 0.29, + "logps_train/chosen": -38.59563446044922, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -34.13814163208008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6095634698867798, + "rewards_train/margins": 0.37925076484680176, + "rewards_train/rejected": -1.9888142347335815, + "step": 1035 + }, + { + "epoch": 0.29, + "logps_train/chosen": -64.9456558227539, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -64.71580505371094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.4554344117641449, + "rewards_train/margins": -0.022985100746154785, + "rewards_train/rejected": 0.4784195125102997, + "step": 1035 + }, + { + "epoch": 0.29, + "learning_rate": 1.3576474743044454e-06, + "loss": 0.4824, + "step": 1036 + }, + { + "epoch": 0.29, + "logps_train/chosen": -156.3669891357422, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -202.3282470703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5366989374160767, + "rewards_train/margins": 6.596125960350037, + "rewards_train/rejected": -8.132824897766113, + "step": 1036 + }, + { + "epoch": 0.29, + "logps_train/chosen": -93.48947143554688, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -98.16130065917969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9989471435546875, + "rewards_train/margins": 2.442183017730713, + "rewards_train/rejected": -3.4411301612854004, + "step": 1036 + }, + { + "epoch": 0.29, + "logps_train/chosen": -214.70262145996094, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -205.68392944335938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.370262145996094, + "rewards_train/margins": -2.451869010925293, + "rewards_train/rejected": -7.918393135070801, + "step": 1036 + }, + { + "epoch": 0.29, + "logps_train/chosen": -5.768134117126465, + "logps_train/ref_chosen": -2.53125, + "logps_train/ref_rejected": -1.5859375, + "logps_train/rejected": -14.913228034973145, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32368841767311096, + "rewards_train/margins": 1.0090406835079193, + "rewards_train/rejected": -1.3327291011810303, + "step": 1036 + }, + { + "epoch": 0.29, + "logps_train/chosen": -26.543167114257812, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -15.132233619689941, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3918167352676392, + "rewards_train/margins": -1.0348433554172516, + "rewards_train/rejected": -0.3569733798503876, + "step": 1036 + }, + { + "epoch": 0.29, + "logps_train/chosen": -16.88205909729004, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -2.078125, + "logps_train/rejected": -21.402956008911133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.844455897808075, + "rewards_train/margins": 1.0880271792411804, + "rewards_train/rejected": -1.9324830770492554, + "step": 1036 + }, + { + "epoch": 0.29, + "logps_train/chosen": -115.23344421386719, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -159.7637939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.223344564437866, + "rewards_train/margins": 5.203035116195679, + "rewards_train/rejected": -7.426379680633545, + "step": 1036 + }, + { + "epoch": 0.29, + "logps_train/chosen": -23.556026458740234, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -2.703125, + "logps_train/rejected": -20.024131774902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5681026577949524, + "rewards_train/margins": 1.1639980673789978, + "rewards_train/rejected": -1.7321007251739502, + "step": 1036 + }, + { + "epoch": 0.29, + "logps_train/chosen": -27.37934112548828, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -16.951709747314453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.887934148311615, + "rewards_train/margins": -0.005263149738311768, + "rewards_train/rejected": -0.8826709985733032, + "step": 1037 + }, + { + "epoch": 0.29, + "logps_train/chosen": -5.769266605377197, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -11.580718994140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.017551660537719727, + "rewards_train/margins": 0.7248952388763428, + "rewards_train/rejected": -0.7424468994140625, + "step": 1037 + }, + { + "epoch": 0.29, + "logps_train/chosen": -17.254867553710938, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -35.58073425292969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4942367672920227, + "rewards_train/margins": 1.570086658000946, + "rewards_train/rejected": -2.0643234252929688, + "step": 1037 + }, + { + "epoch": 0.29, + "logps_train/chosen": -37.490211486816406, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -119.26742553710938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6865211725234985, + "rewards_train/margins": -0.5597785711288452, + "rewards_train/rejected": -1.1267426013946533, + "step": 1037 + }, + { + "epoch": 0.29, + "logps_train/chosen": -154.1559295654297, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -234.76983642578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.215592861175537, + "rewards_train/margins": 5.7613911628723145, + "rewards_train/rejected": -9.976984024047852, + "step": 1037 + }, + { + "epoch": 0.29, + "logps_train/chosen": -12.472077369689941, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -15.310002326965332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3909577429294586, + "rewards_train/margins": 0.27754250168800354, + "rewards_train/rejected": -0.6685002446174622, + "step": 1037 + }, + { + "epoch": 0.29, + "logps_train/chosen": -96.0348129272461, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -123.85906982421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6534812450408936, + "rewards_train/margins": 0.8824260234832764, + "rewards_train/rejected": -4.53590726852417, + "step": 1037 + }, + { + "epoch": 0.29, + "logps_train/chosen": -113.04200744628906, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -172.9563446044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.404200792312622, + "rewards_train/margins": 5.99143385887146, + "rewards_train/rejected": -8.395634651184082, + "step": 1037 + }, + { + "epoch": 0.29, + "learning_rate": 1.3551756594954659e-06, + "loss": 0.5032, + "step": 1038 + }, + { + "epoch": 0.29, + "logps_train/chosen": -38.73976516723633, + "logps_train/ref_chosen": -7.84375, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -34.95323181152344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.089601516723633, + "rewards_train/margins": -0.7880282402038574, + "rewards_train/rejected": -2.3015732765197754, + "step": 1038 + }, + { + "epoch": 0.29, + "logps_train/chosen": -8.574502944946289, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -8.47465991973877, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.30432531237602234, + "rewards_train/margins": -0.009984314441680908, + "rewards_train/rejected": -0.29434099793434143, + "step": 1038 + }, + { + "epoch": 0.29, + "logps_train/chosen": -0.12948168814182281, + "logps_train/ref_chosen": -0.349609375, + "logps_train/ref_rejected": -0.349609375, + "logps_train/rejected": -0.15565058588981628, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02201276831328869, + "rewards_train/margins": 0.002616889774799347, + "rewards_train/rejected": 0.019395878538489342, + "step": 1038 + }, + { + "epoch": 0.29, + "logps_train/chosen": -148.4044952392578, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -190.0313720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.49044942855835, + "rewards_train/margins": 1.312687873840332, + "rewards_train/rejected": -7.803137302398682, + "step": 1038 + }, + { + "epoch": 0.29, + "logps_train/chosen": -111.7897720336914, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -158.11703491210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6289772391319275, + "rewards_train/margins": 3.582726538181305, + "rewards_train/rejected": -4.211703777313232, + "step": 1038 + }, + { + "epoch": 0.29, + "logps_train/chosen": -178.85919189453125, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -228.869873046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8859193325042725, + "rewards_train/margins": 4.801068544387817, + "rewards_train/rejected": -8.68698787689209, + "step": 1038 + }, + { + "epoch": 0.29, + "logps_train/chosen": -103.6379165649414, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -139.6770782470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9137916564941406, + "rewards_train/margins": 0.5539162158966064, + "rewards_train/rejected": -1.467707872390747, + "step": 1038 + }, + { + "epoch": 0.29, + "logps_train/chosen": -266.9632568359375, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -251.39683532714844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.096325874328613, + "rewards_train/margins": -0.45664215087890625, + "rewards_train/rejected": -11.639683723449707, + "step": 1038 + }, + { + "epoch": 0.29, + "logps_train/chosen": -127.05449676513672, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -177.08587646484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5554497241973877, + "rewards_train/margins": 2.7531378269195557, + "rewards_train/rejected": -4.308587551116943, + "step": 1039 + }, + { + "epoch": 0.29, + "logps_train/chosen": -20.059978485107422, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -34.09684753417969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2184978574514389, + "rewards_train/margins": 1.8099368959665298, + "rewards_train/rejected": -2.0284347534179688, + "step": 1039 + }, + { + "epoch": 0.29, + "logps_train/chosen": -107.99197387695312, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -186.11074829101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8991974592208862, + "rewards_train/margins": 5.1118775606155396, + "rewards_train/rejected": -7.011075019836426, + "step": 1039 + }, + { + "epoch": 0.29, + "logps_train/chosen": -90.51828002929688, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -128.95457458496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2518280744552612, + "rewards_train/margins": 1.6936293840408325, + "rewards_train/rejected": -2.9454574584960938, + "step": 1039 + }, + { + "epoch": 0.29, + "logps_train/chosen": -50.6102294921875, + "logps_train/ref_chosen": -26.625, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -32.851749420166016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3985230922698975, + "rewards_train/margins": 0.3210268020629883, + "rewards_train/rejected": -2.7195498943328857, + "step": 1039 + }, + { + "epoch": 0.29, + "logps_train/chosen": -11.504148483276367, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -18.188167572021484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6191648840904236, + "rewards_train/margins": 0.5309018492698669, + "rewards_train/rejected": -1.1500667333602905, + "step": 1039 + }, + { + "epoch": 0.29, + "logps_train/chosen": -32.68352508544922, + "logps_train/ref_chosen": -13.625, + "logps_train/ref_rejected": -26.0, + "logps_train/rejected": -53.35368347167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9058525562286377, + "rewards_train/margins": 0.8295159339904785, + "rewards_train/rejected": -2.735368490219116, + "step": 1039 + }, + { + "epoch": 0.29, + "logps_train/chosen": -30.974239349365234, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -5.4375, + "logps_train/rejected": -22.374265670776367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3474239110946655, + "rewards_train/margins": 0.3462526798248291, + "rewards_train/rejected": -1.6936765909194946, + "step": 1039 + }, + { + "epoch": 0.29, + "learning_rate": 1.3527013588334413e-06, + "loss": 0.4075, + "step": 1040 + }, + { + "epoch": 0.29, + "logps_train/chosen": -68.24855041503906, + "logps_train/ref_chosen": -48.5, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -40.95970916748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9748550653457642, + "rewards_train/margins": 0.10861599445343018, + "rewards_train/rejected": -2.0834710597991943, + "step": 1040 + }, + { + "epoch": 0.29, + "logps_train/chosen": -6.593433380126953, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -4.03125, + "logps_train/rejected": -6.162541389465332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028156662359833717, + "rewards_train/margins": 0.24128581024706364, + "rewards_train/rejected": -0.21312914788722992, + "step": 1040 + }, + { + "epoch": 0.29, + "logps_train/chosen": -119.30290985107422, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -202.31619262695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.130290985107422, + "rewards_train/margins": 5.6013288497924805, + "rewards_train/rejected": -8.731619834899902, + "step": 1040 + }, + { + "epoch": 0.29, + "logps_train/chosen": -188.52297973632812, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -141.75173950195312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.1522979736328125, + "rewards_train/margins": -3.077123999595642, + "rewards_train/rejected": -1.0751739740371704, + "step": 1040 + }, + { + "epoch": 0.29, + "logps_train/chosen": -131.44924926757812, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -304.5433349609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8449249267578125, + "rewards_train/margins": 10.509408950805664, + "rewards_train/rejected": -11.354333877563477, + "step": 1040 + }, + { + "epoch": 0.29, + "logps_train/chosen": -9.616429328918457, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -17.41107940673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11164293438196182, + "rewards_train/margins": 0.8044650182127953, + "rewards_train/rejected": -0.9161079525947571, + "step": 1040 + }, + { + "epoch": 0.29, + "logps_train/chosen": -6.080010890960693, + "logps_train/ref_chosen": -3.296875, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -17.977508544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27831360697746277, + "rewards_train/margins": 0.5819372832775116, + "rewards_train/rejected": -0.8602508902549744, + "step": 1040 + }, + { + "epoch": 0.29, + "logps_train/chosen": -28.667970657348633, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -4.03125, + "logps_train/rejected": -30.251869201660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.654297113418579, + "rewards_train/margins": 0.9677648544311523, + "rewards_train/rejected": -2.6220619678497314, + "step": 1040 + }, + { + "epoch": 0.29, + "logps_train/chosen": -96.36150360107422, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -118.09262084960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11384963989257812, + "rewards_train/margins": 0.2731117308139801, + "rewards_train/rejected": -0.15926209092140198, + "step": 1041 + }, + { + "epoch": 0.29, + "logps_train/chosen": -8.40881633758545, + "logps_train/ref_chosen": -3.578125, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -36.26955032348633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48306915163993835, + "rewards_train/margins": 2.137635976076126, + "rewards_train/rejected": -2.6207051277160645, + "step": 1041 + }, + { + "epoch": 0.29, + "logps_train/chosen": -10.358199119567871, + "logps_train/ref_chosen": -2.609375, + "logps_train/ref_rejected": -3.53125, + "logps_train/rejected": -6.441230773925781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.774882435798645, + "rewards_train/margins": -0.48388436436653137, + "rewards_train/rejected": -0.29099807143211365, + "step": 1041 + }, + { + "epoch": 0.29, + "logps_train/chosen": -101.91012573242188, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -105.1358642578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.341012716293335, + "rewards_train/margins": 0.3225736618041992, + "rewards_train/rejected": -3.663586378097534, + "step": 1041 + }, + { + "epoch": 0.29, + "logps_train/chosen": -25.554542541503906, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -116.67765808105469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1617043018341064, + "rewards_train/margins": 2.006061553955078, + "rewards_train/rejected": -3.1677658557891846, + "step": 1041 + }, + { + "epoch": 0.29, + "logps_train/chosen": -6.540433406829834, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -38.0533447265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19779334962368011, + "rewards_train/margins": 2.11379112303257, + "rewards_train/rejected": -2.31158447265625, + "step": 1041 + }, + { + "epoch": 0.29, + "logps_train/chosen": -67.28588104248047, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -30.99512481689453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3285880982875824, + "rewards_train/margins": 1.2396743595600128, + "rewards_train/rejected": -1.5682624578475952, + "step": 1041 + }, + { + "epoch": 0.29, + "logps_train/chosen": -2.8055598735809326, + "logps_train/ref_chosen": -1.484375, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -8.530229568481445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13211849331855774, + "rewards_train/margins": 0.15527945756912231, + "rewards_train/rejected": -0.28739795088768005, + "step": 1041 + }, + { + "epoch": 0.29, + "learning_rate": 1.350224589635853e-06, + "loss": 0.5486, + "step": 1042 + }, + { + "epoch": 0.29, + "logps_train/chosen": -7.038479804992676, + "logps_train/ref_chosen": -0.26953125, + "logps_train/ref_rejected": -0.26953125, + "logps_train/rejected": -6.983414173126221, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6768948435783386, + "rewards_train/margins": -0.0055065155029296875, + "rewards_train/rejected": -0.6713883280754089, + "step": 1042 + }, + { + "epoch": 0.29, + "logps_train/chosen": -106.86920166015625, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -139.70144653320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.886920154094696, + "rewards_train/margins": 1.583224594593048, + "rewards_train/rejected": -2.470144748687744, + "step": 1042 + }, + { + "epoch": 0.29, + "logps_train/chosen": -139.83395385742188, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -168.09954833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.283395528793335, + "rewards_train/margins": 3.026559591293335, + "rewards_train/rejected": -5.30995512008667, + "step": 1042 + }, + { + "epoch": 0.29, + "logps_train/chosen": -21.0883846282959, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -27.906970977783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2275885343551636, + "rewards_train/margins": 1.0912336111068726, + "rewards_train/rejected": -2.318822145462036, + "step": 1042 + }, + { + "epoch": 0.29, + "logps_train/chosen": -41.86370086669922, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -55.08819580078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.948870062828064, + "rewards_train/margins": 1.059949517250061, + "rewards_train/rejected": -3.008819580078125, + "step": 1042 + }, + { + "epoch": 0.29, + "logps_train/chosen": -138.42384338378906, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -138.50360107421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2576156556606293, + "rewards_train/margins": 0.007975757122039795, + "rewards_train/rejected": 0.24963989853858948, + "step": 1042 + }, + { + "epoch": 0.29, + "logps_train/chosen": -46.88966751098633, + "logps_train/ref_chosen": -44.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -124.98011016845703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28896674513816833, + "rewards_train/margins": 0.9590442478656769, + "rewards_train/rejected": -1.2480109930038452, + "step": 1042 + }, + { + "epoch": 0.29, + "logps_train/chosen": -156.28314208984375, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -192.3225555419922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1283142566680908, + "rewards_train/margins": 0.6039413213729858, + "rewards_train/rejected": -1.7322555780410767, + "step": 1042 + }, + { + "epoch": 0.29, + "logps_train/chosen": -32.18128967285156, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -106.69281005859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.618129014968872, + "rewards_train/margins": 2.4011518955230713, + "rewards_train/rejected": -4.019280910491943, + "step": 1043 + }, + { + "epoch": 0.29, + "logps_train/chosen": -18.458358764648438, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -27.784757614135742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4770858883857727, + "rewards_train/margins": 0.08888989686965942, + "rewards_train/rejected": -0.5659757852554321, + "step": 1043 + }, + { + "epoch": 0.29, + "logps_train/chosen": -9.479440689086914, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -6.21875, + "logps_train/rejected": -22.50038719177246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1416940689086914, + "rewards_train/margins": 1.4864696264266968, + "rewards_train/rejected": -1.6281636953353882, + "step": 1043 + }, + { + "epoch": 0.29, + "logps_train/chosen": -13.312477111816406, + "logps_train/ref_chosen": -3.90625, + "logps_train/ref_rejected": -6.875, + "logps_train/rejected": -17.510669708251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9406227469444275, + "rewards_train/margins": 0.12294429540634155, + "rewards_train/rejected": -1.063567042350769, + "step": 1043 + }, + { + "epoch": 0.29, + "logps_train/chosen": -146.62806701660156, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -111.43997955322266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.912806749343872, + "rewards_train/margins": -1.6188087463378906, + "rewards_train/rejected": -2.2939980030059814, + "step": 1043 + }, + { + "epoch": 0.29, + "logps_train/chosen": -86.77494812011719, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -120.87641143798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.227494955062866, + "rewards_train/margins": 1.760146141052246, + "rewards_train/rejected": -3.9876410961151123, + "step": 1043 + }, + { + "epoch": 0.29, + "logps_train/chosen": -27.118133544921875, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -27.183860778808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8743133544921875, + "rewards_train/margins": 0.006572723388671875, + "rewards_train/rejected": -0.8808860778808594, + "step": 1043 + }, + { + "epoch": 0.29, + "logps_train/chosen": -11.981986999511719, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -10.644668579101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09819870442152023, + "rewards_train/margins": 0.4350181892514229, + "rewards_train/rejected": -0.5332168936729431, + "step": 1043 + }, + { + "epoch": 0.29, + "learning_rate": 1.34774536923746e-06, + "loss": 0.4805, + "step": 1044 + }, + { + "epoch": 0.29, + "logps_train/chosen": -93.8194580078125, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -161.1231689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03194580227136612, + "rewards_train/margins": 3.530371092259884, + "rewards_train/rejected": -3.56231689453125, + "step": 1044 + }, + { + "epoch": 0.29, + "logps_train/chosen": -138.36363220214844, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -168.77301025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.636363506317139, + "rewards_train/margins": 1.4409375190734863, + "rewards_train/rejected": -6.077301025390625, + "step": 1044 + }, + { + "epoch": 0.29, + "logps_train/chosen": -25.928953170776367, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -20.9454402923584, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8678953051567078, + "rewards_train/margins": 0.12039875984191895, + "rewards_train/rejected": -0.9882940649986267, + "step": 1044 + }, + { + "epoch": 0.29, + "logps_train/chosen": -48.76353454589844, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -86.55525207519531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5486465692520142, + "rewards_train/margins": 1.0041717886924744, + "rewards_train/rejected": -0.4555252194404602, + "step": 1044 + }, + { + "epoch": 0.29, + "logps_train/chosen": -8.892919540405273, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -20.804580688476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0419580452144146, + "rewards_train/margins": 0.8411661498248577, + "rewards_train/rejected": -0.7992081046104431, + "step": 1044 + }, + { + "epoch": 0.29, + "logps_train/chosen": -80.45948791503906, + "logps_train/ref_chosen": -61.25, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -40.518531799316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.92094886302948, + "rewards_train/margins": 0.8809043169021606, + "rewards_train/rejected": -2.8018531799316406, + "step": 1044 + }, + { + "epoch": 0.29, + "logps_train/chosen": -110.92277526855469, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -1.765625, + "logps_train/rejected": -21.44352912902832, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7422776222229004, + "rewards_train/margins": -0.7744871377944946, + "rewards_train/rejected": -1.9677904844284058, + "step": 1044 + }, + { + "epoch": 0.29, + "logps_train/chosen": -163.76348876953125, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -200.7802734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.176348924636841, + "rewards_train/margins": 2.701678514480591, + "rewards_train/rejected": -5.878027439117432, + "step": 1044 + }, + { + "epoch": 0.29, + "logps_train/chosen": -159.1712646484375, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -261.15191650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4671266078948975, + "rewards_train/margins": 4.348065137863159, + "rewards_train/rejected": -7.815191745758057, + "step": 1045 + }, + { + "epoch": 0.29, + "logps_train/chosen": -16.539278030395508, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -5.09375, + "logps_train/rejected": -16.262956619262695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0195528268814087, + "rewards_train/margins": 0.09736788272857666, + "rewards_train/rejected": -1.1169207096099854, + "step": 1045 + }, + { + "epoch": 0.29, + "logps_train/chosen": -148.6436767578125, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -202.9011688232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.964367628097534, + "rewards_train/margins": 3.575749158859253, + "rewards_train/rejected": -7.540116786956787, + "step": 1045 + }, + { + "epoch": 0.29, + "logps_train/chosen": -46.718589782714844, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -71.52222442626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.321859121322632, + "rewards_train/margins": 2.042863607406616, + "rewards_train/rejected": -5.364722728729248, + "step": 1045 + }, + { + "epoch": 0.29, + "logps_train/chosen": -162.87095642089844, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -147.63473510742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.437095642089844, + "rewards_train/margins": 0.07637786865234375, + "rewards_train/rejected": -6.5134735107421875, + "step": 1045 + }, + { + "epoch": 0.29, + "logps_train/chosen": -31.260601043701172, + "logps_train/ref_chosen": -15.625, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -50.79026794433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.563560128211975, + "rewards_train/margins": 2.4404667615890503, + "rewards_train/rejected": -4.004026889801025, + "step": 1045 + }, + { + "epoch": 0.29, + "logps_train/chosen": -126.16265869140625, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -156.07638549804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.266265869140625, + "rewards_train/margins": 4.1413726806640625, + "rewards_train/rejected": -7.4076385498046875, + "step": 1045 + }, + { + "epoch": 0.29, + "logps_train/chosen": -0.5952409505844116, + "logps_train/ref_chosen": -0.29296875, + "logps_train/ref_rejected": -0.29296875, + "logps_train/rejected": -0.6030668616294861, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.030227219685912132, + "rewards_train/margins": 0.0007825922220945358, + "rewards_train/rejected": -0.031009811908006668, + "step": 1045 + }, + { + "epoch": 0.29, + "learning_rate": 1.345263714990176e-06, + "loss": 0.3365, + "step": 1046 + }, + { + "epoch": 0.29, + "logps_train/chosen": -20.672155380249023, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -24.590356826782227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0297155380249023, + "rewards_train/margins": 0.17307019233703613, + "rewards_train/rejected": -1.2027857303619385, + "step": 1046 + }, + { + "epoch": 0.29, + "logps_train/chosen": -179.00057983398438, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -127.592529296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.300057888031006, + "rewards_train/margins": -0.7908048629760742, + "rewards_train/rejected": -5.509253025054932, + "step": 1046 + }, + { + "epoch": 0.29, + "logps_train/chosen": -4.1874918937683105, + "logps_train/ref_chosen": -2.671875, + "logps_train/ref_rejected": -0.2197265625, + "logps_train/rejected": -0.6852521896362305, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1515616923570633, + "rewards_train/margins": -0.10500912740826607, + "rewards_train/rejected": -0.046552564948797226, + "step": 1046 + }, + { + "epoch": 0.29, + "logps_train/chosen": -20.416399002075195, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -87.33880615234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9728899002075195, + "rewards_train/margins": -0.489009290933609, + "rewards_train/rejected": -0.4838806092739105, + "step": 1046 + }, + { + "epoch": 0.29, + "logps_train/chosen": -160.096435546875, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -180.2405242919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1096436977386475, + "rewards_train/margins": 1.7144086360931396, + "rewards_train/rejected": -4.824052333831787, + "step": 1046 + }, + { + "epoch": 0.29, + "logps_train/chosen": -114.893310546875, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -29.0, + "logps_train/rejected": -51.090293884277344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.989331007003784, + "rewards_train/margins": -0.780301570892334, + "rewards_train/rejected": -2.20902943611145, + "step": 1046 + }, + { + "epoch": 0.29, + "logps_train/chosen": -157.4749755859375, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -219.0, + "logps_train/rejected": -282.4609680175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.597497463226318, + "rewards_train/margins": 0.7485995292663574, + "rewards_train/rejected": -6.346096992492676, + "step": 1046 + }, + { + "epoch": 0.29, + "logps_train/chosen": -34.25876235961914, + "logps_train/ref_chosen": -13.0625, + "logps_train/ref_rejected": -59.75, + "logps_train/rejected": -86.4316635131836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.11962628364563, + "rewards_train/margins": 0.5485401153564453, + "rewards_train/rejected": -2.668166399002075, + "step": 1046 + }, + { + "epoch": 0.29, + "logps_train/chosen": -109.73438262939453, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -179.79957580566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2265617400407791, + "rewards_train/margins": 6.856519415974617, + "rewards_train/rejected": -6.629957675933838, + "step": 1047 + }, + { + "epoch": 0.29, + "logps_train/chosen": -21.686901092529297, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -2.09375, + "logps_train/rejected": -6.0123066902160645, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0936901569366455, + "rewards_train/margins": -0.7018344700336456, + "rewards_train/rejected": -0.3918556869029999, + "step": 1047 + }, + { + "epoch": 0.29, + "logps_train/chosen": -3.423206329345703, + "logps_train/ref_chosen": -0.515625, + "logps_train/ref_rejected": -0.515625, + "logps_train/rejected": -3.708629846572876, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2907581329345703, + "rewards_train/margins": 0.028542369604110718, + "rewards_train/rejected": -0.31930050253868103, + "step": 1047 + }, + { + "epoch": 0.29, + "logps_train/chosen": -26.42057228088379, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -63.541385650634766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7795572280883789, + "rewards_train/margins": 1.6995813846588135, + "rewards_train/rejected": -2.4791386127471924, + "step": 1047 + }, + { + "epoch": 0.29, + "logps_train/chosen": -106.92960357666016, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -129.43716430664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0429604053497314, + "rewards_train/margins": 1.250756025314331, + "rewards_train/rejected": -3.2937164306640625, + "step": 1047 + }, + { + "epoch": 0.29, + "logps_train/chosen": -115.41240692138672, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -225.73928833007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8412407636642456, + "rewards_train/margins": 7.53268826007843, + "rewards_train/rejected": -9.373929023742676, + "step": 1047 + }, + { + "epoch": 0.29, + "logps_train/chosen": -45.27662658691406, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -23.601011276245117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.890162706375122, + "rewards_train/margins": -1.2925615906715393, + "rewards_train/rejected": -0.5976011157035828, + "step": 1047 + }, + { + "epoch": 0.29, + "logps_train/chosen": -117.42432403564453, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -159.51393127441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.642432451248169, + "rewards_train/margins": 1.6589608192443848, + "rewards_train/rejected": -3.3013932704925537, + "step": 1047 + }, + { + "epoch": 0.29, + "learning_rate": 1.3427796442629497e-06, + "loss": 0.5991, + "step": 1048 + }, + { + "epoch": 0.29, + "logps_train/chosen": -20.528308868408203, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -26.772111892700195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7153308987617493, + "rewards_train/margins": 0.5743803381919861, + "rewards_train/rejected": -1.2897112369537354, + "step": 1048 + }, + { + "epoch": 0.29, + "logps_train/chosen": -123.92915344238281, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -181.74453735351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.592915415763855, + "rewards_train/margins": 2.7815386056900024, + "rewards_train/rejected": -4.374454021453857, + "step": 1048 + }, + { + "epoch": 0.29, + "logps_train/chosen": -190.94874572753906, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -198.7254638671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.244874954223633, + "rewards_train/margins": 0.827672004699707, + "rewards_train/rejected": -9.07254695892334, + "step": 1048 + }, + { + "epoch": 0.29, + "logps_train/chosen": -225.59429931640625, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -230.4791259765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.109430313110352, + "rewards_train/margins": -0.461517333984375, + "rewards_train/rejected": -10.647912979125977, + "step": 1048 + }, + { + "epoch": 0.29, + "logps_train/chosen": -11.140880584716797, + "logps_train/ref_chosen": -3.953125, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -45.41612243652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7187755703926086, + "rewards_train/margins": 1.5228366255760193, + "rewards_train/rejected": -2.241612195968628, + "step": 1048 + }, + { + "epoch": 0.29, + "logps_train/chosen": -18.40668487548828, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -7.90625, + "logps_train/rejected": -33.110687255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7906684875488281, + "rewards_train/margins": 1.7297751903533936, + "rewards_train/rejected": -2.5204436779022217, + "step": 1048 + }, + { + "epoch": 0.29, + "logps_train/chosen": -119.23002624511719, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -208.13241577148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.573002576828003, + "rewards_train/margins": 4.890239000320435, + "rewards_train/rejected": -8.463241577148438, + "step": 1048 + }, + { + "epoch": 0.29, + "logps_train/chosen": -114.63589477539062, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -122.70785522460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9635895490646362, + "rewards_train/margins": 0.7571960687637329, + "rewards_train/rejected": -2.720785617828369, + "step": 1048 + }, + { + "epoch": 0.29, + "logps_train/chosen": -2.892561435699463, + "logps_train/ref_chosen": -1.8671875, + "logps_train/ref_rejected": -3.25, + "logps_train/rejected": -24.365413665771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10253739356994629, + "rewards_train/margins": 2.0090041160583496, + "rewards_train/rejected": -2.111541509628296, + "step": 1049 + }, + { + "epoch": 0.29, + "logps_train/chosen": -32.904998779296875, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -11.349294662475586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3654998540878296, + "rewards_train/margins": -0.9711953699588776, + "rewards_train/rejected": -0.394304484128952, + "step": 1049 + }, + { + "epoch": 0.29, + "logps_train/chosen": -19.685075759887695, + "logps_train/ref_chosen": -2.6875, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -26.173337936401367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6997575759887695, + "rewards_train/margins": 0.4863262176513672, + "rewards_train/rejected": -2.1860837936401367, + "step": 1049 + }, + { + "epoch": 0.29, + "logps_train/chosen": -34.861576080322266, + "logps_train/ref_chosen": -14.25, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -33.037452697753906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.061157703399658, + "rewards_train/margins": -0.16366243362426758, + "rewards_train/rejected": -1.8974952697753906, + "step": 1049 + }, + { + "epoch": 0.29, + "logps_train/chosen": -23.767913818359375, + "logps_train/ref_chosen": -7.25, + "logps_train/ref_rejected": -6.59375, + "logps_train/rejected": -27.43894386291504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6517914533615112, + "rewards_train/margins": 0.4327279329299927, + "rewards_train/rejected": -2.084519386291504, + "step": 1049 + }, + { + "epoch": 0.29, + "logps_train/chosen": -163.52572631835938, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -274.9061279296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.252572536468506, + "rewards_train/margins": 8.238040447235107, + "rewards_train/rejected": -12.490612983703613, + "step": 1049 + }, + { + "epoch": 0.29, + "logps_train/chosen": -74.39409637451172, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -13.3125, + "logps_train/rejected": -54.97960662841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5644097328186035, + "rewards_train/margins": 0.6023011207580566, + "rewards_train/rejected": -4.16671085357666, + "step": 1049 + }, + { + "epoch": 0.29, + "logps_train/chosen": -34.35905838012695, + "logps_train/ref_chosen": -24.125, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -49.52899932861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.023405909538269, + "rewards_train/margins": 1.5419939756393433, + "rewards_train/rejected": -2.5653998851776123, + "step": 1049 + }, + { + "epoch": 0.29, + "learning_rate": 1.340293174441643e-06, + "loss": 0.3983, + "step": 1050 + }, + { + "epoch": 0.29, + "logps_train/chosen": -6.89909553527832, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -9.497252464294434, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05865955352783203, + "rewards_train/margins": 0.04106569290161133, + "rewards_train/rejected": -0.09972524642944336, + "step": 1050 + }, + { + "epoch": 0.29, + "logps_train/chosen": -13.58696174621582, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -1.203125, + "logps_train/rejected": -5.132086753845215, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31494617462158203, + "rewards_train/margins": 0.07795000076293945, + "rewards_train/rejected": -0.3928961753845215, + "step": 1050 + }, + { + "epoch": 0.29, + "logps_train/chosen": -5.388001918792725, + "logps_train/ref_chosen": -2.296875, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -54.29154968261719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30911269783973694, + "rewards_train/margins": 3.320042222738266, + "rewards_train/rejected": -3.629154920578003, + "step": 1050 + }, + { + "epoch": 0.29, + "logps_train/chosen": -211.06027221679688, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -198.0, + "logps_train/rejected": -248.79598999023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.906027317047119, + "rewards_train/margins": 0.17357158660888672, + "rewards_train/rejected": -5.079598903656006, + "step": 1050 + }, + { + "epoch": 0.29, + "logps_train/chosen": -32.14111328125, + "logps_train/ref_chosen": -26.125, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -189.11122131347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.601611316204071, + "rewards_train/margins": 4.70951110124588, + "rewards_train/rejected": -5.311122417449951, + "step": 1050 + }, + { + "epoch": 0.29, + "logps_train/chosen": -123.82728576660156, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -119.55253601074219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08272857964038849, + "rewards_train/margins": 2.572525069117546, + "rewards_train/rejected": -2.6552536487579346, + "step": 1050 + }, + { + "epoch": 0.29, + "logps_train/chosen": -168.63702392578125, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -163.67919921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.613702297210693, + "rewards_train/margins": 1.05421781539917, + "rewards_train/rejected": -7.667920112609863, + "step": 1050 + }, + { + "epoch": 0.29, + "logps_train/chosen": -24.943355560302734, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -47.25, + "logps_train/rejected": -88.27080535888672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6318355798721313, + "rewards_train/margins": 3.4702452421188354, + "rewards_train/rejected": -4.102080821990967, + "step": 1050 + }, + { + "epoch": 0.29, + "logps_train/chosen": -148.9725341796875, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -193.9165496826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.99725341796875, + "rewards_train/margins": 1.694401741027832, + "rewards_train/rejected": -5.691655158996582, + "step": 1051 + }, + { + "epoch": 0.29, + "logps_train/chosen": -11.838004112243652, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -2.234375, + "logps_train/rejected": -22.334815979003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5525504350662231, + "rewards_train/margins": 1.4574936628341675, + "rewards_train/rejected": -2.0100440979003906, + "step": 1051 + }, + { + "epoch": 0.29, + "logps_train/chosen": -165.75949096679688, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -158.96090698242188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.375949382781982, + "rewards_train/margins": -0.9298586845397949, + "rewards_train/rejected": -4.4460906982421875, + "step": 1051 + }, + { + "epoch": 0.29, + "logps_train/chosen": -145.8869171142578, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -159.91378784179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.738691806793213, + "rewards_train/margins": 0.852686882019043, + "rewards_train/rejected": -5.591378688812256, + "step": 1051 + }, + { + "epoch": 0.29, + "logps_train/chosen": -155.03392028808594, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -114.88438415527344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9033920764923096, + "rewards_train/margins": -0.41495370864868164, + "rewards_train/rejected": -2.488438367843628, + "step": 1051 + }, + { + "epoch": 0.29, + "logps_train/chosen": -46.0959358215332, + "logps_train/ref_chosen": -36.0, + "logps_train/ref_rejected": -39.0, + "logps_train/rejected": -61.69560623168945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0095936059951782, + "rewards_train/margins": 1.2599669694900513, + "rewards_train/rejected": -2.2695605754852295, + "step": 1051 + }, + { + "epoch": 0.29, + "logps_train/chosen": -60.307106018066406, + "logps_train/ref_chosen": -49.75, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -32.43949890136719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0557106733322144, + "rewards_train/margins": 0.3757392168045044, + "rewards_train/rejected": -1.4314498901367188, + "step": 1051 + }, + { + "epoch": 0.29, + "logps_train/chosen": -71.19659423828125, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -198.7074432373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8196594715118408, + "rewards_train/margins": 7.251085042953491, + "rewards_train/rejected": -9.070744514465332, + "step": 1051 + }, + { + "epoch": 0.29, + "learning_rate": 1.3378043229289075e-06, + "loss": 0.3797, + "step": 1052 + }, + { + "epoch": 0.29, + "logps_train/chosen": -235.74786376953125, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -239.11270141601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.374786376953125, + "rewards_train/margins": 0.23648357391357422, + "rewards_train/rejected": -11.6112699508667, + "step": 1052 + }, + { + "epoch": 0.29, + "logps_train/chosen": -6.091569900512695, + "logps_train/ref_chosen": -0.53125, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -23.67377281188965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5560320019721985, + "rewards_train/margins": 1.36134535074234, + "rewards_train/rejected": -1.9173773527145386, + "step": 1052 + }, + { + "epoch": 0.29, + "logps_train/chosen": -254.00048828125, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -233.5689239501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.300048828125, + "rewards_train/margins": 0.3568439483642578, + "rewards_train/rejected": -8.656892776489258, + "step": 1052 + }, + { + "epoch": 0.29, + "logps_train/chosen": -62.512123107910156, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -31.125, + "logps_train/rejected": -51.7661247253418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8012123107910156, + "rewards_train/margins": 1.2629001140594482, + "rewards_train/rejected": -2.064112424850464, + "step": 1052 + }, + { + "epoch": 0.29, + "logps_train/chosen": -18.476591110229492, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -27.845836639404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8226591348648071, + "rewards_train/margins": 0.32442450523376465, + "rewards_train/rejected": -1.1470836400985718, + "step": 1052 + }, + { + "epoch": 0.29, + "logps_train/chosen": -26.334623336791992, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -26.090656280517578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.589712381362915, + "rewards_train/margins": -0.08689677715301514, + "rewards_train/rejected": -1.5028156042099, + "step": 1052 + }, + { + "epoch": 0.29, + "logps_train/chosen": -32.70741271972656, + "logps_train/ref_chosen": -31.25, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -17.233680725097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.145741268992424, + "rewards_train/margins": 0.2651268094778061, + "rewards_train/rejected": -0.4108680784702301, + "step": 1052 + }, + { + "epoch": 0.29, + "logps_train/chosen": -9.85619831085205, + "logps_train/ref_chosen": -5.65625, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -13.329316139221191, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4199948310852051, + "rewards_train/margins": 0.459811806678772, + "rewards_train/rejected": -0.879806637763977, + "step": 1052 + }, + { + "epoch": 0.29, + "logps_train/chosen": -104.5887451171875, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -12.875, + "logps_train/rejected": -37.7572021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.39112550020217896, + "rewards_train/margins": 2.879345715045929, + "rewards_train/rejected": -2.48822021484375, + "step": 1053 + }, + { + "epoch": 0.29, + "logps_train/chosen": -19.3359375, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -25.536428451538086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20859375596046448, + "rewards_train/margins": 0.6075491011142731, + "rewards_train/rejected": -0.8161428570747375, + "step": 1053 + }, + { + "epoch": 0.29, + "logps_train/chosen": -110.57342529296875, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -219.12612915039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4073426723480225, + "rewards_train/margins": 7.055270433425903, + "rewards_train/rejected": -10.462613105773926, + "step": 1053 + }, + { + "epoch": 0.29, + "logps_train/chosen": -69.83470916748047, + "logps_train/ref_chosen": -48.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -184.37188720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1334710121154785, + "rewards_train/margins": 5.25371789932251, + "rewards_train/rejected": -7.387188911437988, + "step": 1053 + }, + { + "epoch": 0.29, + "logps_train/chosen": -154.46676635742188, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -145.3832244873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5466766357421875, + "rewards_train/margins": 3.141645908355713, + "rewards_train/rejected": -3.6883225440979004, + "step": 1053 + }, + { + "epoch": 0.29, + "logps_train/chosen": -119.27925872802734, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -161.42413330078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.227926015853882, + "rewards_train/margins": -0.18551254272460938, + "rewards_train/rejected": -2.0424134731292725, + "step": 1053 + }, + { + "epoch": 0.29, + "logps_train/chosen": -106.181884765625, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -133.21095275878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8181885480880737, + "rewards_train/margins": 4.002906918525696, + "rewards_train/rejected": -5.8210954666137695, + "step": 1053 + }, + { + "epoch": 0.29, + "logps_train/chosen": -2.454819917678833, + "logps_train/ref_chosen": -1.3125, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -29.153345108032227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11423199623823166, + "rewards_train/margins": 1.651102490723133, + "rewards_train/rejected": -1.7653344869613647, + "step": 1053 + }, + { + "epoch": 0.29, + "learning_rate": 1.3353131071440659e-06, + "loss": 0.3408, + "step": 1054 + }, + { + "epoch": 0.29, + "logps_train/chosen": -93.557861328125, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -133.6846160888672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.855786085128784, + "rewards_train/margins": 1.4626758098602295, + "rewards_train/rejected": -4.318461894989014, + "step": 1054 + }, + { + "epoch": 0.29, + "logps_train/chosen": -76.28201293945312, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -113.9775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0782012939453125, + "rewards_train/margins": 3.1695525646209717, + "rewards_train/rejected": -3.247753858566284, + "step": 1054 + }, + { + "epoch": 0.29, + "logps_train/chosen": -122.02548217773438, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -211.1955108642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8025482892990112, + "rewards_train/margins": 6.91700279712677, + "rewards_train/rejected": -8.719551086425781, + "step": 1054 + }, + { + "epoch": 0.29, + "logps_train/chosen": -100.93649291992188, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -157.54624938964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.543649435043335, + "rewards_train/margins": 2.5609757900238037, + "rewards_train/rejected": -5.104625225067139, + "step": 1054 + }, + { + "epoch": 0.29, + "logps_train/chosen": -33.15720748901367, + "logps_train/ref_chosen": -29.875, + "logps_train/ref_rejected": -7.59375, + "logps_train/rejected": -31.163837432861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32822075486183167, + "rewards_train/margins": 2.0287879407405853, + "rewards_train/rejected": -2.357008695602417, + "step": 1054 + }, + { + "epoch": 0.29, + "logps_train/chosen": -17.317733764648438, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -33.62005615234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7192733883857727, + "rewards_train/margins": 0.855232298374176, + "rewards_train/rejected": -1.5745056867599487, + "step": 1054 + }, + { + "epoch": 0.29, + "logps_train/chosen": -154.53753662109375, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -161.28298950195312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.453753709793091, + "rewards_train/margins": -0.02545475959777832, + "rewards_train/rejected": -2.4282989501953125, + "step": 1054 + }, + { + "epoch": 0.29, + "logps_train/chosen": -84.55145263671875, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -51.25, + "logps_train/rejected": -73.449951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.055145263671875, + "rewards_train/margins": 1.1648499965667725, + "rewards_train/rejected": -2.2199952602386475, + "step": 1054 + }, + { + "epoch": 0.29, + "logps_train/chosen": -115.88948822021484, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -111.85581970214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5889488458633423, + "rewards_train/margins": 0.3466331958770752, + "rewards_train/rejected": -1.9355820417404175, + "step": 1055 + }, + { + "epoch": 0.29, + "logps_train/chosen": -0.9682012796401978, + "logps_train/ref_chosen": -2.3125, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -6.085330963134766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13442987203598022, + "rewards_train/margins": 0.31171296536922455, + "rewards_train/rejected": -0.17728309333324432, + "step": 1055 + }, + { + "epoch": 0.29, + "logps_train/chosen": -178.21844482421875, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -134.52581787109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.721844673156738, + "rewards_train/margins": -1.3692626953125, + "rewards_train/rejected": -4.352581977844238, + "step": 1055 + }, + { + "epoch": 0.29, + "logps_train/chosen": -158.45602416992188, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -157.13543701171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.345602512359619, + "rewards_train/margins": 4.117941379547119, + "rewards_train/rejected": -6.463543891906738, + "step": 1055 + }, + { + "epoch": 0.29, + "logps_train/chosen": -168.70318603515625, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -167.53390502929688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.829681396484375, + "rewards_train/margins": -0.1169281005859375, + "rewards_train/rejected": 0.9466094970703125, + "step": 1055 + }, + { + "epoch": 0.29, + "logps_train/chosen": -107.54900360107422, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -85.56578826904297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9549005031585693, + "rewards_train/margins": -0.8483216762542725, + "rewards_train/rejected": -2.106578826904297, + "step": 1055 + }, + { + "epoch": 0.29, + "logps_train/chosen": -69.00764465332031, + "logps_train/ref_chosen": -47.25, + "logps_train/ref_rejected": -26.0, + "logps_train/rejected": -35.80305480957031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.175764560699463, + "rewards_train/margins": -1.1954590678215027, + "rewards_train/rejected": -0.9803054928779602, + "step": 1055 + }, + { + "epoch": 0.29, + "logps_train/chosen": -15.840179443359375, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -56.68836212158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9933929443359375, + "rewards_train/margins": 0.8004432916641235, + "rewards_train/rejected": -1.793836236000061, + "step": 1055 + }, + { + "epoch": 0.3, + "learning_rate": 1.3328195445229867e-06, + "loss": 0.5164, + "step": 1056 + }, + { + "epoch": 0.3, + "logps_train/chosen": -29.835298538208008, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -26.875, + "logps_train/rejected": -35.630409240722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7960298657417297, + "rewards_train/margins": 0.07951104640960693, + "rewards_train/rejected": -0.8755409121513367, + "step": 1056 + }, + { + "epoch": 0.3, + "logps_train/chosen": -63.918216705322266, + "logps_train/ref_chosen": -40.75, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -101.97755432128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.316821813583374, + "rewards_train/margins": 0.48093366622924805, + "rewards_train/rejected": -2.797755479812622, + "step": 1056 + }, + { + "epoch": 0.3, + "logps_train/chosen": -132.08316040039062, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -170.1339569091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7083160281181335, + "rewards_train/margins": 4.505079567432404, + "rewards_train/rejected": -5.213395595550537, + "step": 1056 + }, + { + "epoch": 0.3, + "logps_train/chosen": -222.72897338867188, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -253.30532836914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.272897720336914, + "rewards_train/margins": 1.7576351165771484, + "rewards_train/rejected": -10.030532836914062, + "step": 1056 + }, + { + "epoch": 0.3, + "logps_train/chosen": -25.461124420166016, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -21.265695571899414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1398624181747437, + "rewards_train/margins": 0.005457162857055664, + "rewards_train/rejected": -1.1453195810317993, + "step": 1056 + }, + { + "epoch": 0.3, + "logps_train/chosen": -34.59113693237305, + "logps_train/ref_chosen": -28.25, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -17.273208618164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6341137290000916, + "rewards_train/margins": 0.5244571566581726, + "rewards_train/rejected": -1.1585708856582642, + "step": 1056 + }, + { + "epoch": 0.3, + "logps_train/chosen": -19.712745666503906, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -45.25, + "logps_train/rejected": -61.46901321411133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7775245904922485, + "rewards_train/margins": 0.844376802444458, + "rewards_train/rejected": -1.6219013929367065, + "step": 1056 + }, + { + "epoch": 0.3, + "logps_train/chosen": -18.46950340270996, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -60.29844665527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1719503402709961, + "rewards_train/margins": 3.882894515991211, + "rewards_train/rejected": -4.054844856262207, + "step": 1056 + }, + { + "epoch": 0.3, + "logps_train/chosen": -9.53720474243164, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -12.681012153625488, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10997047275304794, + "rewards_train/margins": 0.08938074856996536, + "rewards_train/rejected": -0.1993512213230133, + "step": 1057 + }, + { + "epoch": 0.3, + "logps_train/chosen": -42.3917236328125, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -45.0, + "logps_train/rejected": -78.91960906982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9891723990440369, + "rewards_train/margins": 2.402788460254669, + "rewards_train/rejected": -3.391960859298706, + "step": 1057 + }, + { + "epoch": 0.3, + "logps_train/chosen": -5.290191650390625, + "logps_train/ref_chosen": -1.640625, + "logps_train/ref_rejected": -2.25, + "logps_train/rejected": -5.087607383728027, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.36495667695999146, + "rewards_train/margins": -0.08119592070579529, + "rewards_train/rejected": -0.28376075625419617, + "step": 1057 + }, + { + "epoch": 0.3, + "logps_train/chosen": -163.85702514648438, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -134.50982666015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.085702419281006, + "rewards_train/margins": 1.0652804374694824, + "rewards_train/rejected": -5.150982856750488, + "step": 1057 + }, + { + "epoch": 0.3, + "logps_train/chosen": -12.234964370727539, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -2.59375, + "logps_train/rejected": -2.295788288116455, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6922464370727539, + "rewards_train/margins": -0.7220426090061665, + "rewards_train/rejected": 0.029796171933412552, + "step": 1057 + }, + { + "epoch": 0.3, + "logps_train/chosen": -22.12136459350586, + "logps_train/ref_chosen": -7.90625, + "logps_train/ref_rejected": -29.125, + "logps_train/rejected": -33.81884002685547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4215115308761597, + "rewards_train/margins": -0.9521275162696838, + "rewards_train/rejected": -0.46938401460647583, + "step": 1057 + }, + { + "epoch": 0.3, + "logps_train/chosen": -18.714868545532227, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -39.921051025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7402368783950806, + "rewards_train/margins": 1.3643683195114136, + "rewards_train/rejected": -2.104605197906494, + "step": 1057 + }, + { + "epoch": 0.3, + "logps_train/chosen": -21.829139709472656, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -18.481895446777344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1204140186309814, + "rewards_train/margins": -0.9472244679927826, + "rewards_train/rejected": -0.17318955063819885, + "step": 1057 + }, + { + "epoch": 0.3, + "learning_rate": 1.3303236525179647e-06, + "loss": 0.5317, + "step": 1058 + }, + { + "epoch": 0.3, + "logps_train/chosen": -138.1134796142578, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -289.62518310546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0613479614257812, + "rewards_train/margins": 10.801170349121094, + "rewards_train/rejected": -12.862518310546875, + "step": 1058 + }, + { + "epoch": 0.3, + "logps_train/chosen": -21.912479400634766, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -48.12485885620117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3349980115890503, + "rewards_train/margins": -0.3475121259689331, + "rewards_train/rejected": -0.9874858856201172, + "step": 1058 + }, + { + "epoch": 0.3, + "logps_train/chosen": -198.24612426757812, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -218.09100341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8246123790740967, + "rewards_train/margins": 1.8844878673553467, + "rewards_train/rejected": -5.709100246429443, + "step": 1058 + }, + { + "epoch": 0.3, + "logps_train/chosen": -70.85494232177734, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -37.19659423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3854942321777344, + "rewards_train/margins": 1.7216651439666748, + "rewards_train/rejected": -2.107159376144409, + "step": 1058 + }, + { + "epoch": 0.3, + "logps_train/chosen": -104.23834991455078, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -142.58914184570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.973834991455078, + "rewards_train/margins": 2.6850790977478027, + "rewards_train/rejected": -5.658914089202881, + "step": 1058 + }, + { + "epoch": 0.3, + "logps_train/chosen": -254.53524780273438, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -290.17498779296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.653525352478027, + "rewards_train/margins": 1.9639739990234375, + "rewards_train/rejected": -11.617499351501465, + "step": 1058 + }, + { + "epoch": 0.3, + "logps_train/chosen": -92.83303833007812, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -159.74273681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2333037853240967, + "rewards_train/margins": 4.64096999168396, + "rewards_train/rejected": -6.874273777008057, + "step": 1058 + }, + { + "epoch": 0.3, + "logps_train/chosen": -30.139171600341797, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -41.5, + "logps_train/rejected": -49.511497497558594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1764172315597534, + "rewards_train/margins": -0.3752674460411072, + "rewards_train/rejected": -0.8011497855186462, + "step": 1058 + }, + { + "epoch": 0.3, + "logps_train/chosen": -108.74715423583984, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -34.25, + "logps_train/rejected": -58.68620300292969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3747154176235199, + "rewards_train/margins": 2.0689050257205963, + "rewards_train/rejected": -2.443620443344116, + "step": 1059 + }, + { + "epoch": 0.3, + "logps_train/chosen": -131.697998046875, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -282.8101806640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2197998762130737, + "rewards_train/margins": 10.56121838092804, + "rewards_train/rejected": -11.781018257141113, + "step": 1059 + }, + { + "epoch": 0.3, + "logps_train/chosen": -210.7362823486328, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -247.79457092285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3736283779144287, + "rewards_train/margins": 8.855828523635864, + "rewards_train/rejected": -12.229456901550293, + "step": 1059 + }, + { + "epoch": 0.3, + "logps_train/chosen": -17.004547119140625, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -3.765625, + "logps_train/rejected": -22.379825592041016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7942047119140625, + "rewards_train/margins": 1.0672153234481812, + "rewards_train/rejected": -1.8614200353622437, + "step": 1059 + }, + { + "epoch": 0.3, + "logps_train/chosen": -117.13687896728516, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -120.49088287353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9136878848075867, + "rewards_train/margins": 3.8854005932807922, + "rewards_train/rejected": -4.799088478088379, + "step": 1059 + }, + { + "epoch": 0.3, + "logps_train/chosen": -90.51416015625, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -122.04029846191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2514160871505737, + "rewards_train/margins": 1.7026137113571167, + "rewards_train/rejected": -2.9540297985076904, + "step": 1059 + }, + { + "epoch": 0.3, + "logps_train/chosen": -261.28790283203125, + "logps_train/ref_chosen": -190.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -177.19296264648438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.128790378570557, + "rewards_train/margins": -0.0594940185546875, + "rewards_train/rejected": -7.069296360015869, + "step": 1059 + }, + { + "epoch": 0.3, + "logps_train/chosen": -200.00697326660156, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -210.97943115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.1006975173950195, + "rewards_train/margins": 2.1972455978393555, + "rewards_train/rejected": -7.297943115234375, + "step": 1059 + }, + { + "epoch": 0.3, + "learning_rate": 1.3278254485975974e-06, + "loss": 0.2327, + "step": 1060 + }, + { + "epoch": 0.3, + "logps_train/chosen": -129.71263122558594, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -117.71722412109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.371263265609741, + "rewards_train/margins": 1.5004591941833496, + "rewards_train/rejected": -3.871722459793091, + "step": 1060 + }, + { + "epoch": 0.3, + "logps_train/chosen": -74.15850830078125, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -105.72865295410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8158508539199829, + "rewards_train/margins": 2.107014536857605, + "rewards_train/rejected": -2.922865390777588, + "step": 1060 + }, + { + "epoch": 0.3, + "logps_train/chosen": -56.08496856689453, + "logps_train/ref_chosen": -48.0, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -79.75152587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.80849689245224, + "rewards_train/margins": 0.816655695438385, + "rewards_train/rejected": -1.625152587890625, + "step": 1060 + }, + { + "epoch": 0.3, + "logps_train/chosen": -4.518652439117432, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -11.147834777832031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.023740245029330254, + "rewards_train/margins": 0.20354323275387287, + "rewards_train/rejected": -0.22728347778320312, + "step": 1060 + }, + { + "epoch": 0.3, + "logps_train/chosen": -18.26951789855957, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -26.506084442138672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02304821088910103, + "rewards_train/margins": 0.7986566908657551, + "rewards_train/rejected": -0.775608479976654, + "step": 1060 + }, + { + "epoch": 0.3, + "logps_train/chosen": -1.1483235359191895, + "logps_train/ref_chosen": -0.33984375, + "logps_train/ref_rejected": -0.33984375, + "logps_train/rejected": -1.158665657043457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08084797859191895, + "rewards_train/margins": 0.0010342150926589966, + "rewards_train/rejected": -0.08188219368457794, + "step": 1060 + }, + { + "epoch": 0.3, + "logps_train/chosen": -107.73568725585938, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -190.8638916015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9735687971115112, + "rewards_train/margins": 2.7128206491470337, + "rewards_train/rejected": -4.686389446258545, + "step": 1060 + }, + { + "epoch": 0.3, + "logps_train/chosen": -23.807950973510742, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -2.828125, + "logps_train/rejected": -21.705564498901367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.237045168876648, + "rewards_train/margins": 0.6506987810134888, + "rewards_train/rejected": -1.8877439498901367, + "step": 1060 + }, + { + "epoch": 0.3, + "logps_train/chosen": -13.559676170349121, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -14.980327606201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02471761777997017, + "rewards_train/margins": 0.2358151488006115, + "rewards_train/rejected": -0.26053276658058167, + "step": 1061 + }, + { + "epoch": 0.3, + "logps_train/chosen": -166.99383544921875, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -169.61863708496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.649383544921875, + "rewards_train/margins": 1.462480068206787, + "rewards_train/rejected": -6.111863613128662, + "step": 1061 + }, + { + "epoch": 0.3, + "logps_train/chosen": -10.662156105041504, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -53.92486572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5912156105041504, + "rewards_train/margins": 2.4262709617614746, + "rewards_train/rejected": -3.017486572265625, + "step": 1061 + }, + { + "epoch": 0.3, + "logps_train/chosen": -47.394287109375, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -158.15243530273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.839428722858429, + "rewards_train/margins": 5.975814998149872, + "rewards_train/rejected": -6.815243721008301, + "step": 1061 + }, + { + "epoch": 0.3, + "logps_train/chosen": -26.001178741455078, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -27.25, + "logps_train/rejected": -62.27602005004883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9626178741455078, + "rewards_train/margins": 2.5399842262268066, + "rewards_train/rejected": -3.5026021003723145, + "step": 1061 + }, + { + "epoch": 0.3, + "logps_train/chosen": -246.92242431640625, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -241.50216674804688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.992242813110352, + "rewards_train/margins": -0.14202594757080078, + "rewards_train/rejected": -8.85021686553955, + "step": 1061 + }, + { + "epoch": 0.3, + "logps_train/chosen": -24.957162857055664, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -110.88712310791016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6957163214683533, + "rewards_train/margins": 0.5929960608482361, + "rewards_train/rejected": -1.2887123823165894, + "step": 1061 + }, + { + "epoch": 0.3, + "logps_train/chosen": -28.91486358642578, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -39.20073699951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.797736406326294, + "rewards_train/margins": 1.016087293624878, + "rewards_train/rejected": -2.813823699951172, + "step": 1061 + }, + { + "epoch": 0.3, + "learning_rate": 1.3253249502466646e-06, + "loss": 0.331, + "step": 1062 + }, + { + "epoch": 0.3, + "logps_train/chosen": -17.822988510131836, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -20.375, + "logps_train/rejected": -42.1859130859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8635488748550415, + "rewards_train/margins": 1.3175424337387085, + "rewards_train/rejected": -2.18109130859375, + "step": 1062 + }, + { + "epoch": 0.3, + "logps_train/chosen": -27.24574851989746, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -21.80064582824707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2995748519897461, + "rewards_train/margins": 0.01798972487449646, + "rewards_train/rejected": -0.31756457686424255, + "step": 1062 + }, + { + "epoch": 0.3, + "logps_train/chosen": -19.969036102294922, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -39.62803649902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.528153657913208, + "rewards_train/margins": 0.7846500873565674, + "rewards_train/rejected": -2.3128037452697754, + "step": 1062 + }, + { + "epoch": 0.3, + "logps_train/chosen": -100.85102081298828, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -186.88919067382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.935102105140686, + "rewards_train/margins": 7.5038169622421265, + "rewards_train/rejected": -9.438919067382812, + "step": 1062 + }, + { + "epoch": 0.3, + "logps_train/chosen": -108.75505828857422, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -222.2413330078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4755058288574219, + "rewards_train/margins": 8.348628044128418, + "rewards_train/rejected": -9.82413387298584, + "step": 1062 + }, + { + "epoch": 0.3, + "logps_train/chosen": -126.62886047363281, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -124.24512481689453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1628860235214233, + "rewards_train/margins": 0.21162652969360352, + "rewards_train/rejected": -1.3745125532150269, + "step": 1062 + }, + { + "epoch": 0.3, + "logps_train/chosen": -0.17204809188842773, + "logps_train/ref_chosen": -0.2001953125, + "logps_train/ref_rejected": -0.2001953125, + "logps_train/rejected": -0.17316600680351257, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.002814722014591098, + "rewards_train/margins": 0.00011179130524396896, + "rewards_train/rejected": 0.002702930709347129, + "step": 1062 + }, + { + "epoch": 0.3, + "logps_train/chosen": -26.79730987548828, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -15.75, + "logps_train/rejected": -31.522598266601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7047309875488281, + "rewards_train/margins": 0.8725289106369019, + "rewards_train/rejected": -1.57725989818573, + "step": 1062 + }, + { + "epoch": 0.3, + "logps_train/chosen": -14.589573860168457, + "logps_train/ref_chosen": -1.765625, + "logps_train/ref_rejected": -2.890625, + "logps_train/rejected": -19.862018585205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2823948860168457, + "rewards_train/margins": 0.41474449634552, + "rewards_train/rejected": -1.6971393823623657, + "step": 1063 + }, + { + "epoch": 0.3, + "logps_train/chosen": -108.96568298339844, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -257.66143798828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3465683460235596, + "rewards_train/margins": 7.219575643539429, + "rewards_train/rejected": -10.566143989562988, + "step": 1063 + }, + { + "epoch": 0.3, + "logps_train/chosen": -116.89318084716797, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -210.5483856201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.089318037033081, + "rewards_train/margins": 6.415520906448364, + "rewards_train/rejected": -9.504838943481445, + "step": 1063 + }, + { + "epoch": 0.3, + "logps_train/chosen": -143.47634887695312, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -191.4992218017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.84763503074646, + "rewards_train/margins": 3.402287244796753, + "rewards_train/rejected": -6.249922275543213, + "step": 1063 + }, + { + "epoch": 0.3, + "logps_train/chosen": -20.142179489135742, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -20.19835090637207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6892179846763611, + "rewards_train/margins": 0.0056171417236328125, + "rewards_train/rejected": -0.6948351263999939, + "step": 1063 + }, + { + "epoch": 0.3, + "logps_train/chosen": -55.46094512939453, + "logps_train/ref_chosen": -54.0, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -48.756561279296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14609451591968536, + "rewards_train/margins": 2.992061659693718, + "rewards_train/rejected": -3.1381561756134033, + "step": 1063 + }, + { + "epoch": 0.3, + "logps_train/chosen": -14.417399406433105, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -2.484375, + "logps_train/rejected": -12.432205200195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.335489958524704, + "rewards_train/margins": 0.6592930853366852, + "rewards_train/rejected": -0.9947830438613892, + "step": 1063 + }, + { + "epoch": 0.3, + "logps_train/chosen": -71.8363037109375, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -113.53521728515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.133630394935608, + "rewards_train/margins": 1.3698912858963013, + "rewards_train/rejected": -2.503521680831909, + "step": 1063 + }, + { + "epoch": 0.3, + "learning_rate": 1.3228221749660021e-06, + "loss": 0.3037, + "step": 1064 + }, + { + "epoch": 0.3, + "logps_train/chosen": -6.522640228271484, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -10.1875, + "logps_train/rejected": -22.395933151245117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24601402878761292, + "rewards_train/margins": 0.9748292863368988, + "rewards_train/rejected": -1.2208433151245117, + "step": 1064 + }, + { + "epoch": 0.3, + "logps_train/chosen": -131.51522827148438, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -208.5790252685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2015228271484375, + "rewards_train/margins": 4.2563796043396, + "rewards_train/rejected": -5.457902431488037, + "step": 1064 + }, + { + "epoch": 0.3, + "logps_train/chosen": -21.713939666748047, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -34.668426513671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0401439666748047, + "rewards_train/margins": -0.2858012914657593, + "rewards_train/rejected": -0.7543426752090454, + "step": 1064 + }, + { + "epoch": 0.3, + "logps_train/chosen": -108.04818725585938, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -107.67573547363281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7548187375068665, + "rewards_train/margins": 0.1627548336982727, + "rewards_train/rejected": -0.9175735712051392, + "step": 1064 + }, + { + "epoch": 0.3, + "logps_train/chosen": -4.2321343421936035, + "logps_train/ref_chosen": -0.50390625, + "logps_train/ref_rejected": -0.50390625, + "logps_train/rejected": -4.246417999267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3728228211402893, + "rewards_train/margins": 0.001428365707397461, + "rewards_train/rejected": -0.37425118684768677, + "step": 1064 + }, + { + "epoch": 0.3, + "logps_train/chosen": -15.451032638549805, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -19.319896697998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4263532757759094, + "rewards_train/margins": 0.7368864417076111, + "rewards_train/rejected": -1.1632397174835205, + "step": 1064 + }, + { + "epoch": 0.3, + "logps_train/chosen": -87.45343017578125, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -198.25704956054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7453430891036987, + "rewards_train/margins": 6.8803616762161255, + "rewards_train/rejected": -8.625704765319824, + "step": 1064 + }, + { + "epoch": 0.3, + "logps_train/chosen": -163.8391571044922, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -169.94720458984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.433915615081787, + "rewards_train/margins": -0.08919525146484375, + "rewards_train/rejected": -5.344720363616943, + "step": 1064 + }, + { + "epoch": 0.3, + "logps_train/chosen": -76.59933471679688, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -107.88763427734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20993347465991974, + "rewards_train/margins": 0.978830024600029, + "rewards_train/rejected": -1.1887634992599487, + "step": 1065 + }, + { + "epoch": 0.3, + "logps_train/chosen": -76.71969604492188, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -106.39740753173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5719696283340454, + "rewards_train/margins": 0.21777111291885376, + "rewards_train/rejected": -0.7897407412528992, + "step": 1065 + }, + { + "epoch": 0.3, + "logps_train/chosen": -7.381430625915527, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -20.886194229125977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22251807153224945, + "rewards_train/margins": 0.5411013513803482, + "rewards_train/rejected": -0.7636194229125977, + "step": 1065 + }, + { + "epoch": 0.3, + "logps_train/chosen": -172.5836639404297, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -192.80575561523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.208366394042969, + "rewards_train/margins": 2.0222091674804688, + "rewards_train/rejected": -7.2305755615234375, + "step": 1065 + }, + { + "epoch": 0.3, + "logps_train/chosen": -113.80730438232422, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -115.68413543701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.080730438232422, + "rewards_train/margins": 1.9876832962036133, + "rewards_train/rejected": -4.068413734436035, + "step": 1065 + }, + { + "epoch": 0.3, + "logps_train/chosen": -11.580528259277344, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -12.864474296569824, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6799278259277344, + "rewards_train/margins": 0.12214463949203491, + "rewards_train/rejected": -0.8020724654197693, + "step": 1065 + }, + { + "epoch": 0.3, + "logps_train/chosen": -83.44422912597656, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -180.61007690429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.29442298412323, + "rewards_train/margins": 4.966584801673889, + "rewards_train/rejected": -6.261007785797119, + "step": 1065 + }, + { + "epoch": 0.3, + "logps_train/chosen": -42.168922424316406, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -34.00440979003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8793922662734985, + "rewards_train/margins": 0.7585488557815552, + "rewards_train/rejected": -2.6379411220550537, + "step": 1065 + }, + { + "epoch": 0.3, + "learning_rate": 1.3203171402723846e-06, + "loss": 0.3916, + "step": 1066 + }, + { + "epoch": 0.3, + "logps_train/chosen": -190.83604431152344, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -168.6095733642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.883604526519775, + "rewards_train/margins": 0.02735280990600586, + "rewards_train/rejected": -5.910957336425781, + "step": 1066 + }, + { + "epoch": 0.3, + "logps_train/chosen": -24.281713485717773, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -18.207931518554688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4594213962554932, + "rewards_train/margins": -1.038628250360489, + "rewards_train/rejected": -0.4207931458950043, + "step": 1066 + }, + { + "epoch": 0.3, + "logps_train/chosen": -70.31265258789062, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -166.81112670898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.9187347292900085, + "rewards_train/margins": 4.399847447872162, + "rewards_train/rejected": -3.4811127185821533, + "step": 1066 + }, + { + "epoch": 0.3, + "logps_train/chosen": -69.421875, + "logps_train/ref_chosen": -44.5, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -66.23246002197266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4921875, + "rewards_train/margins": 0.5810585021972656, + "rewards_train/rejected": -3.0732460021972656, + "step": 1066 + }, + { + "epoch": 0.3, + "logps_train/chosen": -12.32620620727539, + "logps_train/ref_chosen": -6.90625, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -37.18394470214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.541995644569397, + "rewards_train/margins": 2.0701488256454468, + "rewards_train/rejected": -2.6121444702148438, + "step": 1066 + }, + { + "epoch": 0.3, + "logps_train/chosen": -29.30086326599121, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -26.25, + "logps_train/rejected": -50.820552825927734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6675863265991211, + "rewards_train/margins": 1.7894690036773682, + "rewards_train/rejected": -2.4570553302764893, + "step": 1066 + }, + { + "epoch": 0.3, + "logps_train/chosen": -23.721698760986328, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -2.828125, + "logps_train/rejected": -11.067300796508789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8159198760986328, + "rewards_train/margins": 0.007997691631317139, + "rewards_train/rejected": -0.82391756772995, + "step": 1066 + }, + { + "epoch": 0.3, + "logps_train/chosen": -25.653701782226562, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -7.78125, + "logps_train/rejected": -24.350791931152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.42162024974823, + "rewards_train/margins": 0.23533391952514648, + "rewards_train/rejected": -1.6569541692733765, + "step": 1066 + }, + { + "epoch": 0.3, + "logps_train/chosen": -131.06939697265625, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -134.35926818847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6069397926330566, + "rewards_train/margins": 0.7289869785308838, + "rewards_train/rejected": -3.3359267711639404, + "step": 1067 + }, + { + "epoch": 0.3, + "logps_train/chosen": -13.665168762207031, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -2.40625, + "logps_train/rejected": -17.621746063232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48526689410209656, + "rewards_train/margins": 1.0362826883792877, + "rewards_train/rejected": -1.5215495824813843, + "step": 1067 + }, + { + "epoch": 0.3, + "logps_train/chosen": -152.72865295410156, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -266.5384216308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.772865295410156, + "rewards_train/margins": 7.880976676940918, + "rewards_train/rejected": -12.653841972351074, + "step": 1067 + }, + { + "epoch": 0.3, + "logps_train/chosen": -187.16485595703125, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -265.02783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0164856910705566, + "rewards_train/margins": 8.18629789352417, + "rewards_train/rejected": -11.202783584594727, + "step": 1067 + }, + { + "epoch": 0.3, + "logps_train/chosen": -168.25538635253906, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -182.66473388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.075538635253906, + "rewards_train/margins": 0.7909350395202637, + "rewards_train/rejected": -7.86647367477417, + "step": 1067 + }, + { + "epoch": 0.3, + "logps_train/chosen": -171.4113311767578, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -176.71954345703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.44113302230835, + "rewards_train/margins": 0.8808212280273438, + "rewards_train/rejected": -7.321954250335693, + "step": 1067 + }, + { + "epoch": 0.3, + "logps_train/chosen": -40.087928771972656, + "logps_train/ref_chosen": -32.25, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -31.264373779296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7837929129600525, + "rewards_train/margins": 0.6551445126533508, + "rewards_train/rejected": -1.4389374256134033, + "step": 1067 + }, + { + "epoch": 0.3, + "logps_train/chosen": -21.920181274414062, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -27.245986938476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1482681035995483, + "rewards_train/margins": 0.64508056640625, + "rewards_train/rejected": -1.7933486700057983, + "step": 1067 + }, + { + "epoch": 0.3, + "learning_rate": 1.3178098636983986e-06, + "loss": 0.3925, + "step": 1068 + }, + { + "epoch": 0.3, + "logps_train/chosen": -24.933616638183594, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -1.7578125, + "logps_train/rejected": -19.680246353149414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5371116399765015, + "rewards_train/margins": 0.25513172149658203, + "rewards_train/rejected": -1.7922433614730835, + "step": 1068 + }, + { + "epoch": 0.3, + "logps_train/chosen": -44.72435760498047, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -100.04409790039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.947435736656189, + "rewards_train/margins": 0.156974196434021, + "rewards_train/rejected": -2.10440993309021, + "step": 1068 + }, + { + "epoch": 0.3, + "logps_train/chosen": -152.91448974609375, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -166.12948608398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4914491176605225, + "rewards_train/margins": 2.8214995861053467, + "rewards_train/rejected": -6.312948703765869, + "step": 1068 + }, + { + "epoch": 0.3, + "logps_train/chosen": -222.39059448242188, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -198.43484497070312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.7390594482421875, + "rewards_train/margins": -0.2955746650695801, + "rewards_train/rejected": -6.443484783172607, + "step": 1068 + }, + { + "epoch": 0.3, + "logps_train/chosen": -4.646174907684326, + "logps_train/ref_chosen": -3.71875, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -31.783313751220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09274249523878098, + "rewards_train/margins": 2.1168388798832893, + "rewards_train/rejected": -2.2095813751220703, + "step": 1068 + }, + { + "epoch": 0.3, + "logps_train/chosen": -3.3985910415649414, + "logps_train/ref_chosen": -2.75, + "logps_train/ref_rejected": -24.0, + "logps_train/rejected": -89.05657958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06485910713672638, + "rewards_train/margins": 6.440799042582512, + "rewards_train/rejected": -6.505658149719238, + "step": 1068 + }, + { + "epoch": 0.3, + "logps_train/chosen": -205.63955688476562, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -188.0, + "logps_train/rejected": -214.60675048828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.26395583152771, + "rewards_train/margins": -0.603280782699585, + "rewards_train/rejected": -2.660675048828125, + "step": 1068 + }, + { + "epoch": 0.3, + "logps_train/chosen": -2.0250051021575928, + "logps_train/ref_chosen": -1.5, + "logps_train/ref_rejected": -7.0625, + "logps_train/rejected": -18.06548309326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.052500512450933456, + "rewards_train/margins": 1.0477977730333805, + "rewards_train/rejected": -1.100298285484314, + "step": 1068 + }, + { + "epoch": 0.3, + "logps_train/chosen": -163.84451293945312, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -209.30795288085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7344512939453125, + "rewards_train/margins": 4.496344566345215, + "rewards_train/rejected": -8.230795860290527, + "step": 1069 + }, + { + "epoch": 0.3, + "logps_train/chosen": -3.2115206718444824, + "logps_train/ref_chosen": -1.2578125, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -19.07231330871582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19537082314491272, + "rewards_train/margins": 1.1181105077266693, + "rewards_train/rejected": -1.313481330871582, + "step": 1069 + }, + { + "epoch": 0.3, + "logps_train/chosen": -143.81509399414062, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -109.44400024414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01849060133099556, + "rewards_train/margins": 0.2128906287252903, + "rewards_train/rejected": -0.19440002739429474, + "step": 1069 + }, + { + "epoch": 0.3, + "logps_train/chosen": -121.08241271972656, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -66.68924713134766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.808241367340088, + "rewards_train/margins": -1.739316701889038, + "rewards_train/rejected": -3.06892466545105, + "step": 1069 + }, + { + "epoch": 0.3, + "logps_train/chosen": -18.410160064697266, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -24.003650665283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8160160183906555, + "rewards_train/margins": 0.4530990719795227, + "rewards_train/rejected": -1.2691150903701782, + "step": 1069 + }, + { + "epoch": 0.3, + "logps_train/chosen": -119.10260009765625, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -215.04661560058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.660260021686554, + "rewards_train/margins": 4.444401443004608, + "rewards_train/rejected": -5.104661464691162, + "step": 1069 + }, + { + "epoch": 0.3, + "logps_train/chosen": -172.6523895263672, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -192.1399688720703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.71523904800415, + "rewards_train/margins": -0.3012418746948242, + "rewards_train/rejected": -6.413997173309326, + "step": 1069 + }, + { + "epoch": 0.3, + "logps_train/chosen": -142.29893493652344, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -170.8651580810547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.979893445968628, + "rewards_train/margins": -0.09337759017944336, + "rewards_train/rejected": -3.8865158557891846, + "step": 1069 + }, + { + "epoch": 0.3, + "learning_rate": 1.3153003627923216e-06, + "loss": 0.5277, + "step": 1070 + }, + { + "epoch": 0.3, + "logps_train/chosen": -5.142787933349609, + "logps_train/ref_chosen": -1.515625, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -24.822704315185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36271628737449646, + "rewards_train/margins": 0.807054191827774, + "rewards_train/rejected": -1.1697704792022705, + "step": 1070 + }, + { + "epoch": 0.3, + "logps_train/chosen": -21.52133560180664, + "logps_train/ref_chosen": -13.875, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -19.507984161376953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7646335959434509, + "rewards_train/margins": 0.2861648201942444, + "rewards_train/rejected": -1.0507984161376953, + "step": 1070 + }, + { + "epoch": 0.3, + "logps_train/chosen": -19.009443283081055, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -35.26218032836914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9259443283081055, + "rewards_train/margins": 1.6690237522125244, + "rewards_train/rejected": -2.59496808052063, + "step": 1070 + }, + { + "epoch": 0.3, + "logps_train/chosen": -17.461206436157227, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -23.645015716552734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4898706376552582, + "rewards_train/margins": 0.1371309459209442, + "rewards_train/rejected": -0.6270015835762024, + "step": 1070 + }, + { + "epoch": 0.3, + "logps_train/chosen": -177.87799072265625, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -189.46505737304688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1877992153167725, + "rewards_train/margins": -0.44129347801208496, + "rewards_train/rejected": -1.7465057373046875, + "step": 1070 + }, + { + "epoch": 0.3, + "logps_train/chosen": -110.62020111083984, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -39.080406188964844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2620201110839844, + "rewards_train/margins": -0.5289794206619263, + "rewards_train/rejected": -1.733040690422058, + "step": 1070 + }, + { + "epoch": 0.3, + "logps_train/chosen": -71.16445922851562, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -141.37953186035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01644592359662056, + "rewards_train/margins": 3.1715073101222515, + "rewards_train/rejected": -3.187953233718872, + "step": 1070 + }, + { + "epoch": 0.3, + "logps_train/chosen": -187.55490112304688, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -192.0, + "logps_train/rejected": -283.71807861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.655490398406982, + "rewards_train/margins": 4.516317844390869, + "rewards_train/rejected": -9.171808242797852, + "step": 1070 + }, + { + "epoch": 0.3, + "logps_train/chosen": -230.75242614746094, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -215.74996948242188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.675242900848389, + "rewards_train/margins": -0.40024566650390625, + "rewards_train/rejected": -6.274997234344482, + "step": 1071 + }, + { + "epoch": 0.3, + "logps_train/chosen": -17.63215446472168, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -32.014976501464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5382154583930969, + "rewards_train/margins": 0.7882822155952454, + "rewards_train/rejected": -1.3264976739883423, + "step": 1071 + }, + { + "epoch": 0.3, + "logps_train/chosen": -56.26732635498047, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -140.87680053710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.023267364129424095, + "rewards_train/margins": 2.6609474178403616, + "rewards_train/rejected": -2.6376800537109375, + "step": 1071 + }, + { + "epoch": 0.3, + "logps_train/chosen": -109.42002868652344, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -179.23397827148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3420028686523438, + "rewards_train/margins": 5.531394958496094, + "rewards_train/rejected": -6.8733978271484375, + "step": 1071 + }, + { + "epoch": 0.3, + "logps_train/chosen": -126.95660400390625, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -190.9438934326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.695660352706909, + "rewards_train/margins": 5.698728799819946, + "rewards_train/rejected": -8.394389152526855, + "step": 1071 + }, + { + "epoch": 0.3, + "logps_train/chosen": -58.42518997192383, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -34.34223175048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6925190687179565, + "rewards_train/margins": 0.029204130172729492, + "rewards_train/rejected": -1.721723198890686, + "step": 1071 + }, + { + "epoch": 0.3, + "logps_train/chosen": -5.550624847412109, + "logps_train/ref_chosen": -2.90625, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -20.212268829345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2644374966621399, + "rewards_train/margins": 0.6880394220352173, + "rewards_train/rejected": -0.9524769186973572, + "step": 1071 + }, + { + "epoch": 0.3, + "logps_train/chosen": -10.600457191467285, + "logps_train/ref_chosen": -3.84375, + "logps_train/ref_rejected": -6.1875, + "logps_train/rejected": -13.993167877197266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6756707429885864, + "rewards_train/margins": 0.10489606857299805, + "rewards_train/rejected": -0.7805668115615845, + "step": 1071 + }, + { + "epoch": 0.3, + "learning_rate": 1.312788655117999e-06, + "loss": 0.4261, + "step": 1072 + }, + { + "epoch": 0.3, + "logps_train/chosen": -122.0496826171875, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -140.85977172851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.40496826171875, + "rewards_train/margins": 1.2310090065002441, + "rewards_train/rejected": -4.635977268218994, + "step": 1072 + }, + { + "epoch": 0.3, + "logps_train/chosen": -108.0030517578125, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -215.14964294433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.700305163860321, + "rewards_train/margins": 9.264659702777863, + "rewards_train/rejected": -9.964964866638184, + "step": 1072 + }, + { + "epoch": 0.3, + "logps_train/chosen": -118.28416442871094, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -138.76881408691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7284164428710938, + "rewards_train/margins": 2.1484649181365967, + "rewards_train/rejected": -3.8768813610076904, + "step": 1072 + }, + { + "epoch": 0.3, + "logps_train/chosen": -38.452632904052734, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -126.49752807617188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7515132427215576, + "rewards_train/margins": -1.7517604231834412, + "rewards_train/rejected": -0.9997528195381165, + "step": 1072 + }, + { + "epoch": 0.3, + "logps_train/chosen": -37.430904388427734, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -32.832611083984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1430904865264893, + "rewards_train/margins": -0.859829306602478, + "rewards_train/rejected": -1.2832611799240112, + "step": 1072 + }, + { + "epoch": 0.3, + "logps_train/chosen": -45.095890045166016, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -38.88959884643555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.497089147567749, + "rewards_train/margins": 0.44187068939208984, + "rewards_train/rejected": -2.938959836959839, + "step": 1072 + }, + { + "epoch": 0.3, + "logps_train/chosen": -70.8491439819336, + "logps_train/ref_chosen": -51.0, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -100.37963104248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9849144220352173, + "rewards_train/margins": 1.453048825263977, + "rewards_train/rejected": -3.4379632472991943, + "step": 1072 + }, + { + "epoch": 0.3, + "logps_train/chosen": -90.64856719970703, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -137.16714477539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6648567318916321, + "rewards_train/margins": 2.0018576979637146, + "rewards_train/rejected": -2.6667144298553467, + "step": 1072 + }, + { + "epoch": 0.3, + "logps_train/chosen": -21.658056259155273, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -25.419885635375977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2095556259155273, + "rewards_train/margins": 0.6136829853057861, + "rewards_train/rejected": -1.8232386112213135, + "step": 1073 + }, + { + "epoch": 0.3, + "logps_train/chosen": -130.04840087890625, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -186.34388732910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.054840087890625, + "rewards_train/margins": 2.8795485496520996, + "rewards_train/rejected": -5.934388637542725, + "step": 1073 + }, + { + "epoch": 0.3, + "logps_train/chosen": -209.74078369140625, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -219.11932373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.774078369140625, + "rewards_train/margins": 0.5378541946411133, + "rewards_train/rejected": -8.311932563781738, + "step": 1073 + }, + { + "epoch": 0.3, + "logps_train/chosen": -16.150062561035156, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -0.9296875, + "logps_train/rejected": -4.092956066131592, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8431312441825867, + "rewards_train/margins": -0.5268043875694275, + "rewards_train/rejected": -0.3163268566131592, + "step": 1073 + }, + { + "epoch": 0.3, + "logps_train/chosen": -13.932748794555664, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -5.0625, + "logps_train/rejected": -21.93158531188965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2932748794555664, + "rewards_train/margins": 1.3936337232589722, + "rewards_train/rejected": -1.6869086027145386, + "step": 1073 + }, + { + "epoch": 0.3, + "logps_train/chosen": -134.07620239257812, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -176.41766357421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.507620334625244, + "rewards_train/margins": 1.3841462135314941, + "rewards_train/rejected": -6.891766548156738, + "step": 1073 + }, + { + "epoch": 0.3, + "logps_train/chosen": -22.85668182373047, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -28.75, + "logps_train/rejected": -50.859771728515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3481682538986206, + "rewards_train/margins": 0.8628090620040894, + "rewards_train/rejected": -2.21097731590271, + "step": 1073 + }, + { + "epoch": 0.3, + "logps_train/chosen": -167.3203582763672, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -118.73114013671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.982036113739014, + "rewards_train/margins": -1.0089221000671387, + "rewards_train/rejected": -4.973114013671875, + "step": 1073 + }, + { + "epoch": 0.3, + "learning_rate": 1.3102747582547226e-06, + "loss": 0.5234, + "step": 1074 + }, + { + "epoch": 0.3, + "logps_train/chosen": -200.8614959716797, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -189.36196899414062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.186149597167969, + "rewards_train/margins": -0.4499526023864746, + "rewards_train/rejected": -5.736196994781494, + "step": 1074 + }, + { + "epoch": 0.3, + "logps_train/chosen": -40.97979736328125, + "logps_train/ref_chosen": -30.5, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -31.311233520507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.047979712486267, + "rewards_train/margins": 1.45189368724823, + "rewards_train/rejected": -2.499873399734497, + "step": 1074 + }, + { + "epoch": 0.3, + "logps_train/chosen": -27.346473693847656, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -38.76282501220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4096473753452301, + "rewards_train/margins": 2.385385125875473, + "rewards_train/rejected": -2.795032501220703, + "step": 1074 + }, + { + "epoch": 0.3, + "logps_train/chosen": -18.63043785095215, + "logps_train/ref_chosen": -5.375, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -11.5335693359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.325543761253357, + "rewards_train/margins": -1.1784368306398392, + "rewards_train/rejected": -0.14710693061351776, + "step": 1074 + }, + { + "epoch": 0.3, + "logps_train/chosen": -41.73796463012695, + "logps_train/ref_chosen": -29.0, + "logps_train/ref_rejected": -52.0, + "logps_train/rejected": -91.20218658447266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2737964391708374, + "rewards_train/margins": 2.646422266960144, + "rewards_train/rejected": -3.9202187061309814, + "step": 1074 + }, + { + "epoch": 0.3, + "logps_train/chosen": -31.620174407958984, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -36.252376556396484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7151424884796143, + "rewards_train/margins": -0.06490468978881836, + "rewards_train/rejected": -2.650237798690796, + "step": 1074 + }, + { + "epoch": 0.3, + "logps_train/chosen": -226.60604858398438, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -179.54757690429688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.5606050491333, + "rewards_train/margins": -3.3558473587036133, + "rewards_train/rejected": -5.2047576904296875, + "step": 1074 + }, + { + "epoch": 0.3, + "logps_train/chosen": -0.9649052619934082, + "logps_train/ref_chosen": -0.80859375, + "logps_train/ref_rejected": -2.875, + "logps_train/rejected": -4.73643684387207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01563115231692791, + "rewards_train/margins": 0.17051252909004688, + "rewards_train/rejected": -0.1861436814069748, + "step": 1074 + }, + { + "epoch": 0.3, + "logps_train/chosen": -100.94146728515625, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -199.80722045898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9941468238830566, + "rewards_train/margins": 7.336575031280518, + "rewards_train/rejected": -10.330721855163574, + "step": 1075 + }, + { + "epoch": 0.3, + "logps_train/chosen": -105.84453582763672, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -115.3736572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6844537258148193, + "rewards_train/margins": 0.902911901473999, + "rewards_train/rejected": -4.587365627288818, + "step": 1075 + }, + { + "epoch": 0.3, + "logps_train/chosen": -248.9492950439453, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -185.0, + "logps_train/rejected": -291.29058837890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.794929504394531, + "rewards_train/margins": 3.8341293334960938, + "rewards_train/rejected": -10.629058837890625, + "step": 1075 + }, + { + "epoch": 0.3, + "logps_train/chosen": -27.016067504882812, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -23.666187286376953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8516067862510681, + "rewards_train/margins": 0.0025119781494140625, + "rewards_train/rejected": -0.8541187644004822, + "step": 1075 + }, + { + "epoch": 0.3, + "logps_train/chosen": -180.0697021484375, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -139.9913787841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.806970238685608, + "rewards_train/margins": 2.6421679258346558, + "rewards_train/rejected": -4.449138164520264, + "step": 1075 + }, + { + "epoch": 0.3, + "logps_train/chosen": -15.861506462097168, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -28.748889923095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5611506700515747, + "rewards_train/margins": 1.594988465309143, + "rewards_train/rejected": -2.1561391353607178, + "step": 1075 + }, + { + "epoch": 0.3, + "logps_train/chosen": -77.3665771484375, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -38.5, + "logps_train/rejected": -64.39864349365234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.311657667160034, + "rewards_train/margins": 0.27820682525634766, + "rewards_train/rejected": -2.589864492416382, + "step": 1075 + }, + { + "epoch": 0.3, + "logps_train/chosen": -106.96771240234375, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -199.39791870117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2467713356018066, + "rewards_train/margins": 5.443020820617676, + "rewards_train/rejected": -7.689792156219482, + "step": 1075 + }, + { + "epoch": 0.3, + "learning_rate": 1.3077586897971037e-06, + "loss": 0.5849, + "step": 1076 + }, + { + "epoch": 0.3, + "logps_train/chosen": -70.9055404663086, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -71.03640747070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7405540347099304, + "rewards_train/margins": 0.01308673620223999, + "rewards_train/rejected": -0.7536407709121704, + "step": 1076 + }, + { + "epoch": 0.3, + "logps_train/chosen": -166.2226104736328, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -200.43479919433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7222611904144287, + "rewards_train/margins": 2.1212189197540283, + "rewards_train/rejected": -5.843480110168457, + "step": 1076 + }, + { + "epoch": 0.3, + "logps_train/chosen": -32.276912689208984, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -15.623517036437988, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5526913404464722, + "rewards_train/margins": -0.5590896010398865, + "rewards_train/rejected": -0.9936017394065857, + "step": 1076 + }, + { + "epoch": 0.3, + "logps_train/chosen": -135.8266143798828, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -189.40020751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.832661509513855, + "rewards_train/margins": 2.90735924243927, + "rewards_train/rejected": -4.740020751953125, + "step": 1076 + }, + { + "epoch": 0.3, + "logps_train/chosen": -5.1805500984191895, + "logps_train/ref_chosen": -6.625, + "logps_train/ref_rejected": -0.55078125, + "logps_train/rejected": -2.8282079696655273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14444498717784882, + "rewards_train/margins": 0.37218765914440155, + "rewards_train/rejected": -0.22774267196655273, + "step": 1076 + }, + { + "epoch": 0.3, + "logps_train/chosen": -142.6675262451172, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -192.69302368164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.76675271987915, + "rewards_train/margins": 2.4025497436523438, + "rewards_train/rejected": -7.169302463531494, + "step": 1076 + }, + { + "epoch": 0.3, + "logps_train/chosen": -18.448829650878906, + "logps_train/ref_chosen": -14.0, + "logps_train/ref_rejected": -3.671875, + "logps_train/rejected": -14.704316139221191, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44488295912742615, + "rewards_train/margins": 0.6583612263202667, + "rewards_train/rejected": -1.1032441854476929, + "step": 1076 + }, + { + "epoch": 0.3, + "logps_train/chosen": -12.566106796264648, + "logps_train/ref_chosen": -7.6875, + "logps_train/ref_rejected": -7.71875, + "logps_train/rejected": -24.92894172668457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48786067962646484, + "rewards_train/margins": 1.2331584692001343, + "rewards_train/rejected": -1.7210191488265991, + "step": 1076 + }, + { + "epoch": 0.3, + "logps_train/chosen": -16.940086364746094, + "logps_train/ref_chosen": -7.5625, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -121.26470947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9377586245536804, + "rewards_train/margins": 0.4887123703956604, + "rewards_train/rejected": -1.4264709949493408, + "step": 1077 + }, + { + "epoch": 0.3, + "logps_train/chosen": -261.8365783691406, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -211.24896240234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.1836576461792, + "rewards_train/margins": -0.3587608337402344, + "rewards_train/rejected": -9.824896812438965, + "step": 1077 + }, + { + "epoch": 0.3, + "logps_train/chosen": -257.29315185546875, + "logps_train/ref_chosen": -187.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -208.10281372070312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.02931547164917, + "rewards_train/margins": -0.7190341949462891, + "rewards_train/rejected": -6.310281276702881, + "step": 1077 + }, + { + "epoch": 0.3, + "logps_train/chosen": -132.27784729003906, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -133.4022979736328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.227784633636475, + "rewards_train/margins": -0.7375545501708984, + "rewards_train/rejected": -4.490230083465576, + "step": 1077 + }, + { + "epoch": 0.3, + "logps_train/chosen": -160.87542724609375, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -208.84768676757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.137542724609375, + "rewards_train/margins": 4.097226142883301, + "rewards_train/rejected": -10.234768867492676, + "step": 1077 + }, + { + "epoch": 0.3, + "logps_train/chosen": -59.794010162353516, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -56.980247497558594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.24559898674488068, + "rewards_train/margins": -0.256376251578331, + "rewards_train/rejected": 0.5019752383232117, + "step": 1077 + }, + { + "epoch": 0.3, + "logps_train/chosen": -97.07154083251953, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -145.47262573242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7071540355682373, + "rewards_train/margins": 3.69010853767395, + "rewards_train/rejected": -6.3972625732421875, + "step": 1077 + }, + { + "epoch": 0.3, + "logps_train/chosen": -1.1033265590667725, + "logps_train/ref_chosen": -0.84375, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -6.326162338256836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.025957657024264336, + "rewards_train/margins": 0.17540857382118702, + "rewards_train/rejected": -0.20136623084545135, + "step": 1077 + }, + { + "epoch": 0.3, + "learning_rate": 1.305240467354955e-06, + "loss": 0.5152, + "step": 1078 + }, + { + "epoch": 0.3, + "logps_train/chosen": -133.97528076171875, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -168.28582763671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.647528052330017, + "rewards_train/margins": 1.8810547590255737, + "rewards_train/rejected": -3.528582811355591, + "step": 1078 + }, + { + "epoch": 0.3, + "logps_train/chosen": -164.42984008789062, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -209.0556640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.5429840087890625, + "rewards_train/margins": 2.3625826835632324, + "rewards_train/rejected": -7.905566692352295, + "step": 1078 + }, + { + "epoch": 0.3, + "logps_train/chosen": -100.98727416992188, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -140.4178924560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8987274169921875, + "rewards_train/margins": 0.9430618286132812, + "rewards_train/rejected": -2.8417892456054688, + "step": 1078 + }, + { + "epoch": 0.3, + "logps_train/chosen": -85.07411193847656, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -177.32797241210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.757411241531372, + "rewards_train/margins": 5.175386190414429, + "rewards_train/rejected": -6.932797431945801, + "step": 1078 + }, + { + "epoch": 0.3, + "logps_train/chosen": -335.1452941894531, + "logps_train/ref_chosen": -264.0, + "logps_train/ref_rejected": -220.0, + "logps_train/rejected": -284.1917724609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.114529609680176, + "rewards_train/margins": -0.6953520774841309, + "rewards_train/rejected": -6.419177532196045, + "step": 1078 + }, + { + "epoch": 0.3, + "logps_train/chosen": 0.0, + "logps_train/ref_chosen": 0.0, + "logps_train/ref_rejected": -50.0, + "logps_train/rejected": -76.19818115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0, + "rewards_train/margins": 2.6198182106018066, + "rewards_train/rejected": -2.6198182106018066, + "step": 1078 + }, + { + "epoch": 0.3, + "logps_train/chosen": -67.77162170410156, + "logps_train/ref_chosen": -59.75, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -134.85171508789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8021621704101562, + "rewards_train/margins": 5.5330095291137695, + "rewards_train/rejected": -6.335171699523926, + "step": 1078 + }, + { + "epoch": 0.3, + "logps_train/chosen": -23.831497192382812, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -15.75, + "logps_train/rejected": -33.39665603637695, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.873774766921997, + "rewards_train/margins": -0.10910916328430176, + "rewards_train/rejected": -1.7646656036376953, + "step": 1078 + }, + { + "epoch": 0.3, + "logps_train/chosen": -23.107067108154297, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -19.09882354736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5107067227363586, + "rewards_train/margins": 0.8023006319999695, + "rewards_train/rejected": -1.3130073547363281, + "step": 1079 + }, + { + "epoch": 0.3, + "logps_train/chosen": -152.38165283203125, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -141.59852600097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.288165330886841, + "rewards_train/margins": 2.421687364578247, + "rewards_train/rejected": -5.709852695465088, + "step": 1079 + }, + { + "epoch": 0.3, + "logps_train/chosen": -28.15985870361328, + "logps_train/ref_chosen": -23.0, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -20.179948806762695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.515985906124115, + "rewards_train/margins": 0.8332590460777283, + "rewards_train/rejected": -1.3492449522018433, + "step": 1079 + }, + { + "epoch": 0.3, + "logps_train/chosen": -86.12269592285156, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -134.3186798095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2622695863246918, + "rewards_train/margins": 3.869598299264908, + "rewards_train/rejected": -4.1318678855896, + "step": 1079 + }, + { + "epoch": 0.3, + "logps_train/chosen": -63.203521728515625, + "logps_train/ref_chosen": -56.75, + "logps_train/ref_rejected": -3.546875, + "logps_train/rejected": -28.298664093017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6453521847724915, + "rewards_train/margins": 1.8298267722129822, + "rewards_train/rejected": -2.4751789569854736, + "step": 1079 + }, + { + "epoch": 0.3, + "logps_train/chosen": -113.20002746582031, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -138.7343292236328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1700026988983154, + "rewards_train/margins": -0.04656982421875, + "rewards_train/rejected": -3.1234328746795654, + "step": 1079 + }, + { + "epoch": 0.3, + "logps_train/chosen": -66.5245361328125, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -62.25, + "logps_train/rejected": -66.2020263671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4274536073207855, + "rewards_train/margins": -0.03225097060203552, + "rewards_train/rejected": -0.39520263671875, + "step": 1079 + }, + { + "epoch": 0.3, + "logps_train/chosen": -320.25579833984375, + "logps_train/ref_chosen": -278.0, + "logps_train/ref_rejected": -278.0, + "logps_train/rejected": -326.8580627441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.225579738616943, + "rewards_train/margins": 0.6602268218994141, + "rewards_train/rejected": -4.885806560516357, + "step": 1079 + }, + { + "epoch": 0.3, + "learning_rate": 1.3027201085531633e-06, + "loss": 0.3324, + "step": 1080 + }, + { + "epoch": 0.3, + "logps_train/chosen": -108.33846282958984, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -112.09986877441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8838462829589844, + "rewards_train/margins": 0.2761406898498535, + "rewards_train/rejected": -2.159986972808838, + "step": 1080 + }, + { + "epoch": 0.3, + "logps_train/chosen": -119.75299835205078, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -191.60838317871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.775300025939941, + "rewards_train/margins": 4.135538101196289, + "rewards_train/rejected": -8.91083812713623, + "step": 1080 + }, + { + "epoch": 0.3, + "logps_train/chosen": -17.761505126953125, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -13.3125, + "logps_train/rejected": -27.26319694519043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5261505246162415, + "rewards_train/margins": 0.8689191937446594, + "rewards_train/rejected": -1.3950697183609009, + "step": 1080 + }, + { + "epoch": 0.3, + "logps_train/chosen": -234.60403442382812, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -243.0, + "logps_train/rejected": -302.86370849609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.9604034423828125, + "rewards_train/margins": 0.02596759796142578, + "rewards_train/rejected": -5.986371040344238, + "step": 1080 + }, + { + "epoch": 0.3, + "logps_train/chosen": -106.83375549316406, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -107.22579956054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7333755493164062, + "rewards_train/margins": 0.03920447826385498, + "rewards_train/rejected": -1.7725800275802612, + "step": 1080 + }, + { + "epoch": 0.3, + "logps_train/chosen": -45.80653381347656, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -109.66334533691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2806533575057983, + "rewards_train/margins": 2.035681128501892, + "rewards_train/rejected": -3.3163344860076904, + "step": 1080 + }, + { + "epoch": 0.3, + "logps_train/chosen": -154.36376953125, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -179.95619201660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.936376929283142, + "rewards_train/margins": 3.609242558479309, + "rewards_train/rejected": -5.545619487762451, + "step": 1080 + }, + { + "epoch": 0.3, + "logps_train/chosen": -99.03263854980469, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -212.8090057373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3032639026641846, + "rewards_train/margins": 8.72763705253601, + "rewards_train/rejected": -10.030900955200195, + "step": 1080 + }, + { + "epoch": 0.3, + "logps_train/chosen": -33.48933792114258, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -22.869522094726562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8426837921142578, + "rewards_train/margins": -1.0244815945625305, + "rewards_train/rejected": -0.8182021975517273, + "step": 1081 + }, + { + "epoch": 0.3, + "logps_train/chosen": -0.6572381854057312, + "logps_train/ref_chosen": -1.1171875, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -7.2234206199646, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04599493369460106, + "rewards_train/margins": 0.33083700761199, + "rewards_train/rejected": -0.2848420739173889, + "step": 1081 + }, + { + "epoch": 0.3, + "logps_train/chosen": -122.93971252441406, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -122.13630676269531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.743971347808838, + "rewards_train/margins": 0.6696593761444092, + "rewards_train/rejected": -3.413630723953247, + "step": 1081 + }, + { + "epoch": 0.3, + "logps_train/chosen": -96.24068450927734, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -64.24722290039062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3740684986114502, + "rewards_train/margins": -1.49934621155262, + "rewards_train/rejected": 0.12527771294116974, + "step": 1081 + }, + { + "epoch": 0.3, + "logps_train/chosen": -16.07583236694336, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -36.277427673339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11741676181554794, + "rewards_train/margins": 2.43890967220068, + "rewards_train/rejected": -2.321492910385132, + "step": 1081 + }, + { + "epoch": 0.3, + "logps_train/chosen": -116.01438903808594, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -97.52532958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1014388799667358, + "rewards_train/margins": 2.0010942220687866, + "rewards_train/rejected": -3.1025331020355225, + "step": 1081 + }, + { + "epoch": 0.3, + "logps_train/chosen": -155.67652893066406, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -153.7298126220703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.067652940750122, + "rewards_train/margins": -0.09467166662216187, + "rewards_train/rejected": -0.9729812741279602, + "step": 1081 + }, + { + "epoch": 0.3, + "logps_train/chosen": -121.8951644897461, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -189.93321228027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.089516639709473, + "rewards_train/margins": 4.803804397583008, + "rewards_train/rejected": -8.89332103729248, + "step": 1081 + }, + { + "epoch": 0.3, + "learning_rate": 1.3001976310315677e-06, + "loss": 0.4613, + "step": 1082 + }, + { + "epoch": 0.3, + "logps_train/chosen": -20.138011932373047, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -5.71875, + "logps_train/rejected": -15.136794090270996, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0638011693954468, + "rewards_train/margins": -0.12199676036834717, + "rewards_train/rejected": -0.9418044090270996, + "step": 1082 + }, + { + "epoch": 0.3, + "logps_train/chosen": -212.9530487060547, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -191.89602661132812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.395305156707764, + "rewards_train/margins": -1.1057024002075195, + "rewards_train/rejected": -6.289602756500244, + "step": 1082 + }, + { + "epoch": 0.3, + "logps_train/chosen": -121.3167724609375, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -242.66310119628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.581677198410034, + "rewards_train/margins": 6.784632921218872, + "rewards_train/rejected": -9.366310119628906, + "step": 1082 + }, + { + "epoch": 0.3, + "logps_train/chosen": -52.377655029296875, + "logps_train/ref_chosen": -44.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -140.53939819335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8377655148506165, + "rewards_train/margins": 4.216174304485321, + "rewards_train/rejected": -5.0539398193359375, + "step": 1082 + }, + { + "epoch": 0.3, + "logps_train/chosen": -132.24655151367188, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -156.06753540039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0246551036834717, + "rewards_train/margins": 1.7320983409881592, + "rewards_train/rejected": -4.756753444671631, + "step": 1082 + }, + { + "epoch": 0.3, + "logps_train/chosen": -16.031322479248047, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -25.79995346069336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1718822717666626, + "rewards_train/margins": 0.6893631219863892, + "rewards_train/rejected": -1.8612453937530518, + "step": 1082 + }, + { + "epoch": 0.3, + "logps_train/chosen": -15.254889488220215, + "logps_train/ref_chosen": -1.7421875, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -34.08100891113281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3512701988220215, + "rewards_train/margins": 0.3318307399749756, + "rewards_train/rejected": -1.683100938796997, + "step": 1082 + }, + { + "epoch": 0.3, + "logps_train/chosen": -32.794677734375, + "logps_train/ref_chosen": -24.375, + "logps_train/ref_rejected": -27.25, + "logps_train/rejected": -48.17704772949219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.841967761516571, + "rewards_train/margins": 1.2507370114326477, + "rewards_train/rejected": -2.0927047729492188, + "step": 1082 + }, + { + "epoch": 0.3, + "logps_train/chosen": -161.9861602783203, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -174.57546997070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.848616123199463, + "rewards_train/margins": 3.7089314460754395, + "rewards_train/rejected": -8.557547569274902, + "step": 1083 + }, + { + "epoch": 0.3, + "logps_train/chosen": -151.79005432128906, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -146.07586669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.029005527496338, + "rewards_train/margins": 0.17858123779296875, + "rewards_train/rejected": -6.207586765289307, + "step": 1083 + }, + { + "epoch": 0.3, + "logps_train/chosen": -18.579750061035156, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -29.16655731201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.029524995014071465, + "rewards_train/margins": 1.9586807023733854, + "rewards_train/rejected": -1.929155707359314, + "step": 1083 + }, + { + "epoch": 0.3, + "logps_train/chosen": -120.50218200683594, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -126.5717544555664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.150218203663826, + "rewards_train/margins": 2.6069572418928146, + "rewards_train/rejected": -2.7571754455566406, + "step": 1083 + }, + { + "epoch": 0.3, + "logps_train/chosen": -4.974035263061523, + "logps_train/ref_chosen": -1.515625, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -20.31268310546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34584102034568787, + "rewards_train/margins": 0.49167731404304504, + "rewards_train/rejected": -0.8375183343887329, + "step": 1083 + }, + { + "epoch": 0.3, + "logps_train/chosen": -90.22466278076172, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -133.34814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17246627807617188, + "rewards_train/margins": 1.9123482704162598, + "rewards_train/rejected": -2.0848145484924316, + "step": 1083 + }, + { + "epoch": 0.3, + "logps_train/chosen": -72.2432861328125, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -55.35214614868164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5256714224815369, + "rewards_train/margins": 2.5858861804008484, + "rewards_train/rejected": -2.0602147579193115, + "step": 1083 + }, + { + "epoch": 0.3, + "logps_train/chosen": -138.22195434570312, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -41.5, + "logps_train/rejected": -89.03822326660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9221954345703125, + "rewards_train/margins": 3.8316268920898438, + "rewards_train/rejected": -4.753822326660156, + "step": 1083 + }, + { + "epoch": 0.3, + "learning_rate": 1.2976730524448367e-06, + "loss": 0.3169, + "step": 1084 + }, + { + "epoch": 0.3, + "logps_train/chosen": -187.8860626220703, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -117.41412353515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2886064052581787, + "rewards_train/margins": 0.6028060913085938, + "rewards_train/rejected": -3.8914124965667725, + "step": 1084 + }, + { + "epoch": 0.3, + "logps_train/chosen": -90.67730712890625, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -126.85335540771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.317730665206909, + "rewards_train/margins": 1.0676050186157227, + "rewards_train/rejected": -3.385335683822632, + "step": 1084 + }, + { + "epoch": 0.3, + "logps_train/chosen": -34.68037414550781, + "logps_train/ref_chosen": -30.625, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -29.591028213500977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4055374264717102, + "rewards_train/margins": 0.5535653829574585, + "rewards_train/rejected": -0.9591028094291687, + "step": 1084 + }, + { + "epoch": 0.3, + "logps_train/chosen": -22.765424728393555, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -27.48809051513672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7452924847602844, + "rewards_train/margins": 1.0722666382789612, + "rewards_train/rejected": -1.8175591230392456, + "step": 1084 + }, + { + "epoch": 0.3, + "logps_train/chosen": -120.88521575927734, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -188.82998657226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5385215878486633, + "rewards_train/margins": 7.1944772601127625, + "rewards_train/rejected": -7.732998847961426, + "step": 1084 + }, + { + "epoch": 0.3, + "logps_train/chosen": -105.69379425048828, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -134.80267333984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9693794250488281, + "rewards_train/margins": 3.3608880043029785, + "rewards_train/rejected": -4.330267429351807, + "step": 1084 + }, + { + "epoch": 0.3, + "logps_train/chosen": -16.194992065429688, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -30.185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6569992303848267, + "rewards_train/margins": 1.3990556001663208, + "rewards_train/rejected": -2.0560548305511475, + "step": 1084 + }, + { + "epoch": 0.3, + "logps_train/chosen": -114.59457397460938, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -206.75604248046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2094573974609375, + "rewards_train/margins": 7.366147041320801, + "rewards_train/rejected": -8.575604438781738, + "step": 1084 + }, + { + "epoch": 0.3, + "logps_train/chosen": -8.396930694580078, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -1.796875, + "logps_train/rejected": -4.627012252807617, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3490680754184723, + "rewards_train/margins": -0.0660543441772461, + "rewards_train/rejected": -0.2830137312412262, + "step": 1085 + }, + { + "epoch": 0.3, + "logps_train/chosen": -23.572521209716797, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -0.9140625, + "logps_train/rejected": -16.706403732299805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6322521567344666, + "rewards_train/margins": 0.9469819664955139, + "rewards_train/rejected": -1.5792341232299805, + "step": 1085 + }, + { + "epoch": 0.3, + "logps_train/chosen": -81.44429016113281, + "logps_train/ref_chosen": -55.5, + "logps_train/ref_rejected": -55.5, + "logps_train/rejected": -81.9603042602539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5944290161132812, + "rewards_train/margins": 0.051601409912109375, + "rewards_train/rejected": -2.6460304260253906, + "step": 1085 + }, + { + "epoch": 0.3, + "logps_train/chosen": -10.539552688598633, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -31.858322143554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3445802628993988, + "rewards_train/margins": 1.6287520229816437, + "rewards_train/rejected": -1.9733322858810425, + "step": 1085 + }, + { + "epoch": 0.3, + "logps_train/chosen": -107.55752563476562, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -44.25, + "logps_train/rejected": -77.41395568847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2057526111602783, + "rewards_train/margins": 1.110642910003662, + "rewards_train/rejected": -3.3163955211639404, + "step": 1085 + }, + { + "epoch": 0.3, + "logps_train/chosen": -19.16791343688965, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -44.38859558105469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16679134964942932, + "rewards_train/margins": 1.7970682084560394, + "rewards_train/rejected": -1.9638595581054688, + "step": 1085 + }, + { + "epoch": 0.3, + "logps_train/chosen": -82.19207763671875, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -40.34923553466797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3192077577114105, + "rewards_train/margins": 2.165715843439102, + "rewards_train/rejected": -2.4849236011505127, + "step": 1085 + }, + { + "epoch": 0.3, + "logps_train/chosen": -67.51840209960938, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -59.5, + "logps_train/rejected": -67.21763610839844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8018402457237244, + "rewards_train/margins": -0.03007662296295166, + "rewards_train/rejected": -0.7717636227607727, + "step": 1085 + }, + { + "epoch": 0.3, + "learning_rate": 1.2951463904623439e-06, + "loss": 0.3057, + "step": 1086 + }, + { + "epoch": 0.3, + "logps_train/chosen": -12.064489364624023, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -10.1875, + "logps_train/rejected": -20.359556198120117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7783239483833313, + "rewards_train/margins": 0.2388816475868225, + "rewards_train/rejected": -1.0172055959701538, + "step": 1086 + }, + { + "epoch": 0.3, + "logps_train/chosen": -15.67024040222168, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -21.44826889038086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02327403984963894, + "rewards_train/margins": 0.35905286110937595, + "rewards_train/rejected": -0.3823269009590149, + "step": 1086 + }, + { + "epoch": 0.3, + "logps_train/chosen": -12.56178092956543, + "logps_train/ref_chosen": -5.40625, + "logps_train/ref_rejected": -21.125, + "logps_train/rejected": -37.39984893798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7155531048774719, + "rewards_train/margins": 0.9119318127632141, + "rewards_train/rejected": -1.627484917640686, + "step": 1086 + }, + { + "epoch": 0.3, + "logps_train/chosen": -91.8362045288086, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -107.37751007080078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18362045288085938, + "rewards_train/margins": 1.3541306257247925, + "rewards_train/rejected": -1.5377510786056519, + "step": 1086 + }, + { + "epoch": 0.3, + "logps_train/chosen": -98.46510314941406, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -98.25663757324219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7465103268623352, + "rewards_train/margins": -0.020846545696258545, + "rewards_train/rejected": -0.7256637811660767, + "step": 1086 + }, + { + "epoch": 0.3, + "logps_train/chosen": -166.54122924804688, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -167.8700714111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7541229724884033, + "rewards_train/margins": 2.532884359359741, + "rewards_train/rejected": -4.2870073318481445, + "step": 1086 + }, + { + "epoch": 0.3, + "logps_train/chosen": -156.7605743408203, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -147.42465209960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.226057529449463, + "rewards_train/margins": 0.6664078235626221, + "rewards_train/rejected": -3.892465353012085, + "step": 1086 + }, + { + "epoch": 0.3, + "logps_train/chosen": -18.774925231933594, + "logps_train/ref_chosen": -2.578125, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -28.59337615966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6196800470352173, + "rewards_train/margins": 0.6990326642990112, + "rewards_train/rejected": -2.3187127113342285, + "step": 1086 + }, + { + "epoch": 0.3, + "logps_train/chosen": -111.50837707519531, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -199.59799194335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.300837755203247, + "rewards_train/margins": 6.858961820602417, + "rewards_train/rejected": -8.159799575805664, + "step": 1087 + }, + { + "epoch": 0.3, + "logps_train/chosen": -159.77511596679688, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -216.15196228027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5275115966796875, + "rewards_train/margins": 4.687685012817383, + "rewards_train/rejected": -8.21519660949707, + "step": 1087 + }, + { + "epoch": 0.3, + "logps_train/chosen": -33.59290313720703, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -59.2225341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7467902898788452, + "rewards_train/margins": 0.5004631280899048, + "rewards_train/rejected": -2.24725341796875, + "step": 1087 + }, + { + "epoch": 0.3, + "logps_train/chosen": -28.728946685791016, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -31.85712432861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8478946685791016, + "rewards_train/margins": 1.369067907333374, + "rewards_train/rejected": -2.2169625759124756, + "step": 1087 + }, + { + "epoch": 0.3, + "logps_train/chosen": -13.570327758789062, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -24.875, + "logps_train/rejected": -48.01771926879883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13203278183937073, + "rewards_train/margins": 2.182239145040512, + "rewards_train/rejected": -2.314271926879883, + "step": 1087 + }, + { + "epoch": 0.3, + "logps_train/chosen": -16.817424774169922, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -15.082664489746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4442424774169922, + "rewards_train/margins": 0.0702739953994751, + "rewards_train/rejected": -0.5145164728164673, + "step": 1087 + }, + { + "epoch": 0.3, + "logps_train/chosen": -100.13713836669922, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -34.98713302612305, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2137138843536377, + "rewards_train/margins": -0.4400005340576172, + "rewards_train/rejected": -1.7737133502960205, + "step": 1087 + }, + { + "epoch": 0.3, + "logps_train/chosen": -22.15317153930664, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -24.5, + "logps_train/rejected": -38.39883804321289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5278171300888062, + "rewards_train/margins": -0.13793325424194336, + "rewards_train/rejected": -1.3898838758468628, + "step": 1087 + }, + { + "epoch": 0.3, + "learning_rate": 1.2926176627680438e-06, + "loss": 0.4034, + "step": 1088 + }, + { + "epoch": 0.3, + "logps_train/chosen": -170.69590759277344, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -150.27804565429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.219590663909912, + "rewards_train/margins": 0.05821418762207031, + "rewards_train/rejected": -6.277804851531982, + "step": 1088 + }, + { + "epoch": 0.3, + "logps_train/chosen": -79.72521209716797, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -106.88381958007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8225212097167969, + "rewards_train/margins": 1.3658607006072998, + "rewards_train/rejected": -2.1883819103240967, + "step": 1088 + }, + { + "epoch": 0.3, + "logps_train/chosen": -117.48770141601562, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -204.62474060058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9987701773643494, + "rewards_train/margins": 5.663703978061676, + "rewards_train/rejected": -6.662474155426025, + "step": 1088 + }, + { + "epoch": 0.3, + "logps_train/chosen": -114.8042221069336, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -163.23362731933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.230422258377075, + "rewards_train/margins": 2.792940378189087, + "rewards_train/rejected": -6.023362636566162, + "step": 1088 + }, + { + "epoch": 0.3, + "logps_train/chosen": -140.52655029296875, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -102.5264892578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.502655029296875, + "rewards_train/margins": -2.600006103515625, + "rewards_train/rejected": -0.90264892578125, + "step": 1088 + }, + { + "epoch": 0.3, + "logps_train/chosen": -22.104591369628906, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -24.239410400390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1729591339826584, + "rewards_train/margins": 1.4134819060564041, + "rewards_train/rejected": -1.5864410400390625, + "step": 1088 + }, + { + "epoch": 0.3, + "logps_train/chosen": -111.74537658691406, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -174.18185424804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4745376110076904, + "rewards_train/margins": 3.1436479091644287, + "rewards_train/rejected": -5.618185520172119, + "step": 1088 + }, + { + "epoch": 0.3, + "logps_train/chosen": -28.67855453491211, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -2.34375, + "logps_train/rejected": -17.757781982421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40535545349121094, + "rewards_train/margins": 1.1360477209091187, + "rewards_train/rejected": -1.5414031744003296, + "step": 1088 + }, + { + "epoch": 0.3, + "logps_train/chosen": -35.670433044433594, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -21.56877326965332, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2295433282852173, + "rewards_train/margins": -0.17266595363616943, + "rewards_train/rejected": -1.0568773746490479, + "step": 1089 + }, + { + "epoch": 0.3, + "logps_train/chosen": -18.094501495361328, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -20.113216400146484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5094501376152039, + "rewards_train/margins": -0.016878485679626465, + "rewards_train/rejected": -0.4925716519355774, + "step": 1089 + }, + { + "epoch": 0.3, + "logps_train/chosen": -99.12548828125, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -90.40402221679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6625488996505737, + "rewards_train/margins": 0.6778534650802612, + "rewards_train/rejected": -2.340402364730835, + "step": 1089 + }, + { + "epoch": 0.3, + "logps_train/chosen": -7.044992446899414, + "logps_train/ref_chosen": -1.5, + "logps_train/ref_rejected": -2.75, + "logps_train/rejected": -16.64961051940918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5544992685317993, + "rewards_train/margins": 0.8354618549346924, + "rewards_train/rejected": -1.3899611234664917, + "step": 1089 + }, + { + "epoch": 0.3, + "logps_train/chosen": -135.49530029296875, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -121.91327667236328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1495301723480225, + "rewards_train/margins": -0.20820236206054688, + "rewards_train/rejected": -2.9413278102874756, + "step": 1089 + }, + { + "epoch": 0.3, + "logps_train/chosen": -235.5511474609375, + "logps_train/ref_chosen": -191.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -235.56808471679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.455114841461182, + "rewards_train/margins": 3.201693534851074, + "rewards_train/rejected": -7.656808376312256, + "step": 1089 + }, + { + "epoch": 0.3, + "logps_train/chosen": -200.89552307128906, + "logps_train/ref_chosen": -194.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -296.9105224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6895523071289062, + "rewards_train/margins": 10.701499938964844, + "rewards_train/rejected": -11.39105224609375, + "step": 1089 + }, + { + "epoch": 0.3, + "logps_train/chosen": -11.836381912231445, + "logps_train/ref_chosen": -2.125, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -10.223257064819336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9711381793022156, + "rewards_train/margins": -0.5488124787807465, + "rewards_train/rejected": -0.4223257005214691, + "step": 1089 + }, + { + "epoch": 0.3, + "learning_rate": 1.2900868870603502e-06, + "loss": 0.5168, + "step": 1090 + }, + { + "epoch": 0.3, + "logps_train/chosen": -93.91767120361328, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -197.1794891357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.941767156124115, + "rewards_train/margins": 7.576181948184967, + "rewards_train/rejected": -8.517949104309082, + "step": 1090 + }, + { + "epoch": 0.3, + "logps_train/chosen": -82.04857635498047, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -112.50717163085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2048577070236206, + "rewards_train/margins": 1.595859408378601, + "rewards_train/rejected": -2.8007171154022217, + "step": 1090 + }, + { + "epoch": 0.3, + "logps_train/chosen": -3.618319511413574, + "logps_train/ref_chosen": -2.171875, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -24.40506362915039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14464445412158966, + "rewards_train/margins": 0.9958619326353073, + "rewards_train/rejected": -1.140506386756897, + "step": 1090 + }, + { + "epoch": 0.3, + "logps_train/chosen": -209.08370971679688, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -300.18157958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4083709716796875, + "rewards_train/margins": 8.709787368774414, + "rewards_train/rejected": -12.118158340454102, + "step": 1090 + }, + { + "epoch": 0.3, + "logps_train/chosen": -100.0869369506836, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -58.5, + "logps_train/rejected": -73.24977111816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1086937189102173, + "rewards_train/margins": 0.3662834167480469, + "rewards_train/rejected": -1.4749771356582642, + "step": 1090 + }, + { + "epoch": 0.3, + "logps_train/chosen": -24.52783203125, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -61.20894241333008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20278321206569672, + "rewards_train/margins": 3.993111029267311, + "rewards_train/rejected": -4.195894241333008, + "step": 1090 + }, + { + "epoch": 0.3, + "logps_train/chosen": -21.913135528564453, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -21.495986938476562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6569385528564453, + "rewards_train/margins": -0.9323398470878601, + "rewards_train/rejected": -0.7245987057685852, + "step": 1090 + }, + { + "epoch": 0.3, + "logps_train/chosen": -167.9789581298828, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -247.92063903808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.9478960037231445, + "rewards_train/margins": 0.7441678047180176, + "rewards_train/rejected": -5.692063808441162, + "step": 1090 + }, + { + "epoch": 0.3, + "logps_train/chosen": -232.22344970703125, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -187.0, + "logps_train/rejected": -257.39959716796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.222345352172852, + "rewards_train/margins": -2.1823854446411133, + "rewards_train/rejected": -7.039959907531738, + "step": 1091 + }, + { + "epoch": 0.3, + "logps_train/chosen": -10.93253231048584, + "logps_train/ref_chosen": -2.453125, + "logps_train/ref_rejected": -0.76953125, + "logps_train/rejected": -7.300717830657959, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8479407429695129, + "rewards_train/margins": -0.19482207298278809, + "rewards_train/rejected": -0.6531186699867249, + "step": 1091 + }, + { + "epoch": 0.3, + "logps_train/chosen": -195.58187866210938, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -168.60202026367188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4581878185272217, + "rewards_train/margins": -0.5979857444763184, + "rewards_train/rejected": -2.8602020740509033, + "step": 1091 + }, + { + "epoch": 0.3, + "logps_train/chosen": -20.678401947021484, + "logps_train/ref_chosen": -2.546875, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -28.642072677612305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8131526708602905, + "rewards_train/margins": 0.3666795492172241, + "rewards_train/rejected": -2.1798322200775146, + "step": 1091 + }, + { + "epoch": 0.3, + "logps_train/chosen": -186.34786987304688, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -188.49476623535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.134787559509277, + "rewards_train/margins": 0.9146890640258789, + "rewards_train/rejected": -9.049476623535156, + "step": 1091 + }, + { + "epoch": 0.3, + "logps_train/chosen": -98.0272216796875, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -134.789794921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.60272216796875, + "rewards_train/margins": -1.8237426728010178, + "rewards_train/rejected": 0.22102050483226776, + "step": 1091 + }, + { + "epoch": 0.3, + "logps_train/chosen": -11.599695205688477, + "logps_train/ref_chosen": -5.75, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -38.065460205078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5849695205688477, + "rewards_train/margins": 2.002826452255249, + "rewards_train/rejected": -2.5877959728240967, + "step": 1091 + }, + { + "epoch": 0.3, + "logps_train/chosen": -234.802490234375, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -145.91708374023438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.980248928070068, + "rewards_train/margins": -3.588540554046631, + "rewards_train/rejected": -3.3917083740234375, + "step": 1091 + }, + { + "epoch": 0.31, + "learning_rate": 1.2875540810520084e-06, + "loss": 0.8372, + "step": 1092 + }, + { + "epoch": 0.31, + "logps_train/chosen": -0.03448392450809479, + "logps_train/ref_chosen": -0.443359375, + "logps_train/ref_rejected": -2.0625, + "logps_train/rejected": -3.655822515487671, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04088754579424858, + "rewards_train/margins": 0.2002198062837124, + "rewards_train/rejected": -0.1593322604894638, + "step": 1092 + }, + { + "epoch": 0.31, + "logps_train/chosen": -204.14268493652344, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -208.396240234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.114268779754639, + "rewards_train/margins": 0.32535552978515625, + "rewards_train/rejected": -6.439624309539795, + "step": 1092 + }, + { + "epoch": 0.31, + "logps_train/chosen": -185.92572021484375, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -182.43304443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.292572021484375, + "rewards_train/margins": 1.3507325649261475, + "rewards_train/rejected": -2.6433045864105225, + "step": 1092 + }, + { + "epoch": 0.31, + "logps_train/chosen": -39.40630340576172, + "logps_train/ref_chosen": -27.5, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -44.84049606323242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.190630316734314, + "rewards_train/margins": 2.0121692419052124, + "rewards_train/rejected": -3.2027995586395264, + "step": 1092 + }, + { + "epoch": 0.31, + "logps_train/chosen": -43.93515396118164, + "logps_train/ref_chosen": -28.0, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -40.664276123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.593515396118164, + "rewards_train/margins": 0.19791221618652344, + "rewards_train/rejected": -1.7914276123046875, + "step": 1092 + }, + { + "epoch": 0.31, + "logps_train/chosen": -129.6642608642578, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -71.36753845214844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06642609089612961, + "rewards_train/margins": -0.37967225164175034, + "rewards_train/rejected": 0.3132461607456207, + "step": 1092 + }, + { + "epoch": 0.31, + "logps_train/chosen": -28.653770446777344, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -29.5, + "logps_train/rejected": -40.845428466796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8528770208358765, + "rewards_train/margins": -0.7183341979980469, + "rewards_train/rejected": -1.1345428228378296, + "step": 1092 + }, + { + "epoch": 0.31, + "logps_train/chosen": -22.334571838378906, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -26.125, + "logps_train/rejected": -44.98576736450195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7897071838378906, + "rewards_train/margins": 0.09636962413787842, + "rewards_train/rejected": -1.886076807975769, + "step": 1092 + }, + { + "epoch": 0.31, + "logps_train/chosen": -225.51449584960938, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -233.6859893798828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.751449584960938, + "rewards_train/margins": -0.7828507423400879, + "rewards_train/rejected": -7.96859884262085, + "step": 1093 + }, + { + "epoch": 0.31, + "logps_train/chosen": -115.9493408203125, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -165.93402099609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.74493408203125, + "rewards_train/margins": 4.398468017578125, + "rewards_train/rejected": -6.143402099609375, + "step": 1093 + }, + { + "epoch": 0.31, + "logps_train/chosen": -20.26831817626953, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -24.005659103393555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.520581841468811, + "rewards_train/margins": 0.004984140396118164, + "rewards_train/rejected": -1.5255659818649292, + "step": 1093 + }, + { + "epoch": 0.31, + "logps_train/chosen": -167.90167236328125, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -209.45291137695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9901673793792725, + "rewards_train/margins": 2.2551238536834717, + "rewards_train/rejected": -5.245291233062744, + "step": 1093 + }, + { + "epoch": 0.31, + "logps_train/chosen": -12.57992935180664, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -19.154916763305664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2579929530620575, + "rewards_train/margins": -0.2550012767314911, + "rewards_train/rejected": -0.0029916763305664062, + "step": 1093 + }, + { + "epoch": 0.31, + "logps_train/chosen": -90.77263641357422, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -150.9326171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0272636413574219, + "rewards_train/margins": 2.765998125076294, + "rewards_train/rejected": -3.793261766433716, + "step": 1093 + }, + { + "epoch": 0.31, + "logps_train/chosen": -99.49105834960938, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -57.0, + "logps_train/rejected": -75.23826599121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5491058826446533, + "rewards_train/margins": 0.27472078800201416, + "rewards_train/rejected": -1.8238266706466675, + "step": 1093 + }, + { + "epoch": 0.31, + "logps_train/chosen": -95.4654541015625, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -58.25, + "logps_train/rejected": -69.22434997558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34654542803764343, + "rewards_train/margins": 0.7508895695209503, + "rewards_train/rejected": -1.0974349975585938, + "step": 1093 + }, + { + "epoch": 0.31, + "learning_rate": 1.285019262469976e-06, + "loss": 0.5353, + "step": 1094 + }, + { + "epoch": 0.31, + "logps_train/chosen": -123.19749450683594, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -90.51625061035156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9697494506835938, + "rewards_train/margins": -1.5181243419647217, + "rewards_train/rejected": -1.451625108718872, + "step": 1094 + }, + { + "epoch": 0.31, + "logps_train/chosen": -142.23434448242188, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -186.5229034423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5234344601631165, + "rewards_train/margins": 0.1288558840751648, + "rewards_train/rejected": -0.6522903442382812, + "step": 1094 + }, + { + "epoch": 0.31, + "logps_train/chosen": -108.24949645996094, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -210.76541137695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9749496579170227, + "rewards_train/margins": 5.601591765880585, + "rewards_train/rejected": -6.576541423797607, + "step": 1094 + }, + { + "epoch": 0.31, + "logps_train/chosen": -199.19717407226562, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -200.6587677001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.069717407226562, + "rewards_train/margins": 0.0961599349975586, + "rewards_train/rejected": -9.165877342224121, + "step": 1094 + }, + { + "epoch": 0.31, + "logps_train/chosen": -10.542652130126953, + "logps_train/ref_chosen": -7.46875, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -14.898181915283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3073902130126953, + "rewards_train/margins": 0.7136780023574829, + "rewards_train/rejected": -1.0210682153701782, + "step": 1094 + }, + { + "epoch": 0.31, + "logps_train/chosen": -12.114124298095703, + "logps_train/ref_chosen": -1.7890625, + "logps_train/ref_rejected": -5.28125, + "logps_train/rejected": -18.535179138183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0325062274932861, + "rewards_train/margins": 0.29288673400878906, + "rewards_train/rejected": -1.3253929615020752, + "step": 1094 + }, + { + "epoch": 0.31, + "logps_train/chosen": -4.759297847747803, + "logps_train/ref_chosen": -3.3125, + "logps_train/ref_rejected": -3.984375, + "logps_train/rejected": -4.8955841064453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.14467978477478027, + "rewards_train/margins": -0.053558871150016785, + "rewards_train/rejected": -0.09112091362476349, + "step": 1094 + }, + { + "epoch": 0.31, + "logps_train/chosen": -11.880382537841797, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -18.428218841552734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8474132418632507, + "rewards_train/margins": 0.12040865421295166, + "rewards_train/rejected": -0.9678218960762024, + "step": 1094 + }, + { + "epoch": 0.31, + "logps_train/chosen": -8.020538330078125, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -5.15625, + "logps_train/rejected": -7.9764533042907715, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.286428838968277, + "rewards_train/margins": -0.004408508539199829, + "rewards_train/rejected": -0.28202033042907715, + "step": 1095 + }, + { + "epoch": 0.31, + "logps_train/chosen": -26.317039489746094, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -40.36152648925781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.084829092025757, + "rewards_train/margins": 0.8388235569000244, + "rewards_train/rejected": -2.9236526489257812, + "step": 1095 + }, + { + "epoch": 0.31, + "logps_train/chosen": -19.468284606933594, + "logps_train/ref_chosen": -7.96875, + "logps_train/ref_rejected": -61.5, + "logps_train/rejected": -73.51863098144531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1499534845352173, + "rewards_train/margins": 0.051909685134887695, + "rewards_train/rejected": -1.201863169670105, + "step": 1095 + }, + { + "epoch": 0.31, + "logps_train/chosen": -9.202021598815918, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -28.484500885009766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06729783862829208, + "rewards_train/margins": 2.1032480224967003, + "rewards_train/rejected": -2.035950183868408, + "step": 1095 + }, + { + "epoch": 0.31, + "logps_train/chosen": -166.53811645507812, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -214.4481658935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.05381178855896, + "rewards_train/margins": 3.991004705429077, + "rewards_train/rejected": -7.044816493988037, + "step": 1095 + }, + { + "epoch": 0.31, + "logps_train/chosen": -126.83689880371094, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -140.4816436767578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8336899280548096, + "rewards_train/margins": 2.2144744396209717, + "rewards_train/rejected": -4.048164367675781, + "step": 1095 + }, + { + "epoch": 0.31, + "logps_train/chosen": -11.745408058166504, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -26.65462303161621, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14954081177711487, + "rewards_train/margins": 1.4284214675426483, + "rewards_train/rejected": -1.5779622793197632, + "step": 1095 + }, + { + "epoch": 0.31, + "logps_train/chosen": -13.60892105102539, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -21.6181640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.329642117023468, + "rewards_train/margins": 0.8196743130683899, + "rewards_train/rejected": -1.149316430091858, + "step": 1095 + }, + { + "epoch": 0.31, + "learning_rate": 1.2824824490552958e-06, + "loss": 0.4904, + "step": 1096 + }, + { + "epoch": 0.31, + "logps_train/chosen": -29.96931266784668, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -5.0, + "logps_train/rejected": -37.7059211730957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7406812906265259, + "rewards_train/margins": 1.529910922050476, + "rewards_train/rejected": -3.270592212677002, + "step": 1096 + }, + { + "epoch": 0.31, + "logps_train/chosen": -13.275299072265625, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -37.0, + "logps_train/rejected": -52.12251281738281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2337799072265625, + "rewards_train/margins": 1.2784713506698608, + "rewards_train/rejected": -1.5122512578964233, + "step": 1096 + }, + { + "epoch": 0.31, + "logps_train/chosen": -35.533203125, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -32.27695846557617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.109570264816284, + "rewards_train/margins": 0.13062572479248047, + "rewards_train/rejected": -2.2401959896087646, + "step": 1096 + }, + { + "epoch": 0.31, + "logps_train/chosen": -89.877197265625, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -122.38226318359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33771973848342896, + "rewards_train/margins": 4.400506675243378, + "rewards_train/rejected": -4.738226413726807, + "step": 1096 + }, + { + "epoch": 0.31, + "logps_train/chosen": -26.63094711303711, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -26.5, + "logps_train/rejected": -49.95591735839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8130947351455688, + "rewards_train/margins": 1.5324970483779907, + "rewards_train/rejected": -2.3455917835235596, + "step": 1096 + }, + { + "epoch": 0.31, + "logps_train/chosen": -47.756568908691406, + "logps_train/ref_chosen": -44.0, + "logps_train/ref_rejected": -5.0, + "logps_train/rejected": -12.691333770751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3756569027900696, + "rewards_train/margins": 0.3934764862060547, + "rewards_train/rejected": -0.7691333889961243, + "step": 1096 + }, + { + "epoch": 0.31, + "logps_train/chosen": -95.69285583496094, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -109.42572021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5692855715751648, + "rewards_train/margins": 0.8732864260673523, + "rewards_train/rejected": -1.442571997642517, + "step": 1096 + }, + { + "epoch": 0.31, + "logps_train/chosen": -12.196850776672363, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -1.71875, + "logps_train/rejected": -18.884685516357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8040601015090942, + "rewards_train/margins": 0.9125335216522217, + "rewards_train/rejected": -1.716593623161316, + "step": 1096 + }, + { + "epoch": 0.31, + "logps_train/chosen": -66.23421478271484, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -30.00926399230957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47342148423194885, + "rewards_train/margins": 1.3712548911571503, + "rewards_train/rejected": -1.8446763753890991, + "step": 1097 + }, + { + "epoch": 0.31, + "logps_train/chosen": -36.83562469482422, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -2.671875, + "logps_train/rejected": -12.633560180664062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5335625410079956, + "rewards_train/margins": -0.5373939871788025, + "rewards_train/rejected": -0.9961685538291931, + "step": 1097 + }, + { + "epoch": 0.31, + "logps_train/chosen": -9.81627368927002, + "logps_train/ref_chosen": -6.75, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -16.565208435058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3066273629665375, + "rewards_train/margins": 0.27489349246025085, + "rewards_train/rejected": -0.5815208554267883, + "step": 1097 + }, + { + "epoch": 0.31, + "logps_train/chosen": -90.73368835449219, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -244.64132690429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4733688831329346, + "rewards_train/margins": 9.490764379501343, + "rewards_train/rejected": -10.964133262634277, + "step": 1097 + }, + { + "epoch": 0.31, + "logps_train/chosen": -73.9534912109375, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -78.96339416503906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.04534912109375, + "rewards_train/margins": -0.04900968074798584, + "rewards_train/rejected": -0.9963394403457642, + "step": 1097 + }, + { + "epoch": 0.31, + "logps_train/chosen": -13.266345977783203, + "logps_train/ref_chosen": -12.6875, + "logps_train/ref_rejected": -1.7578125, + "logps_train/rejected": -9.496039390563965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05788460001349449, + "rewards_train/margins": 0.7159381248056889, + "rewards_train/rejected": -0.7738227248191833, + "step": 1097 + }, + { + "epoch": 0.31, + "logps_train/chosen": -10.80422592163086, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -29.033140182495117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19292259216308594, + "rewards_train/margins": 1.6541414260864258, + "rewards_train/rejected": -1.8470640182495117, + "step": 1097 + }, + { + "epoch": 0.31, + "logps_train/chosen": -153.62672424316406, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -222.57733154296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5126724243164062, + "rewards_train/margins": 5.445060729980469, + "rewards_train/rejected": -8.957733154296875, + "step": 1097 + }, + { + "epoch": 0.31, + "learning_rate": 1.2799436585629715e-06, + "loss": 0.3475, + "step": 1098 + }, + { + "epoch": 0.31, + "logps_train/chosen": -165.9452362060547, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -220.9888458251953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.094523906707764, + "rewards_train/margins": -1.2956392765045166, + "rewards_train/rejected": -3.798884630203247, + "step": 1098 + }, + { + "epoch": 0.31, + "logps_train/chosen": -118.34156799316406, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -109.16290283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8841569423675537, + "rewards_train/margins": 0.13213324546813965, + "rewards_train/rejected": -4.016290187835693, + "step": 1098 + }, + { + "epoch": 0.31, + "logps_train/chosen": -101.19143676757812, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -214.81069946289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8191437125205994, + "rewards_train/margins": 9.511926233768463, + "rewards_train/rejected": -10.331069946289062, + "step": 1098 + }, + { + "epoch": 0.31, + "logps_train/chosen": -13.754026412963867, + "logps_train/ref_chosen": -6.96875, + "logps_train/ref_rejected": -22.875, + "logps_train/rejected": -27.907812118530273, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6785276532173157, + "rewards_train/margins": -0.17524641752243042, + "rewards_train/rejected": -0.5032812356948853, + "step": 1098 + }, + { + "epoch": 0.31, + "logps_train/chosen": -4.469758033752441, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -27.893524169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3780241906642914, + "rewards_train/margins": 2.286126583814621, + "rewards_train/rejected": -1.9081023931503296, + "step": 1098 + }, + { + "epoch": 0.31, + "logps_train/chosen": -19.630481719970703, + "logps_train/ref_chosen": -2.9375, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -27.173992156982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6692981719970703, + "rewards_train/margins": 0.11060106754302979, + "rewards_train/rejected": -1.7798992395401, + "step": 1098 + }, + { + "epoch": 0.31, + "logps_train/chosen": -24.71709632873535, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -3.203125, + "logps_train/rejected": -25.84095001220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9717096090316772, + "rewards_train/margins": 0.2920728921890259, + "rewards_train/rejected": -2.263782501220703, + "step": 1098 + }, + { + "epoch": 0.31, + "logps_train/chosen": -30.60244369506836, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -34.77363586425781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.525869369506836, + "rewards_train/margins": 0.33899426460266113, + "rewards_train/rejected": -2.864863634109497, + "step": 1098 + }, + { + "epoch": 0.31, + "logps_train/chosen": -192.8294677734375, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -207.72076416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.682946681976318, + "rewards_train/margins": 2.2891297340393066, + "rewards_train/rejected": -7.972076416015625, + "step": 1099 + }, + { + "epoch": 0.31, + "logps_train/chosen": -85.47380828857422, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -193.30514526367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9973808526992798, + "rewards_train/margins": 5.033133864402771, + "rewards_train/rejected": -6.030514717102051, + "step": 1099 + }, + { + "epoch": 0.31, + "logps_train/chosen": -0.9609429836273193, + "logps_train/ref_chosen": -1.3671875, + "logps_train/ref_rejected": -1.71875, + "logps_train/rejected": -4.369717121124268, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04062445089221001, + "rewards_train/margins": 0.3057211749255657, + "rewards_train/rejected": -0.2650967240333557, + "step": 1099 + }, + { + "epoch": 0.31, + "logps_train/chosen": -17.731107711791992, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -43.5, + "logps_train/rejected": -60.619834899902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8481107950210571, + "rewards_train/margins": 0.863872766494751, + "rewards_train/rejected": -1.711983561515808, + "step": 1099 + }, + { + "epoch": 0.31, + "logps_train/chosen": -192.16317749023438, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -134.98231506347656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.216317653656006, + "rewards_train/margins": -2.7680861949920654, + "rewards_train/rejected": -2.4482314586639404, + "step": 1099 + }, + { + "epoch": 0.31, + "logps_train/chosen": -198.9600067138672, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -217.3590087890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.796000957489014, + "rewards_train/margins": 2.0398998260498047, + "rewards_train/rejected": -7.835900783538818, + "step": 1099 + }, + { + "epoch": 0.31, + "logps_train/chosen": -110.26344299316406, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -212.15907287597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.426344394683838, + "rewards_train/margins": 6.839562892913818, + "rewards_train/rejected": -9.265907287597656, + "step": 1099 + }, + { + "epoch": 0.31, + "logps_train/chosen": -15.338838577270508, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -35.90785217285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3963838517665863, + "rewards_train/margins": 1.74440136551857, + "rewards_train/rejected": -2.1407852172851562, + "step": 1099 + }, + { + "epoch": 0.31, + "learning_rate": 1.2774029087618445e-06, + "loss": 0.5564, + "step": 1100 + }, + { + "epoch": 0.31, + "logps_train/chosen": -98.33880615234375, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -152.4142608642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2338807582855225, + "rewards_train/margins": 1.8575456142425537, + "rewards_train/rejected": -4.091426372528076, + "step": 1100 + }, + { + "epoch": 0.31, + "logps_train/chosen": -0.9306836128234863, + "logps_train/ref_chosen": -1.046875, + "logps_train/ref_rejected": -3.046875, + "logps_train/rejected": -3.4391942024230957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011619138531386852, + "rewards_train/margins": 0.0508510610088706, + "rewards_train/rejected": -0.03923192247748375, + "step": 1100 + }, + { + "epoch": 0.31, + "logps_train/chosen": -24.796823501586914, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -20.48973846435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44218236207962036, + "rewards_train/margins": 0.25679147243499756, + "rewards_train/rejected": -0.6989738345146179, + "step": 1100 + }, + { + "epoch": 0.31, + "logps_train/chosen": -47.817691802978516, + "logps_train/ref_chosen": -44.5, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -93.43148040771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.331769198179245, + "rewards_train/margins": 2.4613788425922394, + "rewards_train/rejected": -2.7931480407714844, + "step": 1100 + }, + { + "epoch": 0.31, + "logps_train/chosen": -144.39938354492188, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -131.73312377929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.189938545227051, + "rewards_train/margins": 0.03337383270263672, + "rewards_train/rejected": -4.2233123779296875, + "step": 1100 + }, + { + "epoch": 0.31, + "logps_train/chosen": -22.993824005126953, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -25.77798080444336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.025617599487304688, + "rewards_train/margins": 2.0534157752990723, + "rewards_train/rejected": -2.0277981758117676, + "step": 1100 + }, + { + "epoch": 0.31, + "logps_train/chosen": -160.68080139160156, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -117.64842224121094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.768080234527588, + "rewards_train/margins": -0.3032379150390625, + "rewards_train/rejected": -2.4648423194885254, + "step": 1100 + }, + { + "epoch": 0.31, + "logps_train/chosen": -32.98107147216797, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -58.78794860839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.360607147216797, + "rewards_train/margins": 2.111937999725342, + "rewards_train/rejected": -4.472545146942139, + "step": 1100 + }, + { + "epoch": 0.31, + "logps_train/chosen": -45.040679931640625, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -16.028118133544922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0790679454803467, + "rewards_train/margins": -0.9637560844421387, + "rewards_train/rejected": -1.115311861038208, + "step": 1101 + }, + { + "epoch": 0.31, + "logps_train/chosen": -8.607521057128906, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -22.65485382080078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07674789428710938, + "rewards_train/margins": 1.3609832525253296, + "rewards_train/rejected": -1.2842353582382202, + "step": 1101 + }, + { + "epoch": 0.31, + "logps_train/chosen": -321.18865966796875, + "logps_train/ref_chosen": -180.0, + "logps_train/ref_rejected": -223.0, + "logps_train/rejected": -397.0789489746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -14.118865966796875, + "rewards_train/margins": 3.289030075073242, + "rewards_train/rejected": -17.407896041870117, + "step": 1101 + }, + { + "epoch": 0.31, + "logps_train/chosen": -195.55487060546875, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -190.8013916015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.455487251281738, + "rewards_train/margins": -1.27534818649292, + "rewards_train/rejected": -4.180139064788818, + "step": 1101 + }, + { + "epoch": 0.31, + "logps_train/chosen": -32.26060485839844, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -33.53386306762695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6885604858398438, + "rewards_train/margins": 1.083575963973999, + "rewards_train/rejected": -2.7721364498138428, + "step": 1101 + }, + { + "epoch": 0.31, + "logps_train/chosen": -16.01342010498047, + "logps_train/ref_chosen": -6.15625, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -35.32032775878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9857169985771179, + "rewards_train/margins": 0.9338157773017883, + "rewards_train/rejected": -1.9195327758789062, + "step": 1101 + }, + { + "epoch": 0.31, + "logps_train/chosen": -115.08821105957031, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -194.3760986328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.358821153640747, + "rewards_train/margins": 1.878788709640503, + "rewards_train/rejected": -5.23760986328125, + "step": 1101 + }, + { + "epoch": 0.31, + "logps_train/chosen": -145.86761474609375, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -235.88259887695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.586761474609375, + "rewards_train/margins": 9.401498794555664, + "rewards_train/rejected": -10.988260269165039, + "step": 1101 + }, + { + "epoch": 0.31, + "learning_rate": 1.2748602174344702e-06, + "loss": 0.4422, + "step": 1102 + }, + { + "epoch": 0.31, + "logps_train/chosen": -37.635528564453125, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -31.356929779052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7510528564453125, + "rewards_train/margins": 0.30964016914367676, + "rewards_train/rejected": -2.0606930255889893, + "step": 1102 + }, + { + "epoch": 0.31, + "logps_train/chosen": -183.9107666015625, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -180.36099243164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.091076612472534, + "rewards_train/margins": 1.4450228214263916, + "rewards_train/rejected": -4.536099433898926, + "step": 1102 + }, + { + "epoch": 0.31, + "logps_train/chosen": -46.34095001220703, + "logps_train/ref_chosen": -28.625, + "logps_train/ref_rejected": -6.1875, + "logps_train/rejected": -19.974031448364258, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7715950012207031, + "rewards_train/margins": -0.39294183254241943, + "rewards_train/rejected": -1.3786531686782837, + "step": 1102 + }, + { + "epoch": 0.31, + "logps_train/chosen": -125.64573669433594, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -148.41830444335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8145737648010254, + "rewards_train/margins": 3.6272568702697754, + "rewards_train/rejected": -6.441830635070801, + "step": 1102 + }, + { + "epoch": 0.31, + "logps_train/chosen": -98.13249206542969, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -122.0704574584961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3132492303848267, + "rewards_train/margins": 1.4437965154647827, + "rewards_train/rejected": -2.7570457458496094, + "step": 1102 + }, + { + "epoch": 0.31, + "logps_train/chosen": -80.84700012207031, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -119.12889099121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6847000122070312, + "rewards_train/margins": 1.5781891345977783, + "rewards_train/rejected": -2.2628891468048096, + "step": 1102 + }, + { + "epoch": 0.31, + "logps_train/chosen": -10.488470077514648, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -19.98582649230957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6207219958305359, + "rewards_train/margins": 0.777860701084137, + "rewards_train/rejected": -1.3985826969146729, + "step": 1102 + }, + { + "epoch": 0.31, + "logps_train/chosen": -260.8440856933594, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -220.14498901367188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.484408378601074, + "rewards_train/margins": -3.06990909576416, + "rewards_train/rejected": -8.414499282836914, + "step": 1102 + }, + { + "epoch": 0.31, + "logps_train/chosen": -20.1395263671875, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -18.076440811157227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.698327660560608, + "rewards_train/margins": -0.32505857944488525, + "rewards_train/rejected": -1.3732690811157227, + "step": 1103 + }, + { + "epoch": 0.31, + "logps_train/chosen": -131.7164764404297, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -127.9990234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1716476678848267, + "rewards_train/margins": 0.9282547235488892, + "rewards_train/rejected": -2.099902391433716, + "step": 1103 + }, + { + "epoch": 0.31, + "logps_train/chosen": -225.94610595703125, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -220.70346069335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.094611167907715, + "rewards_train/margins": 1.6757354736328125, + "rewards_train/rejected": -9.770346641540527, + "step": 1103 + }, + { + "epoch": 0.31, + "logps_train/chosen": -188.69656372070312, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -287.5064697265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2696564197540283, + "rewards_train/margins": 9.480991125106812, + "rewards_train/rejected": -12.75064754486084, + "step": 1103 + }, + { + "epoch": 0.31, + "logps_train/chosen": -146.215087890625, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -121.96704864501953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.971508979797363, + "rewards_train/margins": -0.22480392456054688, + "rewards_train/rejected": -4.746705055236816, + "step": 1103 + }, + { + "epoch": 0.31, + "logps_train/chosen": -123.34117889404297, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -189.74293518066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6841179132461548, + "rewards_train/margins": 6.590175986289978, + "rewards_train/rejected": -8.274293899536133, + "step": 1103 + }, + { + "epoch": 0.31, + "logps_train/chosen": -70.04185485839844, + "logps_train/ref_chosen": -40.0, + "logps_train/ref_rejected": -33.25, + "logps_train/rejected": -46.33985900878906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.004185438156128, + "rewards_train/margins": -1.6951994895935059, + "rewards_train/rejected": -1.308985948562622, + "step": 1103 + }, + { + "epoch": 0.31, + "logps_train/chosen": -29.686166763305664, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -25.08613395690918, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8811167478561401, + "rewards_train/margins": -0.6100033521652222, + "rewards_train/rejected": -1.271113395690918, + "step": 1103 + }, + { + "epoch": 0.31, + "learning_rate": 1.2723156023769925e-06, + "loss": 0.6675, + "step": 1104 + }, + { + "epoch": 0.31, + "logps_train/chosen": -123.43798828125, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -132.3687744140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.443798780441284, + "rewards_train/margins": -0.006921291351318359, + "rewards_train/rejected": -2.436877489089966, + "step": 1104 + }, + { + "epoch": 0.31, + "logps_train/chosen": -28.28465461730957, + "logps_train/ref_chosen": -7.375, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -41.52721405029297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.090965509414673, + "rewards_train/margins": -0.21324408054351807, + "rewards_train/rejected": -1.8777214288711548, + "step": 1104 + }, + { + "epoch": 0.31, + "logps_train/chosen": -100.65660095214844, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -130.41720581054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.740660190582275, + "rewards_train/margins": 0.45106029510498047, + "rewards_train/rejected": -5.191720485687256, + "step": 1104 + }, + { + "epoch": 0.31, + "logps_train/chosen": -20.893775939941406, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -33.625022888183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5331276059150696, + "rewards_train/margins": 1.2731247544288635, + "rewards_train/rejected": -1.806252360343933, + "step": 1104 + }, + { + "epoch": 0.31, + "logps_train/chosen": -127.6930923461914, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -211.90780639648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7193092703819275, + "rewards_train/margins": 8.171471178531647, + "rewards_train/rejected": -8.890780448913574, + "step": 1104 + }, + { + "epoch": 0.31, + "logps_train/chosen": -82.1512222290039, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -172.49856567382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1151222214102745, + "rewards_train/margins": 5.384734250605106, + "rewards_train/rejected": -5.499856472015381, + "step": 1104 + }, + { + "epoch": 0.31, + "logps_train/chosen": -10.082426071166992, + "logps_train/ref_chosen": -4.65625, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -19.26236343383789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5426176190376282, + "rewards_train/margins": 0.802368700504303, + "rewards_train/rejected": -1.3449863195419312, + "step": 1104 + }, + { + "epoch": 0.31, + "logps_train/chosen": -5.501452922821045, + "logps_train/ref_chosen": -2.203125, + "logps_train/ref_rejected": -3.28125, + "logps_train/rejected": -13.661946296691895, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3298327922821045, + "rewards_train/margins": 0.708236813545227, + "rewards_train/rejected": -1.0380696058273315, + "step": 1104 + }, + { + "epoch": 0.31, + "logps_train/chosen": -100.3145751953125, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -127.42234802246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.43145751953125, + "rewards_train/margins": 0.9107773303985596, + "rewards_train/rejected": -3.3422348499298096, + "step": 1105 + }, + { + "epoch": 0.31, + "logps_train/chosen": -2.9856834411621094, + "logps_train/ref_chosen": -0.2265625, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -3.659829616546631, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2759121060371399, + "rewards_train/margins": -0.7224291563034058, + "rewards_train/rejected": 0.44651705026626587, + "step": 1105 + }, + { + "epoch": 0.31, + "logps_train/chosen": -27.658470153808594, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -28.125, + "logps_train/rejected": -40.59687423706055, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.890847086906433, + "rewards_train/margins": -0.6436595916748047, + "rewards_train/rejected": -1.2471874952316284, + "step": 1105 + }, + { + "epoch": 0.31, + "logps_train/chosen": -29.826065063476562, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -20.516233444213867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.145106554031372, + "rewards_train/margins": -0.9434832334518433, + "rewards_train/rejected": -1.2016233205795288, + "step": 1105 + }, + { + "epoch": 0.31, + "logps_train/chosen": -21.006824493408203, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -3.109375, + "logps_train/rejected": -18.20648956298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45068246126174927, + "rewards_train/margins": 1.0590290427207947, + "rewards_train/rejected": -1.509711503982544, + "step": 1105 + }, + { + "epoch": 0.31, + "logps_train/chosen": -129.40711975097656, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -125.75888061523438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4907119274139404, + "rewards_train/margins": -0.26482391357421875, + "rewards_train/rejected": -2.2258880138397217, + "step": 1105 + }, + { + "epoch": 0.31, + "logps_train/chosen": -17.406862258911133, + "logps_train/ref_chosen": -13.875, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -23.138635635375977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3531862199306488, + "rewards_train/margins": 1.5106773674488068, + "rewards_train/rejected": -1.8638635873794556, + "step": 1105 + }, + { + "epoch": 0.31, + "logps_train/chosen": -105.82992553710938, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -184.36102294921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0829925537109375, + "rewards_train/margins": 5.553109645843506, + "rewards_train/rejected": -7.636102199554443, + "step": 1105 + }, + { + "epoch": 0.31, + "learning_rate": 1.2697690813990178e-06, + "loss": 0.5093, + "step": 1106 + }, + { + "epoch": 0.31, + "logps_train/chosen": -120.91609954833984, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -37.0, + "logps_train/rejected": -57.955055236816406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5416100025177, + "rewards_train/margins": -0.4461045265197754, + "rewards_train/rejected": -2.095505475997925, + "step": 1106 + }, + { + "epoch": 0.31, + "logps_train/chosen": -32.91807174682617, + "logps_train/ref_chosen": -35.25, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -53.50753402709961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2331928312778473, + "rewards_train/margins": 1.5839462578296661, + "rewards_train/rejected": -1.3507534265518188, + "step": 1106 + }, + { + "epoch": 0.31, + "logps_train/chosen": -110.31741333007812, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -119.44194030761719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.181741237640381, + "rewards_train/margins": -0.28754711151123047, + "rewards_train/rejected": -4.89419412612915, + "step": 1106 + }, + { + "epoch": 0.31, + "logps_train/chosen": -55.00165557861328, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -42.1219596862793, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.875165581703186, + "rewards_train/margins": -0.025469541549682617, + "rewards_train/rejected": -1.8496960401535034, + "step": 1106 + }, + { + "epoch": 0.31, + "logps_train/chosen": -52.1482048034668, + "logps_train/ref_chosen": -44.5, + "logps_train/ref_rejected": -29.375, + "logps_train/rejected": -41.28728485107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7648205161094666, + "rewards_train/margins": 0.42640799283981323, + "rewards_train/rejected": -1.1912285089492798, + "step": 1106 + }, + { + "epoch": 0.31, + "logps_train/chosen": -18.085695266723633, + "logps_train/ref_chosen": -1.6015625, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -45.22411346435547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6484133005142212, + "rewards_train/margins": -0.3760019540786743, + "rewards_train/rejected": -1.2724113464355469, + "step": 1106 + }, + { + "epoch": 0.31, + "logps_train/chosen": -105.47504425048828, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -176.9601287841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9475045204162598, + "rewards_train/margins": 2.1485085487365723, + "rewards_train/rejected": -5.096013069152832, + "step": 1106 + }, + { + "epoch": 0.31, + "logps_train/chosen": -32.472862243652344, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -23.238149642944336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5347862243652344, + "rewards_train/margins": 0.20777881145477295, + "rewards_train/rejected": -1.7425650358200073, + "step": 1106 + }, + { + "epoch": 0.31, + "logps_train/chosen": -15.500391006469727, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -29.125, + "logps_train/rejected": -51.26249694824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43128910660743713, + "rewards_train/margins": 1.7824605405330658, + "rewards_train/rejected": -2.213749647140503, + "step": 1107 + }, + { + "epoch": 0.31, + "logps_train/chosen": -19.91484832763672, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -28.142337799072266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6977348327636719, + "rewards_train/margins": 1.2289990186691284, + "rewards_train/rejected": -1.9267338514328003, + "step": 1107 + }, + { + "epoch": 0.31, + "logps_train/chosen": -3.2929773330688477, + "logps_train/ref_chosen": -3.5625, + "logps_train/ref_rejected": -3.125, + "logps_train/rejected": -34.50502014160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.026952266693115234, + "rewards_train/margins": 3.164954423904419, + "rewards_train/rejected": -3.1380021572113037, + "step": 1107 + }, + { + "epoch": 0.31, + "logps_train/chosen": -14.51636791229248, + "logps_train/ref_chosen": -5.71875, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -32.057762145996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.879761815071106, + "rewards_train/margins": 1.0697643756866455, + "rewards_train/rejected": -1.9495261907577515, + "step": 1107 + }, + { + "epoch": 0.31, + "logps_train/chosen": -42.86635971069336, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -43.01829147338867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.249135971069336, + "rewards_train/margins": -0.09105682373046875, + "rewards_train/rejected": -3.158079147338867, + "step": 1107 + }, + { + "epoch": 0.31, + "logps_train/chosen": -207.46151733398438, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -158.12367248535156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.096152305603027, + "rewards_train/margins": -2.7337851524353027, + "rewards_train/rejected": -7.362367153167725, + "step": 1107 + }, + { + "epoch": 0.31, + "logps_train/chosen": -45.91215515136719, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -55.177459716796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8287155628204346, + "rewards_train/margins": 1.3890306949615479, + "rewards_train/rejected": -4.217746257781982, + "step": 1107 + }, + { + "epoch": 0.31, + "logps_train/chosen": -13.280731201171875, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -31.486278533935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7499481439590454, + "rewards_train/margins": 1.3986798524856567, + "rewards_train/rejected": -2.148627996444702, + "step": 1107 + }, + { + "epoch": 0.31, + "learning_rate": 1.2672206723234937e-06, + "loss": 0.5947, + "step": 1108 + }, + { + "epoch": 0.31, + "logps_train/chosen": -95.86348724365234, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -116.6300048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8363487720489502, + "rewards_train/margins": 1.6766517162322998, + "rewards_train/rejected": -3.51300048828125, + "step": 1108 + }, + { + "epoch": 0.31, + "logps_train/chosen": -197.29263305664062, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -212.90008544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.129263401031494, + "rewards_train/margins": 0.9607453346252441, + "rewards_train/rejected": -6.090008735656738, + "step": 1108 + }, + { + "epoch": 0.31, + "logps_train/chosen": -40.41785430908203, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -40.412113189697266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6667854189872742, + "rewards_train/margins": -0.0005741119384765625, + "rewards_train/rejected": -0.6662113070487976, + "step": 1108 + }, + { + "epoch": 0.31, + "logps_train/chosen": -0.9131802916526794, + "logps_train/ref_chosen": -1.7578125, + "logps_train/ref_rejected": -5.25, + "logps_train/rejected": -9.454872131347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0844632238149643, + "rewards_train/margins": 0.5049504488706589, + "rewards_train/rejected": -0.4204872250556946, + "step": 1108 + }, + { + "epoch": 0.31, + "logps_train/chosen": -131.3180389404297, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -191.8564453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2318038940429688, + "rewards_train/margins": 3.253840923309326, + "rewards_train/rejected": -5.485644817352295, + "step": 1108 + }, + { + "epoch": 0.31, + "logps_train/chosen": -101.30655670166016, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -106.46528625488281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1306557655334473, + "rewards_train/margins": -0.1841270923614502, + "rewards_train/rejected": -2.946528673171997, + "step": 1108 + }, + { + "epoch": 0.31, + "logps_train/chosen": -63.97199249267578, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -63.5311164855957, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2221992462873459, + "rewards_train/margins": -0.04408758878707886, + "rewards_train/rejected": -0.17811165750026703, + "step": 1108 + }, + { + "epoch": 0.31, + "logps_train/chosen": -27.383668899536133, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -13.179322242736816, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2508668899536133, + "rewards_train/margins": -0.6516846418380737, + "rewards_train/rejected": -0.5991822481155396, + "step": 1108 + }, + { + "epoch": 0.31, + "logps_train/chosen": -98.16788482666016, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -119.72384643554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7167884707450867, + "rewards_train/margins": 2.7055962681770325, + "rewards_train/rejected": -3.422384738922119, + "step": 1109 + }, + { + "epoch": 0.31, + "logps_train/chosen": -86.10279846191406, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -82.5845947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010279846377670765, + "rewards_train/margins": 0.048179625533521175, + "rewards_train/rejected": -0.05845947191119194, + "step": 1109 + }, + { + "epoch": 0.31, + "logps_train/chosen": -21.525358200073242, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -2.84375, + "logps_train/rejected": -15.719271659851074, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2525358200073242, + "rewards_train/margins": 1.035016417503357, + "rewards_train/rejected": -1.2875522375106812, + "step": 1109 + }, + { + "epoch": 0.31, + "logps_train/chosen": -184.0562286376953, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -147.23583984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.905622959136963, + "rewards_train/margins": 0.46796131134033203, + "rewards_train/rejected": -4.373584270477295, + "step": 1109 + }, + { + "epoch": 0.31, + "logps_train/chosen": -27.091829299926758, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -26.998016357421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0841829776763916, + "rewards_train/margins": 0.4218686819076538, + "rewards_train/rejected": -1.5060516595840454, + "step": 1109 + }, + { + "epoch": 0.31, + "logps_train/chosen": -169.8214569091797, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -185.5401611328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.532145977020264, + "rewards_train/margins": -0.6281299591064453, + "rewards_train/rejected": -5.904016017913818, + "step": 1109 + }, + { + "epoch": 0.31, + "logps_train/chosen": -234.52996826171875, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -218.1087188720703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.952997207641602, + "rewards_train/margins": -1.8421249389648438, + "rewards_train/rejected": -8.110872268676758, + "step": 1109 + }, + { + "epoch": 0.31, + "logps_train/chosen": -108.68474578857422, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -161.08676147460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.918474555015564, + "rewards_train/margins": 2.840201497077942, + "rewards_train/rejected": -4.758676052093506, + "step": 1109 + }, + { + "epoch": 0.31, + "learning_rate": 1.2646703929865815e-06, + "loss": 0.5879, + "step": 1110 + }, + { + "epoch": 0.31, + "logps_train/chosen": -53.2574348449707, + "logps_train/ref_chosen": -25.5, + "logps_train/ref_rejected": -26.25, + "logps_train/rejected": -51.50321960449219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7757434844970703, + "rewards_train/margins": -0.25042152404785156, + "rewards_train/rejected": -2.5253219604492188, + "step": 1110 + }, + { + "epoch": 0.31, + "logps_train/chosen": -67.59053802490234, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -122.5836181640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2090537548065186, + "rewards_train/margins": 1.0993080139160156, + "rewards_train/rejected": -3.308361768722534, + "step": 1110 + }, + { + "epoch": 0.31, + "logps_train/chosen": -20.17351722717285, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -23.520339965820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4236017167568207, + "rewards_train/margins": 0.7721822559833527, + "rewards_train/rejected": -1.1957839727401733, + "step": 1110 + }, + { + "epoch": 0.31, + "logps_train/chosen": -73.9138412475586, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -73.67782592773438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.19138412177562714, + "rewards_train/margins": -0.023601531982421875, + "rewards_train/rejected": -0.16778258979320526, + "step": 1110 + }, + { + "epoch": 0.31, + "logps_train/chosen": -241.48004150390625, + "logps_train/ref_chosen": -184.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -273.4151916503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.74800443649292, + "rewards_train/margins": 4.293514728546143, + "rewards_train/rejected": -10.041519165039062, + "step": 1110 + }, + { + "epoch": 0.31, + "logps_train/chosen": -29.42031478881836, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -53.075504302978516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1670315265655518, + "rewards_train/margins": 2.3280189037323, + "rewards_train/rejected": -3.4950504302978516, + "step": 1110 + }, + { + "epoch": 0.31, + "logps_train/chosen": -162.03378295898438, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -164.31716918945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9033782482147217, + "rewards_train/margins": 0.7283387184143066, + "rewards_train/rejected": -3.6317169666290283, + "step": 1110 + }, + { + "epoch": 0.31, + "logps_train/chosen": -30.495468139648438, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -60.25, + "logps_train/rejected": -85.49122619628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2995468378067017, + "rewards_train/margins": 1.2245758771896362, + "rewards_train/rejected": -2.524122714996338, + "step": 1110 + }, + { + "epoch": 0.31, + "logps_train/chosen": -14.949371337890625, + "logps_train/ref_chosen": -7.59375, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -11.600610733032227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7355621457099915, + "rewards_train/margins": 0.011998951435089111, + "rewards_train/rejected": -0.7475610971450806, + "step": 1111 + }, + { + "epoch": 0.31, + "logps_train/chosen": -90.33910369873047, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -37.599388122558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8839103579521179, + "rewards_train/margins": 1.626028597354889, + "rewards_train/rejected": -2.509938955307007, + "step": 1111 + }, + { + "epoch": 0.31, + "logps_train/chosen": -67.23301696777344, + "logps_train/ref_chosen": -44.5, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -45.859344482421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.273301839828491, + "rewards_train/margins": -0.5248674154281616, + "rewards_train/rejected": -1.7484344244003296, + "step": 1111 + }, + { + "epoch": 0.31, + "logps_train/chosen": -15.799715995788574, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -3.5625, + "logps_train/rejected": -13.1121244430542, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1580966711044312, + "rewards_train/margins": -0.20313423871994019, + "rewards_train/rejected": -0.954962432384491, + "step": 1111 + }, + { + "epoch": 0.31, + "logps_train/chosen": -200.76406860351562, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -199.555908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8764069080352783, + "rewards_train/margins": 3.1791841983795166, + "rewards_train/rejected": -7.055591106414795, + "step": 1111 + }, + { + "epoch": 0.31, + "logps_train/chosen": -20.281288146972656, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -2.890625, + "logps_train/rejected": -14.523924827575684, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4718788266181946, + "rewards_train/margins": 0.6914511322975159, + "rewards_train/rejected": -1.1633299589157104, + "step": 1111 + }, + { + "epoch": 0.31, + "logps_train/chosen": -58.388404846191406, + "logps_train/ref_chosen": -59.75, + "logps_train/ref_rejected": -56.0, + "logps_train/rejected": -86.19122314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1361595243215561, + "rewards_train/margins": 3.155281886458397, + "rewards_train/rejected": -3.019122362136841, + "step": 1111 + }, + { + "epoch": 0.31, + "logps_train/chosen": -1.1414868831634521, + "logps_train/ref_chosen": -2.25, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -23.440460205078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11085131019353867, + "rewards_train/margins": 1.723647378385067, + "rewards_train/rejected": -1.6127960681915283, + "step": 1111 + }, + { + "epoch": 0.31, + "learning_rate": 1.2621182612375328e-06, + "loss": 0.3916, + "step": 1112 + }, + { + "epoch": 0.31, + "logps_train/chosen": -81.00499725341797, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -109.74996185302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30049973726272583, + "rewards_train/margins": 1.1744964718818665, + "rewards_train/rejected": -1.4749962091445923, + "step": 1112 + }, + { + "epoch": 0.31, + "logps_train/chosen": -11.66856861114502, + "logps_train/ref_chosen": -5.6875, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -26.761474609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.598106861114502, + "rewards_train/margins": 1.6342906951904297, + "rewards_train/rejected": -2.2323975563049316, + "step": 1112 + }, + { + "epoch": 0.31, + "logps_train/chosen": -217.6798858642578, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -222.08030700683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.517989158630371, + "rewards_train/margins": 0.5400419235229492, + "rewards_train/rejected": -10.05803108215332, + "step": 1112 + }, + { + "epoch": 0.31, + "logps_train/chosen": -172.49566650390625, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -165.07037353515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.399566650390625, + "rewards_train/margins": -1.592529296875, + "rewards_train/rejected": -3.807037353515625, + "step": 1112 + }, + { + "epoch": 0.31, + "logps_train/chosen": -13.846071243286133, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -1.0, + "logps_train/rejected": -17.876832962036133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6064821481704712, + "rewards_train/margins": 1.081201195716858, + "rewards_train/rejected": -1.687683343887329, + "step": 1112 + }, + { + "epoch": 0.31, + "logps_train/chosen": -134.2430419921875, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -180.03135681152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.324304103851318, + "rewards_train/margins": 0.9788317680358887, + "rewards_train/rejected": -6.303135871887207, + "step": 1112 + }, + { + "epoch": 0.31, + "logps_train/chosen": -118.65983581542969, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -118.96188354492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.065983772277832, + "rewards_train/margins": 0.1302046775817871, + "rewards_train/rejected": -4.196188449859619, + "step": 1112 + }, + { + "epoch": 0.31, + "logps_train/chosen": -17.06531524658203, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -22.25, + "logps_train/rejected": -54.31529235839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8940315246582031, + "rewards_train/margins": 2.312497854232788, + "rewards_train/rejected": -3.206529378890991, + "step": 1112 + }, + { + "epoch": 0.31, + "logps_train/chosen": -94.39388275146484, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -146.1385498046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.039388418197632, + "rewards_train/margins": 2.0244667530059814, + "rewards_train/rejected": -4.063855171203613, + "step": 1113 + }, + { + "epoch": 0.31, + "logps_train/chosen": -146.89425659179688, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -113.09646606445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9394257068634033, + "rewards_train/margins": 1.9702210426330566, + "rewards_train/rejected": -3.90964674949646, + "step": 1113 + }, + { + "epoch": 0.31, + "logps_train/chosen": -72.12540435791016, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -153.8400115966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.887540578842163, + "rewards_train/margins": 1.696460485458374, + "rewards_train/rejected": -4.584001064300537, + "step": 1113 + }, + { + "epoch": 0.31, + "logps_train/chosen": -5.2955756187438965, + "logps_train/ref_chosen": -2.890625, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -7.714446544647217, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24049507081508636, + "rewards_train/margins": 0.09344957768917084, + "rewards_train/rejected": -0.3339446485042572, + "step": 1113 + }, + { + "epoch": 0.31, + "logps_train/chosen": -82.00069427490234, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -147.9249725341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24993057548999786, + "rewards_train/margins": 2.8424279242753983, + "rewards_train/rejected": -2.5924973487854004, + "step": 1113 + }, + { + "epoch": 0.31, + "logps_train/chosen": -30.61931800842285, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -7.5625, + "logps_train/rejected": -35.0845832824707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.471306800842285, + "rewards_train/margins": 0.2809016704559326, + "rewards_train/rejected": -2.7522084712982178, + "step": 1113 + }, + { + "epoch": 0.31, + "logps_train/chosen": -46.56794738769531, + "logps_train/ref_chosen": -27.375, + "logps_train/ref_rejected": -46.25, + "logps_train/rejected": -69.91304779052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9192947149276733, + "rewards_train/margins": 0.4470101594924927, + "rewards_train/rejected": -2.366304874420166, + "step": 1113 + }, + { + "epoch": 0.31, + "logps_train/chosen": -12.575535774230957, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -20.1260929107666, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2888035774230957, + "rewards_train/margins": 0.9050557613372803, + "rewards_train/rejected": -1.193859338760376, + "step": 1113 + }, + { + "epoch": 0.31, + "learning_rate": 1.2595642949385638e-06, + "loss": 0.4092, + "step": 1114 + }, + { + "epoch": 0.31, + "logps_train/chosen": -69.96481323242188, + "logps_train/ref_chosen": -54.0, + "logps_train/ref_rejected": -50.0, + "logps_train/rejected": -80.14381408691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5964813232421875, + "rewards_train/margins": 1.4179000854492188, + "rewards_train/rejected": -3.0143814086914062, + "step": 1114 + }, + { + "epoch": 0.31, + "logps_train/chosen": -68.16326904296875, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -35.0, + "logps_train/rejected": -69.5103530883789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1913269758224487, + "rewards_train/margins": 2.259708285331726, + "rewards_train/rejected": -3.451035261154175, + "step": 1114 + }, + { + "epoch": 0.31, + "logps_train/chosen": -26.50342559814453, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -18.14542007446289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.594092607498169, + "rewards_train/margins": -0.23892557621002197, + "rewards_train/rejected": -1.355167031288147, + "step": 1114 + }, + { + "epoch": 0.31, + "logps_train/chosen": -209.80047607421875, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -185.0, + "logps_train/rejected": -240.67984008789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.980047702789307, + "rewards_train/margins": 0.5879364013671875, + "rewards_train/rejected": -5.567984104156494, + "step": 1114 + }, + { + "epoch": 0.31, + "logps_train/chosen": -298.2468566894531, + "logps_train/ref_chosen": -227.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -216.07838439941406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.124685764312744, + "rewards_train/margins": -3.5168473720550537, + "rewards_train/rejected": -3.6078383922576904, + "step": 1114 + }, + { + "epoch": 0.31, + "logps_train/chosen": -113.29559326171875, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -143.60165405273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6795593500137329, + "rewards_train/margins": 1.730606198310852, + "rewards_train/rejected": -2.410165548324585, + "step": 1114 + }, + { + "epoch": 0.31, + "logps_train/chosen": -13.428987503051758, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -27.25, + "logps_train/rejected": -27.822032928466797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01789875142276287, + "rewards_train/margins": 0.03930454142391682, + "rewards_train/rejected": -0.05720329284667969, + "step": 1114 + }, + { + "epoch": 0.31, + "logps_train/chosen": -17.71351432800293, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -5.46875, + "logps_train/rejected": -13.946832656860352, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26510143280029297, + "rewards_train/margins": 0.582706868648529, + "rewards_train/rejected": -0.847808301448822, + "step": 1114 + }, + { + "epoch": 0.31, + "logps_train/chosen": -60.12296676635742, + "logps_train/ref_chosen": -48.0, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -41.59770965576172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.212296724319458, + "rewards_train/margins": -1.0525257587432861, + "rewards_train/rejected": -0.15977096557617188, + "step": 1115 + }, + { + "epoch": 0.31, + "logps_train/chosen": -40.58757400512695, + "logps_train/ref_chosen": -28.875, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -51.6263427734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1712573766708374, + "rewards_train/margins": 1.5288769006729126, + "rewards_train/rejected": -2.70013427734375, + "step": 1115 + }, + { + "epoch": 0.31, + "logps_train/chosen": -15.727058410644531, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -15.326616287231445, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3977058529853821, + "rewards_train/margins": -0.015044212341308594, + "rewards_train/rejected": -0.3826616406440735, + "step": 1115 + }, + { + "epoch": 0.31, + "logps_train/chosen": -57.84757995605469, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -37.5, + "logps_train/rejected": -52.47854995727539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06524200737476349, + "rewards_train/margins": 1.5630970746278763, + "rewards_train/rejected": -1.4978550672531128, + "step": 1115 + }, + { + "epoch": 0.31, + "logps_train/chosen": -27.154937744140625, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -3.015625, + "logps_train/rejected": -13.684235572814941, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5779937505722046, + "rewards_train/margins": -0.5111327171325684, + "rewards_train/rejected": -1.0668610334396362, + "step": 1115 + }, + { + "epoch": 0.31, + "logps_train/chosen": -119.52828979492188, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -41.75, + "logps_train/rejected": -99.2409439086914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.452828884124756, + "rewards_train/margins": 1.2962656021118164, + "rewards_train/rejected": -5.749094486236572, + "step": 1115 + }, + { + "epoch": 0.31, + "logps_train/chosen": -20.62455177307129, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -13.6875, + "logps_train/rejected": -32.2875862121582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6437051892280579, + "rewards_train/margins": 1.2163034081459045, + "rewards_train/rejected": -1.8600085973739624, + "step": 1115 + }, + { + "epoch": 0.31, + "logps_train/chosen": -264.22003173828125, + "logps_train/ref_chosen": -220.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -187.52516174316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.422003269195557, + "rewards_train/margins": 2.5805130004882812, + "rewards_train/rejected": -7.002516269683838, + "step": 1115 + }, + { + "epoch": 0.31, + "learning_rate": 1.2570085119647314e-06, + "loss": 0.6498, + "step": 1116 + }, + { + "epoch": 0.31, + "logps_train/chosen": -21.961732864379883, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -21.44458770751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7836732864379883, + "rewards_train/margins": 0.5420354604721069, + "rewards_train/rejected": -1.3257087469100952, + "step": 1116 + }, + { + "epoch": 0.31, + "logps_train/chosen": -198.41751098632812, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -190.9362030029297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.641751289367676, + "rewards_train/margins": -0.9481310844421387, + "rewards_train/rejected": -6.693620204925537, + "step": 1116 + }, + { + "epoch": 0.31, + "logps_train/chosen": -186.31881713867188, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -175.3485565185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.731881856918335, + "rewards_train/margins": 2.002973794937134, + "rewards_train/rejected": -4.734855651855469, + "step": 1116 + }, + { + "epoch": 0.31, + "logps_train/chosen": -87.40499877929688, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -115.3685302734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8904999494552612, + "rewards_train/margins": 1.5963531732559204, + "rewards_train/rejected": -3.4868531227111816, + "step": 1116 + }, + { + "epoch": 0.31, + "logps_train/chosen": -4.867671012878418, + "logps_train/ref_chosen": -2.125, + "logps_train/ref_rejected": -1.109375, + "logps_train/rejected": -4.540870189666748, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2742671072483063, + "rewards_train/margins": 0.06888240575790405, + "rewards_train/rejected": -0.3431495130062103, + "step": 1116 + }, + { + "epoch": 0.31, + "logps_train/chosen": -39.30310821533203, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -18.75196075439453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2928107976913452, + "rewards_train/margins": -0.6426147222518921, + "rewards_train/rejected": -0.6501960754394531, + "step": 1116 + }, + { + "epoch": 0.31, + "logps_train/chosen": -1.4607754945755005, + "logps_train/ref_chosen": -1.3828125, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -21.34540367126465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007796299643814564, + "rewards_train/margins": 0.9954941151663661, + "rewards_train/rejected": -1.0032904148101807, + "step": 1116 + }, + { + "epoch": 0.31, + "logps_train/chosen": -193.45571899414062, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -169.5384063720703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.645572185516357, + "rewards_train/margins": -0.34173154830932617, + "rewards_train/rejected": -5.303840637207031, + "step": 1116 + }, + { + "epoch": 0.31, + "logps_train/chosen": -60.25865936279297, + "logps_train/ref_chosen": -37.25, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -41.595664978027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.300865888595581, + "rewards_train/margins": 1.0212006568908691, + "rewards_train/rejected": -3.32206654548645, + "step": 1117 + }, + { + "epoch": 0.31, + "logps_train/chosen": -114.98783874511719, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -148.26824951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8987838625907898, + "rewards_train/margins": 1.2780411839485168, + "rewards_train/rejected": -2.1768250465393066, + "step": 1117 + }, + { + "epoch": 0.31, + "logps_train/chosen": -0.5944937467575073, + "logps_train/ref_chosen": -0.8828125, + "logps_train/ref_rejected": -0.7578125, + "logps_train/rejected": -0.7783713936805725, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028831874951720238, + "rewards_train/margins": 0.030887764412909746, + "rewards_train/rejected": -0.0020558894611895084, + "step": 1117 + }, + { + "epoch": 0.31, + "logps_train/chosen": -137.7672576904297, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -173.02255249023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.576725959777832, + "rewards_train/margins": 3.8755292892456055, + "rewards_train/rejected": -8.452255249023438, + "step": 1117 + }, + { + "epoch": 0.31, + "logps_train/chosen": -14.016701698303223, + "logps_train/ref_chosen": -8.0, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -22.409732818603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6016702055931091, + "rewards_train/margins": 0.6018031239509583, + "rewards_train/rejected": -1.2034733295440674, + "step": 1117 + }, + { + "epoch": 0.31, + "logps_train/chosen": -13.129192352294922, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -0.8515625, + "logps_train/rejected": -3.130084753036499, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7879192233085632, + "rewards_train/margins": -0.5600669980049133, + "rewards_train/rejected": -0.2278522253036499, + "step": 1117 + }, + { + "epoch": 0.31, + "logps_train/chosen": -243.43861389160156, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -235.45849609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.543861389160156, + "rewards_train/margins": -2.3980116844177246, + "rewards_train/rejected": -7.145849704742432, + "step": 1117 + }, + { + "epoch": 0.31, + "logps_train/chosen": -8.872270584106445, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -3.28125, + "logps_train/rejected": -11.909226417541504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4841020703315735, + "rewards_train/margins": 0.37869560718536377, + "rewards_train/rejected": -0.8627976775169373, + "step": 1117 + }, + { + "epoch": 0.31, + "learning_rate": 1.2544509302038058e-06, + "loss": 0.6668, + "step": 1118 + }, + { + "epoch": 0.31, + "logps_train/chosen": -239.42800903320312, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -253.0992431640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -13.192801475524902, + "rewards_train/margins": -1.282876968383789, + "rewards_train/rejected": -11.909924507141113, + "step": 1118 + }, + { + "epoch": 0.31, + "logps_train/chosen": -28.037811279296875, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -43.54962158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6287811994552612, + "rewards_train/margins": 1.0261811017990112, + "rewards_train/rejected": -2.6549623012542725, + "step": 1118 + }, + { + "epoch": 0.31, + "logps_train/chosen": -87.30192565917969, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -123.71159362792969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.380192518234253, + "rewards_train/margins": 1.7909667491912842, + "rewards_train/rejected": -4.171159267425537, + "step": 1118 + }, + { + "epoch": 0.31, + "logps_train/chosen": -88.32569885253906, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -88.3187026977539, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.83256995677948, + "rewards_train/margins": -0.0006996393203735352, + "rewards_train/rejected": -1.8318703174591064, + "step": 1118 + }, + { + "epoch": 0.31, + "logps_train/chosen": -12.61712646484375, + "logps_train/ref_chosen": -7.15625, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -42.42361068725586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5460876822471619, + "rewards_train/margins": 2.2962735295295715, + "rewards_train/rejected": -2.8423612117767334, + "step": 1118 + }, + { + "epoch": 0.31, + "logps_train/chosen": -130.91563415527344, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -136.11019897460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6415634155273438, + "rewards_train/margins": 3.469456672668457, + "rewards_train/rejected": -5.111020088195801, + "step": 1118 + }, + { + "epoch": 0.31, + "logps_train/chosen": -138.2654571533203, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -199.03704833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.9265456199646, + "rewards_train/margins": 5.727159023284912, + "rewards_train/rejected": -10.653704643249512, + "step": 1118 + }, + { + "epoch": 0.31, + "logps_train/chosen": -4.432801246643066, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -1.46875, + "logps_train/rejected": -2.576298713684082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0307801254093647, + "rewards_train/margins": 0.07997474446892738, + "rewards_train/rejected": -0.11075486987829208, + "step": 1118 + }, + { + "epoch": 0.31, + "logps_train/chosen": -83.838134765625, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -134.32681274414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11618652194738388, + "rewards_train/margins": 1.5988677963614464, + "rewards_train/rejected": -1.4826812744140625, + "step": 1119 + }, + { + "epoch": 0.31, + "logps_train/chosen": -13.031974792480469, + "logps_train/ref_chosen": -1.4921875, + "logps_train/ref_rejected": -4.0, + "logps_train/rejected": -27.576993942260742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.153978705406189, + "rewards_train/margins": 1.2037206888198853, + "rewards_train/rejected": -2.357699394226074, + "step": 1119 + }, + { + "epoch": 0.31, + "logps_train/chosen": -176.43453979492188, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -233.0, + "logps_train/rejected": -288.0091552734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.643454074859619, + "rewards_train/margins": 0.8574614524841309, + "rewards_train/rejected": -5.50091552734375, + "step": 1119 + }, + { + "epoch": 0.31, + "logps_train/chosen": -8.550790786743164, + "logps_train/ref_chosen": -1.09375, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -12.441856384277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7457041144371033, + "rewards_train/margins": 0.06723153591156006, + "rewards_train/rejected": -0.8129356503486633, + "step": 1119 + }, + { + "epoch": 0.31, + "logps_train/chosen": -4.393929481506348, + "logps_train/ref_chosen": -1.078125, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -32.687095642089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3315804600715637, + "rewards_train/margins": 1.6371291279792786, + "rewards_train/rejected": -1.9687095880508423, + "step": 1119 + }, + { + "epoch": 0.31, + "logps_train/chosen": -106.19172668457031, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -248.95005798339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.419172763824463, + "rewards_train/margins": 9.675833225250244, + "rewards_train/rejected": -12.095005989074707, + "step": 1119 + }, + { + "epoch": 0.31, + "logps_train/chosen": -13.755636215209961, + "logps_train/ref_chosen": -3.828125, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -19.029338836669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9927511215209961, + "rewards_train/margins": 0.4945577383041382, + "rewards_train/rejected": -1.4873088598251343, + "step": 1119 + }, + { + "epoch": 0.31, + "logps_train/chosen": -121.85728454589844, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -98.17682647705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5857285261154175, + "rewards_train/margins": 1.181954264640808, + "rewards_train/rejected": -2.7676827907562256, + "step": 1119 + }, + { + "epoch": 0.31, + "learning_rate": 1.2518915675561481e-06, + "loss": 0.3654, + "step": 1120 + }, + { + "epoch": 0.31, + "logps_train/chosen": -135.67916870117188, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -60.25, + "logps_train/rejected": -123.0521240234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.617917060852051, + "rewards_train/margins": -0.3377046585083008, + "rewards_train/rejected": -6.28021240234375, + "step": 1120 + }, + { + "epoch": 0.31, + "logps_train/chosen": -219.0694580078125, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -249.491943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.106945991516113, + "rewards_train/margins": 4.042248725891113, + "rewards_train/rejected": -10.149194717407227, + "step": 1120 + }, + { + "epoch": 0.31, + "logps_train/chosen": -65.16990661621094, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -147.11585998535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08300933986902237, + "rewards_train/margins": 5.144595243036747, + "rewards_train/rejected": -5.061585903167725, + "step": 1120 + }, + { + "epoch": 0.31, + "logps_train/chosen": -150.93467712402344, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -188.826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.393467664718628, + "rewards_train/margins": 5.089149713516235, + "rewards_train/rejected": -8.482617378234863, + "step": 1120 + }, + { + "epoch": 0.31, + "logps_train/chosen": -187.01303100585938, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -197.3666229248047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.501303195953369, + "rewards_train/margins": -1.6646409034729004, + "rewards_train/rejected": -1.8366622924804688, + "step": 1120 + }, + { + "epoch": 0.31, + "logps_train/chosen": -160.54888916015625, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -183.34815979003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6548889875411987, + "rewards_train/margins": 5.779926896095276, + "rewards_train/rejected": -7.434815883636475, + "step": 1120 + }, + { + "epoch": 0.31, + "logps_train/chosen": -101.91191101074219, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -100.8980941772461, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1411911249160767, + "rewards_train/margins": -0.10138165950775146, + "rewards_train/rejected": -1.0398094654083252, + "step": 1120 + }, + { + "epoch": 0.31, + "logps_train/chosen": -11.565689086914062, + "logps_train/ref_chosen": -6.625, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -13.284111022949219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4940689206123352, + "rewards_train/margins": 0.02184218168258667, + "rewards_train/rejected": -0.5159111022949219, + "step": 1120 + }, + { + "epoch": 0.31, + "logps_train/chosen": -71.42882537841797, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -220.03880310058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8678825497627258, + "rewards_train/margins": 7.9359981417655945, + "rewards_train/rejected": -8.80388069152832, + "step": 1121 + }, + { + "epoch": 0.31, + "logps_train/chosen": -30.344959259033203, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -25.411724090576172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5032459497451782, + "rewards_train/margins": -0.07457351684570312, + "rewards_train/rejected": -1.428672432899475, + "step": 1121 + }, + { + "epoch": 0.31, + "logps_train/chosen": -8.12287712097168, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -3.609375, + "logps_train/rejected": -15.815022468566895, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46853771805763245, + "rewards_train/margins": 0.7520270049571991, + "rewards_train/rejected": -1.2205647230148315, + "step": 1121 + }, + { + "epoch": 0.31, + "logps_train/chosen": -13.203432083129883, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -21.593997955322266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3140932023525238, + "rewards_train/margins": 0.6578066051006317, + "rewards_train/rejected": -0.9718998074531555, + "step": 1121 + }, + { + "epoch": 0.31, + "logps_train/chosen": -53.96691131591797, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -122.55440521240234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14669112861156464, + "rewards_train/margins": 0.40874941647052765, + "rewards_train/rejected": -0.5554405450820923, + "step": 1121 + }, + { + "epoch": 0.31, + "logps_train/chosen": -132.8123779296875, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -103.54916381835938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.08123779296875, + "rewards_train/margins": -0.12632131576538086, + "rewards_train/rejected": -3.954916477203369, + "step": 1121 + }, + { + "epoch": 0.31, + "logps_train/chosen": -295.80224609375, + "logps_train/ref_chosen": -192.0, + "logps_train/ref_rejected": -221.0, + "logps_train/rejected": -327.2270812988281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.38022518157959, + "rewards_train/margins": 0.24248313903808594, + "rewards_train/rejected": -10.622708320617676, + "step": 1121 + }, + { + "epoch": 0.31, + "logps_train/chosen": -66.54572296142578, + "logps_train/ref_chosen": -34.25, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -88.46826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.229572296142578, + "rewards_train/margins": 0.6422538757324219, + "rewards_train/rejected": -3.871826171875, + "step": 1121 + }, + { + "epoch": 0.31, + "learning_rate": 1.2493304419345832e-06, + "loss": 0.4987, + "step": 1122 + }, + { + "epoch": 0.31, + "logps_train/chosen": -83.1181411743164, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -120.23185729980469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9618141651153564, + "rewards_train/margins": 0.5113716125488281, + "rewards_train/rejected": -2.4731857776641846, + "step": 1122 + }, + { + "epoch": 0.31, + "logps_train/chosen": -17.943593978881836, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -27.924842834472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.031859397888183594, + "rewards_train/margins": 1.573124885559082, + "rewards_train/rejected": -1.6049842834472656, + "step": 1122 + }, + { + "epoch": 0.31, + "logps_train/chosen": -13.784954071044922, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -14.878434181213379, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9097453951835632, + "rewards_train/margins": -0.4031519889831543, + "rewards_train/rejected": -0.5065934062004089, + "step": 1122 + }, + { + "epoch": 0.31, + "logps_train/chosen": -20.372756958007812, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -2.78125, + "logps_train/rejected": -19.834312438964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1685256958007812, + "rewards_train/margins": 0.536780595779419, + "rewards_train/rejected": -1.7053062915802002, + "step": 1122 + }, + { + "epoch": 0.31, + "logps_train/chosen": -17.21533966064453, + "logps_train/ref_chosen": -7.90625, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -11.951576232910156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9309089779853821, + "rewards_train/margins": -0.2951263189315796, + "rewards_train/rejected": -0.6357826590538025, + "step": 1122 + }, + { + "epoch": 0.31, + "logps_train/chosen": -79.34663391113281, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -31.75, + "logps_train/rejected": -45.191200256347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.034663438796997, + "rewards_train/margins": 0.30945658683776855, + "rewards_train/rejected": -1.3441200256347656, + "step": 1122 + }, + { + "epoch": 0.31, + "logps_train/chosen": -22.55600357055664, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -23.66223907470703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.830600380897522, + "rewards_train/margins": -0.020626425743103027, + "rewards_train/rejected": -1.809973955154419, + "step": 1122 + }, + { + "epoch": 0.31, + "logps_train/chosen": -263.13629150390625, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -290.3210144042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.913629531860352, + "rewards_train/margins": 5.118473052978516, + "rewards_train/rejected": -16.032102584838867, + "step": 1122 + }, + { + "epoch": 0.31, + "logps_train/chosen": -11.763666152954102, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -14.241573333740234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5232416391372681, + "rewards_train/margins": -0.5053343046456575, + "rewards_train/rejected": -0.017907334491610527, + "step": 1123 + }, + { + "epoch": 0.31, + "logps_train/chosen": -156.1487274169922, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -156.6206817626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.21487283706665, + "rewards_train/margins": 0.0471954345703125, + "rewards_train/rejected": -4.262068271636963, + "step": 1123 + }, + { + "epoch": 0.31, + "logps_train/chosen": -88.33595275878906, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -59.5, + "logps_train/rejected": -89.73624420166016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.38359534740448, + "rewards_train/margins": 1.6400290727615356, + "rewards_train/rejected": -3.0236244201660156, + "step": 1123 + }, + { + "epoch": 0.31, + "logps_train/chosen": -29.727758407592773, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -36.49205780029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4790258407592773, + "rewards_train/margins": 0.19517993927001953, + "rewards_train/rejected": -1.6742057800292969, + "step": 1123 + }, + { + "epoch": 0.31, + "logps_train/chosen": -19.11648941040039, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -33.999385833740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9866489768028259, + "rewards_train/margins": 1.1632896065711975, + "rewards_train/rejected": -2.1499385833740234, + "step": 1123 + }, + { + "epoch": 0.31, + "logps_train/chosen": -107.21273803710938, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -65.83434295654297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2712738513946533, + "rewards_train/margins": 0.4371604919433594, + "rewards_train/rejected": -2.7084343433380127, + "step": 1123 + }, + { + "epoch": 0.31, + "logps_train/chosen": -291.47967529296875, + "logps_train/ref_chosen": -213.0, + "logps_train/ref_rejected": -214.0, + "logps_train/rejected": -311.6648864746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.847967624664307, + "rewards_train/margins": 1.9185214042663574, + "rewards_train/rejected": -9.766489028930664, + "step": 1123 + }, + { + "epoch": 0.31, + "logps_train/chosen": -28.56723403930664, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -38.11473846435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3942234516143799, + "rewards_train/margins": 1.1610004901885986, + "rewards_train/rejected": -2.5552239418029785, + "step": 1123 + }, + { + "epoch": 0.31, + "learning_rate": 1.2467675712642752e-06, + "loss": 0.4843, + "step": 1124 + }, + { + "epoch": 0.31, + "logps_train/chosen": -12.174541473388672, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -3.46875, + "logps_train/rejected": -22.553447723388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5362041592597961, + "rewards_train/margins": 1.372265636920929, + "rewards_train/rejected": -1.908469796180725, + "step": 1124 + }, + { + "epoch": 0.31, + "logps_train/chosen": -163.40342712402344, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -156.4420166015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.440342903137207, + "rewards_train/margins": -0.19614124298095703, + "rewards_train/rejected": -4.24420166015625, + "step": 1124 + }, + { + "epoch": 0.31, + "logps_train/chosen": -29.970561981201172, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -0.228515625, + "logps_train/rejected": -0.19394050538539886, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.465806245803833, + "rewards_train/margins": -1.4692637578118593, + "rewards_train/rejected": 0.0034575120080262423, + "step": 1124 + }, + { + "epoch": 0.31, + "logps_train/chosen": -32.54823303222656, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -28.25, + "logps_train/rejected": -52.66638946533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9298233389854431, + "rewards_train/margins": 1.51181560754776, + "rewards_train/rejected": -2.441638946533203, + "step": 1124 + }, + { + "epoch": 0.31, + "logps_train/chosen": -20.900390625, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -24.219696044921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49003908038139343, + "rewards_train/margins": 1.513180524110794, + "rewards_train/rejected": -2.0032196044921875, + "step": 1124 + }, + { + "epoch": 0.31, + "logps_train/chosen": -34.96379852294922, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -28.25, + "logps_train/rejected": -53.68383026123047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.965129852294922, + "rewards_train/margins": -0.42174673080444336, + "rewards_train/rejected": -2.5433831214904785, + "step": 1124 + }, + { + "epoch": 0.31, + "logps_train/chosen": -27.163667678833008, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -5.25, + "logps_train/rejected": -20.734811782836914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9788667559623718, + "rewards_train/margins": 0.5696144700050354, + "rewards_train/rejected": -1.5484812259674072, + "step": 1124 + }, + { + "epoch": 0.31, + "logps_train/chosen": -15.09017562866211, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -23.040008544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41526755690574646, + "rewards_train/margins": 0.11373332142829895, + "rewards_train/rejected": -0.5290008783340454, + "step": 1124 + }, + { + "epoch": 0.31, + "logps_train/chosen": -256.2408752441406, + "logps_train/ref_chosen": -193.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -172.71762084960938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.324087619781494, + "rewards_train/margins": -0.102325439453125, + "rewards_train/rejected": -6.221762180328369, + "step": 1125 + }, + { + "epoch": 0.31, + "logps_train/chosen": -29.378406524658203, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -45.69241714477539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1378406286239624, + "rewards_train/margins": 1.1314011812210083, + "rewards_train/rejected": -2.2692418098449707, + "step": 1125 + }, + { + "epoch": 0.31, + "logps_train/chosen": -205.36105346679688, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -186.99327087402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.486105918884277, + "rewards_train/margins": 0.01322174072265625, + "rewards_train/rejected": -8.499327659606934, + "step": 1125 + }, + { + "epoch": 0.31, + "logps_train/chosen": -37.413673400878906, + "logps_train/ref_chosen": -28.5, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -81.26887512207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8913673758506775, + "rewards_train/margins": 5.085520327091217, + "rewards_train/rejected": -5.9768877029418945, + "step": 1125 + }, + { + "epoch": 0.31, + "logps_train/chosen": -183.74948120117188, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -220.43978881835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.374948263168335, + "rewards_train/margins": 1.3690307140350342, + "rewards_train/rejected": -4.743978977203369, + "step": 1125 + }, + { + "epoch": 0.31, + "logps_train/chosen": -135.53695678710938, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -128.47962951660156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.5036957263946533, + "rewards_train/margins": -0.10573267936706543, + "rewards_train/rejected": -3.397963047027588, + "step": 1125 + }, + { + "epoch": 0.31, + "logps_train/chosen": -22.755916595458984, + "logps_train/ref_chosen": -5.75, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -27.203441619873047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7005916833877563, + "rewards_train/margins": -0.5177475214004517, + "rewards_train/rejected": -1.1828441619873047, + "step": 1125 + }, + { + "epoch": 0.31, + "logps_train/chosen": -162.28228759765625, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -139.11029052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.528228759765625, + "rewards_train/margins": 3.0828003883361816, + "rewards_train/rejected": -4.611029148101807, + "step": 1125 + }, + { + "epoch": 0.31, + "learning_rate": 1.2442029734826012e-06, + "loss": 0.5523, + "step": 1126 + }, + { + "epoch": 0.31, + "logps_train/chosen": -3.9012949466705322, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -7.4375, + "logps_train/rejected": -9.417364120483398, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.019245505332946777, + "rewards_train/margins": 0.21723191440105438, + "rewards_train/rejected": -0.1979864090681076, + "step": 1126 + }, + { + "epoch": 0.31, + "logps_train/chosen": -8.823629379272461, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -20.738582611083984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.019862938672304153, + "rewards_train/margins": 0.4289953224360943, + "rewards_train/rejected": -0.44885826110839844, + "step": 1126 + }, + { + "epoch": 0.31, + "logps_train/chosen": -25.34858512878418, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -48.94736099243164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5848585367202759, + "rewards_train/margins": -0.5651223659515381, + "rewards_train/rejected": -1.0197361707687378, + "step": 1126 + }, + { + "epoch": 0.31, + "logps_train/chosen": -117.48152160644531, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -152.99444580078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6518478393554688, + "rewards_train/margins": 0.15129238367080688, + "rewards_train/rejected": 0.5005554556846619, + "step": 1126 + }, + { + "epoch": 0.31, + "logps_train/chosen": -171.85247802734375, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -187.0, + "logps_train/rejected": -259.76690673828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8852479457855225, + "rewards_train/margins": 4.3914430141448975, + "rewards_train/rejected": -7.27669095993042, + "step": 1126 + }, + { + "epoch": 0.31, + "logps_train/chosen": -248.853759765625, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -218.0775146484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.8853759765625, + "rewards_train/margins": -1.6776247024536133, + "rewards_train/rejected": -8.207751274108887, + "step": 1126 + }, + { + "epoch": 0.31, + "logps_train/chosen": -31.389684677124023, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -33.678104400634766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.657718539237976, + "rewards_train/margins": 0.7288419008255005, + "rewards_train/rejected": -2.3865604400634766, + "step": 1126 + }, + { + "epoch": 0.31, + "logps_train/chosen": -78.73397827148438, + "logps_train/ref_chosen": -44.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -176.631591796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.473397970199585, + "rewards_train/margins": 3.0397613048553467, + "rewards_train/rejected": -6.513159275054932, + "step": 1126 + }, + { + "epoch": 0.32, + "logps_train/chosen": -31.747966766357422, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -26.265239715576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.212296724319458, + "rewards_train/margins": 0.5392272472381592, + "rewards_train/rejected": -1.7515239715576172, + "step": 1127 + }, + { + "epoch": 0.32, + "logps_train/chosen": -33.880775451660156, + "logps_train/ref_chosen": -24.375, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -55.537261962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9505775570869446, + "rewards_train/margins": 1.0281487107276917, + "rewards_train/rejected": -1.9787262678146362, + "step": 1127 + }, + { + "epoch": 0.32, + "logps_train/chosen": -47.68952178955078, + "logps_train/ref_chosen": -27.875, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -32.20637512207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.981452226638794, + "rewards_train/margins": 0.3516852855682373, + "rewards_train/rejected": -2.3331375122070312, + "step": 1127 + }, + { + "epoch": 0.32, + "logps_train/chosen": -10.328243255615234, + "logps_train/ref_chosen": -0.31640625, + "logps_train/ref_rejected": -0.31640625, + "logps_train/rejected": -9.66953182220459, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0011837482452393, + "rewards_train/margins": -0.06587117910385132, + "rewards_train/rejected": -0.9353125691413879, + "step": 1127 + }, + { + "epoch": 0.32, + "logps_train/chosen": -8.997954368591309, + "logps_train/ref_chosen": -3.28125, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -32.044124603271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5716704726219177, + "rewards_train/margins": 0.8077420592308044, + "rewards_train/rejected": -1.3794125318527222, + "step": 1127 + }, + { + "epoch": 0.32, + "logps_train/chosen": -154.99514770507812, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -89.16748046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6995147466659546, + "rewards_train/margins": -0.03276669979095459, + "rewards_train/rejected": -1.666748046875, + "step": 1127 + }, + { + "epoch": 0.32, + "logps_train/chosen": -141.2907257080078, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -191.97491455078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2290725708007812, + "rewards_train/margins": 4.368419170379639, + "rewards_train/rejected": -5.59749174118042, + "step": 1127 + }, + { + "epoch": 0.32, + "logps_train/chosen": -56.430442810058594, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -62.75, + "logps_train/rejected": -70.2984619140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1930442899465561, + "rewards_train/margins": 0.5618019253015518, + "rewards_train/rejected": -0.7548462152481079, + "step": 1127 + }, + { + "epoch": 0.32, + "learning_rate": 1.241636666539027e-06, + "loss": 0.5373, + "step": 1128 + }, + { + "epoch": 0.32, + "logps_train/chosen": -17.282407760620117, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -76.36408996582031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2032407522201538, + "rewards_train/margins": -0.7668317556381226, + "rewards_train/rejected": -0.43640899658203125, + "step": 1128 + }, + { + "epoch": 0.32, + "logps_train/chosen": -5.34185791015625, + "logps_train/ref_chosen": -1.828125, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -30.434452056884766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3513732850551605, + "rewards_train/margins": 1.8358220160007477, + "rewards_train/rejected": -2.187195301055908, + "step": 1128 + }, + { + "epoch": 0.32, + "logps_train/chosen": -5.450482368469238, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -10.565733909606934, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11692323535680771, + "rewards_train/margins": 0.17090017348527908, + "rewards_train/rejected": -0.2878234088420868, + "step": 1128 + }, + { + "epoch": 0.32, + "logps_train/chosen": -4.49418830871582, + "logps_train/ref_chosen": -3.34375, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -23.112545013427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11504383385181427, + "rewards_train/margins": 1.546210691332817, + "rewards_train/rejected": -1.6612545251846313, + "step": 1128 + }, + { + "epoch": 0.32, + "logps_train/chosen": -25.25377082824707, + "logps_train/ref_chosen": -3.71875, + "logps_train/ref_rejected": -46.75, + "logps_train/rejected": -89.8581771850586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1535022258758545, + "rewards_train/margins": 2.157315492630005, + "rewards_train/rejected": -4.310817718505859, + "step": 1128 + }, + { + "epoch": 0.32, + "logps_train/chosen": -104.0087890625, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -140.0211181640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.850878953933716, + "rewards_train/margins": 2.5512330532073975, + "rewards_train/rejected": -5.402112007141113, + "step": 1128 + }, + { + "epoch": 0.32, + "logps_train/chosen": -74.19976806640625, + "logps_train/ref_chosen": -53.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -134.96371459960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.069976806640625, + "rewards_train/margins": 1.2763946056365967, + "rewards_train/rejected": -3.3463714122772217, + "step": 1128 + }, + { + "epoch": 0.32, + "logps_train/chosen": -89.62850952148438, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -188.44146728515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1128509044647217, + "rewards_train/margins": 2.4312961101531982, + "rewards_train/rejected": -4.54414701461792, + "step": 1128 + }, + { + "epoch": 0.32, + "logps_train/chosen": -18.6307430267334, + "logps_train/ref_chosen": -2.96875, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -14.820714950561523, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5661993026733398, + "rewards_train/margins": -1.177877813577652, + "rewards_train/rejected": -0.38832148909568787, + "step": 1129 + }, + { + "epoch": 0.32, + "logps_train/chosen": -157.41940307617188, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -201.20062255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7419402599334717, + "rewards_train/margins": 1.9781219959259033, + "rewards_train/rejected": -5.720062255859375, + "step": 1129 + }, + { + "epoch": 0.32, + "logps_train/chosen": -27.55126953125, + "logps_train/ref_chosen": -14.75, + "logps_train/ref_rejected": -2.203125, + "logps_train/rejected": -20.942363739013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.280126929283142, + "rewards_train/margins": 0.593796968460083, + "rewards_train/rejected": -1.873923897743225, + "step": 1129 + }, + { + "epoch": 0.32, + "logps_train/chosen": -81.39260864257812, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -77.31939697265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.06073913723230362, + "rewards_train/margins": -0.10732116550207138, + "rewards_train/rejected": 0.168060302734375, + "step": 1129 + }, + { + "epoch": 0.32, + "logps_train/chosen": -215.73892211914062, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -246.310791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.273892402648926, + "rewards_train/margins": 1.9571866989135742, + "rewards_train/rejected": -9.2310791015625, + "step": 1129 + }, + { + "epoch": 0.32, + "logps_train/chosen": -42.05050277709961, + "logps_train/ref_chosen": -31.375, + "logps_train/ref_rejected": -34.25, + "logps_train/rejected": -53.25661087036133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0675503015518188, + "rewards_train/margins": 0.8331108093261719, + "rewards_train/rejected": -1.9006611108779907, + "step": 1129 + }, + { + "epoch": 0.32, + "logps_train/chosen": -115.57552337646484, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -129.66510009765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.657552480697632, + "rewards_train/margins": 1.058957815170288, + "rewards_train/rejected": -4.71651029586792, + "step": 1129 + }, + { + "epoch": 0.32, + "logps_train/chosen": -153.346923828125, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -213.02560424804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.534692406654358, + "rewards_train/margins": 2.967868208885193, + "rewards_train/rejected": -4.502560615539551, + "step": 1129 + }, + { + "epoch": 0.32, + "learning_rate": 1.23906866839498e-06, + "loss": 0.3898, + "step": 1130 + }, + { + "epoch": 0.32, + "logps_train/chosen": -26.702960968017578, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -38.34599685668945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3702961206436157, + "rewards_train/margins": 0.6018035411834717, + "rewards_train/rejected": -1.9720996618270874, + "step": 1130 + }, + { + "epoch": 0.32, + "logps_train/chosen": -8.477222442626953, + "logps_train/ref_chosen": -5.21875, + "logps_train/ref_rejected": -1.890625, + "logps_train/rejected": -5.089971542358398, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.32584723830223083, + "rewards_train/margins": -0.005912572145462036, + "rewards_train/rejected": -0.3199346661567688, + "step": 1130 + }, + { + "epoch": 0.32, + "logps_train/chosen": -132.7491455078125, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -191.8387908935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.024914741516113, + "rewards_train/margins": 3.6089649200439453, + "rewards_train/rejected": -9.633879661560059, + "step": 1130 + }, + { + "epoch": 0.32, + "logps_train/chosen": -2.7561280727386475, + "logps_train/ref_chosen": -0.71875, + "logps_train/ref_rejected": -1.6953125, + "logps_train/rejected": -7.645590305328369, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20373781025409698, + "rewards_train/margins": 0.39128999412059784, + "rewards_train/rejected": -0.5950278043746948, + "step": 1130 + }, + { + "epoch": 0.32, + "logps_train/chosen": -123.614013671875, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -219.14376831054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7614014148712158, + "rewards_train/margins": 7.552975416183472, + "rewards_train/rejected": -9.314376831054688, + "step": 1130 + }, + { + "epoch": 0.32, + "logps_train/chosen": -16.03500747680664, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -26.374908447265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.147250771522522, + "rewards_train/margins": 0.3839900493621826, + "rewards_train/rejected": -1.5312408208847046, + "step": 1130 + }, + { + "epoch": 0.32, + "logps_train/chosen": -21.37091064453125, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -30.509212493896484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.718341052532196, + "rewards_train/margins": 0.9700801968574524, + "rewards_train/rejected": -1.6884212493896484, + "step": 1130 + }, + { + "epoch": 0.32, + "logps_train/chosen": -164.57887268066406, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -183.1681671142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.757887363433838, + "rewards_train/margins": 2.058929443359375, + "rewards_train/rejected": -4.816816806793213, + "step": 1130 + }, + { + "epoch": 0.32, + "logps_train/chosen": -165.83558654785156, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -216.2371368408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2835586071014404, + "rewards_train/margins": 5.240155458450317, + "rewards_train/rejected": -8.523714065551758, + "step": 1131 + }, + { + "epoch": 0.32, + "logps_train/chosen": -14.295048713684082, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -26.579429626464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3732548654079437, + "rewards_train/margins": 0.6909381449222565, + "rewards_train/rejected": -1.0641930103302002, + "step": 1131 + }, + { + "epoch": 0.32, + "logps_train/chosen": -94.10011291503906, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -52.25, + "logps_train/rejected": -94.9688720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6600112915039062, + "rewards_train/margins": 1.6118760108947754, + "rewards_train/rejected": -4.271887302398682, + "step": 1131 + }, + { + "epoch": 0.32, + "logps_train/chosen": -190.3727264404297, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -201.4622344970703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.637272834777832, + "rewards_train/margins": 0.7089505195617676, + "rewards_train/rejected": -5.3462233543396, + "step": 1131 + }, + { + "epoch": 0.32, + "logps_train/chosen": -86.05865478515625, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -106.97592163085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.105865478515625, + "rewards_train/margins": 0.54172682762146, + "rewards_train/rejected": -2.647592306137085, + "step": 1131 + }, + { + "epoch": 0.32, + "logps_train/chosen": -30.840789794921875, + "logps_train/ref_chosen": -3.78125, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -30.631284713745117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.705954074859619, + "rewards_train/margins": 0.09311199188232422, + "rewards_train/rejected": -2.7990660667419434, + "step": 1131 + }, + { + "epoch": 0.32, + "logps_train/chosen": -158.39645385742188, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -185.4469451904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.239645957946777, + "rewards_train/margins": 0.8550491333007812, + "rewards_train/rejected": -9.094695091247559, + "step": 1131 + }, + { + "epoch": 0.32, + "logps_train/chosen": -118.09566497802734, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -180.80186462402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.50956654548645, + "rewards_train/margins": 4.370620012283325, + "rewards_train/rejected": -7.880186557769775, + "step": 1131 + }, + { + "epoch": 0.32, + "learning_rate": 1.2364989970237248e-06, + "loss": 0.319, + "step": 1132 + }, + { + "epoch": 0.32, + "logps_train/chosen": -18.929115295410156, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -24.875, + "logps_train/rejected": -38.50510025024414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7929115295410156, + "rewards_train/margins": 0.5700985193252563, + "rewards_train/rejected": -1.363010048866272, + "step": 1132 + }, + { + "epoch": 0.32, + "logps_train/chosen": -53.89195251464844, + "logps_train/ref_chosen": -34.75, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -31.724483489990234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9141952991485596, + "rewards_train/margins": -0.3292468786239624, + "rewards_train/rejected": -1.5849484205245972, + "step": 1132 + }, + { + "epoch": 0.32, + "logps_train/chosen": -20.70418357849121, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -6.46875, + "logps_train/rejected": -23.627391815185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7579183578491211, + "rewards_train/margins": 0.9579458236694336, + "rewards_train/rejected": -1.7158641815185547, + "step": 1132 + }, + { + "epoch": 0.32, + "logps_train/chosen": -8.096624374389648, + "logps_train/ref_chosen": -1.9375, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -35.8649787902832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6159124374389648, + "rewards_train/margins": 2.051835536956787, + "rewards_train/rejected": -2.667747974395752, + "step": 1132 + }, + { + "epoch": 0.32, + "logps_train/chosen": -101.54712677001953, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -34.389984130859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5047128200531006, + "rewards_train/margins": -0.47196435928344727, + "rewards_train/rejected": -2.0327484607696533, + "step": 1132 + }, + { + "epoch": 0.32, + "logps_train/chosen": -35.51069641113281, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -29.54285430908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1698195934295654, + "rewards_train/margins": 0.09071588516235352, + "rewards_train/rejected": -2.260535478591919, + "step": 1132 + }, + { + "epoch": 0.32, + "logps_train/chosen": -108.56673431396484, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -132.1204833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5066734552383423, + "rewards_train/margins": 2.455374836921692, + "rewards_train/rejected": -2.962048292160034, + "step": 1132 + }, + { + "epoch": 0.32, + "logps_train/chosen": -64.72927856445312, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -84.72222900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12292785942554474, + "rewards_train/margins": 1.0992950648069382, + "rewards_train/rejected": -1.222222924232483, + "step": 1132 + }, + { + "epoch": 0.32, + "logps_train/chosen": -4.705487251281738, + "logps_train/ref_chosen": -1.8359375, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -17.117280960083008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28695496916770935, + "rewards_train/margins": 0.5497731268405914, + "rewards_train/rejected": -0.8367280960083008, + "step": 1133 + }, + { + "epoch": 0.32, + "logps_train/chosen": -28.48764991760254, + "logps_train/ref_chosen": -28.375, + "logps_train/ref_rejected": -26.25, + "logps_train/rejected": -36.19692611694336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.011264991946518421, + "rewards_train/margins": 0.9834276316687465, + "rewards_train/rejected": -0.9946926236152649, + "step": 1133 + }, + { + "epoch": 0.32, + "logps_train/chosen": -119.46772766113281, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -116.75242614746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3467729091644287, + "rewards_train/margins": 0.2784698009490967, + "rewards_train/rejected": -2.6252427101135254, + "step": 1133 + }, + { + "epoch": 0.32, + "logps_train/chosen": -18.039474487304688, + "logps_train/ref_chosen": -6.0, + "logps_train/ref_rejected": -44.5, + "logps_train/rejected": -54.335289001464844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2039474248886108, + "rewards_train/margins": -0.2204185128211975, + "rewards_train/rejected": -0.9835289120674133, + "step": 1133 + }, + { + "epoch": 0.32, + "logps_train/chosen": -33.7970085144043, + "logps_train/ref_chosen": -14.25, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -43.112892150878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9547008275985718, + "rewards_train/margins": 1.7003384828567505, + "rewards_train/rejected": -3.6550393104553223, + "step": 1133 + }, + { + "epoch": 0.32, + "logps_train/chosen": -111.99945068359375, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -102.9000244140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4499452114105225, + "rewards_train/margins": -0.25994277000427246, + "rewards_train/rejected": -3.19000244140625, + "step": 1133 + }, + { + "epoch": 0.32, + "logps_train/chosen": -106.66582489013672, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -183.1905517578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.916582465171814, + "rewards_train/margins": 1.4024728536605835, + "rewards_train/rejected": -3.3190553188323975, + "step": 1133 + }, + { + "epoch": 0.32, + "logps_train/chosen": -13.132835388183594, + "logps_train/ref_chosen": -2.609375, + "logps_train/ref_rejected": -0.11279296875, + "logps_train/rejected": -13.136911392211914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.052346110343933, + "rewards_train/margins": 0.25006580352783203, + "rewards_train/rejected": -1.3024119138717651, + "step": 1133 + }, + { + "epoch": 0.32, + "learning_rate": 1.2339276704102362e-06, + "loss": 0.4799, + "step": 1134 + }, + { + "epoch": 0.32, + "logps_train/chosen": -273.4810485839844, + "logps_train/ref_chosen": -207.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -234.85269165039062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.648105144500732, + "rewards_train/margins": -0.9628357887268066, + "rewards_train/rejected": -5.685269355773926, + "step": 1134 + }, + { + "epoch": 0.32, + "logps_train/chosen": -178.28271484375, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -220.2104949951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.428271532058716, + "rewards_train/margins": 4.992778539657593, + "rewards_train/rejected": -8.421050071716309, + "step": 1134 + }, + { + "epoch": 0.32, + "logps_train/chosen": -88.95732116699219, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -153.77206420898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2957321405410767, + "rewards_train/margins": 1.8314744234085083, + "rewards_train/rejected": -3.127206563949585, + "step": 1134 + }, + { + "epoch": 0.32, + "logps_train/chosen": -207.23109436035156, + "logps_train/ref_chosen": -183.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -128.9170684814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.423109531402588, + "rewards_train/margins": 0.5685973167419434, + "rewards_train/rejected": -2.9917068481445312, + "step": 1134 + }, + { + "epoch": 0.32, + "logps_train/chosen": -89.26581573486328, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -33.25, + "logps_train/rejected": -57.709712982177734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.426581621170044, + "rewards_train/margins": 1.0193896293640137, + "rewards_train/rejected": -2.4459712505340576, + "step": 1134 + }, + { + "epoch": 0.32, + "logps_train/chosen": -75.2639389038086, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -74.20779418945312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8263939023017883, + "rewards_train/margins": -0.10561448335647583, + "rewards_train/rejected": -0.7207794189453125, + "step": 1134 + }, + { + "epoch": 0.32, + "logps_train/chosen": -10.010605812072754, + "logps_train/ref_chosen": -7.84375, + "logps_train/ref_rejected": -2.5, + "logps_train/rejected": -5.318347454071045, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21668557822704315, + "rewards_train/margins": 0.06514917314052582, + "rewards_train/rejected": -0.28183475136756897, + "step": 1134 + }, + { + "epoch": 0.32, + "logps_train/chosen": -29.2572021484375, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -26.759807586669922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.813220202922821, + "rewards_train/margins": -0.3497394323348999, + "rewards_train/rejected": -0.46348077058792114, + "step": 1134 + }, + { + "epoch": 0.32, + "logps_train/chosen": -130.56448364257812, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -254.53579711914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7564483880996704, + "rewards_train/margins": 8.597131133079529, + "rewards_train/rejected": -10.3535795211792, + "step": 1135 + }, + { + "epoch": 0.32, + "logps_train/chosen": -51.00520324707031, + "logps_train/ref_chosen": -43.75, + "logps_train/ref_rejected": -15.75, + "logps_train/rejected": -32.62531280517578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7255203127861023, + "rewards_train/margins": 0.9620110392570496, + "rewards_train/rejected": -1.6875313520431519, + "step": 1135 + }, + { + "epoch": 0.32, + "logps_train/chosen": -229.70436096191406, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -168.21331787109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.2704362869262695, + "rewards_train/margins": -1.099104404449463, + "rewards_train/rejected": -5.171331882476807, + "step": 1135 + }, + { + "epoch": 0.32, + "logps_train/chosen": -7.257147789001465, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -18.73680305480957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1944647878408432, + "rewards_train/margins": 1.1260905414819717, + "rewards_train/rejected": -1.320555329322815, + "step": 1135 + }, + { + "epoch": 0.32, + "logps_train/chosen": -15.076637268066406, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -29.56524658203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2764137387275696, + "rewards_train/margins": 1.4426109194755554, + "rewards_train/rejected": -1.719024658203125, + "step": 1135 + }, + { + "epoch": 0.32, + "logps_train/chosen": -200.2861328125, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -147.1733856201172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.528613090515137, + "rewards_train/margins": -3.6612744331359863, + "rewards_train/rejected": -4.86733865737915, + "step": 1135 + }, + { + "epoch": 0.32, + "logps_train/chosen": -72.14369201660156, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -177.4593505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8393692374229431, + "rewards_train/margins": 7.456566393375397, + "rewards_train/rejected": -8.29593563079834, + "step": 1135 + }, + { + "epoch": 0.32, + "logps_train/chosen": -149.98426818847656, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -216.14398193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.798427104949951, + "rewards_train/margins": 4.5659708976745605, + "rewards_train/rejected": -9.364398002624512, + "step": 1135 + }, + { + "epoch": 0.32, + "learning_rate": 1.2313547065510743e-06, + "loss": 0.6494, + "step": 1136 + }, + { + "epoch": 0.32, + "logps_train/chosen": -254.64181518554688, + "logps_train/ref_chosen": -190.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -205.4012451171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.464181423187256, + "rewards_train/margins": -0.6240568161010742, + "rewards_train/rejected": -5.840124607086182, + "step": 1136 + }, + { + "epoch": 0.32, + "logps_train/chosen": -111.29986572265625, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -58.25, + "logps_train/rejected": -62.71278762817383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4799865782260895, + "rewards_train/margins": -0.03370779752731323, + "rewards_train/rejected": -0.44627878069877625, + "step": 1136 + }, + { + "epoch": 0.32, + "logps_train/chosen": -146.4644317626953, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -218.7729949951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.246443271636963, + "rewards_train/margins": 2.130856513977051, + "rewards_train/rejected": -6.377299785614014, + "step": 1136 + }, + { + "epoch": 0.32, + "logps_train/chosen": -40.9173469543457, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -55.75, + "logps_train/rejected": -68.24437713623047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1354846954345703, + "rewards_train/margins": -1.8860470056533813, + "rewards_train/rejected": -1.249437689781189, + "step": 1136 + }, + { + "epoch": 0.32, + "logps_train/chosen": -14.605447769165039, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -3.609375, + "logps_train/rejected": -22.410104751586914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6230447888374329, + "rewards_train/margins": 1.2570281624794006, + "rewards_train/rejected": -1.8800729513168335, + "step": 1136 + }, + { + "epoch": 0.32, + "logps_train/chosen": -27.01660919189453, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -37.25, + "logps_train/rejected": -91.98812866210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.55166095495224, + "rewards_train/margins": 4.922152101993561, + "rewards_train/rejected": -5.473813056945801, + "step": 1136 + }, + { + "epoch": 0.32, + "logps_train/chosen": -33.3370361328125, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -6.65625, + "logps_train/rejected": -21.12416648864746, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1587036848068237, + "rewards_train/margins": 0.28808796405792236, + "rewards_train/rejected": -1.446791648864746, + "step": 1136 + }, + { + "epoch": 0.32, + "logps_train/chosen": -24.79981231689453, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -16.319063186645508, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3549813032150269, + "rewards_train/margins": -0.2730749845504761, + "rewards_train/rejected": -1.0819063186645508, + "step": 1136 + }, + { + "epoch": 0.32, + "logps_train/chosen": -108.10797119140625, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -202.95166015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5607971549034119, + "rewards_train/margins": 7.984368860721588, + "rewards_train/rejected": -8.545166015625, + "step": 1137 + }, + { + "epoch": 0.32, + "logps_train/chosen": -110.24192810058594, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -135.6178436279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.124192953109741, + "rewards_train/margins": 4.7875916957855225, + "rewards_train/rejected": -6.911784648895264, + "step": 1137 + }, + { + "epoch": 0.32, + "logps_train/chosen": -130.89907836914062, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -261.28173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3899078369140625, + "rewards_train/margins": 8.738265991210938, + "rewards_train/rejected": -9.128173828125, + "step": 1137 + }, + { + "epoch": 0.32, + "logps_train/chosen": -12.640583992004395, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -26.969863891601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7515584230422974, + "rewards_train/margins": 1.4798029661178589, + "rewards_train/rejected": -2.2313613891601562, + "step": 1137 + }, + { + "epoch": 0.32, + "logps_train/chosen": -89.16889953613281, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -160.1704864501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1168899536132812, + "rewards_train/margins": 5.000158786773682, + "rewards_train/rejected": -7.117048740386963, + "step": 1137 + }, + { + "epoch": 0.32, + "logps_train/chosen": -139.36769104003906, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -178.63937377929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9367691278457642, + "rewards_train/margins": 2.6271685361862183, + "rewards_train/rejected": -4.563937664031982, + "step": 1137 + }, + { + "epoch": 0.32, + "logps_train/chosen": -31.66567039489746, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -32.59674835205078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3290669918060303, + "rewards_train/margins": -0.11314201354980469, + "rewards_train/rejected": -2.2159249782562256, + "step": 1137 + }, + { + "epoch": 0.32, + "logps_train/chosen": -7.34119987487793, + "logps_train/ref_chosen": -3.359375, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -12.514739990234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3981824815273285, + "rewards_train/margins": -0.040458470582962036, + "rewards_train/rejected": -0.35772401094436646, + "step": 1137 + }, + { + "epoch": 0.32, + "learning_rate": 1.2287801234542583e-06, + "loss": 0.4574, + "step": 1138 + }, + { + "epoch": 0.32, + "logps_train/chosen": -139.11972045898438, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -126.73265075683594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.611972332000732, + "rewards_train/margins": -1.1887073516845703, + "rewards_train/rejected": -4.423264980316162, + "step": 1138 + }, + { + "epoch": 0.32, + "logps_train/chosen": -71.60067749023438, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -42.22132110595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9600677490234375, + "rewards_train/margins": 1.3495643138885498, + "rewards_train/rejected": -2.3096320629119873, + "step": 1138 + }, + { + "epoch": 0.32, + "logps_train/chosen": -143.341064453125, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -134.417236328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.884106397628784, + "rewards_train/margins": 2.607617139816284, + "rewards_train/rejected": -5.491723537445068, + "step": 1138 + }, + { + "epoch": 0.32, + "logps_train/chosen": -115.62211608886719, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -231.17474365234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9122115969657898, + "rewards_train/margins": 10.405262768268585, + "rewards_train/rejected": -11.317474365234375, + "step": 1138 + }, + { + "epoch": 0.32, + "logps_train/chosen": -43.79942321777344, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -19.382640838623047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.642442464828491, + "rewards_train/margins": -1.2666783332824707, + "rewards_train/rejected": -1.3757641315460205, + "step": 1138 + }, + { + "epoch": 0.32, + "logps_train/chosen": -27.70005226135254, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -30.625, + "logps_train/rejected": -48.306427001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.082505226135254, + "rewards_train/margins": 0.6856374740600586, + "rewards_train/rejected": -1.7681427001953125, + "step": 1138 + }, + { + "epoch": 0.32, + "logps_train/chosen": -7.747430801391602, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -33.24717330932617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3981805741786957, + "rewards_train/margins": 1.6265367567539215, + "rewards_train/rejected": -2.024717330932617, + "step": 1138 + }, + { + "epoch": 0.32, + "logps_train/chosen": -182.546875, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -168.02813720703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.0546875, + "rewards_train/margins": -0.05187368392944336, + "rewards_train/rejected": -5.002813816070557, + "step": 1138 + }, + { + "epoch": 0.32, + "logps_train/chosen": -80.9014663696289, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -86.9189453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0401467084884644, + "rewards_train/margins": 2.501747965812683, + "rewards_train/rejected": -3.5418946743011475, + "step": 1139 + }, + { + "epoch": 0.32, + "logps_train/chosen": -8.029938697814941, + "logps_train/ref_chosen": -1.4296875, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -17.21453857421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6600251197814941, + "rewards_train/margins": -0.14482122659683228, + "rewards_train/rejected": -0.5152038931846619, + "step": 1139 + }, + { + "epoch": 0.32, + "logps_train/chosen": -46.086456298828125, + "logps_train/ref_chosen": -38.5, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -29.07040786743164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7586456537246704, + "rewards_train/margins": 0.7671451568603516, + "rewards_train/rejected": -1.525790810585022, + "step": 1139 + }, + { + "epoch": 0.32, + "logps_train/chosen": -17.23773765563965, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -16.965877532958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5800237655639648, + "rewards_train/margins": 0.5071890354156494, + "rewards_train/rejected": -1.0872128009796143, + "step": 1139 + }, + { + "epoch": 0.32, + "logps_train/chosen": -88.85847473144531, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -109.81228637695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4358474910259247, + "rewards_train/margins": 3.9453814327716827, + "rewards_train/rejected": -4.381228923797607, + "step": 1139 + }, + { + "epoch": 0.32, + "logps_train/chosen": -119.68536376953125, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -196.67630004882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3185365200042725, + "rewards_train/margins": 5.04909348487854, + "rewards_train/rejected": -7.3676300048828125, + "step": 1139 + }, + { + "epoch": 0.32, + "logps_train/chosen": -74.89226531982422, + "logps_train/ref_chosen": -55.5, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -94.41482543945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.939226508140564, + "rewards_train/margins": 0.45225608348846436, + "rewards_train/rejected": -2.3914825916290283, + "step": 1139 + }, + { + "epoch": 0.32, + "logps_train/chosen": -97.36874389648438, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -190.40032958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5368744134902954, + "rewards_train/margins": 8.05315911769867, + "rewards_train/rejected": -8.590033531188965, + "step": 1139 + }, + { + "epoch": 0.32, + "learning_rate": 1.2262039391391404e-06, + "loss": 0.4245, + "step": 1140 + }, + { + "epoch": 0.32, + "logps_train/chosen": -22.05786895751953, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -7.75, + "logps_train/rejected": -25.394742965698242, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7714118957519531, + "rewards_train/margins": -0.006937623023986816, + "rewards_train/rejected": -1.7644742727279663, + "step": 1140 + }, + { + "epoch": 0.32, + "logps_train/chosen": -91.02304077148438, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -204.93992614746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8023041486740112, + "rewards_train/margins": 5.091688752174377, + "rewards_train/rejected": -6.893992900848389, + "step": 1140 + }, + { + "epoch": 0.32, + "logps_train/chosen": -40.08851623535156, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -24.0, + "logps_train/rejected": -46.86437225341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7088516354560852, + "rewards_train/margins": 1.5775856375694275, + "rewards_train/rejected": -2.2864372730255127, + "step": 1140 + }, + { + "epoch": 0.32, + "logps_train/chosen": -2.340336799621582, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -31.842327117919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.23471632599830627, + "rewards_train/margins": 1.9064491093158722, + "rewards_train/rejected": -1.671732783317566, + "step": 1140 + }, + { + "epoch": 0.32, + "logps_train/chosen": -22.293556213378906, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -102.0456314086914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2668555974960327, + "rewards_train/margins": -0.8122924566268921, + "rewards_train/rejected": -0.4545631408691406, + "step": 1140 + }, + { + "epoch": 0.32, + "logps_train/chosen": -33.805015563964844, + "logps_train/ref_chosen": -31.25, + "logps_train/ref_rejected": -3.546875, + "logps_train/rejected": -35.14080047607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25550156831741333, + "rewards_train/margins": 2.9038910269737244, + "rewards_train/rejected": -3.1593925952911377, + "step": 1140 + }, + { + "epoch": 0.32, + "logps_train/chosen": -166.01797485351562, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -209.31077575683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.30179762840271, + "rewards_train/margins": 5.329280138015747, + "rewards_train/rejected": -7.631077766418457, + "step": 1140 + }, + { + "epoch": 0.32, + "logps_train/chosen": -142.68499755859375, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -194.76669311523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.868499755859375, + "rewards_train/margins": 2.408169746398926, + "rewards_train/rejected": -4.276669502258301, + "step": 1140 + }, + { + "epoch": 0.32, + "logps_train/chosen": -79.81610870361328, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -176.81564331054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3816108703613281, + "rewards_train/margins": 4.399953365325928, + "rewards_train/rejected": -5.781564235687256, + "step": 1141 + }, + { + "epoch": 0.32, + "logps_train/chosen": -8.120543479919434, + "logps_train/ref_chosen": -6.71875, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -17.30657386779785, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1401793509721756, + "rewards_train/margins": 0.8217280358076096, + "rewards_train/rejected": -0.9619073867797852, + "step": 1141 + }, + { + "epoch": 0.32, + "logps_train/chosen": -134.83224487304688, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -161.34796142578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0832245349884033, + "rewards_train/margins": 3.40157151222229, + "rewards_train/rejected": -4.484796047210693, + "step": 1141 + }, + { + "epoch": 0.32, + "logps_train/chosen": -25.470909118652344, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -37.63745880126953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2845909595489502, + "rewards_train/margins": 1.366654872894287, + "rewards_train/rejected": -2.6512458324432373, + "step": 1141 + }, + { + "epoch": 0.32, + "logps_train/chosen": -9.33485221862793, + "logps_train/ref_chosen": -2.078125, + "logps_train/ref_rejected": -3.671875, + "logps_train/rejected": -24.138961791992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.725672721862793, + "rewards_train/margins": 1.3210361003875732, + "rewards_train/rejected": -2.046708822250366, + "step": 1141 + }, + { + "epoch": 0.32, + "logps_train/chosen": -133.63046264648438, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -226.1840362548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.413046360015869, + "rewards_train/margins": 8.555357456207275, + "rewards_train/rejected": -10.968403816223145, + "step": 1141 + }, + { + "epoch": 0.32, + "logps_train/chosen": -113.19127655029297, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -114.89130401611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.880872368812561, + "rewards_train/margins": 0.37000274658203125, + "rewards_train/rejected": 0.5108696222305298, + "step": 1141 + }, + { + "epoch": 0.32, + "logps_train/chosen": -32.77909851074219, + "logps_train/ref_chosen": -22.0, + "logps_train/ref_rejected": -0.78125, + "logps_train/rejected": -19.747041702270508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0779098272323608, + "rewards_train/margins": 0.818669319152832, + "rewards_train/rejected": -1.8965791463851929, + "step": 1141 + }, + { + "epoch": 0.32, + "learning_rate": 1.2236261716362789e-06, + "loss": 0.2578, + "step": 1142 + }, + { + "epoch": 0.32, + "logps_train/chosen": -219.841552734375, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -258.04248046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.184155464172363, + "rewards_train/margins": -0.17990684509277344, + "rewards_train/rejected": -9.00424861907959, + "step": 1142 + }, + { + "epoch": 0.32, + "logps_train/chosen": -144.85015869140625, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -127.92005920410156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1350159645080566, + "rewards_train/margins": -0.2430100440979004, + "rewards_train/rejected": -2.8920059204101562, + "step": 1142 + }, + { + "epoch": 0.32, + "logps_train/chosen": -214.40982055664062, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -231.50137329101562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.340982437133789, + "rewards_train/margins": -0.39084482192993164, + "rewards_train/rejected": -7.950137615203857, + "step": 1142 + }, + { + "epoch": 0.32, + "logps_train/chosen": -3.5816612243652344, + "logps_train/ref_chosen": -1.3984375, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -18.84545135498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21832238137722015, + "rewards_train/margins": 0.40372277796268463, + "rewards_train/rejected": -0.6220451593399048, + "step": 1142 + }, + { + "epoch": 0.32, + "logps_train/chosen": -242.0531463623047, + "logps_train/ref_chosen": -189.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -307.1295166015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.305314540863037, + "rewards_train/margins": 7.5076375007629395, + "rewards_train/rejected": -12.812952041625977, + "step": 1142 + }, + { + "epoch": 0.32, + "logps_train/chosen": -8.876705169677734, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -2.875, + "logps_train/rejected": -16.388072967529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5498294830322266, + "rewards_train/margins": 1.9011367559432983, + "rewards_train/rejected": -1.3513072729110718, + "step": 1142 + }, + { + "epoch": 0.32, + "logps_train/chosen": -8.876521110534668, + "logps_train/ref_chosen": -2.96875, + "logps_train/ref_rejected": -3.75, + "logps_train/rejected": -10.637907981872559, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5907770991325378, + "rewards_train/margins": 0.09801369905471802, + "rewards_train/rejected": -0.6887907981872559, + "step": 1142 + }, + { + "epoch": 0.32, + "logps_train/chosen": -26.421905517578125, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -33.476234436035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5546905994415283, + "rewards_train/margins": 0.655432939529419, + "rewards_train/rejected": -2.2101235389709473, + "step": 1142 + }, + { + "epoch": 0.32, + "logps_train/chosen": -121.99433898925781, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -184.09130859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.399433970451355, + "rewards_train/margins": 2.8096967935562134, + "rewards_train/rejected": -4.209130764007568, + "step": 1143 + }, + { + "epoch": 0.32, + "logps_train/chosen": -216.75685119628906, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -208.1432647705078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.675685405731201, + "rewards_train/margins": -0.7613587379455566, + "rewards_train/rejected": -4.9143266677856445, + "step": 1143 + }, + { + "epoch": 0.32, + "logps_train/chosen": -13.700054168701172, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -5.25, + "logps_train/rejected": -27.63884735107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004994583316147327, + "rewards_train/margins": 2.2438792707398534, + "rewards_train/rejected": -2.238884687423706, + "step": 1143 + }, + { + "epoch": 0.32, + "logps_train/chosen": -125.12494659423828, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -155.6569061279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4624946117401123, + "rewards_train/margins": 0.0031960010528564453, + "rewards_train/rejected": -2.4656906127929688, + "step": 1143 + }, + { + "epoch": 0.32, + "logps_train/chosen": -3.879096031188965, + "logps_train/ref_chosen": -2.640625, + "logps_train/ref_rejected": -1.3125, + "logps_train/rejected": -1.1899930238723755, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1238471046090126, + "rewards_train/margins": -0.1360978027805686, + "rewards_train/rejected": 0.012250698171555996, + "step": 1143 + }, + { + "epoch": 0.32, + "logps_train/chosen": -52.906715393066406, + "logps_train/ref_chosen": -26.875, + "logps_train/ref_rejected": -3.296875, + "logps_train/rejected": -15.858902931213379, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6031715869903564, + "rewards_train/margins": -1.3469687700271606, + "rewards_train/rejected": -1.2562028169631958, + "step": 1143 + }, + { + "epoch": 0.32, + "logps_train/chosen": -153.06686401367188, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -117.55909729003906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.5066864490509033, + "rewards_train/margins": -1.500776767730713, + "rewards_train/rejected": -2.0059096813201904, + "step": 1143 + }, + { + "epoch": 0.32, + "logps_train/chosen": -105.42887878417969, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -211.8856658935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.942888021469116, + "rewards_train/margins": 6.245679140090942, + "rewards_train/rejected": -10.188567161560059, + "step": 1143 + }, + { + "epoch": 0.32, + "learning_rate": 1.2210468389873136e-06, + "loss": 0.6419, + "step": 1144 + }, + { + "epoch": 0.32, + "logps_train/chosen": -113.19038391113281, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -128.15487670898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.019038438796997, + "rewards_train/margins": 0.39644932746887207, + "rewards_train/rejected": -2.415487766265869, + "step": 1144 + }, + { + "epoch": 0.32, + "logps_train/chosen": -22.536306381225586, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -15.6875, + "logps_train/rejected": -35.40660095214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9911306500434875, + "rewards_train/margins": 0.9807794690132141, + "rewards_train/rejected": -1.9719101190567017, + "step": 1144 + }, + { + "epoch": 0.32, + "logps_train/chosen": -83.92479705810547, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -128.30690002441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8924797773361206, + "rewards_train/margins": 1.0882102251052856, + "rewards_train/rejected": -2.9806900024414062, + "step": 1144 + }, + { + "epoch": 0.32, + "logps_train/chosen": -53.17353820800781, + "logps_train/ref_chosen": -45.75, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -36.4083137512207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7423538565635681, + "rewards_train/margins": 0.885977566242218, + "rewards_train/rejected": -1.6283314228057861, + "step": 1144 + }, + { + "epoch": 0.32, + "logps_train/chosen": -109.51588439941406, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -171.61483764648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5015885829925537, + "rewards_train/margins": 4.859895467758179, + "rewards_train/rejected": -7.361484050750732, + "step": 1144 + }, + { + "epoch": 0.32, + "logps_train/chosen": -114.74443054199219, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -137.96229553222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1744431257247925, + "rewards_train/margins": 0.2717864513397217, + "rewards_train/rejected": -1.4462295770645142, + "step": 1144 + }, + { + "epoch": 0.32, + "logps_train/chosen": -11.175585746765137, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -23.33723258972168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29880857467651367, + "rewards_train/margins": 1.0911647081375122, + "rewards_train/rejected": -1.3899732828140259, + "step": 1144 + }, + { + "epoch": 0.32, + "logps_train/chosen": -10.414912223815918, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -6.326146125793457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.035241223871707916, + "rewards_train/margins": -0.0557516124099493, + "rewards_train/rejected": 0.020510388538241386, + "step": 1144 + }, + { + "epoch": 0.32, + "logps_train/chosen": -103.59053039550781, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -103.50302124023438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3590530157089233, + "rewards_train/margins": -0.00875091552734375, + "rewards_train/rejected": -1.3503021001815796, + "step": 1145 + }, + { + "epoch": 0.32, + "logps_train/chosen": -0.8901805877685547, + "logps_train/ref_chosen": -0.419921875, + "logps_train/ref_rejected": -0.419921875, + "logps_train/rejected": -0.9200904369354248, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04702587053179741, + "rewards_train/margins": 0.002990987151861191, + "rewards_train/rejected": -0.0500168576836586, + "step": 1145 + }, + { + "epoch": 0.32, + "logps_train/chosen": -39.80254364013672, + "logps_train/ref_chosen": -26.875, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -34.831390380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2927544116973877, + "rewards_train/margins": 0.4403846263885498, + "rewards_train/rejected": -1.7331390380859375, + "step": 1145 + }, + { + "epoch": 0.32, + "logps_train/chosen": -76.72560119628906, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -25.75, + "logps_train/rejected": -64.86321258544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.047560214996338, + "rewards_train/margins": 1.8637611865997314, + "rewards_train/rejected": -3.9113214015960693, + "step": 1145 + }, + { + "epoch": 0.32, + "logps_train/chosen": -135.11669921875, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -116.51973724365234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5616700649261475, + "rewards_train/margins": -0.3596963882446289, + "rewards_train/rejected": -2.2019736766815186, + "step": 1145 + }, + { + "epoch": 0.32, + "logps_train/chosen": -144.5468292236328, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -244.12335205078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.1546831130981445, + "rewards_train/margins": 5.85765266418457, + "rewards_train/rejected": -10.012335777282715, + "step": 1145 + }, + { + "epoch": 0.32, + "logps_train/chosen": -110.34354400634766, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -26.62201690673828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2343544960021973, + "rewards_train/margins": -0.5721527338027954, + "rewards_train/rejected": -1.6622017621994019, + "step": 1145 + }, + { + "epoch": 0.32, + "logps_train/chosen": -165.93905639648438, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -143.1368408203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.493905782699585, + "rewards_train/margins": 0.0697784423828125, + "rewards_train/rejected": -2.5636842250823975, + "step": 1145 + }, + { + "epoch": 0.32, + "learning_rate": 1.2184659592448384e-06, + "loss": 0.4784, + "step": 1146 + }, + { + "epoch": 0.32, + "logps_train/chosen": -8.582215309143066, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -12.564701080322266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02302846871316433, + "rewards_train/margins": 0.7451236005872488, + "rewards_train/rejected": -0.7220951318740845, + "step": 1146 + }, + { + "epoch": 0.32, + "logps_train/chosen": -112.52870178222656, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -194.57485961914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5028703212738037, + "rewards_train/margins": 6.054616212844849, + "rewards_train/rejected": -8.557486534118652, + "step": 1146 + }, + { + "epoch": 0.32, + "logps_train/chosen": -82.53935241699219, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -91.31755065917969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8039352297782898, + "rewards_train/margins": 0.12781983613967896, + "rewards_train/rejected": -0.9317550659179688, + "step": 1146 + }, + { + "epoch": 0.32, + "logps_train/chosen": -158.98275756835938, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -200.0170440673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9982757568359375, + "rewards_train/margins": 3.0034289360046387, + "rewards_train/rejected": -6.001704692840576, + "step": 1146 + }, + { + "epoch": 0.32, + "logps_train/chosen": -22.961793899536133, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -15.907790184020996, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.761804461479187, + "rewards_train/margins": -0.7335253953933716, + "rewards_train/rejected": -1.0282790660858154, + "step": 1146 + }, + { + "epoch": 0.32, + "logps_train/chosen": -11.400123596191406, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -20.994287490844727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14626236259937286, + "rewards_train/margins": 0.6281664222478867, + "rewards_train/rejected": -0.7744287848472595, + "step": 1146 + }, + { + "epoch": 0.32, + "logps_train/chosen": -232.84217834472656, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -229.8135223388672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.884218215942383, + "rewards_train/margins": -0.4028654098510742, + "rewards_train/rejected": -9.481352806091309, + "step": 1146 + }, + { + "epoch": 0.32, + "logps_train/chosen": -145.9784698486328, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -110.03193664550781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.247847080230713, + "rewards_train/margins": -2.094653367996216, + "rewards_train/rejected": -3.153193712234497, + "step": 1146 + }, + { + "epoch": 0.32, + "logps_train/chosen": -101.19905090332031, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -101.03877258300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2699050903320312, + "rewards_train/margins": 1.2839722633361816, + "rewards_train/rejected": -3.553877353668213, + "step": 1147 + }, + { + "epoch": 0.32, + "logps_train/chosen": -102.44209289550781, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -34.75876235961914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.144209384918213, + "rewards_train/margins": 0.28166699409484863, + "rewards_train/rejected": -2.4258763790130615, + "step": 1147 + }, + { + "epoch": 0.32, + "logps_train/chosen": -8.815104484558105, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -19.26776695251465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17213545739650726, + "rewards_train/margins": 0.9358912855386734, + "rewards_train/rejected": -1.1080267429351807, + "step": 1147 + }, + { + "epoch": 0.32, + "logps_train/chosen": -85.58028411865234, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -167.2283935546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.908028483390808, + "rewards_train/margins": 3.814810872077942, + "rewards_train/rejected": -5.72283935546875, + "step": 1147 + }, + { + "epoch": 0.32, + "logps_train/chosen": -75.0624008178711, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -193.12059020996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.356240153312683, + "rewards_train/margins": 6.605818867683411, + "rewards_train/rejected": -7.962059020996094, + "step": 1147 + }, + { + "epoch": 0.32, + "logps_train/chosen": -10.700141906738281, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -27.274869918823242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5543891787528992, + "rewards_train/margins": 1.060597836971283, + "rewards_train/rejected": -1.6149870157241821, + "step": 1147 + }, + { + "epoch": 0.32, + "logps_train/chosen": -14.403266906738281, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -27.586015701293945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3153266906738281, + "rewards_train/margins": 1.2182749509811401, + "rewards_train/rejected": -1.5336016416549683, + "step": 1147 + }, + { + "epoch": 0.32, + "logps_train/chosen": -90.64048767089844, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -57.75, + "logps_train/rejected": -83.41211700439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7640488147735596, + "rewards_train/margins": 0.8021628856658936, + "rewards_train/rejected": -2.566211700439453, + "step": 1147 + }, + { + "epoch": 0.32, + "learning_rate": 1.215883550472275e-06, + "loss": 0.4898, + "step": 1148 + }, + { + "epoch": 0.32, + "logps_train/chosen": -145.96005249023438, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -248.93914794921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.696005344390869, + "rewards_train/margins": 5.797909259796143, + "rewards_train/rejected": -10.493914604187012, + "step": 1148 + }, + { + "epoch": 0.32, + "logps_train/chosen": -11.455401420593262, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -10.643649101257324, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46429014205932617, + "rewards_train/margins": 0.19382476806640625, + "rewards_train/rejected": -0.6581149101257324, + "step": 1148 + }, + { + "epoch": 0.32, + "logps_train/chosen": -79.2708969116211, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -108.04782104492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.477089762687683, + "rewards_train/margins": 1.4776924848556519, + "rewards_train/rejected": -2.954782247543335, + "step": 1148 + }, + { + "epoch": 0.32, + "logps_train/chosen": -30.029830932617188, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -16.236608505249023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6279830932617188, + "rewards_train/margins": 0.5581778287887573, + "rewards_train/rejected": -1.186160922050476, + "step": 1148 + }, + { + "epoch": 0.32, + "logps_train/chosen": -2.6705689430236816, + "logps_train/ref_chosen": -0.81640625, + "logps_train/ref_rejected": -22.25, + "logps_train/rejected": -35.704261779785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18541626632213593, + "rewards_train/margins": 1.1600099354982376, + "rewards_train/rejected": -1.3454262018203735, + "step": 1148 + }, + { + "epoch": 0.32, + "logps_train/chosen": -20.051795959472656, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -83.67437744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.030179595574736595, + "rewards_train/margins": 1.2372582200914621, + "rewards_train/rejected": -1.2674378156661987, + "step": 1148 + }, + { + "epoch": 0.32, + "logps_train/chosen": -154.85972595214844, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -201.72906494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.035972595214844, + "rewards_train/margins": 2.436933994293213, + "rewards_train/rejected": -6.472906589508057, + "step": 1148 + }, + { + "epoch": 0.32, + "logps_train/chosen": -135.59326171875, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -163.6197052001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.559326171875, + "rewards_train/margins": 0.40264463424682617, + "rewards_train/rejected": -4.961970806121826, + "step": 1148 + }, + { + "epoch": 0.32, + "logps_train/chosen": -124.27364349365234, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -173.29666137695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.327364444732666, + "rewards_train/margins": 5.2523016929626465, + "rewards_train/rejected": -7.5796661376953125, + "step": 1149 + }, + { + "epoch": 0.32, + "logps_train/chosen": -8.20996379852295, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -1.4921875, + "logps_train/rejected": -8.890216827392578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0352536216378212, + "rewards_train/margins": 0.7750565782189369, + "rewards_train/rejected": -0.7398029565811157, + "step": 1149 + }, + { + "epoch": 0.32, + "logps_train/chosen": -49.608489990234375, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -6.8125, + "logps_train/rejected": -50.1534423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18915100395679474, + "rewards_train/margins": 4.52324552834034, + "rewards_train/rejected": -4.334094524383545, + "step": 1149 + }, + { + "epoch": 0.32, + "logps_train/chosen": -114.44364166259766, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -137.56546020507812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.194364309310913, + "rewards_train/margins": -0.8878182172775269, + "rewards_train/rejected": -1.3065460920333862, + "step": 1149 + }, + { + "epoch": 0.32, + "logps_train/chosen": -18.152849197387695, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -5.0625, + "logps_train/rejected": -21.675020217895508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8152849078178406, + "rewards_train/margins": 0.8459671139717102, + "rewards_train/rejected": -1.6612520217895508, + "step": 1149 + }, + { + "epoch": 0.32, + "logps_train/chosen": -223.8914794921875, + "logps_train/ref_chosen": -169.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -185.8406982421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.489148139953613, + "rewards_train/margins": -1.405078411102295, + "rewards_train/rejected": -4.084069728851318, + "step": 1149 + }, + { + "epoch": 0.32, + "logps_train/chosen": -8.537307739257812, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -8.811729431152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02873077429831028, + "rewards_train/margins": 0.3024421688169241, + "rewards_train/rejected": -0.3311729431152344, + "step": 1149 + }, + { + "epoch": 0.32, + "logps_train/chosen": -317.8609313964844, + "logps_train/ref_chosen": -194.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -281.565185546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.386093139648438, + "rewards_train/margins": -0.9295740127563477, + "rewards_train/rejected": -11.45651912689209, + "step": 1149 + }, + { + "epoch": 0.32, + "learning_rate": 1.2132996307437468e-06, + "loss": 0.488, + "step": 1150 + }, + { + "epoch": 0.32, + "logps_train/chosen": -77.80612182617188, + "logps_train/ref_chosen": -59.75, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -156.51388549804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8056122064590454, + "rewards_train/margins": 5.095776438713074, + "rewards_train/rejected": -6.901388645172119, + "step": 1150 + }, + { + "epoch": 0.32, + "logps_train/chosen": -11.87724494934082, + "logps_train/ref_chosen": -1.6171875, + "logps_train/ref_rejected": -1.6171875, + "logps_train/rejected": -12.373869895935059, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.026005744934082, + "rewards_train/margins": 0.04966247081756592, + "rewards_train/rejected": -1.075668215751648, + "step": 1150 + }, + { + "epoch": 0.32, + "logps_train/chosen": -177.09864807128906, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -198.0313262939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8098647594451904, + "rewards_train/margins": 1.5932681560516357, + "rewards_train/rejected": -5.403132915496826, + "step": 1150 + }, + { + "epoch": 0.32, + "logps_train/chosen": -126.9002685546875, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -153.82302856445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.190026760101318, + "rewards_train/margins": 2.342276096343994, + "rewards_train/rejected": -6.5323028564453125, + "step": 1150 + }, + { + "epoch": 0.32, + "logps_train/chosen": -110.71492767333984, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -199.0, + "logps_train/rejected": -293.7216491699219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9714927673339844, + "rewards_train/margins": 6.500672340393066, + "rewards_train/rejected": -9.47216510772705, + "step": 1150 + }, + { + "epoch": 0.32, + "logps_train/chosen": -159.8919677734375, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -191.24899291992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8891968727111816, + "rewards_train/margins": 5.035702705383301, + "rewards_train/rejected": -7.924899578094482, + "step": 1150 + }, + { + "epoch": 0.32, + "logps_train/chosen": -27.87813377380371, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -34.806636810302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6440633535385132, + "rewards_train/margins": 1.039725422859192, + "rewards_train/rejected": -2.683788776397705, + "step": 1150 + }, + { + "epoch": 0.32, + "logps_train/chosen": -13.984588623046875, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -10.1875, + "logps_train/rejected": -17.39320945739746, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9234588742256165, + "rewards_train/margins": -0.2028878927230835, + "rewards_train/rejected": -0.720570981502533, + "step": 1150 + }, + { + "epoch": 0.32, + "logps_train/chosen": -198.31817626953125, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -211.79412841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.931817531585693, + "rewards_train/margins": 0.84759521484375, + "rewards_train/rejected": -7.779412746429443, + "step": 1151 + }, + { + "epoch": 0.32, + "logps_train/chosen": -181.56423950195312, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -270.55218505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9564239978790283, + "rewards_train/margins": 8.198795080184937, + "rewards_train/rejected": -11.155219078063965, + "step": 1151 + }, + { + "epoch": 0.32, + "logps_train/chosen": -28.90040397644043, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -18.048072814941406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1275404691696167, + "rewards_train/margins": -0.007108211517333984, + "rewards_train/rejected": -1.1204322576522827, + "step": 1151 + }, + { + "epoch": 0.32, + "logps_train/chosen": -163.01820373535156, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -189.84332275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.50182044506073, + "rewards_train/margins": 4.182511925697327, + "rewards_train/rejected": -5.684332370758057, + "step": 1151 + }, + { + "epoch": 0.32, + "logps_train/chosen": -29.54796028137207, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -32.19876480102539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.629796028137207, + "rewards_train/margins": 0.6588304042816162, + "rewards_train/rejected": -2.2886264324188232, + "step": 1151 + }, + { + "epoch": 0.32, + "logps_train/chosen": -170.85345458984375, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -248.31373596191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.635345458984375, + "rewards_train/margins": 6.396028518676758, + "rewards_train/rejected": -11.031373977661133, + "step": 1151 + }, + { + "epoch": 0.32, + "logps_train/chosen": -26.67142677307129, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -3.953125, + "logps_train/rejected": -17.76396369934082, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.460892677307129, + "rewards_train/margins": -0.07980883121490479, + "rewards_train/rejected": -1.3810838460922241, + "step": 1151 + }, + { + "epoch": 0.32, + "logps_train/chosen": -117.30304718017578, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -262.0623779296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.030304908752441, + "rewards_train/margins": 9.175932884216309, + "rewards_train/rejected": -13.20623779296875, + "step": 1151 + }, + { + "epoch": 0.32, + "learning_rate": 1.2107142181439524e-06, + "loss": 0.2678, + "step": 1152 + }, + { + "epoch": 0.32, + "logps_train/chosen": -16.656103134155273, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -14.875, + "logps_train/rejected": -32.0457878112793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5343603491783142, + "rewards_train/margins": 1.1827184557914734, + "rewards_train/rejected": -1.7170788049697876, + "step": 1152 + }, + { + "epoch": 0.32, + "logps_train/chosen": -10.285919189453125, + "logps_train/ref_chosen": -3.65625, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -19.255142211914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6629669070243835, + "rewards_train/margins": 0.3625473380088806, + "rewards_train/rejected": -1.0255142450332642, + "step": 1152 + }, + { + "epoch": 0.32, + "logps_train/chosen": -146.552490234375, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -228.0318145751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.805248975753784, + "rewards_train/margins": 5.397932291030884, + "rewards_train/rejected": -9.203181266784668, + "step": 1152 + }, + { + "epoch": 0.32, + "logps_train/chosen": -98.51313018798828, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -61.08320617675781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.551313042640686, + "rewards_train/margins": 0.7820075750350952, + "rewards_train/rejected": -2.3333206176757812, + "step": 1152 + }, + { + "epoch": 0.32, + "logps_train/chosen": -180.19798278808594, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -183.86219787597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7197983264923096, + "rewards_train/margins": 1.7664215564727783, + "rewards_train/rejected": -5.486219882965088, + "step": 1152 + }, + { + "epoch": 0.32, + "logps_train/chosen": -249.72561645507812, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -264.00408935546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.472561836242676, + "rewards_train/margins": 1.3278474807739258, + "rewards_train/rejected": -11.800409317016602, + "step": 1152 + }, + { + "epoch": 0.32, + "logps_train/chosen": -97.47709655761719, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -97.66742706298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19770966470241547, + "rewards_train/margins": 0.019033044576644897, + "rewards_train/rejected": -0.21674270927906036, + "step": 1152 + }, + { + "epoch": 0.32, + "logps_train/chosen": -2.4348931312561035, + "logps_train/ref_chosen": -1.6875, + "logps_train/ref_rejected": -1.546875, + "logps_train/rejected": -22.95731544494629, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07473931461572647, + "rewards_train/margins": 2.066304825246334, + "rewards_train/rejected": -2.1410441398620605, + "step": 1152 + }, + { + "epoch": 0.32, + "logps_train/chosen": -136.84100341796875, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -142.15635681152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1341004371643066, + "rewards_train/margins": 0.7815353870391846, + "rewards_train/rejected": -3.915635824203491, + "step": 1153 + }, + { + "epoch": 0.32, + "logps_train/chosen": -202.94398498535156, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -210.0, + "logps_train/rejected": -286.10430908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.794398784637451, + "rewards_train/margins": 0.8160324096679688, + "rewards_train/rejected": -7.61043119430542, + "step": 1153 + }, + { + "epoch": 0.32, + "logps_train/chosen": -191.10179138183594, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -263.5471496582031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.160179138183594, + "rewards_train/margins": 2.394536018371582, + "rewards_train/rejected": -11.554715156555176, + "step": 1153 + }, + { + "epoch": 0.32, + "logps_train/chosen": -23.188905715942383, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -58.4363899230957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7313905954360962, + "rewards_train/margins": 4.280998587608337, + "rewards_train/rejected": -5.012389183044434, + "step": 1153 + }, + { + "epoch": 0.32, + "logps_train/chosen": -18.70209312438965, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -10.1875, + "logps_train/rejected": -35.43782043457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2827093303203583, + "rewards_train/margins": 2.242322713136673, + "rewards_train/rejected": -2.5250320434570312, + "step": 1153 + }, + { + "epoch": 0.32, + "logps_train/chosen": -12.5106201171875, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -36.06599426269531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02393798902630806, + "rewards_train/margins": 2.1242874152958393, + "rewards_train/rejected": -2.1003494262695312, + "step": 1153 + }, + { + "epoch": 0.32, + "logps_train/chosen": -92.17379760742188, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -132.3582000732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.917379856109619, + "rewards_train/margins": 0.8684401512145996, + "rewards_train/rejected": -3.7858200073242188, + "step": 1153 + }, + { + "epoch": 0.32, + "logps_train/chosen": -145.2723388671875, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -186.16964721679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.22723388671875, + "rewards_train/margins": 2.389730930328369, + "rewards_train/rejected": -3.616964817047119, + "step": 1153 + }, + { + "epoch": 0.32, + "learning_rate": 1.2081273307680388e-06, + "loss": 0.2418, + "step": 1154 + }, + { + "epoch": 0.32, + "logps_train/chosen": -3.8123836517333984, + "logps_train/ref_chosen": -3.21875, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -43.92926025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.059363365173339844, + "rewards_train/margins": 3.7241876125335693, + "rewards_train/rejected": -3.783550977706909, + "step": 1154 + }, + { + "epoch": 0.32, + "logps_train/chosen": -26.50179672241211, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -14.8125, + "logps_train/rejected": -41.500892639160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2689297199249268, + "rewards_train/margins": 1.399909496307373, + "rewards_train/rejected": -2.6688392162323, + "step": 1154 + }, + { + "epoch": 0.32, + "logps_train/chosen": -15.381707191467285, + "logps_train/ref_chosen": -2.921875, + "logps_train/ref_rejected": -1.328125, + "logps_train/rejected": -3.5322265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2459832429885864, + "rewards_train/margins": -1.0255730897188187, + "rewards_train/rejected": -0.22041015326976776, + "step": 1154 + }, + { + "epoch": 0.32, + "logps_train/chosen": -28.00017547607422, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -25.238496780395508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1750175952911377, + "rewards_train/margins": 0.8988320827484131, + "rewards_train/rejected": -2.073849678039551, + "step": 1154 + }, + { + "epoch": 0.32, + "logps_train/chosen": -19.947782516479492, + "logps_train/ref_chosen": -7.40625, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -28.066287994384766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2541532516479492, + "rewards_train/margins": 0.37122559547424316, + "rewards_train/rejected": -1.6253788471221924, + "step": 1154 + }, + { + "epoch": 0.32, + "logps_train/chosen": -144.62814331054688, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -171.84385681152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7128143310546875, + "rewards_train/margins": 5.321571350097656, + "rewards_train/rejected": -7.034385681152344, + "step": 1154 + }, + { + "epoch": 0.32, + "logps_train/chosen": -164.53732299804688, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -240.94342041015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3537323474884033, + "rewards_train/margins": 5.140609502792358, + "rewards_train/rejected": -8.494341850280762, + "step": 1154 + }, + { + "epoch": 0.32, + "logps_train/chosen": -116.77620697021484, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -134.07257080078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.427620768547058, + "rewards_train/margins": 3.02963650226593, + "rewards_train/rejected": -4.457257270812988, + "step": 1154 + }, + { + "epoch": 0.32, + "logps_train/chosen": -98.71636199951172, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -54.383697509765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4716362953186035, + "rewards_train/margins": -1.2582664489746094, + "rewards_train/rejected": -2.213369846343994, + "step": 1155 + }, + { + "epoch": 0.32, + "logps_train/chosen": -174.4517822265625, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -237.2510986328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.095178127288818, + "rewards_train/margins": 1.429931640625, + "rewards_train/rejected": -6.525109767913818, + "step": 1155 + }, + { + "epoch": 0.32, + "logps_train/chosen": -169.317138671875, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -126.72627258300781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.081714153289795, + "rewards_train/margins": -2.809086799621582, + "rewards_train/rejected": -2.272627353668213, + "step": 1155 + }, + { + "epoch": 0.32, + "logps_train/chosen": -76.40032196044922, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -75.78306579589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4400322139263153, + "rewards_train/margins": 0.0382743775844574, + "rewards_train/rejected": -0.4783065915107727, + "step": 1155 + }, + { + "epoch": 0.32, + "logps_train/chosen": -111.42090606689453, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -174.7056121826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5920906066894531, + "rewards_train/margins": 2.5784707069396973, + "rewards_train/rejected": -3.1705613136291504, + "step": 1155 + }, + { + "epoch": 0.32, + "logps_train/chosen": -175.78627014160156, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -176.22023010253906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.378627061843872, + "rewards_train/margins": -0.7566039562225342, + "rewards_train/rejected": -2.622023105621338, + "step": 1155 + }, + { + "epoch": 0.32, + "logps_train/chosen": -32.75127029418945, + "logps_train/ref_chosen": -24.125, + "logps_train/ref_rejected": -13.3125, + "logps_train/rejected": -30.287487030029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8626270294189453, + "rewards_train/margins": 0.8348716497421265, + "rewards_train/rejected": -1.6974986791610718, + "step": 1155 + }, + { + "epoch": 0.32, + "logps_train/chosen": -128.43626403808594, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -173.7027587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3436264991760254, + "rewards_train/margins": 3.976649284362793, + "rewards_train/rejected": -6.320275783538818, + "step": 1155 + }, + { + "epoch": 0.32, + "learning_rate": 1.205538986721475e-06, + "loss": 0.5849, + "step": 1156 + }, + { + "epoch": 0.32, + "logps_train/chosen": -11.925593376159668, + "logps_train/ref_chosen": -7.59375, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -13.087992668151855, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43318435549736023, + "rewards_train/margins": 0.16623994708061218, + "rewards_train/rejected": -0.5994243025779724, + "step": 1156 + }, + { + "epoch": 0.32, + "logps_train/chosen": -5.976380825042725, + "logps_train/ref_chosen": -2.96875, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -8.568039894104004, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3007631003856659, + "rewards_train/margins": 0.006040900945663452, + "rewards_train/rejected": -0.30680400133132935, + "step": 1156 + }, + { + "epoch": 0.32, + "logps_train/chosen": -139.76626586914062, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -274.01348876953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4266265630722046, + "rewards_train/margins": 10.874722123146057, + "rewards_train/rejected": -12.301348686218262, + "step": 1156 + }, + { + "epoch": 0.32, + "logps_train/chosen": -1.4286003112792969, + "logps_train/ref_chosen": -0.94140625, + "logps_train/ref_rejected": -0.94140625, + "logps_train/rejected": -1.5276867151260376, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04871940612792969, + "rewards_train/margins": 0.009908642619848251, + "rewards_train/rejected": -0.05862804874777794, + "step": 1156 + }, + { + "epoch": 0.32, + "logps_train/chosen": -243.7493896484375, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -221.90859985351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.674939155578613, + "rewards_train/margins": 0.9159212112426758, + "rewards_train/rejected": -9.590860366821289, + "step": 1156 + }, + { + "epoch": 0.32, + "logps_train/chosen": -131.61630249023438, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -207.31646728515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7116302847862244, + "rewards_train/margins": 7.820016443729401, + "rewards_train/rejected": -8.531646728515625, + "step": 1156 + }, + { + "epoch": 0.32, + "logps_train/chosen": -69.99954986572266, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -131.0101318359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3499549925327301, + "rewards_train/margins": 2.9010582864284515, + "rewards_train/rejected": -3.2510132789611816, + "step": 1156 + }, + { + "epoch": 0.32, + "logps_train/chosen": -42.168025970458984, + "logps_train/ref_chosen": -27.5, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -32.96678924560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4668025970458984, + "rewards_train/margins": 0.7923762798309326, + "rewards_train/rejected": -2.259178876876831, + "step": 1156 + }, + { + "epoch": 0.32, + "logps_train/chosen": -18.174701690673828, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -16.282318115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19247017800807953, + "rewards_train/margins": 0.12951163947582245, + "rewards_train/rejected": -0.321981817483902, + "step": 1157 + }, + { + "epoch": 0.32, + "logps_train/chosen": -19.860679626464844, + "logps_train/ref_chosen": -7.125, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -26.352861404418945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.273568034172058, + "rewards_train/margins": 0.26796817779541016, + "rewards_train/rejected": -1.5415362119674683, + "step": 1157 + }, + { + "epoch": 0.32, + "logps_train/chosen": -14.847679138183594, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -31.531387329101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8035179376602173, + "rewards_train/margins": 1.2871209383010864, + "rewards_train/rejected": -2.0906388759613037, + "step": 1157 + }, + { + "epoch": 0.32, + "logps_train/chosen": -27.455795288085938, + "logps_train/ref_chosen": -15.1875, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -30.060853958129883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2268295288085938, + "rewards_train/margins": 0.07925593852996826, + "rewards_train/rejected": -1.306085467338562, + "step": 1157 + }, + { + "epoch": 0.32, + "logps_train/chosen": -2.420747756958008, + "logps_train/ref_chosen": -0.99609375, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -68.87063598632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14246539771556854, + "rewards_train/margins": 0.1945982128381729, + "rewards_train/rejected": -0.33706361055374146, + "step": 1157 + }, + { + "epoch": 0.32, + "logps_train/chosen": -20.928510665893555, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -37.985443115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7616010904312134, + "rewards_train/margins": 2.1619433164596558, + "rewards_train/rejected": -2.923544406890869, + "step": 1157 + }, + { + "epoch": 0.32, + "logps_train/chosen": -8.159395217895508, + "logps_train/ref_chosen": -3.09375, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -26.39723014831543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5065645575523376, + "rewards_train/margins": 1.614408552646637, + "rewards_train/rejected": -2.1209731101989746, + "step": 1157 + }, + { + "epoch": 0.32, + "logps_train/chosen": -109.49126434326172, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -189.4493408203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4491264820098877, + "rewards_train/margins": 1.5958077907562256, + "rewards_train/rejected": -4.044934272766113, + "step": 1157 + }, + { + "epoch": 0.32, + "learning_rate": 1.2029492041199252e-06, + "loss": 0.3705, + "step": 1158 + }, + { + "epoch": 0.32, + "logps_train/chosen": -6.669106483459473, + "logps_train/ref_chosen": -2.609375, + "logps_train/ref_rejected": -3.171875, + "logps_train/rejected": -18.934860229492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4059731662273407, + "rewards_train/margins": 1.1703254282474518, + "rewards_train/rejected": -1.5762985944747925, + "step": 1158 + }, + { + "epoch": 0.32, + "logps_train/chosen": -39.43752670288086, + "logps_train/ref_chosen": -7.5625, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -132.53375244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.18750262260437, + "rewards_train/margins": 1.1658728122711182, + "rewards_train/rejected": -4.353375434875488, + "step": 1158 + }, + { + "epoch": 0.32, + "logps_train/chosen": -115.58956146240234, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -24.875, + "logps_train/rejected": -64.297119140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.158956289291382, + "rewards_train/margins": 0.7832555770874023, + "rewards_train/rejected": -3.942211866378784, + "step": 1158 + }, + { + "epoch": 0.32, + "logps_train/chosen": -12.858043670654297, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -6.8125, + "logps_train/rejected": -5.5099945068359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.46080437302589417, + "rewards_train/margins": -0.5910549312829971, + "rewards_train/rejected": 0.13025055825710297, + "step": 1158 + }, + { + "epoch": 0.32, + "logps_train/chosen": -5.6267499923706055, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -14.66349983215332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11580000072717667, + "rewards_train/margins": 0.2755500003695488, + "rewards_train/rejected": -0.39135000109672546, + "step": 1158 + }, + { + "epoch": 0.32, + "logps_train/chosen": -9.243866920471191, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -31.480266571044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.608761727809906, + "rewards_train/margins": 0.5767650008201599, + "rewards_train/rejected": -1.185526728630066, + "step": 1158 + }, + { + "epoch": 0.32, + "logps_train/chosen": -137.94912719726562, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -182.60617065429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9449126720428467, + "rewards_train/margins": 3.4157044887542725, + "rewards_train/rejected": -6.360617160797119, + "step": 1158 + }, + { + "epoch": 0.32, + "logps_train/chosen": -89.32078552246094, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -200.18496704101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0820785760879517, + "rewards_train/margins": 7.636418700218201, + "rewards_train/rejected": -8.718497276306152, + "step": 1158 + }, + { + "epoch": 0.32, + "logps_train/chosen": -32.53436279296875, + "logps_train/ref_chosen": -27.625, + "logps_train/ref_rejected": -38.5, + "logps_train/rejected": -50.42388916015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.490936279296875, + "rewards_train/margins": 0.7014526128768921, + "rewards_train/rejected": -1.192388892173767, + "step": 1159 + }, + { + "epoch": 0.32, + "logps_train/chosen": -26.43693733215332, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -37.50242233276367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2093188762664795, + "rewards_train/margins": 1.0627985000610352, + "rewards_train/rejected": -3.2721173763275146, + "step": 1159 + }, + { + "epoch": 0.32, + "logps_train/chosen": -27.384723663330078, + "logps_train/ref_chosen": -27.25, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -40.251686096191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.013472366146743298, + "rewards_train/margins": 2.0491961957886815, + "rewards_train/rejected": -2.062668561935425, + "step": 1159 + }, + { + "epoch": 0.32, + "logps_train/chosen": -31.469947814941406, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -58.63700485229492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1344947814941406, + "rewards_train/margins": 2.1917057037353516, + "rewards_train/rejected": -3.326200485229492, + "step": 1159 + }, + { + "epoch": 0.32, + "logps_train/chosen": -295.1358947753906, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -272.516845703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -15.1135892868042, + "rewards_train/margins": -0.9619045257568359, + "rewards_train/rejected": -14.151684761047363, + "step": 1159 + }, + { + "epoch": 0.32, + "logps_train/chosen": -35.719940185546875, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -42.69109344482422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6344940662384033, + "rewards_train/margins": 1.3283653259277344, + "rewards_train/rejected": -2.9628593921661377, + "step": 1159 + }, + { + "epoch": 0.32, + "logps_train/chosen": -168.41867065429688, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -188.474853515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.391867637634277, + "rewards_train/margins": -0.744382381439209, + "rewards_train/rejected": -7.647485256195068, + "step": 1159 + }, + { + "epoch": 0.32, + "logps_train/chosen": -192.49081420898438, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -230.26974487304688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.249081611633301, + "rewards_train/margins": -0.5221071243286133, + "rewards_train/rejected": -5.7269744873046875, + "step": 1159 + }, + { + "epoch": 0.32, + "learning_rate": 1.2003580010891212e-06, + "loss": 0.473, + "step": 1160 + }, + { + "epoch": 0.32, + "logps_train/chosen": -33.12302017211914, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -33.42347717285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.918552041053772, + "rewards_train/margins": 0.030045747756958008, + "rewards_train/rejected": -1.94859778881073, + "step": 1160 + }, + { + "epoch": 0.32, + "logps_train/chosen": -194.46966552734375, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -190.03408813476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.99696683883667, + "rewards_train/margins": 3.3564419746398926, + "rewards_train/rejected": -10.353408813476562, + "step": 1160 + }, + { + "epoch": 0.32, + "logps_train/chosen": -5.698536396026611, + "logps_train/ref_chosen": -3.953125, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -27.929759979248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1745411455631256, + "rewards_train/margins": 1.987184852361679, + "rewards_train/rejected": -2.1617259979248047, + "step": 1160 + }, + { + "epoch": 0.32, + "logps_train/chosen": -35.50352478027344, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -24.96424102783203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8378524780273438, + "rewards_train/margins": -1.0789283514022827, + "rewards_train/rejected": -0.758924126625061, + "step": 1160 + }, + { + "epoch": 0.32, + "logps_train/chosen": -1.3127057552337646, + "logps_train/ref_chosen": -2.21875, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -36.98077392578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09060442447662354, + "rewards_train/margins": 0.5636818110942841, + "rewards_train/rejected": -0.4730773866176605, + "step": 1160 + }, + { + "epoch": 0.32, + "logps_train/chosen": -64.01183319091797, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -86.69224548339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0011833191383630037, + "rewards_train/margins": 0.4180412411224097, + "rewards_train/rejected": -0.4192245602607727, + "step": 1160 + }, + { + "epoch": 0.32, + "logps_train/chosen": -54.395111083984375, + "logps_train/ref_chosen": -42.75, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -126.91270446777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1645110845565796, + "rewards_train/margins": 0.7267594337463379, + "rewards_train/rejected": -1.8912705183029175, + "step": 1160 + }, + { + "epoch": 0.32, + "logps_train/chosen": -143.92214965820312, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -202.96722412109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4922149181365967, + "rewards_train/margins": 4.354507684707642, + "rewards_train/rejected": -7.846722602844238, + "step": 1160 + }, + { + "epoch": 0.32, + "logps_train/chosen": -101.60044860839844, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -189.8957061767578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8100448846817017, + "rewards_train/margins": 8.129525542259216, + "rewards_train/rejected": -8.939570426940918, + "step": 1161 + }, + { + "epoch": 0.32, + "logps_train/chosen": -34.432716369628906, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -37.24736404418945, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.305771589279175, + "rewards_train/margins": -0.39353513717651367, + "rewards_train/rejected": -1.9122364521026611, + "step": 1161 + }, + { + "epoch": 0.32, + "logps_train/chosen": -99.07400512695312, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -191.11920166015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2574005126953125, + "rewards_train/margins": 7.604519844055176, + "rewards_train/rejected": -8.861920356750488, + "step": 1161 + }, + { + "epoch": 0.32, + "logps_train/chosen": -136.12815856933594, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -240.5124053955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.462815761566162, + "rewards_train/margins": 4.688424587249756, + "rewards_train/rejected": -9.151240348815918, + "step": 1161 + }, + { + "epoch": 0.32, + "logps_train/chosen": -47.27866744995117, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -49.194332122802734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.496616840362549, + "rewards_train/margins": -1.2896835803985596, + "rewards_train/rejected": -2.2069332599639893, + "step": 1161 + }, + { + "epoch": 0.32, + "logps_train/chosen": -142.23622131347656, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -141.7346649169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4236221313476562, + "rewards_train/margins": 2.1998443603515625, + "rewards_train/rejected": -3.6234664916992188, + "step": 1161 + }, + { + "epoch": 0.32, + "logps_train/chosen": -176.53797912597656, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -182.88925170898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.353797912597656, + "rewards_train/margins": 2.585127353668213, + "rewards_train/rejected": -6.938925266265869, + "step": 1161 + }, + { + "epoch": 0.32, + "logps_train/chosen": -42.55247116088867, + "logps_train/ref_chosen": -28.125, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -75.59345245361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4427471160888672, + "rewards_train/margins": 2.4415981769561768, + "rewards_train/rejected": -3.884345293045044, + "step": 1161 + }, + { + "epoch": 0.32, + "learning_rate": 1.1977653957647375e-06, + "loss": 0.3925, + "step": 1162 + }, + { + "epoch": 0.32, + "logps_train/chosen": -29.777481079101562, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -60.38426208496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.14024817943573, + "rewards_train/margins": 2.885677933692932, + "rewards_train/rejected": -4.025926113128662, + "step": 1162 + }, + { + "epoch": 0.32, + "logps_train/chosen": -88.75595092773438, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -85.7820053100586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.225595235824585, + "rewards_train/margins": -0.29739463329315186, + "rewards_train/rejected": -1.928200602531433, + "step": 1162 + }, + { + "epoch": 0.32, + "logps_train/chosen": -29.429410934448242, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -9.5625, + "logps_train/rejected": -31.19668960571289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4179410934448242, + "rewards_train/margins": 1.7454779148101807, + "rewards_train/rejected": -2.163419008255005, + "step": 1162 + }, + { + "epoch": 0.32, + "logps_train/chosen": -47.05508041381836, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -58.30532455444336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48050805926322937, + "rewards_train/margins": 3.175024539232254, + "rewards_train/rejected": -3.6555325984954834, + "step": 1162 + }, + { + "epoch": 0.32, + "logps_train/chosen": -73.99986267089844, + "logps_train/ref_chosen": -58.0, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -36.647308349609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5999863147735596, + "rewards_train/margins": 0.15224456787109375, + "rewards_train/rejected": -1.7522308826446533, + "step": 1162 + }, + { + "epoch": 0.32, + "logps_train/chosen": -141.24014282226562, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -131.59176635742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.124014377593994, + "rewards_train/margins": 1.135162353515625, + "rewards_train/rejected": -4.259176731109619, + "step": 1162 + }, + { + "epoch": 0.32, + "logps_train/chosen": -9.207371711730957, + "logps_train/ref_chosen": -1.2734375, + "logps_train/ref_rejected": -7.25, + "logps_train/rejected": -20.091659545898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7933934330940247, + "rewards_train/margins": 0.490772545337677, + "rewards_train/rejected": -1.2841659784317017, + "step": 1162 + }, + { + "epoch": 0.32, + "logps_train/chosen": -47.81018829345703, + "logps_train/ref_chosen": -32.25, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -49.378353118896484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5560188293457031, + "rewards_train/margins": 1.1318166255950928, + "rewards_train/rejected": -2.687835454940796, + "step": 1162 + }, + { + "epoch": 0.33, + "logps_train/chosen": -216.57492065429688, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -196.12864685058594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.55749225616455, + "rewards_train/margins": -0.14462757110595703, + "rewards_train/rejected": -8.412864685058594, + "step": 1163 + }, + { + "epoch": 0.33, + "logps_train/chosen": -114.59083557128906, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -162.3346405029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.75908362865448, + "rewards_train/margins": 1.6743804216384888, + "rewards_train/rejected": -3.4334640502929688, + "step": 1163 + }, + { + "epoch": 0.33, + "logps_train/chosen": -30.611984252929688, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -3.765625, + "logps_train/rejected": -17.4650936126709, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0174484252929688, + "rewards_train/margins": -0.6475015878677368, + "rewards_train/rejected": -1.369946837425232, + "step": 1163 + }, + { + "epoch": 0.33, + "logps_train/chosen": -115.05901336669922, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -127.7734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4559013843536377, + "rewards_train/margins": -0.22855758666992188, + "rewards_train/rejected": -1.2273437976837158, + "step": 1163 + }, + { + "epoch": 0.33, + "logps_train/chosen": -26.294628143310547, + "logps_train/ref_chosen": -2.90625, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -29.482501983642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3388378620147705, + "rewards_train/margins": 0.12191247940063477, + "rewards_train/rejected": -2.4607503414154053, + "step": 1163 + }, + { + "epoch": 0.33, + "logps_train/chosen": -16.765316009521484, + "logps_train/ref_chosen": -15.0, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -21.90544319152832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1765315979719162, + "rewards_train/margins": 1.2171377688646317, + "rewards_train/rejected": -1.3936693668365479, + "step": 1163 + }, + { + "epoch": 0.33, + "logps_train/chosen": -127.76895141601562, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -161.10472106933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1768951416015625, + "rewards_train/margins": 2.3335771560668945, + "rewards_train/rejected": -5.510472297668457, + "step": 1163 + }, + { + "epoch": 0.33, + "logps_train/chosen": -24.117530822753906, + "logps_train/ref_chosen": -13.0625, + "logps_train/ref_rejected": -2.359375, + "logps_train/rejected": -32.18584060668945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1055030822753906, + "rewards_train/margins": 1.8771436214447021, + "rewards_train/rejected": -2.9826467037200928, + "step": 1163 + }, + { + "epoch": 0.33, + "learning_rate": 1.1951714062922626e-06, + "loss": 0.4193, + "step": 1164 + }, + { + "epoch": 0.33, + "logps_train/chosen": -113.74266052246094, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -162.09475708007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.174266338348389, + "rewards_train/margins": 0.8352093696594238, + "rewards_train/rejected": -5.0094757080078125, + "step": 1164 + }, + { + "epoch": 0.33, + "logps_train/chosen": -5.146063804626465, + "logps_train/ref_chosen": -2.515625, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -8.32823657989502, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2630438804626465, + "rewards_train/margins": -0.15834522247314453, + "rewards_train/rejected": -0.10469865798950195, + "step": 1164 + }, + { + "epoch": 0.33, + "logps_train/chosen": -57.795188903808594, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -82.91699981689453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.029518961906433, + "rewards_train/margins": -0.48781895637512207, + "rewards_train/rejected": -0.541700005531311, + "step": 1164 + }, + { + "epoch": 0.33, + "logps_train/chosen": -125.74968719482422, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -126.24176025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.974968671798706, + "rewards_train/margins": 1.2992074489593506, + "rewards_train/rejected": -5.274176120758057, + "step": 1164 + }, + { + "epoch": 0.33, + "logps_train/chosen": -9.091438293457031, + "logps_train/ref_chosen": -6.5625, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -10.233391761779785, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2528938353061676, + "rewards_train/margins": 0.18919533491134644, + "rewards_train/rejected": -0.44208917021751404, + "step": 1164 + }, + { + "epoch": 0.33, + "logps_train/chosen": -16.184995651245117, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -36.706390380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29349955916404724, + "rewards_train/margins": 1.5646394789218903, + "rewards_train/rejected": -1.8581390380859375, + "step": 1164 + }, + { + "epoch": 0.33, + "logps_train/chosen": -10.270008087158203, + "logps_train/ref_chosen": -2.453125, + "logps_train/ref_rejected": -13.3125, + "logps_train/rejected": -24.15448760986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7816883325576782, + "rewards_train/margins": 0.30251049995422363, + "rewards_train/rejected": -1.0841988325119019, + "step": 1164 + }, + { + "epoch": 0.33, + "logps_train/chosen": -51.102752685546875, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -31.375, + "logps_train/rejected": -77.37969207763672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2352752685546875, + "rewards_train/margins": 2.3651938438415527, + "rewards_train/rejected": -4.60046911239624, + "step": 1164 + }, + { + "epoch": 0.33, + "logps_train/chosen": -43.474464416503906, + "logps_train/ref_chosen": -33.5, + "logps_train/ref_rejected": -0.59765625, + "logps_train/rejected": -16.661792755126953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9974464774131775, + "rewards_train/margins": 0.6089672446250916, + "rewards_train/rejected": -1.606413722038269, + "step": 1165 + }, + { + "epoch": 0.33, + "logps_train/chosen": -3.6862666606903076, + "logps_train/ref_chosen": -0.388671875, + "logps_train/ref_rejected": -0.388671875, + "logps_train/rejected": -4.000467777252197, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32975947856903076, + "rewards_train/margins": 0.031420111656188965, + "rewards_train/rejected": -0.3611795902252197, + "step": 1165 + }, + { + "epoch": 0.33, + "logps_train/chosen": -46.85942459106445, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -92.49972534179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08594246208667755, + "rewards_train/margins": 0.8640301078557968, + "rewards_train/rejected": -0.9499725699424744, + "step": 1165 + }, + { + "epoch": 0.33, + "logps_train/chosen": -122.9476089477539, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -133.97532653808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.694761037826538, + "rewards_train/margins": 0.4027717113494873, + "rewards_train/rejected": -3.0975327491760254, + "step": 1165 + }, + { + "epoch": 0.33, + "logps_train/chosen": -57.29216766357422, + "logps_train/ref_chosen": -42.0, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -40.72942352294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5292167663574219, + "rewards_train/margins": 1.2187256813049316, + "rewards_train/rejected": -2.7479424476623535, + "step": 1165 + }, + { + "epoch": 0.33, + "logps_train/chosen": -166.5777587890625, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -126.06549072265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.257775783538818, + "rewards_train/margins": -1.601226568222046, + "rewards_train/rejected": -3.6565492153167725, + "step": 1165 + }, + { + "epoch": 0.33, + "logps_train/chosen": -114.9039306640625, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -125.8549575805664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4903931617736816, + "rewards_train/margins": -0.054897308349609375, + "rewards_train/rejected": -3.4354958534240723, + "step": 1165 + }, + { + "epoch": 0.33, + "logps_train/chosen": -60.94388198852539, + "logps_train/ref_chosen": -36.75, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -36.036033630371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4193882942199707, + "rewards_train/margins": 0.30921506881713867, + "rewards_train/rejected": -2.7286033630371094, + "step": 1165 + }, + { + "epoch": 0.33, + "learning_rate": 1.1925760508268718e-06, + "loss": 0.5667, + "step": 1166 + }, + { + "epoch": 0.33, + "logps_train/chosen": -34.95569610595703, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -41.5, + "logps_train/rejected": -58.08894348144531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2080696821212769, + "rewards_train/margins": 0.4508247375488281, + "rewards_train/rejected": -1.658894419670105, + "step": 1166 + }, + { + "epoch": 0.33, + "logps_train/chosen": -15.397231101989746, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -23.164045333862305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8303481340408325, + "rewards_train/margins": 0.84230637550354, + "rewards_train/rejected": -1.6726545095443726, + "step": 1166 + }, + { + "epoch": 0.33, + "logps_train/chosen": -135.97698974609375, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -122.70970153808594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8476990461349487, + "rewards_train/margins": -0.32672882080078125, + "rewards_train/rejected": -1.5209702253341675, + "step": 1166 + }, + { + "epoch": 0.33, + "logps_train/chosen": -32.96750259399414, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -37.978904724121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6717503070831299, + "rewards_train/margins": 1.238640308380127, + "rewards_train/rejected": -2.910390615463257, + "step": 1166 + }, + { + "epoch": 0.33, + "logps_train/chosen": -68.78131103515625, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -31.125, + "logps_train/rejected": -53.615386962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.153131127357483, + "rewards_train/margins": 1.0959075689315796, + "rewards_train/rejected": -2.2490386962890625, + "step": 1166 + }, + { + "epoch": 0.33, + "logps_train/chosen": -27.214557647705078, + "logps_train/ref_chosen": -12.0, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -59.32402420043945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5214557647705078, + "rewards_train/margins": 2.648446559906006, + "rewards_train/rejected": -4.169902324676514, + "step": 1166 + }, + { + "epoch": 0.33, + "logps_train/chosen": -20.902671813964844, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -3.828125, + "logps_train/rejected": -18.70172119140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8715171813964844, + "rewards_train/margins": 0.6158424615859985, + "rewards_train/rejected": -1.487359642982483, + "step": 1166 + }, + { + "epoch": 0.33, + "logps_train/chosen": -53.15068817138672, + "logps_train/ref_chosen": -40.75, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -40.54042434692383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.240068793296814, + "rewards_train/margins": 0.6514736413955688, + "rewards_train/rejected": -1.8915424346923828, + "step": 1166 + }, + { + "epoch": 0.33, + "logps_train/chosen": -194.7613525390625, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -204.38949584960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.876135230064392, + "rewards_train/margins": 7.012814164161682, + "rewards_train/rejected": -8.888949394226074, + "step": 1167 + }, + { + "epoch": 0.33, + "logps_train/chosen": -157.21142578125, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -168.58456420898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8211426138877869, + "rewards_train/margins": 3.3873139023780823, + "rewards_train/rejected": -4.208456516265869, + "step": 1167 + }, + { + "epoch": 0.33, + "logps_train/chosen": -30.231609344482422, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -48.0, + "logps_train/rejected": -66.92144012451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3106609582901, + "rewards_train/margins": 0.5814831256866455, + "rewards_train/rejected": -1.8921440839767456, + "step": 1167 + }, + { + "epoch": 0.33, + "logps_train/chosen": -9.883123397827148, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -22.10024642944336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20081233978271484, + "rewards_train/margins": 1.0154622793197632, + "rewards_train/rejected": -1.216274619102478, + "step": 1167 + }, + { + "epoch": 0.33, + "logps_train/chosen": -181.60403442382812, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -245.12353515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7604033946990967, + "rewards_train/margins": 6.151950120925903, + "rewards_train/rejected": -8.912353515625, + "step": 1167 + }, + { + "epoch": 0.33, + "logps_train/chosen": -151.250732421875, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -213.91168212890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.375073194503784, + "rewards_train/margins": 2.216095209121704, + "rewards_train/rejected": -5.591168403625488, + "step": 1167 + }, + { + "epoch": 0.33, + "logps_train/chosen": -21.781076431274414, + "logps_train/ref_chosen": -3.78125, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -50.7770881652832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7999826669692993, + "rewards_train/margins": 1.2277261018753052, + "rewards_train/rejected": -3.0277087688446045, + "step": 1167 + }, + { + "epoch": 0.33, + "logps_train/chosen": -140.6434326171875, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -123.5477294921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.464343547821045, + "rewards_train/margins": -1.059570550918579, + "rewards_train/rejected": -3.404772996902466, + "step": 1167 + }, + { + "epoch": 0.33, + "learning_rate": 1.189979347533303e-06, + "loss": 0.3557, + "step": 1168 + }, + { + "epoch": 0.33, + "logps_train/chosen": -140.04757690429688, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -145.28042602539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.7047576904296875, + "rewards_train/margins": 0.8232851028442383, + "rewards_train/rejected": -5.528042793273926, + "step": 1168 + }, + { + "epoch": 0.33, + "logps_train/chosen": -118.74324035644531, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -120.7308578491211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2743240594863892, + "rewards_train/margins": 0.3987617492675781, + "rewards_train/rejected": -1.6730858087539673, + "step": 1168 + }, + { + "epoch": 0.33, + "logps_train/chosen": -92.63644409179688, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -146.76358032226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6136444807052612, + "rewards_train/margins": 3.412713646888733, + "rewards_train/rejected": -5.026358127593994, + "step": 1168 + }, + { + "epoch": 0.33, + "logps_train/chosen": -256.4010925292969, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -48.25, + "logps_train/rejected": -74.30134582519531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.540109634399414, + "rewards_train/margins": -5.934974908828735, + "rewards_train/rejected": -2.6051347255706787, + "step": 1168 + }, + { + "epoch": 0.33, + "logps_train/chosen": -109.997314453125, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -49.25, + "logps_train/rejected": -59.669193267822266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.199731469154358, + "rewards_train/margins": -0.15781211853027344, + "rewards_train/rejected": -1.0419193506240845, + "step": 1168 + }, + { + "epoch": 0.33, + "logps_train/chosen": -123.40547180175781, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -191.3637237548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7405471801757812, + "rewards_train/margins": 3.0958251953125, + "rewards_train/rejected": -4.836372375488281, + "step": 1168 + }, + { + "epoch": 0.33, + "logps_train/chosen": -16.819244384765625, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -39.385345458984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2287994623184204, + "rewards_train/margins": 1.128485083580017, + "rewards_train/rejected": -2.3572845458984375, + "step": 1168 + }, + { + "epoch": 0.33, + "logps_train/chosen": -61.011165618896484, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -56.555397033691406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4636166095733643, + "rewards_train/margins": -0.6455769538879395, + "rewards_train/rejected": -2.818039655685425, + "step": 1168 + }, + { + "epoch": 0.33, + "logps_train/chosen": -140.99432373046875, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -187.11355590820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7494324445724487, + "rewards_train/margins": 5.911923050880432, + "rewards_train/rejected": -7.661355495452881, + "step": 1169 + }, + { + "epoch": 0.33, + "logps_train/chosen": -8.703047752380371, + "logps_train/ref_chosen": -5.375, + "logps_train/ref_rejected": -25.75, + "logps_train/rejected": -46.43614196777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33280476927757263, + "rewards_train/margins": 1.735809475183487, + "rewards_train/rejected": -2.0686142444610596, + "step": 1169 + }, + { + "epoch": 0.33, + "logps_train/chosen": -132.5010986328125, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -161.0179901123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6501100063323975, + "rewards_train/margins": 1.7016890048980713, + "rewards_train/rejected": -5.351799011230469, + "step": 1169 + }, + { + "epoch": 0.33, + "logps_train/chosen": -22.464523315429688, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -34.20460510253906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7402023077011108, + "rewards_train/margins": -0.4572417736053467, + "rewards_train/rejected": -1.2829605340957642, + "step": 1169 + }, + { + "epoch": 0.33, + "logps_train/chosen": -32.62580108642578, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -42.94456481933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5000801086425781, + "rewards_train/margins": 1.3381264209747314, + "rewards_train/rejected": -2.8382065296173096, + "step": 1169 + }, + { + "epoch": 0.33, + "logps_train/chosen": -10.277318000793457, + "logps_train/ref_chosen": -1.0, + "logps_train/ref_rejected": -1.9609375, + "logps_train/rejected": -15.930232048034668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9277318120002747, + "rewards_train/margins": 0.46919769048690796, + "rewards_train/rejected": -1.3969295024871826, + "step": 1169 + }, + { + "epoch": 0.33, + "logps_train/chosen": -111.50891876220703, + "logps_train/ref_chosen": -58.75, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -186.76144409179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.2758917808532715, + "rewards_train/margins": 2.050252914428711, + "rewards_train/rejected": -7.326144695281982, + "step": 1169 + }, + { + "epoch": 0.33, + "logps_train/chosen": -107.72711181640625, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -214.23068237304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.122711181640625, + "rewards_train/margins": 6.400357246398926, + "rewards_train/rejected": -7.523068428039551, + "step": 1169 + }, + { + "epoch": 0.33, + "learning_rate": 1.1873813145857248e-06, + "loss": 0.6942, + "step": 1170 + }, + { + "epoch": 0.33, + "logps_train/chosen": -107.74568176269531, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -157.97463989257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4245681762695312, + "rewards_train/margins": 4.87289571762085, + "rewards_train/rejected": -6.297463893890381, + "step": 1170 + }, + { + "epoch": 0.33, + "logps_train/chosen": -6.883014678955078, + "logps_train/ref_chosen": -6.71875, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -8.798823356628418, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.016426468268036842, + "rewards_train/margins": -0.13029413111507893, + "rewards_train/rejected": 0.11386766284704208, + "step": 1170 + }, + { + "epoch": 0.33, + "logps_train/chosen": -10.073749542236328, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -32.738712310791016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1363750547170639, + "rewards_train/margins": 1.4352463334798813, + "rewards_train/rejected": -1.2988712787628174, + "step": 1170 + }, + { + "epoch": 0.33, + "logps_train/chosen": -8.179851531982422, + "logps_train/ref_chosen": -2.0625, + "logps_train/ref_rejected": -2.796875, + "logps_train/rejected": -26.757543563842773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6117351651191711, + "rewards_train/margins": 1.784331738948822, + "rewards_train/rejected": -2.396066904067993, + "step": 1170 + }, + { + "epoch": 0.33, + "logps_train/chosen": -31.921005249023438, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -26.375, + "logps_train/rejected": -60.05704879760742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3546005487442017, + "rewards_train/margins": 2.0136042833328247, + "rewards_train/rejected": -3.3682048320770264, + "step": 1170 + }, + { + "epoch": 0.33, + "logps_train/chosen": -17.17503547668457, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -11.876107215881348, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0175036191940308, + "rewards_train/margins": -0.5142678618431091, + "rewards_train/rejected": -0.5032357573509216, + "step": 1170 + }, + { + "epoch": 0.33, + "logps_train/chosen": -142.27536010742188, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -148.56585693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9275360107421875, + "rewards_train/margins": 1.8790497779846191, + "rewards_train/rejected": -4.806585788726807, + "step": 1170 + }, + { + "epoch": 0.33, + "logps_train/chosen": -116.63892364501953, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -134.0509033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.163892388343811, + "rewards_train/margins": 2.191197991371155, + "rewards_train/rejected": -3.355090379714966, + "step": 1170 + }, + { + "epoch": 0.33, + "logps_train/chosen": -21.243453979492188, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -3.984375, + "logps_train/rejected": -16.346660614013672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3462203741073608, + "rewards_train/margins": -0.10999178886413574, + "rewards_train/rejected": -1.236228585243225, + "step": 1171 + }, + { + "epoch": 0.33, + "logps_train/chosen": -164.24636840820312, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -274.5869140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.774636745452881, + "rewards_train/margins": 6.484055042266846, + "rewards_train/rejected": -14.258691787719727, + "step": 1171 + }, + { + "epoch": 0.33, + "logps_train/chosen": -43.623321533203125, + "logps_train/ref_chosen": -29.5, + "logps_train/ref_rejected": -43.5, + "logps_train/rejected": -77.73017883300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4123321771621704, + "rewards_train/margins": 2.0106858015060425, + "rewards_train/rejected": -3.423017978668213, + "step": 1171 + }, + { + "epoch": 0.33, + "logps_train/chosen": -127.34291076660156, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -151.79164123535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0842912197113037, + "rewards_train/margins": 3.444873094558716, + "rewards_train/rejected": -6.5291643142700195, + "step": 1171 + }, + { + "epoch": 0.33, + "logps_train/chosen": -15.579133987426758, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -25.886524200439453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0047883987426758, + "rewards_train/margins": -0.40363597869873047, + "rewards_train/rejected": -0.6011524200439453, + "step": 1171 + }, + { + "epoch": 0.33, + "logps_train/chosen": -132.8717803955078, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -192.2318115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3871780335903168, + "rewards_train/margins": 5.136003404855728, + "rewards_train/rejected": -5.523181438446045, + "step": 1171 + }, + { + "epoch": 0.33, + "logps_train/chosen": -186.19515991210938, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -207.7774200439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.119515895843506, + "rewards_train/margins": 1.8582262992858887, + "rewards_train/rejected": -7.9777421951293945, + "step": 1171 + }, + { + "epoch": 0.33, + "logps_train/chosen": -175.86314392089844, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -205.0, + "logps_train/rejected": -315.3556213378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5863144397735596, + "rewards_train/margins": 8.44924807548523, + "rewards_train/rejected": -11.035562515258789, + "step": 1171 + }, + { + "epoch": 0.33, + "learning_rate": 1.1847819701676137e-06, + "loss": 0.2793, + "step": 1172 + }, + { + "epoch": 0.33, + "logps_train/chosen": -180.32884216308594, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -198.97613525390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.282884120941162, + "rewards_train/margins": 1.3147296905517578, + "rewards_train/rejected": -6.59761381149292, + "step": 1172 + }, + { + "epoch": 0.33, + "logps_train/chosen": -47.14099884033203, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -35.904754638671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1891000270843506, + "rewards_train/margins": 0.30762553215026855, + "rewards_train/rejected": -2.496725559234619, + "step": 1172 + }, + { + "epoch": 0.33, + "logps_train/chosen": -92.71037292480469, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -109.26513671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8710373044013977, + "rewards_train/margins": 0.6554763913154602, + "rewards_train/rejected": -1.526513695716858, + "step": 1172 + }, + { + "epoch": 0.33, + "logps_train/chosen": -24.57333755493164, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -3.96875, + "logps_train/rejected": -20.59085464477539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.557333767414093, + "rewards_train/margins": 1.104876697063446, + "rewards_train/rejected": -1.662210464477539, + "step": 1172 + }, + { + "epoch": 0.33, + "logps_train/chosen": -0.28094449639320374, + "logps_train/ref_chosen": -1.3125, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -16.55283546447754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10315555334091187, + "rewards_train/margins": 1.2459390759468079, + "rewards_train/rejected": -1.142783522605896, + "step": 1172 + }, + { + "epoch": 0.33, + "logps_train/chosen": -21.204788208007812, + "logps_train/ref_chosen": -3.453125, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -33.61528015136719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.775166392326355, + "rewards_train/margins": -0.48863840103149414, + "rewards_train/rejected": -1.2865279912948608, + "step": 1172 + }, + { + "epoch": 0.33, + "logps_train/chosen": -60.835994720458984, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -61.0, + "logps_train/rejected": -90.39451599121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016400529071688652, + "rewards_train/margins": 2.955852223560214, + "rewards_train/rejected": -2.9394516944885254, + "step": 1172 + }, + { + "epoch": 0.33, + "logps_train/chosen": -5.604833126068115, + "logps_train/ref_chosen": -1.6796875, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -12.617403030395508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39251455664634705, + "rewards_train/margins": 0.4536007344722748, + "rewards_train/rejected": -0.8461152911186218, + "step": 1172 + }, + { + "epoch": 0.33, + "logps_train/chosen": -89.65945434570312, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -89.94822692871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08405456691980362, + "rewards_train/margins": 0.02887725830078125, + "rewards_train/rejected": 0.05517730861902237, + "step": 1173 + }, + { + "epoch": 0.33, + "logps_train/chosen": -23.369028091430664, + "logps_train/ref_chosen": -14.0, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -40.76995849609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9369028210639954, + "rewards_train/margins": 0.8400930762290955, + "rewards_train/rejected": -1.7769958972930908, + "step": 1173 + }, + { + "epoch": 0.33, + "logps_train/chosen": -21.88907241821289, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -37.00249099731445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.338907241821289, + "rewards_train/margins": 0.47384190559387207, + "rewards_train/rejected": -1.8127491474151611, + "step": 1173 + }, + { + "epoch": 0.33, + "logps_train/chosen": -31.47966194152832, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -20.175983428955078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8604662418365479, + "rewards_train/margins": -0.43974292278289795, + "rewards_train/rejected": -1.42072331905365, + "step": 1173 + }, + { + "epoch": 0.33, + "logps_train/chosen": -2.4238247871398926, + "logps_train/ref_chosen": -1.3359375, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -5.65146541595459, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10878872871398926, + "rewards_train/margins": 0.040732815861701965, + "rewards_train/rejected": -0.14952154457569122, + "step": 1173 + }, + { + "epoch": 0.33, + "logps_train/chosen": -16.37531280517578, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -41.79970932006836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1937812566757202, + "rewards_train/margins": 1.6361898183822632, + "rewards_train/rejected": -2.8299710750579834, + "step": 1173 + }, + { + "epoch": 0.33, + "logps_train/chosen": -19.64921760559082, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -86.96575164794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1899217665195465, + "rewards_train/margins": 4.58165368437767, + "rewards_train/rejected": -4.771575450897217, + "step": 1173 + }, + { + "epoch": 0.33, + "logps_train/chosen": -10.240508079528809, + "logps_train/ref_chosen": -3.3125, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -12.266012191772461, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6928008198738098, + "rewards_train/margins": -0.45369960367679596, + "rewards_train/rejected": -0.23910121619701385, + "step": 1173 + }, + { + "epoch": 0.33, + "learning_rate": 1.1821813324716251e-06, + "loss": 0.4699, + "step": 1174 + }, + { + "epoch": 0.33, + "logps_train/chosen": -140.16168212890625, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -262.039306640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.866168260574341, + "rewards_train/margins": 7.037762403488159, + "rewards_train/rejected": -10.9039306640625, + "step": 1174 + }, + { + "epoch": 0.33, + "logps_train/chosen": -2.3074376583099365, + "logps_train/ref_chosen": -0.328125, + "logps_train/ref_rejected": -3.671875, + "logps_train/rejected": -16.091733932495117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19793127477169037, + "rewards_train/margins": 1.0440546423196793, + "rewards_train/rejected": -1.2419859170913696, + "step": 1174 + }, + { + "epoch": 0.33, + "logps_train/chosen": -54.72254180908203, + "logps_train/ref_chosen": -31.75, + "logps_train/ref_rejected": -42.75, + "logps_train/rejected": -82.99371337890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2972543239593506, + "rewards_train/margins": 1.7271173000335693, + "rewards_train/rejected": -4.02437162399292, + "step": 1174 + }, + { + "epoch": 0.33, + "logps_train/chosen": -22.739456176757812, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -1.25, + "logps_train/rejected": -8.813108444213867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5364456176757812, + "rewards_train/margins": 0.21986526250839233, + "rewards_train/rejected": -0.7563108801841736, + "step": 1174 + }, + { + "epoch": 0.33, + "logps_train/chosen": -11.879379272460938, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -14.88459300994873, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2254379242658615, + "rewards_train/margins": 0.7911463528871536, + "rewards_train/rejected": -1.0165842771530151, + "step": 1174 + }, + { + "epoch": 0.33, + "logps_train/chosen": -1.7559254169464111, + "logps_train/ref_chosen": -1.1953125, + "logps_train/ref_rejected": -0.51953125, + "logps_train/rejected": -0.2961371839046478, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05606129392981529, + "rewards_train/margins": -0.07840070128440857, + "rewards_train/rejected": 0.022339407354593277, + "step": 1174 + }, + { + "epoch": 0.33, + "logps_train/chosen": -9.91518497467041, + "logps_train/ref_chosen": -5.5625, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -14.214384078979492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43526849150657654, + "rewards_train/margins": 0.14866992831230164, + "rewards_train/rejected": -0.5839384198188782, + "step": 1174 + }, + { + "epoch": 0.33, + "logps_train/chosen": -222.09823608398438, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -226.73280334472656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.909823417663574, + "rewards_train/margins": -0.9365429878234863, + "rewards_train/rejected": -7.973280429840088, + "step": 1174 + }, + { + "epoch": 0.33, + "logps_train/chosen": -16.755199432373047, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -15.987991333007812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8255199790000916, + "rewards_train/margins": -0.1892208456993103, + "rewards_train/rejected": -0.6362991333007812, + "step": 1175 + }, + { + "epoch": 0.33, + "logps_train/chosen": -131.56765747070312, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -209.88241577148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.006765842437744, + "rewards_train/margins": 4.681475639343262, + "rewards_train/rejected": -6.688241481781006, + "step": 1175 + }, + { + "epoch": 0.33, + "logps_train/chosen": -82.05012512207031, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -59.25, + "logps_train/rejected": -117.69898986816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9050124883651733, + "rewards_train/margins": 3.939886689186096, + "rewards_train/rejected": -5.8448991775512695, + "step": 1175 + }, + { + "epoch": 0.33, + "logps_train/chosen": -71.18670654296875, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -56.75, + "logps_train/rejected": -61.83551788330078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.793670654296875, + "rewards_train/margins": -0.28511887788772583, + "rewards_train/rejected": -0.5085517764091492, + "step": 1175 + }, + { + "epoch": 0.33, + "logps_train/chosen": -179.47445678710938, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -200.19918823242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.697445869445801, + "rewards_train/margins": 0.5224728584289551, + "rewards_train/rejected": -7.219918727874756, + "step": 1175 + }, + { + "epoch": 0.33, + "logps_train/chosen": -248.57958984375, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -200.15325927734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.357958793640137, + "rewards_train/margins": -0.5426325798034668, + "rewards_train/rejected": -7.81532621383667, + "step": 1175 + }, + { + "epoch": 0.33, + "logps_train/chosen": -53.622955322265625, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -50.38942337036133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.831045627593994, + "rewards_train/margins": 0.6453967094421387, + "rewards_train/rejected": -4.476442337036133, + "step": 1175 + }, + { + "epoch": 0.33, + "logps_train/chosen": -176.98300170898438, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -235.75665283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.098300457000732, + "rewards_train/margins": 7.827365398406982, + "rewards_train/rejected": -12.925665855407715, + "step": 1175 + }, + { + "epoch": 0.33, + "learning_rate": 1.1795794196994655e-06, + "loss": 0.4754, + "step": 1176 + }, + { + "epoch": 0.33, + "logps_train/chosen": -1.7076785564422607, + "logps_train/ref_chosen": -0.416015625, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -10.376666069030762, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12916629016399384, + "rewards_train/margins": 0.42725034058094025, + "rewards_train/rejected": -0.5564166307449341, + "step": 1176 + }, + { + "epoch": 0.33, + "logps_train/chosen": -134.32534790039062, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -115.63934326171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2825348377227783, + "rewards_train/margins": 1.1813995838165283, + "rewards_train/rejected": -2.4639344215393066, + "step": 1176 + }, + { + "epoch": 0.33, + "logps_train/chosen": -187.60916137695312, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -256.39422607421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.96091628074646, + "rewards_train/margins": 5.378506898880005, + "rewards_train/rejected": -9.339423179626465, + "step": 1176 + }, + { + "epoch": 0.33, + "logps_train/chosen": -48.7410888671875, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -80.32319641113281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.89910888671875, + "rewards_train/margins": 1.8332109451293945, + "rewards_train/rejected": -4.7323198318481445, + "step": 1176 + }, + { + "epoch": 0.33, + "logps_train/chosen": -271.13916015625, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -271.9471130371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.713915824890137, + "rewards_train/margins": 2.180795669555664, + "rewards_train/rejected": -11.8947114944458, + "step": 1176 + }, + { + "epoch": 0.33, + "logps_train/chosen": -140.812255859375, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -140.42022705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3312256336212158, + "rewards_train/margins": 5.0607969760894775, + "rewards_train/rejected": -6.392022609710693, + "step": 1176 + }, + { + "epoch": 0.33, + "logps_train/chosen": -99.3795394897461, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -45.01028823852539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1379539966583252, + "rewards_train/margins": 2.081824779510498, + "rewards_train/rejected": -3.2197787761688232, + "step": 1176 + }, + { + "epoch": 0.33, + "logps_train/chosen": -46.17816925048828, + "logps_train/ref_chosen": -24.375, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -50.92271041870117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.180316925048828, + "rewards_train/margins": 0.4369542598724365, + "rewards_train/rejected": -2.6172711849212646, + "step": 1176 + }, + { + "epoch": 0.33, + "logps_train/chosen": -49.478485107421875, + "logps_train/ref_chosen": -28.125, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -45.058448791503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1353485584259033, + "rewards_train/margins": 1.064246416091919, + "rewards_train/rejected": -3.1995949745178223, + "step": 1177 + }, + { + "epoch": 0.33, + "logps_train/chosen": -29.823577880859375, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -58.7027587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1323578357696533, + "rewards_train/margins": 1.0754179954528809, + "rewards_train/rejected": -3.207775831222534, + "step": 1177 + }, + { + "epoch": 0.33, + "logps_train/chosen": -123.19767761230469, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -124.92170715332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9697678089141846, + "rewards_train/margins": 1.3724029064178467, + "rewards_train/rejected": -3.3421707153320312, + "step": 1177 + }, + { + "epoch": 0.33, + "logps_train/chosen": -210.27105712890625, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -248.39479064941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.127105712890625, + "rewards_train/margins": 2.6123733520507812, + "rewards_train/rejected": -8.739479064941406, + "step": 1177 + }, + { + "epoch": 0.33, + "logps_train/chosen": -201.264404296875, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -208.0, + "logps_train/rejected": -298.53857421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.026440620422363, + "rewards_train/margins": 2.0274171829223633, + "rewards_train/rejected": -9.053857803344727, + "step": 1177 + }, + { + "epoch": 0.33, + "logps_train/chosen": -141.99102783203125, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -174.32632446289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.799102783203125, + "rewards_train/margins": 1.5335297584533691, + "rewards_train/rejected": -4.332632541656494, + "step": 1177 + }, + { + "epoch": 0.33, + "logps_train/chosen": -217.05081176757812, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -199.41558837890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.305081367492676, + "rewards_train/margins": 0.3864774703979492, + "rewards_train/rejected": -8.691558837890625, + "step": 1177 + }, + { + "epoch": 0.33, + "logps_train/chosen": -30.81623077392578, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -25.801979064941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9441231489181519, + "rewards_train/margins": 0.08294975757598877, + "rewards_train/rejected": -2.0270729064941406, + "step": 1177 + }, + { + "epoch": 0.33, + "learning_rate": 1.1769762500617647e-06, + "loss": 0.2519, + "step": 1178 + }, + { + "epoch": 0.33, + "logps_train/chosen": -141.27383422851562, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -164.0343017578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.577383518218994, + "rewards_train/margins": 1.8260469436645508, + "rewards_train/rejected": -6.403430461883545, + "step": 1178 + }, + { + "epoch": 0.33, + "logps_train/chosen": -8.589408874511719, + "logps_train/ref_chosen": -2.671875, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -4.888143062591553, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5917534232139587, + "rewards_train/margins": -0.5998141169548035, + "rewards_train/rejected": 0.008060693740844727, + "step": 1178 + }, + { + "epoch": 0.33, + "logps_train/chosen": -12.296635627746582, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -34.236793518066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7765385508537292, + "rewards_train/margins": 1.0596408247947693, + "rewards_train/rejected": -1.8361793756484985, + "step": 1178 + }, + { + "epoch": 0.33, + "logps_train/chosen": -5.550769329071045, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -9.06502914428711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.026951933279633522, + "rewards_train/margins": 0.4389259871095419, + "rewards_train/rejected": -0.4658779203891754, + "step": 1178 + }, + { + "epoch": 0.33, + "logps_train/chosen": -74.36944580078125, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -190.5855255126953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.086944580078125, + "rewards_train/margins": 7.871607780456543, + "rewards_train/rejected": -8.958552360534668, + "step": 1178 + }, + { + "epoch": 0.33, + "logps_train/chosen": -172.03756713867188, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -107.14168548583984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.703756809234619, + "rewards_train/margins": -1.3395881652832031, + "rewards_train/rejected": -2.364168643951416, + "step": 1178 + }, + { + "epoch": 0.33, + "logps_train/chosen": -83.47026062011719, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -107.49809265136719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8970260620117188, + "rewards_train/margins": 0.7527831792831421, + "rewards_train/rejected": -1.6498092412948608, + "step": 1178 + }, + { + "epoch": 0.33, + "logps_train/chosen": -39.136138916015625, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -42.25, + "logps_train/rejected": -82.51163482666016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8261139392852783, + "rewards_train/margins": 2.200049638748169, + "rewards_train/rejected": -4.026163578033447, + "step": 1178 + }, + { + "epoch": 0.33, + "logps_train/chosen": -63.064762115478516, + "logps_train/ref_chosen": -37.75, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -33.98684310913086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5314762592315674, + "rewards_train/margins": 0.25470805168151855, + "rewards_train/rejected": -2.786184310913086, + "step": 1179 + }, + { + "epoch": 0.33, + "logps_train/chosen": -191.37673950195312, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -229.72720336914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.0376739501953125, + "rewards_train/margins": 1.8350467681884766, + "rewards_train/rejected": -8.872720718383789, + "step": 1179 + }, + { + "epoch": 0.33, + "logps_train/chosen": -169.79656982421875, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -201.5658416748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.679656982421875, + "rewards_train/margins": 2.376927375793457, + "rewards_train/rejected": -6.056584358215332, + "step": 1179 + }, + { + "epoch": 0.33, + "logps_train/chosen": -148.5360565185547, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -156.6229248046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.503605604171753, + "rewards_train/margins": 0.6086869239807129, + "rewards_train/rejected": -3.112292528152466, + "step": 1179 + }, + { + "epoch": 0.33, + "logps_train/chosen": -160.89390563964844, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -238.55429077148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6893906593322754, + "rewards_train/margins": 5.766038417816162, + "rewards_train/rejected": -8.455429077148438, + "step": 1179 + }, + { + "epoch": 0.33, + "logps_train/chosen": -149.33560180664062, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -205.9499053955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1835601329803467, + "rewards_train/margins": 3.8114306926727295, + "rewards_train/rejected": -5.994990825653076, + "step": 1179 + }, + { + "epoch": 0.33, + "logps_train/chosen": -122.67030334472656, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -242.749755859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5170304775238037, + "rewards_train/margins": 2.4579451084136963, + "rewards_train/rejected": -5.9749755859375, + "step": 1179 + }, + { + "epoch": 0.33, + "logps_train/chosen": -111.994384765625, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -259.47216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0494384765625, + "rewards_train/margins": 9.697778701782227, + "rewards_train/rejected": -10.747217178344727, + "step": 1179 + }, + { + "epoch": 0.33, + "learning_rate": 1.1743718417779516e-06, + "loss": 0.3374, + "step": 1180 + }, + { + "epoch": 0.33, + "logps_train/chosen": -67.48023986816406, + "logps_train/ref_chosen": -37.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -111.39607238769531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0480239391326904, + "rewards_train/margins": -0.5584166049957275, + "rewards_train/rejected": -2.489607334136963, + "step": 1180 + }, + { + "epoch": 0.33, + "logps_train/chosen": -28.926586151123047, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -110.6963882446289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9301586151123047, + "rewards_train/margins": 2.789480209350586, + "rewards_train/rejected": -4.719638824462891, + "step": 1180 + }, + { + "epoch": 0.33, + "logps_train/chosen": -23.074560165405273, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -23.23908233642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4324560165405273, + "rewards_train/margins": 0.12270224094390869, + "rewards_train/rejected": -1.555158257484436, + "step": 1180 + }, + { + "epoch": 0.33, + "logps_train/chosen": -16.375642776489258, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -16.258590698242188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4188142716884613, + "rewards_train/margins": -0.011705189943313599, + "rewards_train/rejected": -0.4071090817451477, + "step": 1180 + }, + { + "epoch": 0.33, + "logps_train/chosen": -257.7550048828125, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -270.6504211425781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.07550048828125, + "rewards_train/margins": -0.21045780181884766, + "rewards_train/rejected": -8.865042686462402, + "step": 1180 + }, + { + "epoch": 0.33, + "logps_train/chosen": -166.84072875976562, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -245.184326171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4840729236602783, + "rewards_train/margins": 6.034359693527222, + "rewards_train/rejected": -8.5184326171875, + "step": 1180 + }, + { + "epoch": 0.33, + "logps_train/chosen": -16.022144317626953, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -24.107757568359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17278556525707245, + "rewards_train/margins": 1.002311334013939, + "rewards_train/rejected": -0.8295257687568665, + "step": 1180 + }, + { + "epoch": 0.33, + "logps_train/chosen": -52.260108947753906, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -110.94640350341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2239891141653061, + "rewards_train/margins": 4.018629416823387, + "rewards_train/rejected": -3.794640302658081, + "step": 1180 + }, + { + "epoch": 0.33, + "logps_train/chosen": -5.121853828430176, + "logps_train/ref_chosen": -2.46875, + "logps_train/ref_rejected": -2.46875, + "logps_train/rejected": -5.125268936157227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2653103768825531, + "rewards_train/margins": 0.0003415346145629883, + "rewards_train/rejected": -0.2656519114971161, + "step": 1181 + }, + { + "epoch": 0.33, + "logps_train/chosen": -86.85076904296875, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -122.26676177978516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6850769519805908, + "rewards_train/margins": 2.5415995121002197, + "rewards_train/rejected": -4.2266764640808105, + "step": 1181 + }, + { + "epoch": 0.33, + "logps_train/chosen": -13.814205169677734, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -26.67844009399414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05607948452234268, + "rewards_train/margins": 1.6551735177636147, + "rewards_train/rejected": -1.599094033241272, + "step": 1181 + }, + { + "epoch": 0.33, + "logps_train/chosen": -59.50313186645508, + "logps_train/ref_chosen": -28.625, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -143.25787353515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.087813138961792, + "rewards_train/margins": 2.637974500656128, + "rewards_train/rejected": -5.72578763961792, + "step": 1181 + }, + { + "epoch": 0.33, + "logps_train/chosen": -16.58373260498047, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -35.41680908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3271232545375824, + "rewards_train/margins": 0.6520576775074005, + "rewards_train/rejected": -0.9791809320449829, + "step": 1181 + }, + { + "epoch": 0.33, + "logps_train/chosen": -36.62508773803711, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -58.75, + "logps_train/rejected": -72.91239929199219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.037508726119995, + "rewards_train/margins": -0.6212687492370605, + "rewards_train/rejected": -1.4162399768829346, + "step": 1181 + }, + { + "epoch": 0.33, + "logps_train/chosen": -91.0541763305664, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -139.60968017578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5554176568984985, + "rewards_train/margins": 0.40555036067962646, + "rewards_train/rejected": -1.960968017578125, + "step": 1181 + }, + { + "epoch": 0.33, + "logps_train/chosen": -11.634243965148926, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -21.635042190551758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6040493845939636, + "rewards_train/margins": 0.628204882144928, + "rewards_train/rejected": -1.2322542667388916, + "step": 1181 + }, + { + "epoch": 0.33, + "learning_rate": 1.1717662130761223e-06, + "loss": 0.4352, + "step": 1182 + }, + { + "epoch": 0.33, + "logps_train/chosen": -81.36611938476562, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -106.13945007324219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.41338807344436646, + "rewards_train/margins": 0.9773330688476562, + "rewards_train/rejected": -0.5639449954032898, + "step": 1182 + }, + { + "epoch": 0.33, + "logps_train/chosen": -87.12467193603516, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -87.95072937011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18753281235694885, + "rewards_train/margins": 0.0826057493686676, + "rewards_train/rejected": 0.10492706298828125, + "step": 1182 + }, + { + "epoch": 0.33, + "logps_train/chosen": -32.806156158447266, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -38.69538879394531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8181155920028687, + "rewards_train/margins": 0.28892338275909424, + "rewards_train/rejected": -2.107038974761963, + "step": 1182 + }, + { + "epoch": 0.33, + "logps_train/chosen": -22.319774627685547, + "logps_train/ref_chosen": -14.5, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -33.417877197265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7819774746894836, + "rewards_train/margins": 1.522310197353363, + "rewards_train/rejected": -2.3042876720428467, + "step": 1182 + }, + { + "epoch": 0.33, + "logps_train/chosen": -134.4534912109375, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -171.98910522460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2453491687774658, + "rewards_train/margins": 5.653561353683472, + "rewards_train/rejected": -6.8989105224609375, + "step": 1182 + }, + { + "epoch": 0.33, + "logps_train/chosen": -61.6025390625, + "logps_train/ref_chosen": -42.5, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -20.938159942626953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.910253882408142, + "rewards_train/margins": -0.05550038814544678, + "rewards_train/rejected": -1.8547534942626953, + "step": 1182 + }, + { + "epoch": 0.33, + "logps_train/chosen": -77.87947082519531, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -129.62957763671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8879470825195312, + "rewards_train/margins": 2.8750107288360596, + "rewards_train/rejected": -3.762957811355591, + "step": 1182 + }, + { + "epoch": 0.33, + "logps_train/chosen": -131.3170623779297, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -178.4402618408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.431706428527832, + "rewards_train/margins": 1.6623196601867676, + "rewards_train/rejected": -6.0940260887146, + "step": 1182 + }, + { + "epoch": 0.33, + "logps_train/chosen": -75.88358306884766, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -45.942771911621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2383583784103394, + "rewards_train/margins": 2.2684189081192017, + "rewards_train/rejected": -3.506777286529541, + "step": 1183 + }, + { + "epoch": 0.33, + "logps_train/chosen": -2.247850179672241, + "logps_train/ref_chosen": -2.5625, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -5.890334129333496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03146498277783394, + "rewards_train/margins": 0.14862339571118355, + "rewards_train/rejected": -0.11715841293334961, + "step": 1183 + }, + { + "epoch": 0.33, + "logps_train/chosen": -100.78631591796875, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -195.0, + "logps_train/rejected": -195.47079467773438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7786316275596619, + "rewards_train/margins": -0.7315521575510502, + "rewards_train/rejected": -0.04707947000861168, + "step": 1183 + }, + { + "epoch": 0.33, + "logps_train/chosen": -23.878326416015625, + "logps_train/ref_chosen": -6.4375, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -32.69122314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7440826892852783, + "rewards_train/margins": 1.1031646728515625, + "rewards_train/rejected": -2.847247362136841, + "step": 1183 + }, + { + "epoch": 0.33, + "logps_train/chosen": -32.12357711791992, + "logps_train/ref_chosen": -22.0, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -144.37109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0123577117919922, + "rewards_train/margins": 6.774751663208008, + "rewards_train/rejected": -7.787109375, + "step": 1183 + }, + { + "epoch": 0.33, + "logps_train/chosen": -10.11102294921875, + "logps_train/ref_chosen": -5.84375, + "logps_train/ref_rejected": -7.59375, + "logps_train/rejected": -16.894187927246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.426727294921875, + "rewards_train/margins": 0.5033165216445923, + "rewards_train/rejected": -0.9300438165664673, + "step": 1183 + }, + { + "epoch": 0.33, + "logps_train/chosen": -9.809986114501953, + "logps_train/ref_chosen": -2.546875, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -25.595800399780273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7263111472129822, + "rewards_train/margins": 0.9145188927650452, + "rewards_train/rejected": -1.6408300399780273, + "step": 1183 + }, + { + "epoch": 0.33, + "logps_train/chosen": -18.44368553161621, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -28.350791931152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06936855614185333, + "rewards_train/margins": 1.328210636973381, + "rewards_train/rejected": -1.3975791931152344, + "step": 1183 + }, + { + "epoch": 0.33, + "learning_rate": 1.1691593821929147e-06, + "loss": 0.3666, + "step": 1184 + }, + { + "epoch": 0.33, + "logps_train/chosen": -15.257611274719238, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -37.49474334716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7976361513137817, + "rewards_train/margins": 1.2768381834030151, + "rewards_train/rejected": -2.074474334716797, + "step": 1184 + }, + { + "epoch": 0.33, + "logps_train/chosen": -170.7330322265625, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -148.07760620117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5266968011856079, + "rewards_train/margins": 3.1344574689865112, + "rewards_train/rejected": -2.6077606678009033, + "step": 1184 + }, + { + "epoch": 0.33, + "logps_train/chosen": -181.02874755859375, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -265.0129089355469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.852874755859375, + "rewards_train/margins": 5.5484161376953125, + "rewards_train/rejected": -12.401290893554688, + "step": 1184 + }, + { + "epoch": 0.33, + "logps_train/chosen": -50.56452941894531, + "logps_train/ref_chosen": -31.375, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -39.458656311035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9189529418945312, + "rewards_train/margins": 0.0019127130508422852, + "rewards_train/rejected": -1.9208656549453735, + "step": 1184 + }, + { + "epoch": 0.33, + "logps_train/chosen": -11.405339241027832, + "logps_train/ref_chosen": -7.3125, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -12.306682586669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40928393602371216, + "rewards_train/margins": 0.23388433456420898, + "rewards_train/rejected": -0.6431682705879211, + "step": 1184 + }, + { + "epoch": 0.33, + "logps_train/chosen": -24.915027618408203, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -45.89413833618164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4915027618408203, + "rewards_train/margins": 3.197911024093628, + "rewards_train/rejected": -3.6894137859344482, + "step": 1184 + }, + { + "epoch": 0.33, + "logps_train/chosen": -5.963106155395508, + "logps_train/ref_chosen": -1.59375, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -16.013486862182617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4369356334209442, + "rewards_train/margins": 0.05816304683685303, + "rewards_train/rejected": -0.49509868025779724, + "step": 1184 + }, + { + "epoch": 0.33, + "logps_train/chosen": -12.022269248962402, + "logps_train/ref_chosen": -2.53125, + "logps_train/ref_rejected": -3.421875, + "logps_train/rejected": -12.742013931274414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9491019248962402, + "rewards_train/margins": -0.017087996006011963, + "rewards_train/rejected": -0.9320139288902283, + "step": 1184 + }, + { + "epoch": 0.33, + "logps_train/chosen": -228.96646118164062, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -203.0, + "logps_train/rejected": -331.0285949707031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.896646022796631, + "rewards_train/margins": 5.906213283538818, + "rewards_train/rejected": -12.80285930633545, + "step": 1185 + }, + { + "epoch": 0.33, + "logps_train/chosen": -179.20806884765625, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -243.65802001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.320806980133057, + "rewards_train/margins": 4.744995594024658, + "rewards_train/rejected": -10.065802574157715, + "step": 1185 + }, + { + "epoch": 0.33, + "logps_train/chosen": -7.565834045410156, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -7.125, + "logps_train/rejected": -20.57022476196289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32845839858055115, + "rewards_train/margins": 1.016064077615738, + "rewards_train/rejected": -1.344522476196289, + "step": 1185 + }, + { + "epoch": 0.33, + "logps_train/chosen": -54.69996643066406, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -47.5, + "logps_train/rejected": -54.972618103027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7199966311454773, + "rewards_train/margins": 0.027265191078186035, + "rewards_train/rejected": -0.7472618222236633, + "step": 1185 + }, + { + "epoch": 0.33, + "logps_train/chosen": -112.21714782714844, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -138.06106567382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5217148065567017, + "rewards_train/margins": 0.1843917965888977, + "rewards_train/rejected": -0.7061066031455994, + "step": 1185 + }, + { + "epoch": 0.33, + "logps_train/chosen": -110.61228942871094, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -43.97275161743164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.411228895187378, + "rewards_train/margins": -0.7264536619186401, + "rewards_train/rejected": -1.6847752332687378, + "step": 1185 + }, + { + "epoch": 0.33, + "logps_train/chosen": -107.16851806640625, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -151.14991760253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.116851806640625, + "rewards_train/margins": 1.8981399536132812, + "rewards_train/rejected": -3.0149917602539062, + "step": 1185 + }, + { + "epoch": 0.33, + "logps_train/chosen": -188.615234375, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -217.63812255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.761523485183716, + "rewards_train/margins": 3.4022886753082275, + "rewards_train/rejected": -6.163812160491943, + "step": 1185 + }, + { + "epoch": 0.33, + "learning_rate": 1.1665513673733813e-06, + "loss": 0.3666, + "step": 1186 + }, + { + "epoch": 0.33, + "logps_train/chosen": -31.106393814086914, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -61.02643966674805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6293894052505493, + "rewards_train/margins": 1.685754656791687, + "rewards_train/rejected": -3.3151440620422363, + "step": 1186 + }, + { + "epoch": 0.33, + "logps_train/chosen": -139.7738037109375, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -195.63836669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.077380657196045, + "rewards_train/margins": 3.336456298828125, + "rewards_train/rejected": -7.41383695602417, + "step": 1186 + }, + { + "epoch": 0.33, + "logps_train/chosen": -86.70185852050781, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -120.36344909667969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9201858639717102, + "rewards_train/margins": 0.16615909337997437, + "rewards_train/rejected": -1.0863449573516846, + "step": 1186 + }, + { + "epoch": 0.33, + "logps_train/chosen": -165.6266326904297, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -169.27645874023438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.062663555145264, + "rewards_train/margins": -0.43501758575439453, + "rewards_train/rejected": -3.627645969390869, + "step": 1186 + }, + { + "epoch": 0.33, + "logps_train/chosen": -196.45208740234375, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -135.885986328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.745208740234375, + "rewards_train/margins": -1.056610107421875, + "rewards_train/rejected": -5.6885986328125, + "step": 1186 + }, + { + "epoch": 0.33, + "logps_train/chosen": -0.36597275733947754, + "logps_train/ref_chosen": -0.6484375, + "logps_train/ref_rejected": -2.15625, + "logps_train/rejected": -2.5349631309509277, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028246475383639336, + "rewards_train/margins": 0.06611778773367405, + "rewards_train/rejected": -0.037871312350034714, + "step": 1186 + }, + { + "epoch": 0.33, + "logps_train/chosen": -121.32147979736328, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -182.8946990966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9321479797363281, + "rewards_train/margins": 3.1573219299316406, + "rewards_train/rejected": -5.089469909667969, + "step": 1186 + }, + { + "epoch": 0.33, + "logps_train/chosen": -63.63862991333008, + "logps_train/ref_chosen": -34.75, + "logps_train/ref_rejected": -56.5, + "logps_train/rejected": -123.37133026123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8888630867004395, + "rewards_train/margins": 3.7982702255249023, + "rewards_train/rejected": -6.687133312225342, + "step": 1186 + }, + { + "epoch": 0.33, + "logps_train/chosen": -28.23543930053711, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -48.75, + "logps_train/rejected": -69.01709747314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.173543930053711, + "rewards_train/margins": 0.853165864944458, + "rewards_train/rejected": -2.026709794998169, + "step": 1187 + }, + { + "epoch": 0.33, + "logps_train/chosen": -111.99742126464844, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -123.09309387207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5497421026229858, + "rewards_train/margins": 2.8595672845840454, + "rewards_train/rejected": -4.409309387207031, + "step": 1187 + }, + { + "epoch": 0.33, + "logps_train/chosen": -122.73419189453125, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -183.73670959472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.773419141769409, + "rewards_train/margins": 4.800251722335815, + "rewards_train/rejected": -7.573670864105225, + "step": 1187 + }, + { + "epoch": 0.33, + "logps_train/chosen": -87.88105773925781, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -121.0764389038086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9881057739257812, + "rewards_train/margins": 0.5695381164550781, + "rewards_train/rejected": -2.5576438903808594, + "step": 1187 + }, + { + "epoch": 0.33, + "logps_train/chosen": -339.64361572265625, + "logps_train/ref_chosen": -206.0, + "logps_train/ref_rejected": -219.0, + "logps_train/rejected": -351.2965087890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -13.364361763000488, + "rewards_train/margins": -0.13471031188964844, + "rewards_train/rejected": -13.22965145111084, + "step": 1187 + }, + { + "epoch": 0.33, + "logps_train/chosen": -106.61670684814453, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -177.02529907226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6616706848144531, + "rewards_train/margins": 2.440859317779541, + "rewards_train/rejected": -4.102530002593994, + "step": 1187 + }, + { + "epoch": 0.33, + "logps_train/chosen": -107.8521499633789, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -135.8646697998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4352149963378906, + "rewards_train/margins": 2.401252269744873, + "rewards_train/rejected": -5.836467266082764, + "step": 1187 + }, + { + "epoch": 0.33, + "logps_train/chosen": -114.40258026123047, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -200.3531494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.040258027613162994, + "rewards_train/margins": 5.39505710452795, + "rewards_train/rejected": -5.435315132141113, + "step": 1187 + }, + { + "epoch": 0.33, + "learning_rate": 1.1639421868708612e-06, + "loss": 0.3523, + "step": 1188 + }, + { + "epoch": 0.33, + "logps_train/chosen": -150.28451538085938, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -200.21755981445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4784514904022217, + "rewards_train/margins": 6.293304681777954, + "rewards_train/rejected": -9.771756172180176, + "step": 1188 + }, + { + "epoch": 0.33, + "logps_train/chosen": -130.8634490966797, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -159.05453491210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.986345052719116, + "rewards_train/margins": 2.6191084384918213, + "rewards_train/rejected": -5.6054534912109375, + "step": 1188 + }, + { + "epoch": 0.33, + "logps_train/chosen": -138.30923461914062, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -200.38265991210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3309234380722046, + "rewards_train/margins": 5.507342457771301, + "rewards_train/rejected": -6.838265895843506, + "step": 1188 + }, + { + "epoch": 0.33, + "logps_train/chosen": -67.4390869140625, + "logps_train/ref_chosen": -49.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -132.67303466796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.843908667564392, + "rewards_train/margins": 0.12339484691619873, + "rewards_train/rejected": -1.9673035144805908, + "step": 1188 + }, + { + "epoch": 0.33, + "logps_train/chosen": -23.946842193603516, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -7.875, + "logps_train/rejected": -29.219085693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8071842193603516, + "rewards_train/margins": 1.3272244930267334, + "rewards_train/rejected": -2.134408712387085, + "step": 1188 + }, + { + "epoch": 0.33, + "logps_train/chosen": -87.51287841796875, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -85.61168670654297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.301287889480591, + "rewards_train/margins": 0.509880781173706, + "rewards_train/rejected": -2.811168670654297, + "step": 1188 + }, + { + "epoch": 0.33, + "logps_train/chosen": -39.898780822753906, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -24.25, + "logps_train/rejected": -44.927066802978516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9773781299591064, + "rewards_train/margins": -0.9096713066101074, + "rewards_train/rejected": -2.067706823348999, + "step": 1188 + }, + { + "epoch": 0.33, + "logps_train/chosen": -65.92816162109375, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -125.08984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.517816185951233, + "rewards_train/margins": 2.2911683320999146, + "rewards_train/rejected": -3.8089845180511475, + "step": 1188 + }, + { + "epoch": 0.33, + "logps_train/chosen": -96.87525939941406, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -172.23394775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0375258922576904, + "rewards_train/margins": 5.835869073867798, + "rewards_train/rejected": -7.873394966125488, + "step": 1189 + }, + { + "epoch": 0.33, + "logps_train/chosen": -131.4097137451172, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -120.13622283935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.290971517562866, + "rewards_train/margins": 2.022650957107544, + "rewards_train/rejected": -5.31362247467041, + "step": 1189 + }, + { + "epoch": 0.33, + "logps_train/chosen": -96.75748443603516, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -118.91249084472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0257484912872314, + "rewards_train/margins": 2.1155006885528564, + "rewards_train/rejected": -3.141249179840088, + "step": 1189 + }, + { + "epoch": 0.33, + "logps_train/chosen": -63.20155334472656, + "logps_train/ref_chosen": -40.0, + "logps_train/ref_rejected": -39.75, + "logps_train/rejected": -86.7387466430664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.320155382156372, + "rewards_train/margins": 2.3787195682525635, + "rewards_train/rejected": -4.6988749504089355, + "step": 1189 + }, + { + "epoch": 0.33, + "logps_train/chosen": -30.479726791381836, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -3.015625, + "logps_train/rejected": -36.14103698730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7229726910591125, + "rewards_train/margins": 2.589568555355072, + "rewards_train/rejected": -3.3125412464141846, + "step": 1189 + }, + { + "epoch": 0.33, + "logps_train/chosen": -23.071901321411133, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -10.681547164916992, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2571901082992554, + "rewards_train/margins": -0.5952853560447693, + "rewards_train/rejected": -0.6619047522544861, + "step": 1189 + }, + { + "epoch": 0.33, + "logps_train/chosen": -159.4264373779297, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -38.0, + "logps_train/rejected": -109.08306884765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.092643737792969, + "rewards_train/margins": 0.01566314697265625, + "rewards_train/rejected": -7.108306884765625, + "step": 1189 + }, + { + "epoch": 0.33, + "logps_train/chosen": -59.8790168762207, + "logps_train/ref_chosen": -42.5, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -268.91278076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7379016876220703, + "rewards_train/margins": 9.853376388549805, + "rewards_train/rejected": -11.591278076171875, + "step": 1189 + }, + { + "epoch": 0.33, + "learning_rate": 1.161331858946851e-06, + "loss": 0.3051, + "step": 1190 + }, + { + "epoch": 0.33, + "logps_train/chosen": -103.68013000488281, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -121.44485473632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8680130243301392, + "rewards_train/margins": 3.4764727354049683, + "rewards_train/rejected": -4.344485759735107, + "step": 1190 + }, + { + "epoch": 0.33, + "logps_train/chosen": -17.336734771728516, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -26.375, + "logps_train/rejected": -39.97623825073242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5211734771728516, + "rewards_train/margins": 0.8389503955841064, + "rewards_train/rejected": -1.360123872756958, + "step": 1190 + }, + { + "epoch": 0.33, + "logps_train/chosen": -30.824922561645508, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -49.142024993896484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6824922561645508, + "rewards_train/margins": 1.8692102432250977, + "rewards_train/rejected": -2.5517024993896484, + "step": 1190 + }, + { + "epoch": 0.33, + "logps_train/chosen": -155.41580200195312, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -191.73797607421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5415802001953125, + "rewards_train/margins": 5.032217502593994, + "rewards_train/rejected": -6.573797702789307, + "step": 1190 + }, + { + "epoch": 0.33, + "logps_train/chosen": -106.53701782226562, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -111.81685638427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2537018060684204, + "rewards_train/margins": 2.227983832359314, + "rewards_train/rejected": -3.4816856384277344, + "step": 1190 + }, + { + "epoch": 0.33, + "logps_train/chosen": -37.4283332824707, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -12.875, + "logps_train/rejected": -48.009605407714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.780333399772644, + "rewards_train/margins": 1.733127236366272, + "rewards_train/rejected": -3.513460636138916, + "step": 1190 + }, + { + "epoch": 0.33, + "logps_train/chosen": -14.884611129760742, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -12.875, + "logps_train/rejected": -28.499725341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8353361487388611, + "rewards_train/margins": 0.7271364331245422, + "rewards_train/rejected": -1.5624725818634033, + "step": 1190 + }, + { + "epoch": 0.33, + "logps_train/chosen": -176.63565063476562, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -221.23385620117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.0135650634765625, + "rewards_train/margins": 2.60982084274292, + "rewards_train/rejected": -7.623385906219482, + "step": 1190 + }, + { + "epoch": 0.33, + "logps_train/chosen": -85.70388793945312, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -85.55886840820312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9203888177871704, + "rewards_train/margins": -0.014501988887786865, + "rewards_train/rejected": -0.9058868288993835, + "step": 1191 + }, + { + "epoch": 0.33, + "logps_train/chosen": -76.91859436035156, + "logps_train/ref_chosen": -50.25, + "logps_train/ref_rejected": -46.0, + "logps_train/rejected": -63.70569610595703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6668593883514404, + "rewards_train/margins": -0.8962897062301636, + "rewards_train/rejected": -1.7705696821212769, + "step": 1191 + }, + { + "epoch": 0.33, + "logps_train/chosen": -15.164929389953613, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -26.041988372802734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9883679747581482, + "rewards_train/margins": 0.8095808625221252, + "rewards_train/rejected": -1.7979488372802734, + "step": 1191 + }, + { + "epoch": 0.33, + "logps_train/chosen": -22.94884490966797, + "logps_train/ref_chosen": -2.15625, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -49.982688903808594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0792596340179443, + "rewards_train/margins": -0.5559906959533691, + "rewards_train/rejected": -1.5232689380645752, + "step": 1191 + }, + { + "epoch": 0.33, + "logps_train/chosen": -179.84796142578125, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -163.20761108398438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.384796142578125, + "rewards_train/margins": -0.5640349388122559, + "rewards_train/rejected": -4.820761203765869, + "step": 1191 + }, + { + "epoch": 0.33, + "logps_train/chosen": -23.46918487548828, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -1.5390625, + "logps_train/rejected": -29.712276458740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9344184994697571, + "rewards_train/margins": 1.8829030394554138, + "rewards_train/rejected": -2.817321538925171, + "step": 1191 + }, + { + "epoch": 0.33, + "logps_train/chosen": -17.36530113220215, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -21.747169494628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3302801251411438, + "rewards_train/margins": 0.4694368243217468, + "rewards_train/rejected": -0.7997169494628906, + "step": 1191 + }, + { + "epoch": 0.33, + "logps_train/chosen": -352.0318298339844, + "logps_train/ref_chosen": -199.0, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -329.87774658203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -15.303183555603027, + "rewards_train/margins": -1.4154090881347656, + "rewards_train/rejected": -13.887774467468262, + "step": 1191 + }, + { + "epoch": 0.33, + "learning_rate": 1.1587204018708788e-06, + "loss": 0.4909, + "step": 1192 + }, + { + "epoch": 0.33, + "logps_train/chosen": -40.4320068359375, + "logps_train/ref_chosen": -31.875, + "logps_train/ref_rejected": -24.5, + "logps_train/rejected": -65.8819580078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.855700671672821, + "rewards_train/margins": 3.2824953198432922, + "rewards_train/rejected": -4.138195991516113, + "step": 1192 + }, + { + "epoch": 0.33, + "logps_train/chosen": -60.37717819213867, + "logps_train/ref_chosen": -33.5, + "logps_train/ref_rejected": -54.0, + "logps_train/rejected": -62.126243591308594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.687717914581299, + "rewards_train/margins": -1.8750935196876526, + "rewards_train/rejected": -0.8126243948936462, + "step": 1192 + }, + { + "epoch": 0.33, + "logps_train/chosen": -9.378246307373047, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -21.394973754882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3940746486186981, + "rewards_train/margins": 1.323547750711441, + "rewards_train/rejected": -1.7176223993301392, + "step": 1192 + }, + { + "epoch": 0.33, + "logps_train/chosen": -35.594940185546875, + "logps_train/ref_chosen": -28.375, + "logps_train/ref_rejected": -3.28125, + "logps_train/rejected": -28.389808654785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7219940423965454, + "rewards_train/margins": 1.788861870765686, + "rewards_train/rejected": -2.5108559131622314, + "step": 1192 + }, + { + "epoch": 0.33, + "logps_train/chosen": -149.18649291992188, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -241.56715393066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.3186492919921875, + "rewards_train/margins": 5.238066673278809, + "rewards_train/rejected": -10.556715965270996, + "step": 1192 + }, + { + "epoch": 0.33, + "logps_train/chosen": -38.86433410644531, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -69.80670928955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.411433458328247, + "rewards_train/margins": 2.019237518310547, + "rewards_train/rejected": -3.430670976638794, + "step": 1192 + }, + { + "epoch": 0.33, + "logps_train/chosen": -14.056124687194824, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -20.67840576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9368624687194824, + "rewards_train/margins": 0.6122281551361084, + "rewards_train/rejected": -1.5490906238555908, + "step": 1192 + }, + { + "epoch": 0.33, + "logps_train/chosen": -12.658367156982422, + "logps_train/ref_chosen": -7.375, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -20.877079010009766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5283367037773132, + "rewards_train/margins": 0.37812119722366333, + "rewards_train/rejected": -0.9064579010009766, + "step": 1192 + }, + { + "epoch": 0.33, + "logps_train/chosen": -39.47962188720703, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -99.14096069335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2104623317718506, + "rewards_train/margins": 0.7036337852478027, + "rewards_train/rejected": -2.9140961170196533, + "step": 1193 + }, + { + "epoch": 0.33, + "logps_train/chosen": -57.02577590942383, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -29.125, + "logps_train/rejected": -93.50154113769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.640077590942383, + "rewards_train/margins": 2.797576427459717, + "rewards_train/rejected": -6.4376540184021, + "step": 1193 + }, + { + "epoch": 0.33, + "logps_train/chosen": -111.52845764160156, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -148.99929809570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.702845811843872, + "rewards_train/margins": 1.047084093093872, + "rewards_train/rejected": -2.749929904937744, + "step": 1193 + }, + { + "epoch": 0.33, + "logps_train/chosen": -129.73599243164062, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -182.98243713378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9735991954803467, + "rewards_train/margins": 3.8746445178985596, + "rewards_train/rejected": -6.848243713378906, + "step": 1193 + }, + { + "epoch": 0.33, + "logps_train/chosen": -12.921537399291992, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -6.21875, + "logps_train/rejected": -5.965893268585205, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4171537458896637, + "rewards_train/margins": -0.4424394201487303, + "rewards_train/rejected": 0.02528567425906658, + "step": 1193 + }, + { + "epoch": 0.33, + "logps_train/chosen": -126.49414825439453, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -177.45083618164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1994149684906006, + "rewards_train/margins": 1.245668649673462, + "rewards_train/rejected": -4.4450836181640625, + "step": 1193 + }, + { + "epoch": 0.33, + "logps_train/chosen": -50.555633544921875, + "logps_train/ref_chosen": -31.125, + "logps_train/ref_rejected": -41.75, + "logps_train/rejected": -63.3397331237793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9430633783340454, + "rewards_train/margins": 0.21591007709503174, + "rewards_train/rejected": -2.158973455429077, + "step": 1193 + }, + { + "epoch": 0.33, + "logps_train/chosen": -38.58617401123047, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -30.150863647460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8086174726486206, + "rewards_train/margins": 0.48771893978118896, + "rewards_train/rejected": -2.2963364124298096, + "step": 1193 + }, + { + "epoch": 0.33, + "learning_rate": 1.1561078339203758e-06, + "loss": 0.4106, + "step": 1194 + }, + { + "epoch": 0.33, + "logps_train/chosen": -111.88784790039062, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -142.39788818359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1887848377227783, + "rewards_train/margins": 2.4010040760040283, + "rewards_train/rejected": -3.5897889137268066, + "step": 1194 + }, + { + "epoch": 0.33, + "logps_train/chosen": -129.88604736328125, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -234.23858642578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.738604784011841, + "rewards_train/margins": 5.685253858566284, + "rewards_train/rejected": -8.423858642578125, + "step": 1194 + }, + { + "epoch": 0.33, + "logps_train/chosen": -292.68133544921875, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -249.84083557128906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.168133735656738, + "rewards_train/margins": -1.584050178527832, + "rewards_train/rejected": -9.584083557128906, + "step": 1194 + }, + { + "epoch": 0.33, + "logps_train/chosen": -6.722324848175049, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -39.551944732666016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2597324848175049, + "rewards_train/margins": 1.3829619884490967, + "rewards_train/rejected": -1.6426944732666016, + "step": 1194 + }, + { + "epoch": 0.33, + "logps_train/chosen": -211.38858032226562, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -252.5187225341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.038857936859131, + "rewards_train/margins": 4.513014316558838, + "rewards_train/rejected": -11.551872253417969, + "step": 1194 + }, + { + "epoch": 0.33, + "logps_train/chosen": -77.86170196533203, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -117.85556030273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06382980197668076, + "rewards_train/margins": 1.849385879933834, + "rewards_train/rejected": -1.7855560779571533, + "step": 1194 + }, + { + "epoch": 0.33, + "logps_train/chosen": -15.346136093139648, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -56.0, + "logps_train/rejected": -121.34723663330078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44086360931396484, + "rewards_train/margins": 6.093860149383545, + "rewards_train/rejected": -6.53472375869751, + "step": 1194 + }, + { + "epoch": 0.33, + "logps_train/chosen": -115.97227478027344, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -41.05238342285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9972274899482727, + "rewards_train/margins": 1.120510995388031, + "rewards_train/rejected": -2.1177384853363037, + "step": 1194 + }, + { + "epoch": 0.33, + "logps_train/chosen": -213.8994140625, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -216.4423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.789941787719727, + "rewards_train/margins": 0.404296875, + "rewards_train/rejected": -9.194238662719727, + "step": 1195 + }, + { + "epoch": 0.33, + "logps_train/chosen": -123.01361083984375, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -124.78013610839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25136110186576843, + "rewards_train/margins": 0.6766525208950043, + "rewards_train/rejected": -0.9280136227607727, + "step": 1195 + }, + { + "epoch": 0.33, + "logps_train/chosen": -19.317012786865234, + "logps_train/ref_chosen": -0.50390625, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -9.053579330444336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8813107013702393, + "rewards_train/margins": -1.8384527675807476, + "rewards_train/rejected": -0.04285793378949165, + "step": 1195 + }, + { + "epoch": 0.33, + "logps_train/chosen": -2.2937355041503906, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -32.54493713378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18312644958496094, + "rewards_train/margins": 2.093870162963867, + "rewards_train/rejected": -1.9107437133789062, + "step": 1195 + }, + { + "epoch": 0.33, + "logps_train/chosen": -33.90865707397461, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -20.566646575927734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3783657550811768, + "rewards_train/margins": -0.3842011094093323, + "rewards_train/rejected": -0.9941646456718445, + "step": 1195 + }, + { + "epoch": 0.33, + "logps_train/chosen": -83.89733123779297, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -110.91389465332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08973312377929688, + "rewards_train/margins": 0.1516563445329666, + "rewards_train/rejected": -0.2413894683122635, + "step": 1195 + }, + { + "epoch": 0.33, + "logps_train/chosen": -267.86102294921875, + "logps_train/ref_chosen": -184.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -232.66091918945312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.386102676391602, + "rewards_train/margins": -0.6200108528137207, + "rewards_train/rejected": -7.766091823577881, + "step": 1195 + }, + { + "epoch": 0.33, + "logps_train/chosen": -43.244178771972656, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -25.287120819091797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3369178771972656, + "rewards_train/margins": -0.42070579528808594, + "rewards_train/rejected": -1.9162120819091797, + "step": 1195 + }, + { + "epoch": 0.33, + "learning_rate": 1.1534941733805467e-06, + "loss": 0.5659, + "step": 1196 + }, + { + "epoch": 0.33, + "logps_train/chosen": -26.285789489746094, + "logps_train/ref_chosen": -13.875, + "logps_train/ref_rejected": -32.75, + "logps_train/rejected": -55.683937072753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2410789728164673, + "rewards_train/margins": 1.0523148775100708, + "rewards_train/rejected": -2.293393850326538, + "step": 1196 + }, + { + "epoch": 0.33, + "logps_train/chosen": -22.897502899169922, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -30.329010009765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2085002660751343, + "rewards_train/margins": -0.2505992650985718, + "rewards_train/rejected": -0.9579010009765625, + "step": 1196 + }, + { + "epoch": 0.33, + "logps_train/chosen": -103.5240478515625, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -169.3197784423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2524049282073975, + "rewards_train/margins": 3.4795730113983154, + "rewards_train/rejected": -5.731977939605713, + "step": 1196 + }, + { + "epoch": 0.33, + "logps_train/chosen": -21.680339813232422, + "logps_train/ref_chosen": -3.71875, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -35.7224006652832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.796159029006958, + "rewards_train/margins": 0.6260809898376465, + "rewards_train/rejected": -2.4222400188446045, + "step": 1196 + }, + { + "epoch": 0.33, + "logps_train/chosen": -14.193017959594727, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -3.796875, + "logps_train/rejected": -23.415843963623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1114892959594727, + "rewards_train/margins": 0.850407600402832, + "rewards_train/rejected": -1.9618968963623047, + "step": 1196 + }, + { + "epoch": 0.33, + "logps_train/chosen": -6.366664409637451, + "logps_train/ref_chosen": -2.90625, + "logps_train/ref_rejected": -1.953125, + "logps_train/rejected": -4.107300758361816, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3460414409637451, + "rewards_train/margins": -0.13062386214733124, + "rewards_train/rejected": -0.21541757881641388, + "step": 1196 + }, + { + "epoch": 0.33, + "logps_train/chosen": -135.3363800048828, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -200.24478149414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3836381435394287, + "rewards_train/margins": 5.090840578079224, + "rewards_train/rejected": -8.474478721618652, + "step": 1196 + }, + { + "epoch": 0.33, + "logps_train/chosen": -5.2598490715026855, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -11.116440773010254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4927650988101959, + "rewards_train/margins": 0.9356591701507568, + "rewards_train/rejected": -0.4428940713405609, + "step": 1196 + }, + { + "epoch": 0.33, + "logps_train/chosen": -40.1607666015625, + "logps_train/ref_chosen": -29.5, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -28.8028621673584, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.066076636314392, + "rewards_train/margins": -0.08579039573669434, + "rewards_train/rejected": -0.9802862405776978, + "step": 1197 + }, + { + "epoch": 0.33, + "logps_train/chosen": -77.95049285888672, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -86.43299865722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6950492858886719, + "rewards_train/margins": 1.1482505798339844, + "rewards_train/rejected": -1.8432998657226562, + "step": 1197 + }, + { + "epoch": 0.33, + "logps_train/chosen": -129.5741729736328, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -142.09866333007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.307417392730713, + "rewards_train/margins": 1.5024490356445312, + "rewards_train/rejected": -3.809866428375244, + "step": 1197 + }, + { + "epoch": 0.33, + "logps_train/chosen": -13.926700592041016, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -13.6875, + "logps_train/rejected": -17.428956985473633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18642006814479828, + "rewards_train/margins": 0.18772564828395844, + "rewards_train/rejected": -0.3741457164287567, + "step": 1197 + }, + { + "epoch": 0.33, + "logps_train/chosen": -85.46881103515625, + "logps_train/ref_chosen": -48.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -134.77496337890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.696881055831909, + "rewards_train/margins": -0.4193847179412842, + "rewards_train/rejected": -3.277496337890625, + "step": 1197 + }, + { + "epoch": 0.33, + "logps_train/chosen": -10.768564224243164, + "logps_train/ref_chosen": -0.9140625, + "logps_train/ref_rejected": -2.9375, + "logps_train/rejected": -8.63782024383545, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9854502081871033, + "rewards_train/margins": -0.4154181480407715, + "rewards_train/rejected": -0.5700320601463318, + "step": 1197 + }, + { + "epoch": 0.33, + "logps_train/chosen": -34.28926467895508, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -30.53031349182129, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.32892644405365, + "rewards_train/margins": 0.8928550481796265, + "rewards_train/rejected": -2.2217814922332764, + "step": 1197 + }, + { + "epoch": 0.33, + "logps_train/chosen": -80.29243469238281, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -58.75, + "logps_train/rejected": -79.42021942138672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6292434930801392, + "rewards_train/margins": 0.4377785921096802, + "rewards_train/rejected": -2.0670220851898193, + "step": 1197 + }, + { + "epoch": 0.33, + "learning_rate": 1.1508794385442452e-06, + "loss": 0.4715, + "step": 1198 + }, + { + "epoch": 0.33, + "logps_train/chosen": -19.427799224853516, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -46.5, + "logps_train/rejected": -54.97401428222656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9677799344062805, + "rewards_train/margins": -0.12037849426269531, + "rewards_train/rejected": -0.8474014401435852, + "step": 1198 + }, + { + "epoch": 0.33, + "logps_train/chosen": -72.92433166503906, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -104.52445220947266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5924331545829773, + "rewards_train/margins": 3.360012114048004, + "rewards_train/rejected": -3.9524452686309814, + "step": 1198 + }, + { + "epoch": 0.33, + "logps_train/chosen": -50.622589111328125, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -36.288299560546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.02475905418396, + "rewards_train/margins": -0.9771790504455566, + "rewards_train/rejected": -2.0475800037384033, + "step": 1198 + }, + { + "epoch": 0.33, + "logps_train/chosen": -8.704376220703125, + "logps_train/ref_chosen": -0.431640625, + "logps_train/ref_rejected": -0.431640625, + "logps_train/rejected": -8.790242195129395, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8272735476493835, + "rewards_train/margins": 0.008586645126342773, + "rewards_train/rejected": -0.8358601927757263, + "step": 1198 + }, + { + "epoch": 0.33, + "logps_train/chosen": -34.748016357421875, + "logps_train/ref_chosen": -26.0, + "logps_train/ref_rejected": -33.25, + "logps_train/rejected": -58.91527557373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8748016357421875, + "rewards_train/margins": 1.6917259693145752, + "rewards_train/rejected": -2.5665276050567627, + "step": 1198 + }, + { + "epoch": 0.33, + "logps_train/chosen": -88.00865936279297, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -37.833621978759766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.250865936279297, + "rewards_train/margins": -0.6425037384033203, + "rewards_train/rejected": -1.6083621978759766, + "step": 1198 + }, + { + "epoch": 0.33, + "logps_train/chosen": -110.10022735595703, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -159.8812255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4600226879119873, + "rewards_train/margins": 0.4280998706817627, + "rewards_train/rejected": -2.88812255859375, + "step": 1198 + }, + { + "epoch": 0.33, + "logps_train/chosen": -100.1944580078125, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -143.74505615234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.11944580078125, + "rewards_train/margins": 4.305059909820557, + "rewards_train/rejected": -6.424505710601807, + "step": 1198 + }, + { + "epoch": 0.34, + "logps_train/chosen": -5.911705493927002, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -3.703125, + "logps_train/rejected": -11.19990348815918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.30257946252822876, + "rewards_train/margins": 1.0522572994232178, + "rewards_train/rejected": -0.749677836894989, + "step": 1199 + }, + { + "epoch": 0.34, + "logps_train/chosen": -130.20077514648438, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -183.86834716796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2200775146484375, + "rewards_train/margins": 5.516757488250732, + "rewards_train/rejected": -6.73683500289917, + "step": 1199 + }, + { + "epoch": 0.34, + "logps_train/chosen": -170.80587768554688, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -149.90994262695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.080587863922119, + "rewards_train/margins": 1.5604066848754883, + "rewards_train/rejected": -4.640994548797607, + "step": 1199 + }, + { + "epoch": 0.34, + "logps_train/chosen": -2.5942018032073975, + "logps_train/ref_chosen": -2.53125, + "logps_train/ref_rejected": -3.5625, + "logps_train/rejected": -4.964578151702881, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0062951804138720036, + "rewards_train/margins": 0.13391263177618384, + "rewards_train/rejected": -0.14020781219005585, + "step": 1199 + }, + { + "epoch": 0.34, + "logps_train/chosen": -13.092616081237793, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -18.895923614501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4780116081237793, + "rewards_train/margins": 0.7022057771682739, + "rewards_train/rejected": -1.1802173852920532, + "step": 1199 + }, + { + "epoch": 0.34, + "logps_train/chosen": -0.8408071994781494, + "logps_train/ref_chosen": -1.2890625, + "logps_train/ref_rejected": -3.703125, + "logps_train/rejected": -18.286216735839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04482553154230118, + "rewards_train/margins": 1.5031347051262856, + "rewards_train/rejected": -1.4583091735839844, + "step": 1199 + }, + { + "epoch": 0.34, + "logps_train/chosen": -20.560638427734375, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -31.606201171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.256063848733902, + "rewards_train/margins": 0.879556268453598, + "rewards_train/rejected": -1.1356201171875, + "step": 1199 + }, + { + "epoch": 0.34, + "logps_train/chosen": -29.883678436279297, + "logps_train/ref_chosen": -26.25, + "logps_train/ref_rejected": -22.875, + "logps_train/rejected": -54.46295928955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36336785554885864, + "rewards_train/margins": 2.795428216457367, + "rewards_train/rejected": -3.1587960720062256, + "step": 1199 + }, + { + "epoch": 0.34, + "learning_rate": 1.1482636477118419e-06, + "loss": 0.416, + "step": 1200 + }, + { + "epoch": 0.34, + "logps_train/chosen": -12.281087875366211, + "logps_train/ref_chosen": -1.9375, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -11.227051734924316, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0343588590621948, + "rewards_train/margins": -0.7804036736488342, + "rewards_train/rejected": -0.2539551854133606, + "step": 1200 + }, + { + "epoch": 0.34, + "logps_train/chosen": -112.28770446777344, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -185.05870056152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7287704944610596, + "rewards_train/margins": 4.727099657058716, + "rewards_train/rejected": -6.455870151519775, + "step": 1200 + }, + { + "epoch": 0.34, + "logps_train/chosen": -8.125822067260742, + "logps_train/ref_chosen": -0.86328125, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -31.12306022644043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7262541055679321, + "rewards_train/margins": 1.7985519170761108, + "rewards_train/rejected": -2.524806022644043, + "step": 1200 + }, + { + "epoch": 0.34, + "logps_train/chosen": -8.077646255493164, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -27.552730560302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24838963150978088, + "rewards_train/margins": 1.3756334483623505, + "rewards_train/rejected": -1.6240230798721313, + "step": 1200 + }, + { + "epoch": 0.34, + "logps_train/chosen": -54.54962158203125, + "logps_train/ref_chosen": -27.875, + "logps_train/ref_rejected": -24.0, + "logps_train/rejected": -49.14763259887695, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.667462110519409, + "rewards_train/margins": -0.15269875526428223, + "rewards_train/rejected": -2.514763355255127, + "step": 1200 + }, + { + "epoch": 0.34, + "logps_train/chosen": -12.7798490524292, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -21.354461669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6998599171638489, + "rewards_train/margins": 0.5293362736701965, + "rewards_train/rejected": -1.2291961908340454, + "step": 1200 + }, + { + "epoch": 0.34, + "logps_train/chosen": -10.288126945495605, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -2.78125, + "logps_train/rejected": -13.267515182495117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6256877183914185, + "rewards_train/margins": 0.42293882369995117, + "rewards_train/rejected": -1.0486265420913696, + "step": 1200 + }, + { + "epoch": 0.34, + "logps_train/chosen": -200.32676696777344, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -234.41845703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.832676887512207, + "rewards_train/margins": 2.309168815612793, + "rewards_train/rejected": -10.141845703125, + "step": 1200 + }, + { + "epoch": 0.34, + "logps_train/chosen": -183.97067260742188, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -141.6786346435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.297067165374756, + "rewards_train/margins": 0.17079639434814453, + "rewards_train/rejected": -4.4678635597229, + "step": 1201 + }, + { + "epoch": 0.34, + "logps_train/chosen": -15.029946327209473, + "logps_train/ref_chosen": -2.328125, + "logps_train/ref_rejected": -29.125, + "logps_train/rejected": -42.476402282714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2701821327209473, + "rewards_train/margins": 0.06495809555053711, + "rewards_train/rejected": -1.3351402282714844, + "step": 1201 + }, + { + "epoch": 0.34, + "logps_train/chosen": -102.04985046386719, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -180.99044799804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6049851179122925, + "rewards_train/margins": 1.8940597772598267, + "rewards_train/rejected": -3.499044895172119, + "step": 1201 + }, + { + "epoch": 0.34, + "logps_train/chosen": -30.420860290527344, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -51.19259262084961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2295860052108765, + "rewards_train/margins": -1.2603267431259155, + "rewards_train/rejected": 0.030740737915039062, + "step": 1201 + }, + { + "epoch": 0.34, + "logps_train/chosen": -97.23880004882812, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -97.99718475341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5738800764083862, + "rewards_train/margins": 0.07583844661712646, + "rewards_train/rejected": -1.6497185230255127, + "step": 1201 + }, + { + "epoch": 0.34, + "logps_train/chosen": -132.11367797851562, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -109.20933532714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.36136794090271, + "rewards_train/margins": 0.009565591812133789, + "rewards_train/rejected": -3.3709335327148438, + "step": 1201 + }, + { + "epoch": 0.34, + "logps_train/chosen": -190.37225341796875, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -162.3775177001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8372254371643066, + "rewards_train/margins": 1.450526237487793, + "rewards_train/rejected": -5.2877516746521, + "step": 1201 + }, + { + "epoch": 0.34, + "logps_train/chosen": -94.14877319335938, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -105.48845672607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8648773431777954, + "rewards_train/margins": 1.3339682817459106, + "rewards_train/rejected": -3.198845624923706, + "step": 1201 + }, + { + "epoch": 0.34, + "learning_rate": 1.1456468191911e-06, + "loss": 0.5061, + "step": 1202 + }, + { + "epoch": 0.34, + "logps_train/chosen": -8.241920471191406, + "logps_train/ref_chosen": -1.5078125, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -31.426090240478516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6734108328819275, + "rewards_train/margins": 0.9754481911659241, + "rewards_train/rejected": -1.6488590240478516, + "step": 1202 + }, + { + "epoch": 0.34, + "logps_train/chosen": -6.750915050506592, + "logps_train/ref_chosen": -7.3125, + "logps_train/ref_rejected": -28.75, + "logps_train/rejected": -31.383291244506836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05615849420428276, + "rewards_train/margins": 0.3194876126945019, + "rewards_train/rejected": -0.2633291184902191, + "step": 1202 + }, + { + "epoch": 0.34, + "logps_train/chosen": -9.97075366973877, + "logps_train/ref_chosen": -2.6875, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -21.137775421142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.728325366973877, + "rewards_train/margins": 0.619827151298523, + "rewards_train/rejected": -1.3481525182724, + "step": 1202 + }, + { + "epoch": 0.34, + "logps_train/chosen": -10.873866081237793, + "logps_train/ref_chosen": -3.265625, + "logps_train/ref_rejected": -2.703125, + "logps_train/rejected": -12.584043502807617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7608241438865662, + "rewards_train/margins": 0.22726774215698242, + "rewards_train/rejected": -0.9880918860435486, + "step": 1202 + }, + { + "epoch": 0.34, + "logps_train/chosen": -14.640872955322266, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -22.98862075805664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5515872836112976, + "rewards_train/margins": -0.040225207805633545, + "rewards_train/rejected": -0.5113620758056641, + "step": 1202 + }, + { + "epoch": 0.34, + "logps_train/chosen": -162.27529907226562, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -186.4674530029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.3275299072265625, + "rewards_train/margins": 0.21921539306640625, + "rewards_train/rejected": -4.546745300292969, + "step": 1202 + }, + { + "epoch": 0.34, + "logps_train/chosen": -18.476058959960938, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -31.027332305908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8788558840751648, + "rewards_train/margins": 1.4832523465156555, + "rewards_train/rejected": -2.3621082305908203, + "step": 1202 + }, + { + "epoch": 0.34, + "logps_train/chosen": -139.77633666992188, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -305.15380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.477633953094482, + "rewards_train/margins": 10.637747287750244, + "rewards_train/rejected": -15.115381240844727, + "step": 1202 + }, + { + "epoch": 0.34, + "logps_train/chosen": -149.8291778564453, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -215.29864501953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3329179286956787, + "rewards_train/margins": 5.946946382522583, + "rewards_train/rejected": -9.279864311218262, + "step": 1203 + }, + { + "epoch": 0.34, + "logps_train/chosen": -119.54727172851562, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -158.5230712890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0047271251678467, + "rewards_train/margins": 5.447580099105835, + "rewards_train/rejected": -7.452307224273682, + "step": 1203 + }, + { + "epoch": 0.34, + "logps_train/chosen": -57.46603775024414, + "logps_train/ref_chosen": -40.75, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -134.3201904296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.671603798866272, + "rewards_train/margins": 5.385415434837341, + "rewards_train/rejected": -7.057019233703613, + "step": 1203 + }, + { + "epoch": 0.34, + "logps_train/chosen": -76.30557250976562, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -228.0, + "logps_train/rejected": -342.7844543457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8555572032928467, + "rewards_train/margins": 8.622888803482056, + "rewards_train/rejected": -11.478446006774902, + "step": 1203 + }, + { + "epoch": 0.34, + "logps_train/chosen": -39.485862731933594, + "logps_train/ref_chosen": -23.5, + "logps_train/ref_rejected": -45.5, + "logps_train/rejected": -55.517845153808594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5985863208770752, + "rewards_train/margins": -0.5968017578125, + "rewards_train/rejected": -1.0017845630645752, + "step": 1203 + }, + { + "epoch": 0.34, + "logps_train/chosen": -13.889829635620117, + "logps_train/ref_chosen": -7.75, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -13.764212608337402, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6139829754829407, + "rewards_train/margins": -0.5750617124140263, + "rewards_train/rejected": -0.03892126306891441, + "step": 1203 + }, + { + "epoch": 0.34, + "logps_train/chosen": -28.408527374267578, + "logps_train/ref_chosen": -23.875, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -42.961456298828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45335274934768677, + "rewards_train/margins": 0.36779290437698364, + "rewards_train/rejected": -0.8211456537246704, + "step": 1203 + }, + { + "epoch": 0.34, + "logps_train/chosen": -128.1684112548828, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -117.72659301757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8168411254882812, + "rewards_train/margins": 0.25581836700439453, + "rewards_train/rejected": -4.072659492492676, + "step": 1203 + }, + { + "epoch": 0.34, + "learning_rate": 1.1430289712970448e-06, + "loss": 0.4101, + "step": 1204 + }, + { + "epoch": 0.34, + "logps_train/chosen": -89.43402099609375, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -27.75, + "logps_train/rejected": -53.78678894042969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.768402099609375, + "rewards_train/margins": -0.16472315788269043, + "rewards_train/rejected": -2.6036789417266846, + "step": 1204 + }, + { + "epoch": 0.34, + "logps_train/chosen": -204.85525512695312, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -194.0, + "logps_train/rejected": -257.8025207519531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.0855255126953125, + "rewards_train/margins": 0.2947268486022949, + "rewards_train/rejected": -6.380252361297607, + "step": 1204 + }, + { + "epoch": 0.34, + "logps_train/chosen": -84.58190155029297, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -98.53529357910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2081901580095291, + "rewards_train/margins": 0.2953392118215561, + "rewards_train/rejected": -0.5035293698310852, + "step": 1204 + }, + { + "epoch": 0.34, + "logps_train/chosen": -25.233510971069336, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -8.309288024902344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1796010732650757, + "rewards_train/margins": -0.8674222528934479, + "rewards_train/rejected": -0.3121788203716278, + "step": 1204 + }, + { + "epoch": 0.34, + "logps_train/chosen": -64.04695129394531, + "logps_train/ref_chosen": -31.625, + "logps_train/ref_rejected": -15.4375, + "logps_train/rejected": -43.84821701049805, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2421951293945312, + "rewards_train/margins": -0.4011232852935791, + "rewards_train/rejected": -2.841071844100952, + "step": 1204 + }, + { + "epoch": 0.34, + "logps_train/chosen": -152.7249298095703, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -263.0211181640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.4724931716918945, + "rewards_train/margins": 4.429618835449219, + "rewards_train/rejected": -9.902112007141113, + "step": 1204 + }, + { + "epoch": 0.34, + "logps_train/chosen": -21.85427474975586, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -28.102035522460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44792747497558594, + "rewards_train/margins": 0.3997761011123657, + "rewards_train/rejected": -0.8477035760879517, + "step": 1204 + }, + { + "epoch": 0.34, + "logps_train/chosen": -27.854036331176758, + "logps_train/ref_chosen": -14.0, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -39.18661880493164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3854036331176758, + "rewards_train/margins": 0.8082582950592041, + "rewards_train/rejected": -2.19366192817688, + "step": 1204 + }, + { + "epoch": 0.34, + "logps_train/chosen": -46.29601287841797, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -38.63471984863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2921013832092285, + "rewards_train/margins": 0.1401207447052002, + "rewards_train/rejected": -2.4322221279144287, + "step": 1205 + }, + { + "epoch": 0.34, + "logps_train/chosen": -121.38514709472656, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -147.0987548828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.888514757156372, + "rewards_train/margins": 3.0713608264923096, + "rewards_train/rejected": -5.959875583648682, + "step": 1205 + }, + { + "epoch": 0.34, + "logps_train/chosen": -121.93302917480469, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -157.9295654296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0433030128479004, + "rewards_train/margins": 3.849653720855713, + "rewards_train/rejected": -5.892956733703613, + "step": 1205 + }, + { + "epoch": 0.34, + "logps_train/chosen": -132.13461303710938, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -184.65753173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3634612560272217, + "rewards_train/margins": 5.302292108535767, + "rewards_train/rejected": -8.665753364562988, + "step": 1205 + }, + { + "epoch": 0.34, + "logps_train/chosen": -139.10025024414062, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -216.52577209472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0600249767303467, + "rewards_train/margins": 6.492552042007446, + "rewards_train/rejected": -8.552577018737793, + "step": 1205 + }, + { + "epoch": 0.34, + "logps_train/chosen": -0.4520949423313141, + "logps_train/ref_chosen": -0.451171875, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -9.929098129272461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.230673458660021e-05, + "rewards_train/margins": 0.07406750917289173, + "rewards_train/rejected": -0.07415981590747833, + "step": 1205 + }, + { + "epoch": 0.34, + "logps_train/chosen": -249.10195922851562, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -228.125244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.410196304321289, + "rewards_train/margins": 1.802328109741211, + "rewards_train/rejected": -10.2125244140625, + "step": 1205 + }, + { + "epoch": 0.34, + "logps_train/chosen": -179.77301025390625, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -153.14515686035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3773010969161987, + "rewards_train/margins": 4.887214779853821, + "rewards_train/rejected": -6.2645158767700195, + "step": 1205 + }, + { + "epoch": 0.34, + "learning_rate": 1.1404101223518355e-06, + "loss": 0.4018, + "step": 1206 + }, + { + "epoch": 0.34, + "logps_train/chosen": -94.6297836303711, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -113.06340789794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5629783868789673, + "rewards_train/margins": 2.743362545967102, + "rewards_train/rejected": -3.3063409328460693, + "step": 1206 + }, + { + "epoch": 0.34, + "logps_train/chosen": -30.0277042388916, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -64.4593505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.446520447731018, + "rewards_train/margins": 3.468164801597595, + "rewards_train/rejected": -4.914685249328613, + "step": 1206 + }, + { + "epoch": 0.34, + "logps_train/chosen": -46.53721618652344, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -93.28071594238281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2212783843278885, + "rewards_train/margins": 0.7493499666452408, + "rewards_train/rejected": -0.5280715823173523, + "step": 1206 + }, + { + "epoch": 0.34, + "logps_train/chosen": -137.41299438476562, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -211.57571411132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2912994623184204, + "rewards_train/margins": 6.3662718534469604, + "rewards_train/rejected": -7.657571315765381, + "step": 1206 + }, + { + "epoch": 0.34, + "logps_train/chosen": -12.538812637329102, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -1.578125, + "logps_train/rejected": -5.110927581787109, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7070062756538391, + "rewards_train/margins": -0.35372599959373474, + "rewards_train/rejected": -0.35328027606010437, + "step": 1206 + }, + { + "epoch": 0.34, + "logps_train/chosen": -166.6791534423828, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -190.91213989257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9679152965545654, + "rewards_train/margins": 0.7232987880706787, + "rewards_train/rejected": -4.691214084625244, + "step": 1206 + }, + { + "epoch": 0.34, + "logps_train/chosen": -2.286710023880005, + "logps_train/ref_chosen": -0.55859375, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -15.331621170043945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1728116273880005, + "rewards_train/margins": 0.9009754657745361, + "rewards_train/rejected": -1.0737870931625366, + "step": 1206 + }, + { + "epoch": 0.34, + "logps_train/chosen": -11.32996940612793, + "logps_train/ref_chosen": -1.4375, + "logps_train/ref_rejected": -3.546875, + "logps_train/rejected": -10.226031303405762, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9892469644546509, + "rewards_train/margins": -0.32133132219314575, + "rewards_train/rejected": -0.6679156422615051, + "step": 1206 + }, + { + "epoch": 0.34, + "logps_train/chosen": -158.39305114746094, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -245.06619262695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.139305114746094, + "rewards_train/margins": 2.0673141479492188, + "rewards_train/rejected": -8.206619262695312, + "step": 1207 + }, + { + "epoch": 0.34, + "logps_train/chosen": -11.396183013916016, + "logps_train/ref_chosen": -7.96875, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -26.609725952148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34274330735206604, + "rewards_train/margins": 1.2432293593883514, + "rewards_train/rejected": -1.5859726667404175, + "step": 1207 + }, + { + "epoch": 0.34, + "logps_train/chosen": -40.189666748046875, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -99.67394256591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21896667778491974, + "rewards_train/margins": 1.8984275311231613, + "rewards_train/rejected": -2.117394208908081, + "step": 1207 + }, + { + "epoch": 0.34, + "logps_train/chosen": -64.51423645019531, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -38.58368682861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.248576357960701, + "rewards_train/margins": 2.4194449931383133, + "rewards_train/rejected": -2.1708686351776123, + "step": 1207 + }, + { + "epoch": 0.34, + "logps_train/chosen": -109.48828887939453, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -161.0825653076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.598828911781311, + "rewards_train/margins": 4.359427809715271, + "rewards_train/rejected": -4.958256721496582, + "step": 1207 + }, + { + "epoch": 0.34, + "logps_train/chosen": -230.20504760742188, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -179.23550415039062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.320504665374756, + "rewards_train/margins": -2.4969539642333984, + "rewards_train/rejected": -4.823550701141357, + "step": 1207 + }, + { + "epoch": 0.34, + "logps_train/chosen": -23.35770034790039, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -27.88251304626465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.248270034790039, + "rewards_train/margins": 0.3149813413619995, + "rewards_train/rejected": -1.5632513761520386, + "step": 1207 + }, + { + "epoch": 0.34, + "logps_train/chosen": -66.60983276367188, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -134.72030639648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8859832882881165, + "rewards_train/margins": 0.936047375202179, + "rewards_train/rejected": -1.8220306634902954, + "step": 1207 + }, + { + "epoch": 0.34, + "learning_rate": 1.137790290684638e-06, + "loss": 0.4398, + "step": 1208 + }, + { + "epoch": 0.34, + "logps_train/chosen": -7.414167404174805, + "logps_train/ref_chosen": -3.171875, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -10.16125202178955, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.424229234457016, + "rewards_train/margins": 0.11064597964286804, + "rewards_train/rejected": -0.534875214099884, + "step": 1208 + }, + { + "epoch": 0.34, + "logps_train/chosen": -115.45974731445312, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -156.4092254638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8459746837615967, + "rewards_train/margins": 2.044947862625122, + "rewards_train/rejected": -5.890922546386719, + "step": 1208 + }, + { + "epoch": 0.34, + "logps_train/chosen": -241.72898864746094, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -219.3941192626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.972899436950684, + "rewards_train/margins": 0.21651268005371094, + "rewards_train/rejected": -10.189412117004395, + "step": 1208 + }, + { + "epoch": 0.34, + "logps_train/chosen": -20.291196823120117, + "logps_train/ref_chosen": -2.65625, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -57.8299560546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7634947299957275, + "rewards_train/margins": 1.46950101852417, + "rewards_train/rejected": -3.2329957485198975, + "step": 1208 + }, + { + "epoch": 0.34, + "logps_train/chosen": -18.964323043823242, + "logps_train/ref_chosen": -2.46875, + "logps_train/ref_rejected": -3.953125, + "logps_train/rejected": -21.20957374572754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.64955735206604, + "rewards_train/margins": 0.0760875940322876, + "rewards_train/rejected": -1.7256449460983276, + "step": 1208 + }, + { + "epoch": 0.34, + "logps_train/chosen": -138.1097412109375, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -161.65798950195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.81097412109375, + "rewards_train/margins": 1.4048247337341309, + "rewards_train/rejected": -7.215798854827881, + "step": 1208 + }, + { + "epoch": 0.34, + "logps_train/chosen": -110.47715759277344, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -186.22032165527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7477158308029175, + "rewards_train/margins": 4.074316620826721, + "rewards_train/rejected": -5.822032451629639, + "step": 1208 + }, + { + "epoch": 0.34, + "logps_train/chosen": -235.97842407226562, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -181.0, + "logps_train/rejected": -336.0269775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -12.647842407226562, + "rewards_train/margins": 2.854855537414551, + "rewards_train/rejected": -15.502697944641113, + "step": 1208 + }, + { + "epoch": 0.34, + "logps_train/chosen": -143.6220703125, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -172.9547882080078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.76220703125, + "rewards_train/margins": 3.8332719802856445, + "rewards_train/rejected": -4.5954790115356445, + "step": 1209 + }, + { + "epoch": 0.34, + "logps_train/chosen": -12.303690910339355, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -3.09375, + "logps_train/rejected": -16.83126449584961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2491190880537033, + "rewards_train/margins": 1.1246324330568314, + "rewards_train/rejected": -1.3737515211105347, + "step": 1209 + }, + { + "epoch": 0.34, + "logps_train/chosen": -136.11077880859375, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -197.0, + "logps_train/rejected": -231.76345825195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18892212212085724, + "rewards_train/margins": 3.665267899632454, + "rewards_train/rejected": -3.4763457775115967, + "step": 1209 + }, + { + "epoch": 0.34, + "logps_train/chosen": -54.325660705566406, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -85.35317993164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7924339175224304, + "rewards_train/margins": 0.2777519226074219, + "rewards_train/rejected": 0.5146819949150085, + "step": 1209 + }, + { + "epoch": 0.34, + "logps_train/chosen": -26.8392391204834, + "logps_train/ref_chosen": -5.65625, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -54.89784240722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1182990074157715, + "rewards_train/margins": 0.6339852809906006, + "rewards_train/rejected": -2.752284288406372, + "step": 1209 + }, + { + "epoch": 0.34, + "logps_train/chosen": -105.99807739257812, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -219.39013671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8498077392578125, + "rewards_train/margins": 4.8892059326171875, + "rewards_train/rejected": -7.739013671875, + "step": 1209 + }, + { + "epoch": 0.34, + "logps_train/chosen": -81.76765441894531, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -123.732666015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.476765513420105, + "rewards_train/margins": 2.8965011835098267, + "rewards_train/rejected": -4.373266696929932, + "step": 1209 + }, + { + "epoch": 0.34, + "logps_train/chosen": -152.71011352539062, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -249.69345092773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.37101149559021, + "rewards_train/margins": 7.998334169387817, + "rewards_train/rejected": -11.369345664978027, + "step": 1209 + }, + { + "epoch": 0.34, + "learning_rate": 1.135169494631497e-06, + "loss": 0.243, + "step": 1210 + }, + { + "epoch": 0.34, + "logps_train/chosen": -18.882129669189453, + "logps_train/ref_chosen": -7.59375, + "logps_train/ref_rejected": -7.46875, + "logps_train/rejected": -15.902190208435059, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1288379430770874, + "rewards_train/margins": -0.2854939103126526, + "rewards_train/rejected": -0.8433440327644348, + "step": 1210 + }, + { + "epoch": 0.34, + "logps_train/chosen": -332.35296630859375, + "logps_train/ref_chosen": -242.0, + "logps_train/ref_rejected": -264.0, + "logps_train/rejected": -368.34942626953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.035296440124512, + "rewards_train/margins": 1.3996467590332031, + "rewards_train/rejected": -10.434943199157715, + "step": 1210 + }, + { + "epoch": 0.34, + "logps_train/chosen": -131.50755310058594, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -134.71910095214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.050755262374878, + "rewards_train/margins": 0.12115478515625, + "rewards_train/rejected": -3.171910047531128, + "step": 1210 + }, + { + "epoch": 0.34, + "logps_train/chosen": -151.43231201171875, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -184.0, + "logps_train/rejected": -237.08087158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2432312965393066, + "rewards_train/margins": 2.0648560523986816, + "rewards_train/rejected": -5.308087348937988, + "step": 1210 + }, + { + "epoch": 0.34, + "logps_train/chosen": -30.052017211914062, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -49.838802337646484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.03020179271698, + "rewards_train/margins": 2.1286784410476685, + "rewards_train/rejected": -3.1588802337646484, + "step": 1210 + }, + { + "epoch": 0.34, + "logps_train/chosen": -223.82179260253906, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -197.0, + "logps_train/rejected": -328.91864013671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.882179260253906, + "rewards_train/margins": 5.309684753417969, + "rewards_train/rejected": -13.191864013671875, + "step": 1210 + }, + { + "epoch": 0.34, + "logps_train/chosen": -16.29208755493164, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -1.40625, + "logps_train/rejected": -9.945569038391113, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6604587435722351, + "rewards_train/margins": 0.19347316026687622, + "rewards_train/rejected": -0.8539319038391113, + "step": 1210 + }, + { + "epoch": 0.34, + "logps_train/chosen": -20.356508255004883, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -27.25, + "logps_train/rejected": -36.28545379638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4106508195400238, + "rewards_train/margins": 0.49289456009864807, + "rewards_train/rejected": -0.9035453796386719, + "step": 1210 + }, + { + "epoch": 0.34, + "logps_train/chosen": -180.01670837402344, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -222.9095001220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7016708850860596, + "rewards_train/margins": 2.7892792224884033, + "rewards_train/rejected": -6.490950107574463, + "step": 1211 + }, + { + "epoch": 0.34, + "logps_train/chosen": -26.263296127319336, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -26.0, + "logps_train/rejected": -46.0660514831543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0138295888900757, + "rewards_train/margins": 0.992775559425354, + "rewards_train/rejected": -2.0066051483154297, + "step": 1211 + }, + { + "epoch": 0.34, + "logps_train/chosen": -206.0189208984375, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -175.00430297851562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.50189208984375, + "rewards_train/margins": -0.7014617919921875, + "rewards_train/rejected": -3.8004302978515625, + "step": 1211 + }, + { + "epoch": 0.34, + "logps_train/chosen": -5.03126335144043, + "logps_train/ref_chosen": -2.890625, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -18.171127319335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2140638381242752, + "rewards_train/margins": 0.12804891169071198, + "rewards_train/rejected": -0.3421127498149872, + "step": 1211 + }, + { + "epoch": 0.34, + "logps_train/chosen": -193.5655517578125, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -196.50146484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.606555461883545, + "rewards_train/margins": 0.39359140396118164, + "rewards_train/rejected": -8.000146865844727, + "step": 1211 + }, + { + "epoch": 0.34, + "logps_train/chosen": -137.52059936523438, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -181.28292846679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.402060031890869, + "rewards_train/margins": 3.32623291015625, + "rewards_train/rejected": -5.728292942047119, + "step": 1211 + }, + { + "epoch": 0.34, + "logps_train/chosen": -118.20529174804688, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -215.058837890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5705292224884033, + "rewards_train/margins": 8.235355138778687, + "rewards_train/rejected": -11.80588436126709, + "step": 1211 + }, + { + "epoch": 0.34, + "logps_train/chosen": -258.959716796875, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -189.893310546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.195971488952637, + "rewards_train/margins": -1.1566405296325684, + "rewards_train/rejected": -7.039330959320068, + "step": 1211 + }, + { + "epoch": 0.34, + "learning_rate": 1.1325477525352056e-06, + "loss": 0.4443, + "step": 1212 + }, + { + "epoch": 0.34, + "logps_train/chosen": -8.212218284606934, + "logps_train/ref_chosen": -7.5625, + "logps_train/ref_rejected": -7.34375, + "logps_train/rejected": -16.22946548461914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06497182697057724, + "rewards_train/margins": 0.8235997334122658, + "rewards_train/rejected": -0.888571560382843, + "step": 1212 + }, + { + "epoch": 0.34, + "logps_train/chosen": -157.21426391601562, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -188.0, + "logps_train/rejected": -259.41473388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5214264392852783, + "rewards_train/margins": 4.620046854019165, + "rewards_train/rejected": -7.141473293304443, + "step": 1212 + }, + { + "epoch": 0.34, + "logps_train/chosen": -36.43535232543945, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -45.96367263793945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3122851848602295, + "rewards_train/margins": 0.9278321266174316, + "rewards_train/rejected": -3.240117311477661, + "step": 1212 + }, + { + "epoch": 0.34, + "logps_train/chosen": -228.408935546875, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -213.37152099609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.740893840789795, + "rewards_train/margins": 3.99625825881958, + "rewards_train/rejected": -8.737152099609375, + "step": 1212 + }, + { + "epoch": 0.34, + "logps_train/chosen": -6.8982415199279785, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -37.616390228271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1366991549730301, + "rewards_train/margins": 2.62493996322155, + "rewards_train/rejected": -2.76163911819458, + "step": 1212 + }, + { + "epoch": 0.34, + "logps_train/chosen": -118.89773559570312, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -228.14471435546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2397735118865967, + "rewards_train/margins": 6.974698305130005, + "rewards_train/rejected": -9.214471817016602, + "step": 1212 + }, + { + "epoch": 0.34, + "logps_train/chosen": -94.70669555664062, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -99.90924835205078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1706695556640625, + "rewards_train/margins": -0.32974472641944885, + "rewards_train/rejected": 0.15907517075538635, + "step": 1212 + }, + { + "epoch": 0.34, + "logps_train/chosen": -123.20892333984375, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -126.5455093383789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4208924770355225, + "rewards_train/margins": 1.8336584568023682, + "rewards_train/rejected": -4.254550933837891, + "step": 1212 + }, + { + "epoch": 0.34, + "logps_train/chosen": -6.9929351806640625, + "logps_train/ref_chosen": -5.65625, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -10.131735801696777, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13366852700710297, + "rewards_train/margins": 0.24825505912303925, + "rewards_train/rejected": -0.3819235861301422, + "step": 1213 + }, + { + "epoch": 0.34, + "logps_train/chosen": -83.33769989013672, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -82.87689208984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5837700366973877, + "rewards_train/margins": 0.37891924381256104, + "rewards_train/rejected": -1.9626892805099487, + "step": 1213 + }, + { + "epoch": 0.34, + "logps_train/chosen": -15.972654342651367, + "logps_train/ref_chosen": -4.8125, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -21.716655731201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1160154342651367, + "rewards_train/margins": 0.011900186538696289, + "rewards_train/rejected": -1.127915620803833, + "step": 1213 + }, + { + "epoch": 0.34, + "logps_train/chosen": -104.97431945800781, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -119.41264343261719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.072432041168213, + "rewards_train/margins": 0.3188323974609375, + "rewards_train/rejected": -5.39126443862915, + "step": 1213 + }, + { + "epoch": 0.34, + "logps_train/chosen": -155.21630859375, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -152.6552734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.121630907058716, + "rewards_train/margins": -0.20610356330871582, + "rewards_train/rejected": -2.91552734375, + "step": 1213 + }, + { + "epoch": 0.34, + "logps_train/chosen": -94.98252868652344, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -158.48118591308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8482528924942017, + "rewards_train/margins": 4.199865698814392, + "rewards_train/rejected": -5.048118591308594, + "step": 1213 + }, + { + "epoch": 0.34, + "logps_train/chosen": -144.62278747558594, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -125.89419555664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.362278699874878, + "rewards_train/margins": 2.4771411418914795, + "rewards_train/rejected": -4.839419841766357, + "step": 1213 + }, + { + "epoch": 0.34, + "logps_train/chosen": -15.82650375366211, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -4.561558723449707, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.582650363445282, + "rewards_train/margins": -0.5577444899827242, + "rewards_train/rejected": -0.024905873462557793, + "step": 1213 + }, + { + "epoch": 0.34, + "learning_rate": 1.129925082745179e-06, + "loss": 0.3784, + "step": 1214 + }, + { + "epoch": 0.34, + "logps_train/chosen": -103.31297302246094, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -105.50526428222656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3312973082065582, + "rewards_train/margins": -0.030770868062973022, + "rewards_train/rejected": -0.3005264401435852, + "step": 1214 + }, + { + "epoch": 0.34, + "logps_train/chosen": -53.86781692504883, + "logps_train/ref_chosen": -35.75, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -80.82609558105469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8117817640304565, + "rewards_train/margins": 2.895827889442444, + "rewards_train/rejected": -4.7076096534729, + "step": 1214 + }, + { + "epoch": 0.34, + "logps_train/chosen": -19.393247604370117, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -2.734375, + "logps_train/rejected": -10.98906421661377, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6518247723579407, + "rewards_train/margins": 0.17364418506622314, + "rewards_train/rejected": -0.8254689574241638, + "step": 1214 + }, + { + "epoch": 0.34, + "logps_train/chosen": -13.068120002746582, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -11.78821086883545, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43181201815605164, + "rewards_train/margins": 0.2845090925693512, + "rewards_train/rejected": -0.7163211107254028, + "step": 1214 + }, + { + "epoch": 0.34, + "logps_train/chosen": -85.74095916748047, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -133.10031127929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.124095916748047, + "rewards_train/margins": 5.135935306549072, + "rewards_train/rejected": -7.260031223297119, + "step": 1214 + }, + { + "epoch": 0.34, + "logps_train/chosen": -26.353466033935547, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -12.709268569946289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7853466272354126, + "rewards_train/margins": -1.1269197463989258, + "rewards_train/rejected": -0.6584268808364868, + "step": 1214 + }, + { + "epoch": 0.34, + "logps_train/chosen": -34.27619171142578, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -28.442476272583008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7276191711425781, + "rewards_train/margins": 0.4166285991668701, + "rewards_train/rejected": -2.1442477703094482, + "step": 1214 + }, + { + "epoch": 0.34, + "logps_train/chosen": -100.3630142211914, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -132.35232543945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2863014936447144, + "rewards_train/margins": 3.8489309549331665, + "rewards_train/rejected": -5.135232448577881, + "step": 1214 + }, + { + "epoch": 0.34, + "logps_train/chosen": -166.247314453125, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -167.03665161132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.524731636047363, + "rewards_train/margins": 0.7789335250854492, + "rewards_train/rejected": -7.3036651611328125, + "step": 1215 + }, + { + "epoch": 0.34, + "logps_train/chosen": -77.26680755615234, + "logps_train/ref_chosen": -59.75, + "logps_train/ref_rejected": -52.25, + "logps_train/rejected": -73.37479400634766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7516807317733765, + "rewards_train/margins": 0.360798716545105, + "rewards_train/rejected": -2.1124794483184814, + "step": 1215 + }, + { + "epoch": 0.34, + "logps_train/chosen": -108.69904327392578, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -103.15390014648438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.269904375076294, + "rewards_train/margins": -0.6045143604278564, + "rewards_train/rejected": -0.6653900146484375, + "step": 1215 + }, + { + "epoch": 0.34, + "logps_train/chosen": -64.10427856445312, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -60.25, + "logps_train/rejected": -83.57500457763672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0854278802871704, + "rewards_train/margins": 1.2470725774765015, + "rewards_train/rejected": -2.332500457763672, + "step": 1215 + }, + { + "epoch": 0.34, + "logps_train/chosen": -149.7618408203125, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -178.04710388183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.326184034347534, + "rewards_train/margins": 4.128526449203491, + "rewards_train/rejected": -7.454710483551025, + "step": 1215 + }, + { + "epoch": 0.34, + "logps_train/chosen": -109.68901824951172, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -92.9229965209961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1189019680023193, + "rewards_train/margins": -0.12660229206085205, + "rewards_train/rejected": -1.9922996759414673, + "step": 1215 + }, + { + "epoch": 0.34, + "logps_train/chosen": -44.93785095214844, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -28.30851173400879, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.093785047531128, + "rewards_train/margins": -0.3566838502883911, + "rewards_train/rejected": -1.7371011972427368, + "step": 1215 + }, + { + "epoch": 0.34, + "logps_train/chosen": -89.3277359008789, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -122.15821838378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0327736139297485, + "rewards_train/margins": 3.3330482244491577, + "rewards_train/rejected": -4.365821838378906, + "step": 1215 + }, + { + "epoch": 0.34, + "learning_rate": 1.127301503617325e-06, + "loss": 0.4857, + "step": 1216 + }, + { + "epoch": 0.34, + "logps_train/chosen": -151.9754638671875, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -174.67706298828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.647546291351318, + "rewards_train/margins": 0.22016000747680664, + "rewards_train/rejected": -4.867706298828125, + "step": 1216 + }, + { + "epoch": 0.34, + "logps_train/chosen": -74.1967544555664, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -117.15765380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1303245574235916, + "rewards_train/margins": 3.4460899382829666, + "rewards_train/rejected": -3.315765380859375, + "step": 1216 + }, + { + "epoch": 0.34, + "logps_train/chosen": -167.8505096435547, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -218.09349060058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1850509643554688, + "rewards_train/margins": 5.624298095703125, + "rewards_train/rejected": -7.809349060058594, + "step": 1216 + }, + { + "epoch": 0.34, + "logps_train/chosen": -16.391035079956055, + "logps_train/ref_chosen": -5.8125, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -26.466121673583984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0578535795211792, + "rewards_train/margins": 1.1700085401535034, + "rewards_train/rejected": -2.2278621196746826, + "step": 1216 + }, + { + "epoch": 0.34, + "logps_train/chosen": -5.729304313659668, + "logps_train/ref_chosen": -0.65234375, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -12.947351455688477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5076960921287537, + "rewards_train/margins": 0.26828908920288086, + "rewards_train/rejected": -0.7759851813316345, + "step": 1216 + }, + { + "epoch": 0.34, + "logps_train/chosen": -57.068599700927734, + "logps_train/ref_chosen": -42.5, + "logps_train/ref_rejected": -25.75, + "logps_train/rejected": -51.340843200683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4568599462509155, + "rewards_train/margins": 1.1022244691848755, + "rewards_train/rejected": -2.559084415435791, + "step": 1216 + }, + { + "epoch": 0.34, + "logps_train/chosen": -221.88600158691406, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -243.9384307861328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.288600444793701, + "rewards_train/margins": -0.2947573661804199, + "rewards_train/rejected": -6.993843078613281, + "step": 1216 + }, + { + "epoch": 0.34, + "logps_train/chosen": -141.75662231445312, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -167.33656311035156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.425662517547607, + "rewards_train/margins": -0.5920062065124512, + "rewards_train/rejected": -3.8336563110351562, + "step": 1216 + }, + { + "epoch": 0.34, + "logps_train/chosen": -44.449859619140625, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -112.23463439941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3449859619140625, + "rewards_train/margins": 2.5534777641296387, + "rewards_train/rejected": -4.898463726043701, + "step": 1217 + }, + { + "epoch": 0.34, + "logps_train/chosen": -218.8507080078125, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -215.03448486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.38507080078125, + "rewards_train/margins": 0.01837778091430664, + "rewards_train/rejected": -7.403448581695557, + "step": 1217 + }, + { + "epoch": 0.34, + "logps_train/chosen": -153.02554321289062, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -155.8266143798828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4025542736053467, + "rewards_train/margins": -0.6198928356170654, + "rewards_train/rejected": -1.7826614379882812, + "step": 1217 + }, + { + "epoch": 0.34, + "logps_train/chosen": -23.571971893310547, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -35.20441436767578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6696972250938416, + "rewards_train/margins": 1.1257442831993103, + "rewards_train/rejected": -1.7954415082931519, + "step": 1217 + }, + { + "epoch": 0.34, + "logps_train/chosen": -29.523147583007812, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -56.692909240722656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6148147583007812, + "rewards_train/margins": -0.07052385807037354, + "rewards_train/rejected": -1.5442909002304077, + "step": 1217 + }, + { + "epoch": 0.34, + "logps_train/chosen": -87.96923828125, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -138.05299377441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14692382514476776, + "rewards_train/margins": 7.283375456929207, + "rewards_train/rejected": -7.430299282073975, + "step": 1217 + }, + { + "epoch": 0.34, + "logps_train/chosen": -142.1820068359375, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -160.0536651611328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8182008266448975, + "rewards_train/margins": -0.41283416748046875, + "rewards_train/rejected": -2.4053666591644287, + "step": 1217 + }, + { + "epoch": 0.34, + "logps_train/chosen": -122.58084106445312, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -247.26246643066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.608084201812744, + "rewards_train/margins": 10.218163013458252, + "rewards_train/rejected": -13.826247215270996, + "step": 1217 + }, + { + "epoch": 0.34, + "learning_rate": 1.124677033513916e-06, + "loss": 0.4608, + "step": 1218 + }, + { + "epoch": 0.34, + "logps_train/chosen": -212.26040649414062, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -222.56846618652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.276041030883789, + "rewards_train/margins": 1.4308061599731445, + "rewards_train/rejected": -10.706847190856934, + "step": 1218 + }, + { + "epoch": 0.34, + "logps_train/chosen": -15.203641891479492, + "logps_train/ref_chosen": -4.59375, + "logps_train/ref_rejected": -2.859375, + "logps_train/rejected": -19.966449737548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.060989260673523, + "rewards_train/margins": 0.6497182846069336, + "rewards_train/rejected": -1.7107075452804565, + "step": 1218 + }, + { + "epoch": 0.34, + "logps_train/chosen": -7.62562370300293, + "logps_train/ref_chosen": -3.046875, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -22.12759017944336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4578748643398285, + "rewards_train/margins": 0.0548841655254364, + "rewards_train/rejected": -0.5127590298652649, + "step": 1218 + }, + { + "epoch": 0.34, + "logps_train/chosen": -30.28036117553711, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -39.37409210205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1280361413955688, + "rewards_train/margins": 0.7593730688095093, + "rewards_train/rejected": -1.8874092102050781, + "step": 1218 + }, + { + "epoch": 0.34, + "logps_train/chosen": -31.473539352416992, + "logps_train/ref_chosen": -26.5, + "logps_train/ref_rejected": -6.75, + "logps_train/rejected": -18.36917495727539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4973539412021637, + "rewards_train/margins": 0.6645636260509491, + "rewards_train/rejected": -1.1619175672531128, + "step": 1218 + }, + { + "epoch": 0.34, + "logps_train/chosen": -42.88203048706055, + "logps_train/ref_chosen": -30.75, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -63.40672302246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2132030725479126, + "rewards_train/margins": 2.8774694204330444, + "rewards_train/rejected": -4.090672492980957, + "step": 1218 + }, + { + "epoch": 0.34, + "logps_train/chosen": -184.22593688964844, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -152.96311950683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.022593975067139, + "rewards_train/margins": 0.7737178802490234, + "rewards_train/rejected": -5.796311855316162, + "step": 1218 + }, + { + "epoch": 0.34, + "logps_train/chosen": -148.98858642578125, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -148.99099731445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6488587856292725, + "rewards_train/margins": 0.0002410411834716797, + "rewards_train/rejected": -3.649099826812744, + "step": 1218 + }, + { + "epoch": 0.34, + "logps_train/chosen": -148.52841186523438, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -95.03565979003906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.402841091156006, + "rewards_train/margins": -2.0992751121520996, + "rewards_train/rejected": -3.3035659790039062, + "step": 1219 + }, + { + "epoch": 0.34, + "logps_train/chosen": -136.95703125, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -178.48440551757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.695703029632568, + "rewards_train/margins": 2.1027374267578125, + "rewards_train/rejected": -6.798440456390381, + "step": 1219 + }, + { + "epoch": 0.34, + "logps_train/chosen": -17.881877899169922, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -55.91863250732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8131877779960632, + "rewards_train/margins": 0.7786754965782166, + "rewards_train/rejected": -1.5918632745742798, + "step": 1219 + }, + { + "epoch": 0.34, + "logps_train/chosen": -96.1390380859375, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -198.23301696777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.56390380859375, + "rewards_train/margins": 5.8093976974487305, + "rewards_train/rejected": -8.37330150604248, + "step": 1219 + }, + { + "epoch": 0.34, + "logps_train/chosen": -103.30511474609375, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -242.52890014648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5805115699768066, + "rewards_train/margins": 8.17237901687622, + "rewards_train/rejected": -10.752890586853027, + "step": 1219 + }, + { + "epoch": 0.34, + "logps_train/chosen": -123.37281036376953, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -208.71542358398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9372811317443848, + "rewards_train/margins": 6.134261608123779, + "rewards_train/rejected": -9.071542739868164, + "step": 1219 + }, + { + "epoch": 0.34, + "logps_train/chosen": -20.089309692382812, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -15.974408149719238, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.071431040763855, + "rewards_train/margins": -0.34899020195007324, + "rewards_train/rejected": -0.7224408388137817, + "step": 1219 + }, + { + "epoch": 0.34, + "logps_train/chosen": -15.659873962402344, + "logps_train/ref_chosen": -1.5625, + "logps_train/ref_rejected": -2.5625, + "logps_train/rejected": -23.8267765045166, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.409737467765808, + "rewards_train/margins": 0.716690182685852, + "rewards_train/rejected": -2.12642765045166, + "step": 1219 + }, + { + "epoch": 0.34, + "learning_rate": 1.12205169080346e-06, + "loss": 0.4512, + "step": 1220 + }, + { + "epoch": 0.34, + "logps_train/chosen": -24.95322036743164, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -24.7247314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.432822048664093, + "rewards_train/margins": 0.633401095867157, + "rewards_train/rejected": -1.06622314453125, + "step": 1220 + }, + { + "epoch": 0.34, + "logps_train/chosen": -8.893392562866211, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -16.697439193725586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3924642503261566, + "rewards_train/margins": 0.05227968096733093, + "rewards_train/rejected": -0.44474393129348755, + "step": 1220 + }, + { + "epoch": 0.34, + "logps_train/chosen": -198.12515258789062, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -221.81991577148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.512515544891357, + "rewards_train/margins": 0.6694760322570801, + "rewards_train/rejected": -7.1819915771484375, + "step": 1220 + }, + { + "epoch": 0.34, + "logps_train/chosen": -59.13875961303711, + "logps_train/ref_chosen": -56.0, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -78.9211196899414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3138759732246399, + "rewards_train/margins": 0.4282360076904297, + "rewards_train/rejected": -0.7421119809150696, + "step": 1220 + }, + { + "epoch": 0.34, + "logps_train/chosen": -3.8071818351745605, + "logps_train/ref_chosen": -1.6875, + "logps_train/ref_rejected": -2.84375, + "logps_train/rejected": -4.596378326416016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21196818351745605, + "rewards_train/margins": -0.036705344915390015, + "rewards_train/rejected": -0.17526283860206604, + "step": 1220 + }, + { + "epoch": 0.34, + "logps_train/chosen": -23.784543991088867, + "logps_train/ref_chosen": -6.6875, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -40.340511322021484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7097043991088867, + "rewards_train/margins": 1.5555968284606934, + "rewards_train/rejected": -3.26530122756958, + "step": 1220 + }, + { + "epoch": 0.34, + "logps_train/chosen": -74.82630157470703, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -73.87638854980469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.932630181312561, + "rewards_train/margins": -0.09499132633209229, + "rewards_train/rejected": -0.8376388549804688, + "step": 1220 + }, + { + "epoch": 0.34, + "logps_train/chosen": -208.50875854492188, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -243.33816528320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.050876140594482, + "rewards_train/margins": 1.4829401969909668, + "rewards_train/rejected": -8.53381633758545, + "step": 1220 + }, + { + "epoch": 0.34, + "logps_train/chosen": -117.29133605957031, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -171.4447784423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3791335821151733, + "rewards_train/margins": 1.565344214439392, + "rewards_train/rejected": -2.9444777965545654, + "step": 1221 + }, + { + "epoch": 0.34, + "logps_train/chosen": -109.35749816894531, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -192.90257263183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.585749864578247, + "rewards_train/margins": 3.6045076847076416, + "rewards_train/rejected": -5.190257549285889, + "step": 1221 + }, + { + "epoch": 0.34, + "logps_train/chosen": -142.8243865966797, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -264.1575927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.432438611984253, + "rewards_train/margins": 6.883321046829224, + "rewards_train/rejected": -10.315759658813477, + "step": 1221 + }, + { + "epoch": 0.34, + "logps_train/chosen": -5.517968654632568, + "logps_train/ref_chosen": -2.96875, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -26.50835609436035, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25492188334465027, + "rewards_train/margins": 0.40841373801231384, + "rewards_train/rejected": -0.6633356213569641, + "step": 1221 + }, + { + "epoch": 0.34, + "logps_train/chosen": -194.2044677734375, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -179.002685546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.720446586608887, + "rewards_train/margins": -1.920177936553955, + "rewards_train/rejected": -6.800268650054932, + "step": 1221 + }, + { + "epoch": 0.34, + "logps_train/chosen": -155.22610473632812, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -224.4229736328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.622610569000244, + "rewards_train/margins": 7.119686603546143, + "rewards_train/rejected": -9.742297172546387, + "step": 1221 + }, + { + "epoch": 0.34, + "logps_train/chosen": -125.39785766601562, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -159.91954040527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5397857427597046, + "rewards_train/margins": 4.452168583869934, + "rewards_train/rejected": -5.991954326629639, + "step": 1221 + }, + { + "epoch": 0.34, + "logps_train/chosen": -141.5860595703125, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -174.82284545898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.458606004714966, + "rewards_train/margins": 2.87367844581604, + "rewards_train/rejected": -6.332284450531006, + "step": 1221 + }, + { + "epoch": 0.34, + "learning_rate": 1.1194254938605728e-06, + "loss": 0.4193, + "step": 1222 + }, + { + "epoch": 0.34, + "logps_train/chosen": -16.78424072265625, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -14.625, + "logps_train/rejected": -38.201934814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6284241080284119, + "rewards_train/margins": 1.7292693257331848, + "rewards_train/rejected": -2.3576934337615967, + "step": 1222 + }, + { + "epoch": 0.34, + "logps_train/chosen": -183.1754608154297, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -184.0, + "logps_train/rejected": -252.002685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.5175461769104, + "rewards_train/margins": 2.2827224731445312, + "rewards_train/rejected": -6.800268650054932, + "step": 1222 + }, + { + "epoch": 0.34, + "logps_train/chosen": -86.48594665527344, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -231.97531127929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7985946536064148, + "rewards_train/margins": 8.898937046527863, + "rewards_train/rejected": -9.697531700134277, + "step": 1222 + }, + { + "epoch": 0.34, + "logps_train/chosen": -162.65892028808594, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -295.5310974121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.515892028808594, + "rewards_train/margins": 7.937217712402344, + "rewards_train/rejected": -12.453109741210938, + "step": 1222 + }, + { + "epoch": 0.34, + "logps_train/chosen": -147.1260223388672, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -137.32037353515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.712602376937866, + "rewards_train/margins": 2.319434881210327, + "rewards_train/rejected": -5.032037258148193, + "step": 1222 + }, + { + "epoch": 0.34, + "logps_train/chosen": -5.240720272064209, + "logps_train/ref_chosen": -2.875, + "logps_train/ref_rejected": -1.3515625, + "logps_train/rejected": -9.53738784790039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2365720272064209, + "rewards_train/margins": 0.5820105075836182, + "rewards_train/rejected": -0.8185825347900391, + "step": 1222 + }, + { + "epoch": 0.34, + "logps_train/chosen": -145.44622802734375, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -150.38299560546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.844622850418091, + "rewards_train/margins": 1.6936767101287842, + "rewards_train/rejected": -4.538299560546875, + "step": 1222 + }, + { + "epoch": 0.34, + "logps_train/chosen": -197.470703125, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -254.84457397460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.2470703125, + "rewards_train/margins": 4.8373870849609375, + "rewards_train/rejected": -13.084457397460938, + "step": 1222 + }, + { + "epoch": 0.34, + "logps_train/chosen": -172.68112182617188, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -209.97677612304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.468112468719482, + "rewards_train/margins": 0.7295651435852051, + "rewards_train/rejected": -5.1976776123046875, + "step": 1223 + }, + { + "epoch": 0.34, + "logps_train/chosen": -3.7566537857055664, + "logps_train/ref_chosen": -3.75, + "logps_train/ref_rejected": -5.46875, + "logps_train/rejected": -14.325892448425293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0006653785821981728, + "rewards_train/margins": 0.8850488543394022, + "rewards_train/rejected": -0.8857142329216003, + "step": 1223 + }, + { + "epoch": 0.34, + "logps_train/chosen": -115.314453125, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -97.89362335205078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0314453840255737, + "rewards_train/margins": -0.9420830458402634, + "rewards_train/rejected": -0.08936233818531036, + "step": 1223 + }, + { + "epoch": 0.34, + "logps_train/chosen": -200.64181518554688, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -216.74676513671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.464181661605835, + "rewards_train/margins": 6.310494661331177, + "rewards_train/rejected": -9.774676322937012, + "step": 1223 + }, + { + "epoch": 0.34, + "logps_train/chosen": -18.14518928527832, + "logps_train/ref_chosen": -4.0, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -40.5362434387207, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.41451895236969, + "rewards_train/margins": -0.6608945727348328, + "rewards_train/rejected": -0.7536243796348572, + "step": 1223 + }, + { + "epoch": 0.34, + "logps_train/chosen": -26.734155654907227, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -68.8974838256836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3484155833721161, + "rewards_train/margins": 4.578832894563675, + "rewards_train/rejected": -4.927248477935791, + "step": 1223 + }, + { + "epoch": 0.34, + "logps_train/chosen": -29.716045379638672, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -49.88082504272461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7216045260429382, + "rewards_train/margins": 0.6914779543876648, + "rewards_train/rejected": -1.413082480430603, + "step": 1223 + }, + { + "epoch": 0.34, + "logps_train/chosen": -62.682373046875, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -223.24951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09323730319738388, + "rewards_train/margins": 12.03171405941248, + "rewards_train/rejected": -12.124951362609863, + "step": 1223 + }, + { + "epoch": 0.34, + "learning_rate": 1.1167984610658484e-06, + "loss": 0.2801, + "step": 1224 + }, + { + "epoch": 0.34, + "logps_train/chosen": -146.88247680664062, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -195.1576690673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1882476806640625, + "rewards_train/margins": 6.727519512176514, + "rewards_train/rejected": -7.915767192840576, + "step": 1224 + }, + { + "epoch": 0.34, + "logps_train/chosen": -23.221546173095703, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -24.861757278442383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9721546173095703, + "rewards_train/margins": -0.3234788775444031, + "rewards_train/rejected": -0.6486757397651672, + "step": 1224 + }, + { + "epoch": 0.34, + "logps_train/chosen": -167.76220703125, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -218.04061889648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.226220607757568, + "rewards_train/margins": 3.6778416633605957, + "rewards_train/rejected": -8.904062271118164, + "step": 1224 + }, + { + "epoch": 0.34, + "logps_train/chosen": -78.71039581298828, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -189.62405395507812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4210395812988281, + "rewards_train/margins": -0.4586341865360737, + "rewards_train/rejected": 0.03759460523724556, + "step": 1224 + }, + { + "epoch": 0.34, + "logps_train/chosen": -141.5597686767578, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -204.0, + "logps_train/rejected": -260.59307861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.955976963043213, + "rewards_train/margins": 2.7033309936523438, + "rewards_train/rejected": -5.659307956695557, + "step": 1224 + }, + { + "epoch": 0.34, + "logps_train/chosen": -13.592754364013672, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -50.867279052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9061504602432251, + "rewards_train/margins": 1.6680775880813599, + "rewards_train/rejected": -2.574228048324585, + "step": 1224 + }, + { + "epoch": 0.34, + "logps_train/chosen": -198.8466796875, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -203.0, + "logps_train/rejected": -273.637939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8846681118011475, + "rewards_train/margins": 4.179126024246216, + "rewards_train/rejected": -7.063794136047363, + "step": 1224 + }, + { + "epoch": 0.34, + "logps_train/chosen": -43.31142807006836, + "logps_train/ref_chosen": -36.75, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -26.306957244873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6561428308486938, + "rewards_train/margins": 0.37455296516418457, + "rewards_train/rejected": -1.0306957960128784, + "step": 1224 + }, + { + "epoch": 0.34, + "logps_train/chosen": -6.94175386428833, + "logps_train/ref_chosen": -2.765625, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -35.563140869140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41761288046836853, + "rewards_train/margins": 2.619951158761978, + "rewards_train/rejected": -3.0375640392303467, + "step": 1225 + }, + { + "epoch": 0.34, + "logps_train/chosen": -40.658782958984375, + "logps_train/ref_chosen": -14.25, + "logps_train/ref_rejected": -47.25, + "logps_train/rejected": -69.58462524414062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.640878438949585, + "rewards_train/margins": -0.40741586685180664, + "rewards_train/rejected": -2.2334625720977783, + "step": 1225 + }, + { + "epoch": 0.34, + "logps_train/chosen": -119.88821411132812, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -40.579505920410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6888214349746704, + "rewards_train/margins": 1.594129204750061, + "rewards_train/rejected": -2.2829506397247314, + "step": 1225 + }, + { + "epoch": 0.34, + "logps_train/chosen": -196.4351806640625, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -175.77691650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9435181617736816, + "rewards_train/margins": 2.434173583984375, + "rewards_train/rejected": -5.377691745758057, + "step": 1225 + }, + { + "epoch": 0.34, + "logps_train/chosen": -20.527183532714844, + "logps_train/ref_chosen": -1.671875, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -35.0433349609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8855308294296265, + "rewards_train/margins": 1.0063027143478394, + "rewards_train/rejected": -2.891833543777466, + "step": 1225 + }, + { + "epoch": 0.34, + "logps_train/chosen": -20.243860244750977, + "logps_train/ref_chosen": -8.0, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -27.891334533691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2243860960006714, + "rewards_train/margins": 0.24599742889404297, + "rewards_train/rejected": -1.4703835248947144, + "step": 1225 + }, + { + "epoch": 0.34, + "logps_train/chosen": -59.13285446166992, + "logps_train/ref_chosen": -35.75, + "logps_train/ref_rejected": -39.0, + "logps_train/rejected": -79.67991638183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.338285446166992, + "rewards_train/margins": 1.7297062873840332, + "rewards_train/rejected": -4.067991733551025, + "step": 1225 + }, + { + "epoch": 0.34, + "logps_train/chosen": -66.44906616210938, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -229.548583984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3949066400527954, + "rewards_train/margins": 6.759951949119568, + "rewards_train/rejected": -8.154858589172363, + "step": 1225 + }, + { + "epoch": 0.34, + "learning_rate": 1.114170610805731e-06, + "loss": 0.3083, + "step": 1226 + }, + { + "epoch": 0.34, + "logps_train/chosen": -12.830096244812012, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -37.33070755004883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20800963044166565, + "rewards_train/margins": 1.9250610768795013, + "rewards_train/rejected": -2.133070707321167, + "step": 1226 + }, + { + "epoch": 0.34, + "logps_train/chosen": -104.97547912597656, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -268.5535583496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8475479483604431, + "rewards_train/margins": 7.807808458805084, + "rewards_train/rejected": -8.655356407165527, + "step": 1226 + }, + { + "epoch": 0.34, + "logps_train/chosen": -11.111831665039062, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -24.875, + "logps_train/rejected": -44.865116119384766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03618316724896431, + "rewards_train/margins": 1.9628284685313702, + "rewards_train/rejected": -1.9990116357803345, + "step": 1226 + }, + { + "epoch": 0.34, + "logps_train/chosen": -11.409186363220215, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -21.125, + "logps_train/rejected": -43.43019104003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10966863483190536, + "rewards_train/margins": 2.120850421488285, + "rewards_train/rejected": -2.2305190563201904, + "step": 1226 + }, + { + "epoch": 0.34, + "logps_train/chosen": -126.03616333007812, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -199.28504943847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5036163330078125, + "rewards_train/margins": 4.524888515472412, + "rewards_train/rejected": -5.028504848480225, + "step": 1226 + }, + { + "epoch": 0.34, + "logps_train/chosen": -48.54666519165039, + "logps_train/ref_chosen": -45.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -91.03642272949219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.354666531085968, + "rewards_train/margins": 0.5489757657051086, + "rewards_train/rejected": -0.9036422967910767, + "step": 1226 + }, + { + "epoch": 0.34, + "logps_train/chosen": -10.903194427490234, + "logps_train/ref_chosen": -6.46875, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -33.9700927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44344446063041687, + "rewards_train/margins": 1.1160648167133331, + "rewards_train/rejected": -1.55950927734375, + "step": 1226 + }, + { + "epoch": 0.34, + "logps_train/chosen": -203.85772705078125, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -243.63270568847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.685772895812988, + "rewards_train/margins": 3.877497673034668, + "rewards_train/rejected": -10.563270568847656, + "step": 1226 + }, + { + "epoch": 0.34, + "logps_train/chosen": -122.43049621582031, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -129.19412231445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5930496454238892, + "rewards_train/margins": 0.2763625979423523, + "rewards_train/rejected": -0.8694122433662415, + "step": 1227 + }, + { + "epoch": 0.34, + "logps_train/chosen": -147.32151794433594, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -200.66294860839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8321517705917358, + "rewards_train/margins": 2.4341431856155396, + "rewards_train/rejected": -4.266294956207275, + "step": 1227 + }, + { + "epoch": 0.34, + "logps_train/chosen": -142.32252502441406, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -161.826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5822525024414062, + "rewards_train/margins": 0.30036473274230957, + "rewards_train/rejected": -2.882617235183716, + "step": 1227 + }, + { + "epoch": 0.34, + "logps_train/chosen": -76.94854736328125, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -49.0, + "logps_train/rejected": -62.446617126464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5948547720909119, + "rewards_train/margins": 0.7498069405555725, + "rewards_train/rejected": -1.3446617126464844, + "step": 1227 + }, + { + "epoch": 0.34, + "logps_train/chosen": -11.635894775390625, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -7.31765079498291, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17608948051929474, + "rewards_train/margins": 0.21973811089992523, + "rewards_train/rejected": -0.39582759141921997, + "step": 1227 + }, + { + "epoch": 0.34, + "logps_train/chosen": -178.6438446044922, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -167.31495666503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.314384460449219, + "rewards_train/margins": 1.2171111106872559, + "rewards_train/rejected": -6.531495571136475, + "step": 1227 + }, + { + "epoch": 0.34, + "logps_train/chosen": -103.56475067138672, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -114.95832824707031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.456475019454956, + "rewards_train/margins": 1.5893580913543701, + "rewards_train/rejected": -4.045833110809326, + "step": 1227 + }, + { + "epoch": 0.34, + "logps_train/chosen": -122.40467834472656, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -280.2532958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2904679775238037, + "rewards_train/margins": 10.53486180305481, + "rewards_train/rejected": -13.825329780578613, + "step": 1227 + }, + { + "epoch": 0.34, + "learning_rate": 1.1115419614723862e-06, + "loss": 0.236, + "step": 1228 + }, + { + "epoch": 0.34, + "logps_train/chosen": -21.329532623291016, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -7.697954177856445, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7298282384872437, + "rewards_train/margins": -1.4412828087806702, + "rewards_train/rejected": -0.2885454297065735, + "step": 1228 + }, + { + "epoch": 0.34, + "logps_train/chosen": -127.49425506591797, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -55.5, + "logps_train/rejected": -54.764801025390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.44942551851272583, + "rewards_train/margins": -0.5229454189538956, + "rewards_train/rejected": 0.07351990044116974, + "step": 1228 + }, + { + "epoch": 0.34, + "logps_train/chosen": -236.5436553955078, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -234.498779296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.154365539550781, + "rewards_train/margins": -0.8044872283935547, + "rewards_train/rejected": -8.349878311157227, + "step": 1228 + }, + { + "epoch": 0.34, + "logps_train/chosen": -19.63555335998535, + "logps_train/ref_chosen": -6.625, + "logps_train/ref_rejected": -3.984375, + "logps_train/rejected": -28.44226837158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3010553121566772, + "rewards_train/margins": 1.1447340250015259, + "rewards_train/rejected": -2.445789337158203, + "step": 1228 + }, + { + "epoch": 0.34, + "logps_train/chosen": -28.3736572265625, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -26.0, + "logps_train/rejected": -35.05877685546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2779908180236816, + "rewards_train/margins": -1.3721131086349487, + "rewards_train/rejected": -0.9058777093887329, + "step": 1228 + }, + { + "epoch": 0.34, + "logps_train/chosen": -21.219207763671875, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -18.140975952148438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1906708478927612, + "rewards_train/margins": -0.28907322883605957, + "rewards_train/rejected": -0.9015976190567017, + "step": 1228 + }, + { + "epoch": 0.34, + "logps_train/chosen": -5.190880298614502, + "logps_train/ref_chosen": -0.73046875, + "logps_train/ref_rejected": -0.73046875, + "logps_train/rejected": -5.236469745635986, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44604116678237915, + "rewards_train/margins": 0.004558950662612915, + "rewards_train/rejected": -0.45060011744499207, + "step": 1228 + }, + { + "epoch": 0.34, + "logps_train/chosen": -389.63250732421875, + "logps_train/ref_chosen": -322.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -179.63002014160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.763250827789307, + "rewards_train/margins": -1.1502485275268555, + "rewards_train/rejected": -5.613002300262451, + "step": 1228 + }, + { + "epoch": 0.34, + "logps_train/chosen": -6.46605110168457, + "logps_train/ref_chosen": -3.84375, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -20.956382751464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26223012804985046, + "rewards_train/margins": 1.2927831709384918, + "rewards_train/rejected": -1.5550132989883423, + "step": 1229 + }, + { + "epoch": 0.34, + "logps_train/chosen": -126.257080078125, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -255.0947723388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.375707983970642, + "rewards_train/margins": 8.33376944065094, + "rewards_train/rejected": -9.709477424621582, + "step": 1229 + }, + { + "epoch": 0.34, + "logps_train/chosen": -80.60725402832031, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -99.04029083251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6392745971679688, + "rewards_train/margins": 1.3433036804199219, + "rewards_train/rejected": -0.7040290832519531, + "step": 1229 + }, + { + "epoch": 0.34, + "logps_train/chosen": -14.800048828125, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -34.635711669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9487549066543579, + "rewards_train/margins": 1.3523162603378296, + "rewards_train/rejected": -2.3010711669921875, + "step": 1229 + }, + { + "epoch": 0.34, + "logps_train/chosen": -14.701764106750488, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -35.291221618652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.162363886833191, + "rewards_train/margins": 1.310508370399475, + "rewards_train/rejected": -2.472872257232666, + "step": 1229 + }, + { + "epoch": 0.34, + "logps_train/chosen": -24.94456672668457, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -29.229068756103516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9413317441940308, + "rewards_train/margins": 0.14407527446746826, + "rewards_train/rejected": -2.085407018661499, + "step": 1229 + }, + { + "epoch": 0.34, + "logps_train/chosen": -176.03152465820312, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -147.30279541015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.303152561187744, + "rewards_train/margins": 2.427126884460449, + "rewards_train/rejected": -4.730279445648193, + "step": 1229 + }, + { + "epoch": 0.34, + "logps_train/chosen": -18.563323974609375, + "logps_train/ref_chosen": -3.578125, + "logps_train/ref_rejected": -2.890625, + "logps_train/rejected": -8.823580741882324, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4985198974609375, + "rewards_train/margins": -0.9052243232727051, + "rewards_train/rejected": -0.5932955741882324, + "step": 1229 + }, + { + "epoch": 0.34, + "learning_rate": 1.1089125314635726e-06, + "loss": 0.7242, + "step": 1230 + }, + { + "epoch": 0.34, + "logps_train/chosen": -147.02731323242188, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -121.54290771484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.8027312755584717, + "rewards_train/margins": -0.09844040870666504, + "rewards_train/rejected": -3.7042908668518066, + "step": 1230 + }, + { + "epoch": 0.34, + "logps_train/chosen": -98.26803588867188, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -130.9468994140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1268036365509033, + "rewards_train/margins": 1.8178863525390625, + "rewards_train/rejected": -3.944689989089966, + "step": 1230 + }, + { + "epoch": 0.34, + "logps_train/chosen": -31.301027297973633, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -99.01129150390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8738527297973633, + "rewards_train/margins": 1.7272765636444092, + "rewards_train/rejected": -3.6011292934417725, + "step": 1230 + }, + { + "epoch": 0.34, + "logps_train/chosen": -218.22926330566406, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -59.956451416015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.422926425933838, + "rewards_train/margins": -2.2522811889648438, + "rewards_train/rejected": -2.170645236968994, + "step": 1230 + }, + { + "epoch": 0.34, + "logps_train/chosen": -7.53544807434082, + "logps_train/ref_chosen": -2.125, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -24.113584518432617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5410448312759399, + "rewards_train/margins": 0.7203136682510376, + "rewards_train/rejected": -1.2613584995269775, + "step": 1230 + }, + { + "epoch": 0.34, + "logps_train/chosen": -51.79492950439453, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -21.125, + "logps_train/rejected": -32.628562927246094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8544930219650269, + "rewards_train/margins": -0.7041367292404175, + "rewards_train/rejected": -1.1503562927246094, + "step": 1230 + }, + { + "epoch": 0.34, + "logps_train/chosen": -23.053985595703125, + "logps_train/ref_chosen": -7.5, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -43.822383880615234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5553985834121704, + "rewards_train/margins": 1.7393399477005005, + "rewards_train/rejected": -3.294738531112671, + "step": 1230 + }, + { + "epoch": 0.34, + "logps_train/chosen": -129.66925048828125, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -182.0870361328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.566925048828125, + "rewards_train/margins": 2.4917783737182617, + "rewards_train/rejected": -8.058703422546387, + "step": 1230 + }, + { + "epoch": 0.34, + "logps_train/chosen": -200.40704345703125, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -161.13845825195312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.090704917907715, + "rewards_train/margins": -1.7268590927124023, + "rewards_train/rejected": -6.3638458251953125, + "step": 1231 + }, + { + "epoch": 0.34, + "logps_train/chosen": -108.5805892944336, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -171.8702392578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8080589771270752, + "rewards_train/margins": 6.828964948654175, + "rewards_train/rejected": -8.63702392578125, + "step": 1231 + }, + { + "epoch": 0.34, + "logps_train/chosen": -104.59956359863281, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -109.35672760009766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.459956407546997, + "rewards_train/margins": 1.5257163047790527, + "rewards_train/rejected": -3.98567271232605, + "step": 1231 + }, + { + "epoch": 0.34, + "logps_train/chosen": -169.5846710205078, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -152.78555297851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1584670543670654, + "rewards_train/margins": 0.4200882911682129, + "rewards_train/rejected": -3.5785553455352783, + "step": 1231 + }, + { + "epoch": 0.34, + "logps_train/chosen": -3.464698314666748, + "logps_train/ref_chosen": -3.8125, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -12.76420783996582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.034780170768499374, + "rewards_train/margins": 0.5049509666860104, + "rewards_train/rejected": -0.470170795917511, + "step": 1231 + }, + { + "epoch": 0.34, + "logps_train/chosen": -212.86920166015625, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -221.00059509277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.88692045211792, + "rewards_train/margins": 1.013139247894287, + "rewards_train/rejected": -5.900059700012207, + "step": 1231 + }, + { + "epoch": 0.34, + "logps_train/chosen": -29.531593322753906, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -7.8125, + "logps_train/rejected": -37.35425567626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5094093084335327, + "rewards_train/margins": 1.4447664022445679, + "rewards_train/rejected": -2.9541757106781006, + "step": 1231 + }, + { + "epoch": 0.34, + "logps_train/chosen": -6.702772617340088, + "logps_train/ref_chosen": -2.8125, + "logps_train/ref_rejected": -1.6640625, + "logps_train/rejected": -5.394521713256836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.38902726769447327, + "rewards_train/margins": -0.015981346368789673, + "rewards_train/rejected": -0.3730459213256836, + "step": 1231 + }, + { + "epoch": 0.34, + "learning_rate": 1.106282339182512e-06, + "loss": 0.5888, + "step": 1232 + }, + { + "epoch": 0.34, + "logps_train/chosen": -68.99583435058594, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -114.7169418334961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7995834350585938, + "rewards_train/margins": 1.6221108436584473, + "rewards_train/rejected": -2.421694278717041, + "step": 1232 + }, + { + "epoch": 0.34, + "logps_train/chosen": -6.0237932205200195, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -33.68759536743164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07112932205200195, + "rewards_train/margins": 1.12263023853302, + "rewards_train/rejected": -1.193759560585022, + "step": 1232 + }, + { + "epoch": 0.34, + "logps_train/chosen": -210.05291748046875, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -216.42440795898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.955291748046875, + "rewards_train/margins": 0.1871490478515625, + "rewards_train/rejected": -9.142440795898438, + "step": 1232 + }, + { + "epoch": 0.34, + "logps_train/chosen": -98.21022033691406, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -158.16165161132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22102203965187073, + "rewards_train/margins": 1.3951431214809418, + "rewards_train/rejected": -1.6161651611328125, + "step": 1232 + }, + { + "epoch": 0.34, + "logps_train/chosen": -280.49383544921875, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -100.72615814208984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.649383544921875, + "rewards_train/margins": -8.876767635345459, + "rewards_train/rejected": -2.772615909576416, + "step": 1232 + }, + { + "epoch": 0.34, + "logps_train/chosen": -129.3945770263672, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -224.92982482910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5394577980041504, + "rewards_train/margins": 6.453525066375732, + "rewards_train/rejected": -8.992982864379883, + "step": 1232 + }, + { + "epoch": 0.34, + "logps_train/chosen": -21.31988525390625, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -29.338274002075195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.925738513469696, + "rewards_train/margins": 1.2674639821052551, + "rewards_train/rejected": -2.193202495574951, + "step": 1232 + }, + { + "epoch": 0.34, + "logps_train/chosen": -7.18525505065918, + "logps_train/ref_chosen": -1.625, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -42.69708251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.556025505065918, + "rewards_train/margins": 0.9636827707290649, + "rewards_train/rejected": -1.519708275794983, + "step": 1232 + }, + { + "epoch": 0.34, + "logps_train/chosen": -114.91596984863281, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -188.1310577392578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4915969967842102, + "rewards_train/margins": 4.521508872509003, + "rewards_train/rejected": -5.013105869293213, + "step": 1233 + }, + { + "epoch": 0.34, + "logps_train/chosen": -36.56425857543945, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -44.968509674072266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45642587542533875, + "rewards_train/margins": 1.4529251158237457, + "rewards_train/rejected": -1.9093509912490845, + "step": 1233 + }, + { + "epoch": 0.34, + "logps_train/chosen": -116.2149658203125, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -217.0, + "logps_train/rejected": -244.82444763183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22850342094898224, + "rewards_train/margins": 3.01094813644886, + "rewards_train/rejected": -2.782444715499878, + "step": 1233 + }, + { + "epoch": 0.34, + "logps_train/chosen": -19.519237518310547, + "logps_train/ref_chosen": -6.59375, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -79.82235717773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2925487756729126, + "rewards_train/margins": 2.564686894416809, + "rewards_train/rejected": -3.8572356700897217, + "step": 1233 + }, + { + "epoch": 0.34, + "logps_train/chosen": -22.57937240600586, + "logps_train/ref_chosen": -15.8125, + "logps_train/ref_rejected": -31.75, + "logps_train/rejected": -60.11573028564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6766872406005859, + "rewards_train/margins": 2.159885883331299, + "rewards_train/rejected": -2.8365731239318848, + "step": 1233 + }, + { + "epoch": 0.34, + "logps_train/chosen": -16.850360870361328, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -31.131465911865234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0319111347198486, + "rewards_train/margins": 0.6437355279922485, + "rewards_train/rejected": -1.6756466627120972, + "step": 1233 + }, + { + "epoch": 0.34, + "logps_train/chosen": -181.97276306152344, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -244.63491821289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.797276258468628, + "rewards_train/margins": 3.7662155628204346, + "rewards_train/rejected": -7.5634918212890625, + "step": 1233 + }, + { + "epoch": 0.34, + "logps_train/chosen": -173.75985717773438, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -143.10464477539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3759857416152954, + "rewards_train/margins": 2.3844786882400513, + "rewards_train/rejected": -3.7604644298553467, + "step": 1233 + }, + { + "epoch": 0.34, + "learning_rate": 1.1036514030377621e-06, + "loss": 0.7328, + "step": 1234 + }, + { + "epoch": 0.34, + "logps_train/chosen": -53.1306037902832, + "logps_train/ref_chosen": -55.75, + "logps_train/ref_rejected": -31.875, + "logps_train/rejected": -54.92709732055664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2619396150112152, + "rewards_train/margins": 2.5671494901180267, + "rewards_train/rejected": -2.3052098751068115, + "step": 1234 + }, + { + "epoch": 0.34, + "logps_train/chosen": -4.589502334594727, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -5.487026691436768, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.030825233086943626, + "rewards_train/margins": 0.08975243754684925, + "rewards_train/rejected": -0.12057767063379288, + "step": 1234 + }, + { + "epoch": 0.34, + "logps_train/chosen": -75.09468841552734, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -57.25, + "logps_train/rejected": -79.63805389404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20946884155273438, + "rewards_train/margins": 2.02933669090271, + "rewards_train/rejected": -2.2388055324554443, + "step": 1234 + }, + { + "epoch": 0.34, + "logps_train/chosen": -105.0340576171875, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -263.76019287109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.55340576171875, + "rewards_train/margins": 8.322613716125488, + "rewards_train/rejected": -9.876019477844238, + "step": 1234 + }, + { + "epoch": 0.34, + "logps_train/chosen": -218.0910186767578, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -147.5785369873047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.709102153778076, + "rewards_train/margins": -0.4012484550476074, + "rewards_train/rejected": -5.307853698730469, + "step": 1234 + }, + { + "epoch": 0.34, + "logps_train/chosen": -114.50884246826172, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -142.21902465820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2508842945098877, + "rewards_train/margins": 1.9710180759429932, + "rewards_train/rejected": -5.221902370452881, + "step": 1234 + }, + { + "epoch": 0.34, + "logps_train/chosen": -20.258544921875, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -29.97270393371582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07414551079273224, + "rewards_train/margins": 0.48391591012477875, + "rewards_train/rejected": -0.4097703993320465, + "step": 1234 + }, + { + "epoch": 0.34, + "logps_train/chosen": -159.79408264160156, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -141.30259704589844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.5294084548950195, + "rewards_train/margins": -0.7991485595703125, + "rewards_train/rejected": -6.730259895324707, + "step": 1234 + }, + { + "epoch": 0.35, + "logps_train/chosen": -114.8751449584961, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -214.126953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.937514543533325, + "rewards_train/margins": 8.025180578231812, + "rewards_train/rejected": -10.962695121765137, + "step": 1235 + }, + { + "epoch": 0.35, + "logps_train/chosen": -126.15589904785156, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -218.0234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0655899047851562, + "rewards_train/margins": 8.43675422668457, + "rewards_train/rejected": -10.502344131469727, + "step": 1235 + }, + { + "epoch": 0.35, + "logps_train/chosen": -207.064697265625, + "logps_train/ref_chosen": -184.0, + "logps_train/ref_rejected": -193.0, + "logps_train/rejected": -240.55528259277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.306469678878784, + "rewards_train/margins": 2.449058771133423, + "rewards_train/rejected": -4.755528450012207, + "step": 1235 + }, + { + "epoch": 0.35, + "logps_train/chosen": -15.765935897827148, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -30.298185348510742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4515936076641083, + "rewards_train/margins": 1.828224927186966, + "rewards_train/rejected": -2.279818534851074, + "step": 1235 + }, + { + "epoch": 0.35, + "logps_train/chosen": -166.05006408691406, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -181.03445434570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.655006408691406, + "rewards_train/margins": 0.5484390258789062, + "rewards_train/rejected": -7.2034454345703125, + "step": 1235 + }, + { + "epoch": 0.35, + "logps_train/chosen": -106.26274108886719, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -115.64846801757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.426274061203003, + "rewards_train/margins": 0.5385727882385254, + "rewards_train/rejected": -3.9648468494415283, + "step": 1235 + }, + { + "epoch": 0.35, + "logps_train/chosen": -132.3372344970703, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -181.4851837158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.633723497390747, + "rewards_train/margins": 1.5147950649261475, + "rewards_train/rejected": -4.1485185623168945, + "step": 1235 + }, + { + "epoch": 0.35, + "logps_train/chosen": -65.84124755859375, + "logps_train/ref_chosen": -37.75, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -46.3817253112793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.809124708175659, + "rewards_train/margins": 0.3977978229522705, + "rewards_train/rejected": -3.2069225311279297, + "step": 1235 + }, + { + "epoch": 0.35, + "learning_rate": 1.1010197414430866e-06, + "loss": 0.3375, + "step": 1236 + }, + { + "epoch": 0.35, + "logps_train/chosen": -3.6693382263183594, + "logps_train/ref_chosen": -0.74609375, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -13.339082717895508, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2923244535923004, + "rewards_train/margins": -0.033416181802749634, + "rewards_train/rejected": -0.2589082717895508, + "step": 1236 + }, + { + "epoch": 0.35, + "logps_train/chosen": -88.96805572509766, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -88.92012023925781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5468056201934814, + "rewards_train/margins": -0.004793524742126465, + "rewards_train/rejected": -1.542012095451355, + "step": 1236 + }, + { + "epoch": 0.35, + "logps_train/chosen": -196.48910522460938, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -200.1090087890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.948910713195801, + "rewards_train/margins": 3.4619903564453125, + "rewards_train/rejected": -11.410901069641113, + "step": 1236 + }, + { + "epoch": 0.35, + "logps_train/chosen": -100.54774475097656, + "logps_train/ref_chosen": -52.0, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -58.317508697509766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.854774475097656, + "rewards_train/margins": -0.38552331924438477, + "rewards_train/rejected": -4.4692511558532715, + "step": 1236 + }, + { + "epoch": 0.35, + "logps_train/chosen": -37.6272087097168, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -46.554229736328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9877208471298218, + "rewards_train/margins": 0.9677022695541382, + "rewards_train/rejected": -2.95542311668396, + "step": 1236 + }, + { + "epoch": 0.35, + "logps_train/chosen": -67.82091522216797, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -133.84622192382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4570915400981903, + "rewards_train/margins": 4.827530652284622, + "rewards_train/rejected": -5.2846221923828125, + "step": 1236 + }, + { + "epoch": 0.35, + "logps_train/chosen": -66.23380279541016, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -70.79642486572266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8733803033828735, + "rewards_train/margins": 1.331262230873108, + "rewards_train/rejected": -3.2046425342559814, + "step": 1236 + }, + { + "epoch": 0.35, + "logps_train/chosen": -149.35365295410156, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -149.95018005371094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.3353652954101562, + "rewards_train/margins": -0.3403472900390625, + "rewards_train/rejected": -2.9950180053710938, + "step": 1236 + }, + { + "epoch": 0.35, + "logps_train/chosen": -138.37918090820312, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -176.72921752929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.687918186187744, + "rewards_train/margins": 1.3850035667419434, + "rewards_train/rejected": -4.0729217529296875, + "step": 1237 + }, + { + "epoch": 0.35, + "logps_train/chosen": -34.31353759765625, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -7.34375, + "logps_train/rejected": -32.25346374511719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.925103783607483, + "rewards_train/margins": 0.56586754322052, + "rewards_train/rejected": -2.490971326828003, + "step": 1237 + }, + { + "epoch": 0.35, + "logps_train/chosen": -151.04257202148438, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -164.63624572753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7042572498321533, + "rewards_train/margins": 2.1093673706054688, + "rewards_train/rejected": -3.813624620437622, + "step": 1237 + }, + { + "epoch": 0.35, + "logps_train/chosen": -14.516063690185547, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -7.84375, + "logps_train/rejected": -16.945068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6328563690185547, + "rewards_train/margins": 0.2772755026817322, + "rewards_train/rejected": -0.9101318717002869, + "step": 1237 + }, + { + "epoch": 0.35, + "logps_train/chosen": -199.28057861328125, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -214.458251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.728057861328125, + "rewards_train/margins": 1.617767333984375, + "rewards_train/rejected": -7.3458251953125, + "step": 1237 + }, + { + "epoch": 0.35, + "logps_train/chosen": -14.952791213989258, + "logps_train/ref_chosen": -3.890625, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -33.1110725402832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1062166690826416, + "rewards_train/margins": 0.9548907279968262, + "rewards_train/rejected": -2.0611073970794678, + "step": 1237 + }, + { + "epoch": 0.35, + "logps_train/chosen": -221.50711059570312, + "logps_train/ref_chosen": -169.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -213.71298217773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.250710964202881, + "rewards_train/margins": 2.52058744430542, + "rewards_train/rejected": -7.771298408508301, + "step": 1237 + }, + { + "epoch": 0.35, + "logps_train/chosen": -75.95718383789062, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -28.5, + "logps_train/rejected": -46.49475860595703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8207184076309204, + "rewards_train/margins": -0.021242499351501465, + "rewards_train/rejected": -1.799475908279419, + "step": 1237 + }, + { + "epoch": 0.35, + "learning_rate": 1.098387372817326e-06, + "loss": 0.4009, + "step": 1238 + }, + { + "epoch": 0.35, + "logps_train/chosen": -37.84549331665039, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -47.45121765136719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6720494031906128, + "rewards_train/margins": 2.582447648048401, + "rewards_train/rejected": -4.254497051239014, + "step": 1238 + }, + { + "epoch": 0.35, + "logps_train/chosen": -123.810546875, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -183.17095947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4310548305511475, + "rewards_train/margins": 3.1860411167144775, + "rewards_train/rejected": -5.617095947265625, + "step": 1238 + }, + { + "epoch": 0.35, + "logps_train/chosen": -68.64735412597656, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -56.0521125793457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7897354364395142, + "rewards_train/margins": -0.009524106979370117, + "rewards_train/rejected": -1.780211329460144, + "step": 1238 + }, + { + "epoch": 0.35, + "logps_train/chosen": -1.6544266939163208, + "logps_train/ref_chosen": -1.3359375, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -12.559538841247559, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03184891864657402, + "rewards_train/margins": 0.6897299773991108, + "rewards_train/rejected": -0.7215788960456848, + "step": 1238 + }, + { + "epoch": 0.35, + "logps_train/chosen": -64.49310302734375, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -62.75, + "logps_train/rejected": -60.79307556152344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.10068970173597336, + "rewards_train/margins": -0.09500274807214737, + "rewards_train/rejected": 0.19569244980812073, + "step": 1238 + }, + { + "epoch": 0.35, + "logps_train/chosen": -5.487658500671387, + "logps_train/ref_chosen": -3.78125, + "logps_train/ref_rejected": -1.921875, + "logps_train/rejected": -7.9105987548828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17064085602760315, + "rewards_train/margins": 0.42823150753974915, + "rewards_train/rejected": -0.5988723635673523, + "step": 1238 + }, + { + "epoch": 0.35, + "logps_train/chosen": -89.83740234375, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -144.89227294921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.983740210533142, + "rewards_train/margins": 4.105486989021301, + "rewards_train/rejected": -6.089227199554443, + "step": 1238 + }, + { + "epoch": 0.35, + "logps_train/chosen": -42.27592468261719, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -109.83271789550781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8900924921035767, + "rewards_train/margins": -1.0568206906318665, + "rewards_train/rejected": -0.8332718014717102, + "step": 1238 + }, + { + "epoch": 0.35, + "logps_train/chosen": -25.692201614379883, + "logps_train/ref_chosen": -15.6875, + "logps_train/ref_rejected": -2.09375, + "logps_train/rejected": -41.0982551574707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0004701614379883, + "rewards_train/margins": 2.899980306625366, + "rewards_train/rejected": -3.9004504680633545, + "step": 1239 + }, + { + "epoch": 0.35, + "logps_train/chosen": -114.30354309082031, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -182.5926055908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.930354356765747, + "rewards_train/margins": 3.5289061069488525, + "rewards_train/rejected": -5.4592604637146, + "step": 1239 + }, + { + "epoch": 0.35, + "logps_train/chosen": -75.66017150878906, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -43.13661193847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6160171627998352, + "rewards_train/margins": 1.847644031047821, + "rewards_train/rejected": -2.4636611938476562, + "step": 1239 + }, + { + "epoch": 0.35, + "logps_train/chosen": -31.9713077545166, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -34.08930206298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7096308469772339, + "rewards_train/margins": 1.1055494546890259, + "rewards_train/rejected": -2.8151803016662598, + "step": 1239 + }, + { + "epoch": 0.35, + "logps_train/chosen": -12.543183326721191, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -15.276920318603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010568332858383656, + "rewards_train/margins": 0.6671237228438258, + "rewards_train/rejected": -0.6776920557022095, + "step": 1239 + }, + { + "epoch": 0.35, + "logps_train/chosen": -4.808769702911377, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -12.323005676269531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1402519792318344, + "rewards_train/margins": 0.46704860031604767, + "rewards_train/rejected": -0.6073005795478821, + "step": 1239 + }, + { + "epoch": 0.35, + "logps_train/chosen": -27.444095611572266, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -5.4375, + "logps_train/rejected": -27.491579055786133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9944095611572266, + "rewards_train/margins": 1.210998296737671, + "rewards_train/rejected": -2.2054078578948975, + "step": 1239 + }, + { + "epoch": 0.35, + "logps_train/chosen": -20.776409149169922, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -11.021310806274414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9588909149169922, + "rewards_train/margins": -0.3911347985267639, + "rewards_train/rejected": -0.5677561163902283, + "step": 1239 + }, + { + "epoch": 0.35, + "learning_rate": 1.0957543155842701e-06, + "loss": 0.4004, + "step": 1240 + }, + { + "epoch": 0.35, + "logps_train/chosen": -37.848445892333984, + "logps_train/ref_chosen": -34.0, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -36.66267776489258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3848446011543274, + "rewards_train/margins": 2.2439231276512146, + "rewards_train/rejected": -2.628767728805542, + "step": 1240 + }, + { + "epoch": 0.35, + "logps_train/chosen": -56.31600570678711, + "logps_train/ref_chosen": -39.5, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -78.78646850585938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.681600570678711, + "rewards_train/margins": -0.20295369625091553, + "rewards_train/rejected": -1.4786468744277954, + "step": 1240 + }, + { + "epoch": 0.35, + "logps_train/chosen": -95.71121215820312, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -148.9166717529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1211212873458862, + "rewards_train/margins": 2.57054603099823, + "rewards_train/rejected": -3.691667318344116, + "step": 1240 + }, + { + "epoch": 0.35, + "logps_train/chosen": -184.6442413330078, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -40.43207931518555, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.364424228668213, + "rewards_train/margins": -0.6962162256240845, + "rewards_train/rejected": -1.6682080030441284, + "step": 1240 + }, + { + "epoch": 0.35, + "logps_train/chosen": -18.845579147338867, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -2.078125, + "logps_train/rejected": -16.307979583740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0095579624176025, + "rewards_train/margins": 0.413427472114563, + "rewards_train/rejected": -1.4229854345321655, + "step": 1240 + }, + { + "epoch": 0.35, + "logps_train/chosen": -216.07220458984375, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -197.27493286132812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.057220458984375, + "rewards_train/margins": -1.1297273635864258, + "rewards_train/rejected": -8.92749309539795, + "step": 1240 + }, + { + "epoch": 0.35, + "logps_train/chosen": -7.999689102172852, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -20.54460906982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3843439221382141, + "rewards_train/margins": 0.8201170563697815, + "rewards_train/rejected": -1.2044609785079956, + "step": 1240 + }, + { + "epoch": 0.35, + "logps_train/chosen": -139.2935791015625, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -312.392333984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.929357886314392, + "rewards_train/margins": 14.709876656532288, + "rewards_train/rejected": -16.63923454284668, + "step": 1240 + }, + { + "epoch": 0.35, + "logps_train/chosen": -262.0230712890625, + "logps_train/ref_chosen": -222.0, + "logps_train/ref_rejected": -202.0, + "logps_train/rejected": -299.13348388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.002307415008545, + "rewards_train/margins": 5.71104097366333, + "rewards_train/rejected": -9.713348388671875, + "step": 1241 + }, + { + "epoch": 0.35, + "logps_train/chosen": -20.645721435546875, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -25.12656593322754, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0708221197128296, + "rewards_train/margins": -0.24566549062728882, + "rewards_train/rejected": -0.8251566290855408, + "step": 1241 + }, + { + "epoch": 0.35, + "logps_train/chosen": -100.15695190429688, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -117.03878021240234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8656951785087585, + "rewards_train/margins": 2.488182842731476, + "rewards_train/rejected": -3.3538780212402344, + "step": 1241 + }, + { + "epoch": 0.35, + "logps_train/chosen": -26.967975616455078, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -15.0, + "logps_train/rejected": -27.972957611083984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1030476093292236, + "rewards_train/margins": 0.19424819946289062, + "rewards_train/rejected": -1.2972958087921143, + "step": 1241 + }, + { + "epoch": 0.35, + "logps_train/chosen": -29.75625991821289, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -4.9375, + "logps_train/rejected": -17.278518676757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9256259799003601, + "rewards_train/margins": 0.30847591161727905, + "rewards_train/rejected": -1.2341018915176392, + "step": 1241 + }, + { + "epoch": 0.35, + "logps_train/chosen": -82.4605712890625, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -206.59967041015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15394286811351776, + "rewards_train/margins": 6.913910195231438, + "rewards_train/rejected": -6.75996732711792, + "step": 1241 + }, + { + "epoch": 0.35, + "logps_train/chosen": -25.720579147338867, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -31.850095748901367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5845578908920288, + "rewards_train/margins": 0.3192017078399658, + "rewards_train/rejected": -1.9037595987319946, + "step": 1241 + }, + { + "epoch": 0.35, + "logps_train/chosen": -43.003196716308594, + "logps_train/ref_chosen": -29.0, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -42.380332946777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4003196954727173, + "rewards_train/margins": 1.0502136945724487, + "rewards_train/rejected": -2.450533390045166, + "step": 1241 + }, + { + "epoch": 0.35, + "learning_rate": 1.0931205881725278e-06, + "loss": 0.4549, + "step": 1242 + }, + { + "epoch": 0.35, + "logps_train/chosen": -142.70016479492188, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -163.8304443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0200164318084717, + "rewards_train/margins": 1.1630280017852783, + "rewards_train/rejected": -4.18304443359375, + "step": 1242 + }, + { + "epoch": 0.35, + "logps_train/chosen": -259.51361083984375, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -253.02371215820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.251360893249512, + "rewards_train/margins": 1.2510108947753906, + "rewards_train/rejected": -12.502371788024902, + "step": 1242 + }, + { + "epoch": 0.35, + "logps_train/chosen": -6.548455238342285, + "logps_train/ref_chosen": -1.6015625, + "logps_train/ref_rejected": -12.875, + "logps_train/rejected": -49.17089080810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49468928575515747, + "rewards_train/margins": 3.1348997950553894, + "rewards_train/rejected": -3.629589080810547, + "step": 1242 + }, + { + "epoch": 0.35, + "logps_train/chosen": -6.753727912902832, + "logps_train/ref_chosen": -2.6875, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -22.11848258972168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4066227972507477, + "rewards_train/margins": 0.8177255094051361, + "rewards_train/rejected": -1.2243483066558838, + "step": 1242 + }, + { + "epoch": 0.35, + "logps_train/chosen": -10.714043617248535, + "logps_train/ref_chosen": -1.25, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -27.21446418762207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9464043974876404, + "rewards_train/margins": 0.8812920451164246, + "rewards_train/rejected": -1.827696442604065, + "step": 1242 + }, + { + "epoch": 0.35, + "logps_train/chosen": -18.238704681396484, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -29.489776611328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1894954442977905, + "rewards_train/margins": 1.1782323122024536, + "rewards_train/rejected": -2.367727756500244, + "step": 1242 + }, + { + "epoch": 0.35, + "logps_train/chosen": -36.260528564453125, + "logps_train/ref_chosen": -24.375, + "logps_train/ref_rejected": -36.5, + "logps_train/rejected": -55.616798400878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1885528564453125, + "rewards_train/margins": 0.723127007484436, + "rewards_train/rejected": -1.9116798639297485, + "step": 1242 + }, + { + "epoch": 0.35, + "logps_train/chosen": -190.5238494873047, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -195.27716064453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.3523850440979, + "rewards_train/margins": 2.4753313064575195, + "rewards_train/rejected": -6.82771635055542, + "step": 1242 + }, + { + "epoch": 0.35, + "logps_train/chosen": -77.40940856933594, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -94.18972778320312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7409408688545227, + "rewards_train/margins": -0.5219680815935135, + "rewards_train/rejected": -0.21897278726100922, + "step": 1243 + }, + { + "epoch": 0.35, + "logps_train/chosen": -29.282609939575195, + "logps_train/ref_chosen": -15.6875, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -20.905847549438477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3595110177993774, + "rewards_train/margins": 0.21232378482818604, + "rewards_train/rejected": -1.5718348026275635, + "step": 1243 + }, + { + "epoch": 0.35, + "logps_train/chosen": -11.643255233764648, + "logps_train/ref_chosen": -4.8125, + "logps_train/ref_rejected": -3.53125, + "logps_train/rejected": -12.634445190429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6830755472183228, + "rewards_train/margins": 0.22724395990371704, + "rewards_train/rejected": -0.9103195071220398, + "step": 1243 + }, + { + "epoch": 0.35, + "logps_train/chosen": -19.59121322631836, + "logps_train/ref_chosen": -2.296875, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -19.190610885620117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7294338941574097, + "rewards_train/margins": -0.616622805595398, + "rewards_train/rejected": -1.1128110885620117, + "step": 1243 + }, + { + "epoch": 0.35, + "logps_train/chosen": -1.436923861503601, + "logps_train/ref_chosen": -0.984375, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -9.168750762939453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04525488615036011, + "rewards_train/margins": -0.27837981283664703, + "rewards_train/rejected": 0.23312492668628693, + "step": 1243 + }, + { + "epoch": 0.35, + "logps_train/chosen": -13.403881072998047, + "logps_train/ref_chosen": -6.84375, + "logps_train/ref_rejected": -0.78515625, + "logps_train/rejected": -2.517616033554077, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6560131311416626, + "rewards_train/margins": -0.48276714980602264, + "rewards_train/rejected": -0.17324598133563995, + "step": 1243 + }, + { + "epoch": 0.35, + "logps_train/chosen": -180.6611328125, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -189.49285888671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.966113328933716, + "rewards_train/margins": 3.283172845840454, + "rewards_train/rejected": -7.24928617477417, + "step": 1243 + }, + { + "epoch": 0.35, + "logps_train/chosen": -163.85040283203125, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -211.75436401367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.685040473937988, + "rewards_train/margins": 0.49039602279663086, + "rewards_train/rejected": -7.175436496734619, + "step": 1243 + }, + { + "epoch": 0.35, + "learning_rate": 1.0904862090153982e-06, + "loss": 0.4722, + "step": 1244 + }, + { + "epoch": 0.35, + "logps_train/chosen": -93.19380187988281, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -43.5, + "logps_train/rejected": -46.814083099365234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1443803310394287, + "rewards_train/margins": -2.8129720091819763, + "rewards_train/rejected": -0.3314083218574524, + "step": 1244 + }, + { + "epoch": 0.35, + "logps_train/chosen": -97.72345733642578, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -153.9591064453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3223457336425781, + "rewards_train/margins": 5.173564910888672, + "rewards_train/rejected": -6.49591064453125, + "step": 1244 + }, + { + "epoch": 0.35, + "logps_train/chosen": -250.76168823242188, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -270.6501159667969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.276168823242188, + "rewards_train/margins": 0.5888433456420898, + "rewards_train/rejected": -10.865012168884277, + "step": 1244 + }, + { + "epoch": 0.35, + "logps_train/chosen": -209.22760009765625, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -230.1201171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.522759914398193, + "rewards_train/margins": 2.7892518043518066, + "rewards_train/rejected": -9.31201171875, + "step": 1244 + }, + { + "epoch": 0.35, + "logps_train/chosen": -12.40247917175293, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -18.90650749206543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32149791717529297, + "rewards_train/margins": 0.8722778558731079, + "rewards_train/rejected": -1.1937757730484009, + "step": 1244 + }, + { + "epoch": 0.35, + "logps_train/chosen": -22.2939395904541, + "logps_train/ref_chosen": -7.3125, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -41.836265563964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4981440305709839, + "rewards_train/margins": 1.4042326211929321, + "rewards_train/rejected": -2.902376651763916, + "step": 1244 + }, + { + "epoch": 0.35, + "logps_train/chosen": -117.86981964111328, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -172.60260009765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.086982011795044, + "rewards_train/margins": 4.773277997970581, + "rewards_train/rejected": -7.860260009765625, + "step": 1244 + }, + { + "epoch": 0.35, + "logps_train/chosen": -29.81525230407715, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -13.3125, + "logps_train/rejected": -34.52964401245117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7002753019332886, + "rewards_train/margins": 0.4214390516281128, + "rewards_train/rejected": -2.1217143535614014, + "step": 1244 + }, + { + "epoch": 0.35, + "logps_train/chosen": -108.26126098632812, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -124.87650299072266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1761261224746704, + "rewards_train/margins": 1.7115241289138794, + "rewards_train/rejected": -2.88765025138855, + "step": 1245 + }, + { + "epoch": 0.35, + "logps_train/chosen": -87.06790924072266, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -101.14923095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2067909240722656, + "rewards_train/margins": 1.208132266998291, + "rewards_train/rejected": -2.4149231910705566, + "step": 1245 + }, + { + "epoch": 0.35, + "logps_train/chosen": -145.49444580078125, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -143.98025512695312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7494447231292725, + "rewards_train/margins": -0.15141916275024414, + "rewards_train/rejected": -2.5980255603790283, + "step": 1245 + }, + { + "epoch": 0.35, + "logps_train/chosen": -6.181815147399902, + "logps_train/ref_chosen": -3.421875, + "logps_train/ref_rejected": -3.609375, + "logps_train/rejected": -10.529496192932129, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27599403262138367, + "rewards_train/margins": 0.4160180985927582, + "rewards_train/rejected": -0.6920121312141418, + "step": 1245 + }, + { + "epoch": 0.35, + "logps_train/chosen": -205.83396911621094, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -207.80844116210938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.983397006988525, + "rewards_train/margins": -0.0025529861450195312, + "rewards_train/rejected": -4.980844020843506, + "step": 1245 + }, + { + "epoch": 0.35, + "logps_train/chosen": -85.73654174804688, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -129.0952911376953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8736541867256165, + "rewards_train/margins": 0.6858749985694885, + "rewards_train/rejected": -1.559529185295105, + "step": 1245 + }, + { + "epoch": 0.35, + "logps_train/chosen": -18.34320640563965, + "logps_train/ref_chosen": -4.1875, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -42.0938720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.415570616722107, + "rewards_train/margins": 1.2438167333602905, + "rewards_train/rejected": -2.6593873500823975, + "step": 1245 + }, + { + "epoch": 0.35, + "logps_train/chosen": -117.39531707763672, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -116.39571380615234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5895317792892456, + "rewards_train/margins": -0.0999603271484375, + "rewards_train/rejected": -1.489571452140808, + "step": 1245 + }, + { + "epoch": 0.35, + "learning_rate": 1.0878511965507434e-06, + "loss": 0.5166, + "step": 1246 + }, + { + "epoch": 0.35, + "logps_train/chosen": -6.094539642333984, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -10.046957015991211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.024921035394072533, + "rewards_train/margins": -0.23913326300680637, + "rewards_train/rejected": 0.2640542984008789, + "step": 1246 + }, + { + "epoch": 0.35, + "logps_train/chosen": -18.171485900878906, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -31.196430206298828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6858986020088196, + "rewards_train/margins": -0.06625556945800781, + "rewards_train/rejected": -0.6196430325508118, + "step": 1246 + }, + { + "epoch": 0.35, + "logps_train/chosen": -13.511175155639648, + "logps_train/ref_chosen": -3.703125, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -18.264699935913086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9808050394058228, + "rewards_train/margins": 0.35816502571105957, + "rewards_train/rejected": -1.3389700651168823, + "step": 1246 + }, + { + "epoch": 0.35, + "logps_train/chosen": -188.5289306640625, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -203.9781951904297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.802893161773682, + "rewards_train/margins": -0.4050736427307129, + "rewards_train/rejected": -6.397819519042969, + "step": 1246 + }, + { + "epoch": 0.35, + "logps_train/chosen": -203.06805419921875, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -291.96435546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.756805419921875, + "rewards_train/margins": 6.739630699157715, + "rewards_train/rejected": -14.49643611907959, + "step": 1246 + }, + { + "epoch": 0.35, + "logps_train/chosen": -14.389849662780762, + "logps_train/ref_chosen": -6.09375, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -20.833980560302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8296099901199341, + "rewards_train/margins": 0.15378808975219727, + "rewards_train/rejected": -0.9833980798721313, + "step": 1246 + }, + { + "epoch": 0.35, + "logps_train/chosen": -121.65750885009766, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -133.6066131591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9657508730888367, + "rewards_train/margins": 3.9449103474617004, + "rewards_train/rejected": -4.910661220550537, + "step": 1246 + }, + { + "epoch": 0.35, + "logps_train/chosen": -22.846820831298828, + "logps_train/ref_chosen": -1.2265625, + "logps_train/ref_rejected": -0.77734375, + "logps_train/rejected": -17.50057029724121, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1620259284973145, + "rewards_train/margins": -0.48970329761505127, + "rewards_train/rejected": -1.6723226308822632, + "step": 1246 + }, + { + "epoch": 0.35, + "logps_train/chosen": -88.12803649902344, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -34.25, + "logps_train/rejected": -55.08420944213867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4128036499023438, + "rewards_train/margins": 0.6706173419952393, + "rewards_train/rejected": -2.083420991897583, + "step": 1247 + }, + { + "epoch": 0.35, + "logps_train/chosen": -75.64297485351562, + "logps_train/ref_chosen": -56.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -169.15359497070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9642975330352783, + "rewards_train/margins": 7.551062345504761, + "rewards_train/rejected": -9.515359878540039, + "step": 1247 + }, + { + "epoch": 0.35, + "logps_train/chosen": -104.47962188720703, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -173.73397827148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3479622602462769, + "rewards_train/margins": 7.1754361391067505, + "rewards_train/rejected": -8.523398399353027, + "step": 1247 + }, + { + "epoch": 0.35, + "logps_train/chosen": -190.20664978027344, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -210.05758666992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.320664882659912, + "rewards_train/margins": 1.7850937843322754, + "rewards_train/rejected": -8.105758666992188, + "step": 1247 + }, + { + "epoch": 0.35, + "logps_train/chosen": -94.118408203125, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -152.0758056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3381591737270355, + "rewards_train/margins": 2.0457397401332855, + "rewards_train/rejected": -1.70758056640625, + "step": 1247 + }, + { + "epoch": 0.35, + "logps_train/chosen": -16.121610641479492, + "logps_train/ref_chosen": -6.4375, + "logps_train/ref_rejected": -1.6484375, + "logps_train/rejected": -9.651420593261719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9684110879898071, + "rewards_train/margins": -0.16811275482177734, + "rewards_train/rejected": -0.8002983331680298, + "step": 1247 + }, + { + "epoch": 0.35, + "logps_train/chosen": -9.154781341552734, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -22.838314056396484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37172815203666687, + "rewards_train/margins": 0.7746032774448395, + "rewards_train/rejected": -1.1463314294815063, + "step": 1247 + }, + { + "epoch": 0.35, + "logps_train/chosen": -85.39576721191406, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -201.506103515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23957672715187073, + "rewards_train/margins": 7.761033624410629, + "rewards_train/rejected": -8.0006103515625, + "step": 1247 + }, + { + "epoch": 0.35, + "learning_rate": 1.0852155692208556e-06, + "loss": 0.4032, + "step": 1248 + }, + { + "epoch": 0.35, + "logps_train/chosen": -23.842849731445312, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -28.125, + "logps_train/rejected": -30.20937728881836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3092849850654602, + "rewards_train/margins": -0.1008472591638565, + "rewards_train/rejected": -0.2084377259016037, + "step": 1248 + }, + { + "epoch": 0.35, + "logps_train/chosen": -88.17086791992188, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -92.5969009399414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41708680987358093, + "rewards_train/margins": 0.1426033079624176, + "rewards_train/rejected": -0.5596901178359985, + "step": 1248 + }, + { + "epoch": 0.35, + "logps_train/chosen": -194.535888671875, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -196.0752716064453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.5535888671875, + "rewards_train/margins": -0.09606170654296875, + "rewards_train/rejected": -8.457527160644531, + "step": 1248 + }, + { + "epoch": 0.35, + "logps_train/chosen": -342.08404541015625, + "logps_train/ref_chosen": -300.0, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -90.31060791015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.208404541015625, + "rewards_train/margins": -1.6773436069488525, + "rewards_train/rejected": -2.5310609340667725, + "step": 1248 + }, + { + "epoch": 0.35, + "logps_train/chosen": -250.20968627929688, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -250.98837280273438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.920969009399414, + "rewards_train/margins": -1.2221317291259766, + "rewards_train/rejected": -9.698837280273438, + "step": 1248 + }, + { + "epoch": 0.35, + "logps_train/chosen": -29.030994415283203, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -45.0, + "logps_train/rejected": -71.2582015991211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9905994534492493, + "rewards_train/margins": 1.63522070646286, + "rewards_train/rejected": -2.6258201599121094, + "step": 1248 + }, + { + "epoch": 0.35, + "logps_train/chosen": -51.836669921875, + "logps_train/ref_chosen": -29.25, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -98.38130950927734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2586669921875, + "rewards_train/margins": 0.7794640064239502, + "rewards_train/rejected": -3.03813099861145, + "step": 1248 + }, + { + "epoch": 0.35, + "logps_train/chosen": -39.79238510131836, + "logps_train/ref_chosen": -30.5, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -37.55560302734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.929238498210907, + "rewards_train/margins": 1.7825718522071838, + "rewards_train/rejected": -2.711810350418091, + "step": 1248 + }, + { + "epoch": 0.35, + "logps_train/chosen": -28.971250534057617, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -73.32282257080078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0471251010894775, + "rewards_train/margins": 2.935157299041748, + "rewards_train/rejected": -3.9822824001312256, + "step": 1249 + }, + { + "epoch": 0.35, + "logps_train/chosen": -24.74542236328125, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -17.38256072998047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.699542224407196, + "rewards_train/margins": -0.12378615140914917, + "rewards_train/rejected": -0.5757560729980469, + "step": 1249 + }, + { + "epoch": 0.35, + "logps_train/chosen": -3.7509925365448, + "logps_train/ref_chosen": -2.234375, + "logps_train/ref_rejected": -3.078125, + "logps_train/rejected": -11.0115966796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15166175365447998, + "rewards_train/margins": 0.641685426235199, + "rewards_train/rejected": -0.793347179889679, + "step": 1249 + }, + { + "epoch": 0.35, + "logps_train/chosen": -20.366086959838867, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -88.2718505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5272337198257446, + "rewards_train/margins": 0.39995133876800537, + "rewards_train/rejected": -1.92718505859375, + "step": 1249 + }, + { + "epoch": 0.35, + "logps_train/chosen": -119.75568389892578, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -222.76036071777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2755683660507202, + "rewards_train/margins": 6.600467801094055, + "rewards_train/rejected": -7.876036167144775, + "step": 1249 + }, + { + "epoch": 0.35, + "logps_train/chosen": -5.887350082397461, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -2.140625, + "logps_train/rejected": -5.954710960388184, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18561001121997833, + "rewards_train/margins": 0.1957985907793045, + "rewards_train/rejected": -0.38140860199928284, + "step": 1249 + }, + { + "epoch": 0.35, + "logps_train/chosen": -156.7590789794922, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -185.74581909179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.475908041000366, + "rewards_train/margins": 4.39867377281189, + "rewards_train/rejected": -6.874581813812256, + "step": 1249 + }, + { + "epoch": 0.35, + "logps_train/chosen": -177.25494384765625, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -193.0, + "logps_train/rejected": -295.4144287109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.025494337081909, + "rewards_train/margins": 7.2159483432769775, + "rewards_train/rejected": -10.241442680358887, + "step": 1249 + }, + { + "epoch": 0.35, + "learning_rate": 1.0825793454723324e-06, + "loss": 0.5322, + "step": 1250 + }, + { + "epoch": 0.35, + "logps_train/chosen": -153.2156524658203, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -164.01202392578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4715652465820312, + "rewards_train/margins": -0.07036280632019043, + "rewards_train/rejected": -3.401202440261841, + "step": 1250 + }, + { + "epoch": 0.35, + "logps_train/chosen": -120.77821350097656, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -242.36398315429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8778213858604431, + "rewards_train/margins": 6.558577120304108, + "rewards_train/rejected": -7.436398506164551, + "step": 1250 + }, + { + "epoch": 0.35, + "logps_train/chosen": -186.90960693359375, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -177.26519775390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.190960884094238, + "rewards_train/margins": -0.46444106101989746, + "rewards_train/rejected": -3.726519823074341, + "step": 1250 + }, + { + "epoch": 0.35, + "logps_train/chosen": -39.104705810546875, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -35.28081512451172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9354705810546875, + "rewards_train/margins": -1.1448890566825867, + "rewards_train/rejected": -0.7905815243721008, + "step": 1250 + }, + { + "epoch": 0.35, + "logps_train/chosen": -150.47618103027344, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -184.2717742919922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9976181983947754, + "rewards_train/margins": -0.670440673828125, + "rewards_train/rejected": -3.3271775245666504, + "step": 1250 + }, + { + "epoch": 0.35, + "logps_train/chosen": -4.4012579917907715, + "logps_train/ref_chosen": -2.9375, + "logps_train/ref_rejected": -1.25, + "logps_train/rejected": -4.9052934646606445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14637580513954163, + "rewards_train/margins": 0.21915355324745178, + "rewards_train/rejected": -0.3655293583869934, + "step": 1250 + }, + { + "epoch": 0.35, + "logps_train/chosen": -124.85734558105469, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -132.95045471191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2857346534729004, + "rewards_train/margins": 1.6593108177185059, + "rewards_train/rejected": -4.945045471191406, + "step": 1250 + }, + { + "epoch": 0.35, + "logps_train/chosen": -114.951171875, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -167.86187744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.795117378234863, + "rewards_train/margins": 1.99107027053833, + "rewards_train/rejected": -7.786187648773193, + "step": 1250 + }, + { + "epoch": 0.35, + "logps_train/chosen": -9.474369049072266, + "logps_train/ref_chosen": -4.65625, + "logps_train/ref_rejected": -3.984375, + "logps_train/rejected": -24.281450271606445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48181191086769104, + "rewards_train/margins": 1.547895759344101, + "rewards_train/rejected": -2.029707670211792, + "step": 1251 + }, + { + "epoch": 0.35, + "logps_train/chosen": -112.05488586425781, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -119.42256164550781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.455488681793213, + "rewards_train/margins": -0.3632323741912842, + "rewards_train/rejected": -2.0922563076019287, + "step": 1251 + }, + { + "epoch": 0.35, + "logps_train/chosen": -120.57858276367188, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -154.73709106445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8578583002090454, + "rewards_train/margins": 2.4658509492874146, + "rewards_train/rejected": -3.32370924949646, + "step": 1251 + }, + { + "epoch": 0.35, + "logps_train/chosen": -26.897174835205078, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -62.320030212402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8022174835205078, + "rewards_train/margins": 1.3047854900360107, + "rewards_train/rejected": -2.1070029735565186, + "step": 1251 + }, + { + "epoch": 0.35, + "logps_train/chosen": -19.034709930419922, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -20.900665283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6003459692001343, + "rewards_train/margins": 0.15378308296203613, + "rewards_train/rejected": -1.7541290521621704, + "step": 1251 + }, + { + "epoch": 0.35, + "logps_train/chosen": -170.53640747070312, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -222.07211303710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.6536407470703125, + "rewards_train/margins": 1.253570556640625, + "rewards_train/rejected": -6.9072113037109375, + "step": 1251 + }, + { + "epoch": 0.35, + "logps_train/chosen": -13.1867036819458, + "logps_train/ref_chosen": -3.265625, + "logps_train/ref_rejected": -3.8125, + "logps_train/rejected": -5.745269298553467, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9921078681945801, + "rewards_train/margins": -0.7988309413194656, + "rewards_train/rejected": -0.19327692687511444, + "step": 1251 + }, + { + "epoch": 0.35, + "logps_train/chosen": -169.77833557128906, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -58.25, + "logps_train/rejected": -111.0584716796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.2278337478637695, + "rewards_train/margins": -1.9469866752624512, + "rewards_train/rejected": -5.280847072601318, + "step": 1251 + }, + { + "epoch": 0.35, + "learning_rate": 1.0799425437559448e-06, + "loss": 0.6628, + "step": 1252 + }, + { + "epoch": 0.35, + "logps_train/chosen": -113.96131896972656, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -150.3148193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4961318969726562, + "rewards_train/margins": 3.635350227355957, + "rewards_train/rejected": -6.131482124328613, + "step": 1252 + }, + { + "epoch": 0.35, + "logps_train/chosen": -9.068272590637207, + "logps_train/ref_chosen": -1.3828125, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -43.975921630859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7685460448265076, + "rewards_train/margins": 2.3352962136268616, + "rewards_train/rejected": -3.103842258453369, + "step": 1252 + }, + { + "epoch": 0.35, + "logps_train/chosen": -86.51798248291016, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -91.7109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2517982721328735, + "rewards_train/margins": 0.9192954301834106, + "rewards_train/rejected": -2.171093702316284, + "step": 1252 + }, + { + "epoch": 0.35, + "logps_train/chosen": -50.97898864746094, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -21.092256546020508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7728988528251648, + "rewards_train/margins": 0.7363268733024597, + "rewards_train/rejected": -1.5092257261276245, + "step": 1252 + }, + { + "epoch": 0.35, + "logps_train/chosen": -24.957130432128906, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -7.34375, + "logps_train/rejected": -19.686195373535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3457130491733551, + "rewards_train/margins": 0.8885315358638763, + "rewards_train/rejected": -1.2342445850372314, + "step": 1252 + }, + { + "epoch": 0.35, + "logps_train/chosen": -8.579586029052734, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -70.11547088623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.023291397839784622, + "rewards_train/margins": 5.8160884864628315, + "rewards_train/rejected": -5.792797088623047, + "step": 1252 + }, + { + "epoch": 0.35, + "logps_train/chosen": -131.11758422851562, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -133.66433715820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.561758518218994, + "rewards_train/margins": 0.10467529296875, + "rewards_train/rejected": -4.666433811187744, + "step": 1252 + }, + { + "epoch": 0.35, + "logps_train/chosen": -48.92180633544922, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -42.71375274658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.367180585861206, + "rewards_train/margins": 1.066694736480713, + "rewards_train/rejected": -3.433875322341919, + "step": 1252 + }, + { + "epoch": 0.35, + "logps_train/chosen": -14.594162940979004, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -37.208473205566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5469163060188293, + "rewards_train/margins": 0.8489310145378113, + "rewards_train/rejected": -1.3958473205566406, + "step": 1253 + }, + { + "epoch": 0.35, + "logps_train/chosen": -18.360458374023438, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -44.670711517333984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8672958612442017, + "rewards_train/margins": 1.4372752904891968, + "rewards_train/rejected": -2.3045711517333984, + "step": 1253 + }, + { + "epoch": 0.35, + "logps_train/chosen": -19.925495147705078, + "logps_train/ref_chosen": -5.84375, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -100.49283599853516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4081745147705078, + "rewards_train/margins": -0.5088909268379211, + "rewards_train/rejected": -0.8992835879325867, + "step": 1253 + }, + { + "epoch": 0.35, + "logps_train/chosen": -84.43020629882812, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -104.80482482910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.493020623922348, + "rewards_train/margins": 0.3374618589878082, + "rewards_train/rejected": -0.8304824829101562, + "step": 1253 + }, + { + "epoch": 0.35, + "logps_train/chosen": -161.2735595703125, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -142.40774536132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.927356004714966, + "rewards_train/margins": 1.0134186744689941, + "rewards_train/rejected": -3.94077467918396, + "step": 1253 + }, + { + "epoch": 0.35, + "logps_train/chosen": -17.81024932861328, + "logps_train/ref_chosen": -6.625, + "logps_train/ref_rejected": -31.75, + "logps_train/rejected": -34.91242218017578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1185249090194702, + "rewards_train/margins": -0.8022826910018921, + "rewards_train/rejected": -0.3162422180175781, + "step": 1253 + }, + { + "epoch": 0.35, + "logps_train/chosen": -47.41815948486328, + "logps_train/ref_chosen": -42.0, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -37.89848327636719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5418159365653992, + "rewards_train/margins": 1.3855324387550354, + "rewards_train/rejected": -1.9273483753204346, + "step": 1253 + }, + { + "epoch": 0.35, + "logps_train/chosen": -1.2324891090393066, + "logps_train/ref_chosen": -0.56640625, + "logps_train/ref_rejected": -3.125, + "logps_train/rejected": -6.104624271392822, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06660828739404678, + "rewards_train/margins": 0.23135413974523544, + "rewards_train/rejected": -0.2979624271392822, + "step": 1253 + }, + { + "epoch": 0.35, + "learning_rate": 1.0773051825265095e-06, + "loss": 0.4062, + "step": 1254 + }, + { + "epoch": 0.35, + "logps_train/chosen": -127.78241729736328, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -124.80416870117188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.178241729736328, + "rewards_train/margins": -0.6978248357772827, + "rewards_train/rejected": -1.4804168939590454, + "step": 1254 + }, + { + "epoch": 0.35, + "logps_train/chosen": -8.258561134338379, + "logps_train/ref_chosen": -3.640625, + "logps_train/ref_rejected": -1.28125, + "logps_train/rejected": -10.648876190185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4617936313152313, + "rewards_train/margins": 0.4749689996242523, + "rewards_train/rejected": -0.9367626309394836, + "step": 1254 + }, + { + "epoch": 0.35, + "logps_train/chosen": -15.258854866027832, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -29.909202575683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26963549852371216, + "rewards_train/margins": 0.20878475904464722, + "rewards_train/rejected": -0.4784202575683594, + "step": 1254 + }, + { + "epoch": 0.35, + "logps_train/chosen": -48.49344253540039, + "logps_train/ref_chosen": -42.75, + "logps_train/ref_rejected": -20.375, + "logps_train/rejected": -34.146728515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.574344277381897, + "rewards_train/margins": 0.8028285503387451, + "rewards_train/rejected": -1.377172827720642, + "step": 1254 + }, + { + "epoch": 0.35, + "logps_train/chosen": -3.8185293674468994, + "logps_train/ref_chosen": -0.92578125, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -6.1845784187316895, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.28927481174468994, + "rewards_train/margins": -0.12394197285175323, + "rewards_train/rejected": -0.1653328388929367, + "step": 1254 + }, + { + "epoch": 0.35, + "logps_train/chosen": -34.19022750854492, + "logps_train/ref_chosen": -30.125, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -106.39311218261719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4065227508544922, + "rewards_train/margins": 1.6327884197235107, + "rewards_train/rejected": -2.039311170578003, + "step": 1254 + }, + { + "epoch": 0.35, + "logps_train/chosen": -179.52255249023438, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -213.8988037109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.652255535125732, + "rewards_train/margins": 5.887625217437744, + "rewards_train/rejected": -11.539880752563477, + "step": 1254 + }, + { + "epoch": 0.35, + "logps_train/chosen": -16.726486206054688, + "logps_train/ref_chosen": -2.609375, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -40.179779052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4117110967636108, + "rewards_train/margins": 0.8937667608261108, + "rewards_train/rejected": -2.3054778575897217, + "step": 1254 + }, + { + "epoch": 0.35, + "logps_train/chosen": -46.321327209472656, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -133.98388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4321327209472656, + "rewards_train/margins": 3.1162562370300293, + "rewards_train/rejected": -4.548388957977295, + "step": 1255 + }, + { + "epoch": 0.35, + "logps_train/chosen": -293.40234375, + "logps_train/ref_chosen": -201.0, + "logps_train/ref_rejected": -24.125, + "logps_train/rejected": -41.7081184387207, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.240234375, + "rewards_train/margins": -7.481922507286072, + "rewards_train/rejected": -1.7583118677139282, + "step": 1255 + }, + { + "epoch": 0.35, + "logps_train/chosen": -8.26048755645752, + "logps_train/ref_chosen": -7.53125, + "logps_train/ref_rejected": -7.90625, + "logps_train/rejected": -15.732531547546387, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07292375713586807, + "rewards_train/margins": 0.7097044214606285, + "rewards_train/rejected": -0.7826281785964966, + "step": 1255 + }, + { + "epoch": 0.35, + "logps_train/chosen": -89.51554107666016, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -114.20747375488281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7015541791915894, + "rewards_train/margins": 1.7191933393478394, + "rewards_train/rejected": -3.4207475185394287, + "step": 1255 + }, + { + "epoch": 0.35, + "logps_train/chosen": -201.0158233642578, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -199.10287475585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.101582527160645, + "rewards_train/margins": 0.05870532989501953, + "rewards_train/rejected": -11.160287857055664, + "step": 1255 + }, + { + "epoch": 0.35, + "logps_train/chosen": -135.9039764404297, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -108.67892456054688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0403976440429688, + "rewards_train/margins": -1.0225050449371338, + "rewards_train/rejected": -2.017892599105835, + "step": 1255 + }, + { + "epoch": 0.35, + "logps_train/chosen": -3.5119621753692627, + "logps_train/ref_chosen": -1.0078125, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -23.219806671142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25041496753692627, + "rewards_train/margins": 0.4903157353401184, + "rewards_train/rejected": -0.7407307028770447, + "step": 1255 + }, + { + "epoch": 0.35, + "logps_train/chosen": -108.35395050048828, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -260.206298828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1853950023651123, + "rewards_train/margins": 11.785234689712524, + "rewards_train/rejected": -13.970629692077637, + "step": 1255 + }, + { + "epoch": 0.35, + "learning_rate": 1.0746672802427583e-06, + "loss": 0.8989, + "step": 1256 + }, + { + "epoch": 0.35, + "logps_train/chosen": -24.763256072998047, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -38.46894454956055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15132561326026917, + "rewards_train/margins": 1.7455688416957855, + "rewards_train/rejected": -1.8968944549560547, + "step": 1256 + }, + { + "epoch": 0.35, + "logps_train/chosen": -129.30091857910156, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -58.59198760986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.880091905593872, + "rewards_train/margins": 0.14160680770874023, + "rewards_train/rejected": -3.0216987133026123, + "step": 1256 + }, + { + "epoch": 0.35, + "logps_train/chosen": -129.2373046875, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -230.05804443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7737305164337158, + "rewards_train/margins": 8.482074499130249, + "rewards_train/rejected": -10.255805015563965, + "step": 1256 + }, + { + "epoch": 0.35, + "logps_train/chosen": -22.330333709716797, + "logps_train/ref_chosen": -6.6875, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -20.9239559173584, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5642833709716797, + "rewards_train/margins": -0.2375127077102661, + "rewards_train/rejected": -1.3267706632614136, + "step": 1256 + }, + { + "epoch": 0.35, + "logps_train/chosen": -10.899224281311035, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -18.501445770263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07117243111133575, + "rewards_train/margins": 0.7414721697568893, + "rewards_train/rejected": -0.8126446008682251, + "step": 1256 + }, + { + "epoch": 0.35, + "logps_train/chosen": -101.65040588378906, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -155.6121826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7849594354629517, + "rewards_train/margins": 6.296177983283997, + "rewards_train/rejected": -5.511218547821045, + "step": 1256 + }, + { + "epoch": 0.35, + "logps_train/chosen": -262.9591064453125, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -275.8601379394531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -13.89591121673584, + "rewards_train/margins": -0.20989704132080078, + "rewards_train/rejected": -13.686014175415039, + "step": 1256 + }, + { + "epoch": 0.35, + "logps_train/chosen": -8.110187530517578, + "logps_train/ref_chosen": -1.6484375, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -20.64010238647461, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6461750268936157, + "rewards_train/margins": -0.13216477632522583, + "rewards_train/rejected": -0.5140102505683899, + "step": 1256 + }, + { + "epoch": 0.35, + "logps_train/chosen": -88.93323516845703, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -139.06332397460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4066765010356903, + "rewards_train/margins": 0.7130089104175568, + "rewards_train/rejected": -0.30633240938186646, + "step": 1257 + }, + { + "epoch": 0.35, + "logps_train/chosen": -30.567262649536133, + "logps_train/ref_chosen": -3.140625, + "logps_train/ref_rejected": -3.140625, + "logps_train/rejected": -30.452754974365234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.742663860321045, + "rewards_train/margins": -0.011450767517089844, + "rewards_train/rejected": -2.731213092803955, + "step": 1257 + }, + { + "epoch": 0.35, + "logps_train/chosen": -33.535709381103516, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -21.72088050842285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6660709381103516, + "rewards_train/margins": 0.0653921365737915, + "rewards_train/rejected": -1.731463074684143, + "step": 1257 + }, + { + "epoch": 0.35, + "logps_train/chosen": -20.310497283935547, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -5.84375, + "logps_train/rejected": -19.930133819580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3935497999191284, + "rewards_train/margins": 0.015088558197021484, + "rewards_train/rejected": -1.40863835811615, + "step": 1257 + }, + { + "epoch": 0.35, + "logps_train/chosen": -170.6939697265625, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -212.0790557861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.06939697265625, + "rewards_train/margins": 5.1385087966918945, + "rewards_train/rejected": -8.207905769348145, + "step": 1257 + }, + { + "epoch": 0.35, + "logps_train/chosen": -76.47227478027344, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -76.38789367675781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6472274661064148, + "rewards_train/margins": -0.0084381103515625, + "rewards_train/rejected": -0.6387893557548523, + "step": 1257 + }, + { + "epoch": 0.35, + "logps_train/chosen": -4.847728729248047, + "logps_train/ref_chosen": -2.0, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -11.865056991577148, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2847728729248047, + "rewards_train/margins": 0.20485782623291016, + "rewards_train/rejected": -0.48963069915771484, + "step": 1257 + }, + { + "epoch": 0.35, + "logps_train/chosen": -91.2269058227539, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -191.7371063232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2726906538009644, + "rewards_train/margins": 4.901020169258118, + "rewards_train/rejected": -6.173710823059082, + "step": 1257 + }, + { + "epoch": 0.35, + "learning_rate": 1.072028855367211e-06, + "loss": 0.4574, + "step": 1258 + }, + { + "epoch": 0.35, + "logps_train/chosen": -61.776878356933594, + "logps_train/ref_chosen": -51.0, + "logps_train/ref_rejected": -51.25, + "logps_train/rejected": -68.10521697998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0776878595352173, + "rewards_train/margins": 0.6078338623046875, + "rewards_train/rejected": -1.6855217218399048, + "step": 1258 + }, + { + "epoch": 0.35, + "logps_train/chosen": -171.68124389648438, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -170.48388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8681243658065796, + "rewards_train/margins": 3.130264401435852, + "rewards_train/rejected": -4.998388767242432, + "step": 1258 + }, + { + "epoch": 0.35, + "logps_train/chosen": -155.64920043945312, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -228.29806518554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.414920330047607, + "rewards_train/margins": 6.164885997772217, + "rewards_train/rejected": -10.579806327819824, + "step": 1258 + }, + { + "epoch": 0.35, + "logps_train/chosen": -15.18215560913086, + "logps_train/ref_chosen": -6.625, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -19.297332763671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8557155728340149, + "rewards_train/margins": 0.6052677035331726, + "rewards_train/rejected": -1.4609832763671875, + "step": 1258 + }, + { + "epoch": 0.35, + "logps_train/chosen": -94.97760772705078, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -92.54940795898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0477608442306519, + "rewards_train/margins": 0.5571799278259277, + "rewards_train/rejected": -1.6049407720565796, + "step": 1258 + }, + { + "epoch": 0.35, + "logps_train/chosen": -143.2901611328125, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -122.42826080322266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.529016137123108, + "rewards_train/margins": -0.43619000911712646, + "rewards_train/rejected": -1.0928261280059814, + "step": 1258 + }, + { + "epoch": 0.35, + "logps_train/chosen": -38.983463287353516, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -7.0625, + "logps_train/rejected": -30.914825439453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4545962810516357, + "rewards_train/margins": -0.06936359405517578, + "rewards_train/rejected": -2.38523268699646, + "step": 1258 + }, + { + "epoch": 0.35, + "logps_train/chosen": -91.1876449584961, + "logps_train/ref_chosen": -50.25, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -60.65077590942383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.093764781951904, + "rewards_train/margins": 0.008812904357910156, + "rewards_train/rejected": -4.1025776863098145, + "step": 1258 + }, + { + "epoch": 0.35, + "logps_train/chosen": -25.45158576965332, + "logps_train/ref_chosen": -15.8125, + "logps_train/ref_rejected": -14.4375, + "logps_train/rejected": -37.087669372558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9639086127281189, + "rewards_train/margins": 1.3011084198951721, + "rewards_train/rejected": -2.265017032623291, + "step": 1259 + }, + { + "epoch": 0.35, + "logps_train/chosen": -150.8518524169922, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -199.00111389160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.135185241699219, + "rewards_train/margins": 3.5649261474609375, + "rewards_train/rejected": -7.700111389160156, + "step": 1259 + }, + { + "epoch": 0.35, + "logps_train/chosen": -48.865657806396484, + "logps_train/ref_chosen": -34.25, + "logps_train/ref_rejected": -25.75, + "logps_train/rejected": -54.091854095458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4615658521652222, + "rewards_train/margins": 1.3726195096969604, + "rewards_train/rejected": -2.8341853618621826, + "step": 1259 + }, + { + "epoch": 0.35, + "logps_train/chosen": -144.40078735351562, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -114.09404754638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.290078639984131, + "rewards_train/margins": 0.06932640075683594, + "rewards_train/rejected": -4.359405040740967, + "step": 1259 + }, + { + "epoch": 0.35, + "logps_train/chosen": -223.145263671875, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -191.83419799804688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.414526462554932, + "rewards_train/margins": -0.1811065673828125, + "rewards_train/rejected": -7.233419895172119, + "step": 1259 + }, + { + "epoch": 0.35, + "logps_train/chosen": -3.28667950630188, + "logps_train/ref_chosen": -7.1875, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -97.04346466064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.39008206129074097, + "rewards_train/margins": 2.8444284796714783, + "rewards_train/rejected": -2.4543464183807373, + "step": 1259 + }, + { + "epoch": 0.35, + "logps_train/chosen": -85.69231414794922, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -88.18721008300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31923142075538635, + "rewards_train/margins": 0.44948962330818176, + "rewards_train/rejected": -0.7687210440635681, + "step": 1259 + }, + { + "epoch": 0.35, + "logps_train/chosen": -139.6483154296875, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -196.18417358398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.414831519126892, + "rewards_train/margins": 3.4035860300064087, + "rewards_train/rejected": -4.818417549133301, + "step": 1259 + }, + { + "epoch": 0.35, + "learning_rate": 1.069389926366044e-06, + "loss": 0.3903, + "step": 1260 + }, + { + "epoch": 0.35, + "logps_train/chosen": -20.62066650390625, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -31.56306266784668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07456665486097336, + "rewards_train/margins": 2.2192396596074104, + "rewards_train/rejected": -2.293806314468384, + "step": 1260 + }, + { + "epoch": 0.35, + "logps_train/chosen": -126.87420654296875, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -126.63377380371094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.837420642375946, + "rewards_train/margins": -0.024043262004852295, + "rewards_train/rejected": -0.8133773803710938, + "step": 1260 + }, + { + "epoch": 0.35, + "logps_train/chosen": -102.21829986572266, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -137.753662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6218299865722656, + "rewards_train/margins": 1.3035361766815186, + "rewards_train/rejected": -2.925366163253784, + "step": 1260 + }, + { + "epoch": 0.35, + "logps_train/chosen": -124.3290023803711, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -181.6815185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5329002141952515, + "rewards_train/margins": 3.985251545906067, + "rewards_train/rejected": -5.518151760101318, + "step": 1260 + }, + { + "epoch": 0.35, + "logps_train/chosen": -48.01361846923828, + "logps_train/ref_chosen": -45.0, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -59.875572204589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3013618588447571, + "rewards_train/margins": 4.129945456981659, + "rewards_train/rejected": -4.431307315826416, + "step": 1260 + }, + { + "epoch": 0.35, + "logps_train/chosen": -9.328630447387695, + "logps_train/ref_chosen": -5.65625, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -23.609214782714844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.36723804473876953, + "rewards_train/margins": -0.0563165545463562, + "rewards_train/rejected": -0.31092149019241333, + "step": 1260 + }, + { + "epoch": 0.35, + "logps_train/chosen": -2.010131359100342, + "logps_train/ref_chosen": -3.546875, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -8.390519142150879, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15367436408996582, + "rewards_train/margins": 0.41460129618644714, + "rewards_train/rejected": -0.2609269320964813, + "step": 1260 + }, + { + "epoch": 0.35, + "logps_train/chosen": -22.320253372192383, + "logps_train/ref_chosen": -12.6875, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -30.700685501098633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9632753729820251, + "rewards_train/margins": 0.34429317712783813, + "rewards_train/rejected": -1.3075685501098633, + "step": 1260 + }, + { + "epoch": 0.35, + "logps_train/chosen": -64.18463134765625, + "logps_train/ref_chosen": -60.0, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -27.835988998413086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4184631407260895, + "rewards_train/margins": 1.3651357591152191, + "rewards_train/rejected": -1.7835988998413086, + "step": 1261 + }, + { + "epoch": 0.35, + "logps_train/chosen": -23.26091957092285, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -22.875, + "logps_train/rejected": -44.3067512512207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5135919451713562, + "rewards_train/margins": 1.6295831799507141, + "rewards_train/rejected": -2.1431751251220703, + "step": 1261 + }, + { + "epoch": 0.35, + "logps_train/chosen": -142.77142333984375, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -232.55718994140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.277142524719238, + "rewards_train/margins": 7.078577041625977, + "rewards_train/rejected": -11.355719566345215, + "step": 1261 + }, + { + "epoch": 0.35, + "logps_train/chosen": -3.3265304565429688, + "logps_train/ref_chosen": -3.25, + "logps_train/ref_rejected": -2.8125, + "logps_train/rejected": -15.387053489685059, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007653045933693647, + "rewards_train/margins": 1.2498023030348122, + "rewards_train/rejected": -1.2574553489685059, + "step": 1261 + }, + { + "epoch": 0.35, + "logps_train/chosen": -77.98744201660156, + "logps_train/ref_chosen": -50.25, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -171.61126708984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7737443447113037, + "rewards_train/margins": 6.137382745742798, + "rewards_train/rejected": -8.911127090454102, + "step": 1261 + }, + { + "epoch": 0.35, + "logps_train/chosen": -2.2451844215393066, + "logps_train/ref_chosen": -2.5, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -12.894331932067871, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.025481557473540306, + "rewards_train/margins": 0.25241475366055965, + "rewards_train/rejected": -0.22693319618701935, + "step": 1261 + }, + { + "epoch": 0.35, + "logps_train/chosen": -55.192893981933594, + "logps_train/ref_chosen": -38.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -231.62527465820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.669289469718933, + "rewards_train/margins": 7.893238186836243, + "rewards_train/rejected": -9.562527656555176, + "step": 1261 + }, + { + "epoch": 0.35, + "logps_train/chosen": -112.40425109863281, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -149.10975646972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.690425157546997, + "rewards_train/margins": 2.520550489425659, + "rewards_train/rejected": -5.210975646972656, + "step": 1261 + }, + { + "epoch": 0.35, + "learning_rate": 1.0667505117089626e-06, + "loss": 0.2602, + "step": 1262 + }, + { + "epoch": 0.35, + "logps_train/chosen": -9.882377624511719, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -17.854825973510742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7023002505302429, + "rewards_train/margins": 0.6300573945045471, + "rewards_train/rejected": -1.33235764503479, + "step": 1262 + }, + { + "epoch": 0.35, + "logps_train/chosen": -106.04197692871094, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -158.99725341796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7041977643966675, + "rewards_train/margins": -0.20447242259979248, + "rewards_train/rejected": -1.499725341796875, + "step": 1262 + }, + { + "epoch": 0.35, + "logps_train/chosen": -45.18576431274414, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -49.66484832763672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8310763835906982, + "rewards_train/margins": 0.2479085922241211, + "rewards_train/rejected": -3.0789849758148193, + "step": 1262 + }, + { + "epoch": 0.35, + "logps_train/chosen": -88.26063537597656, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -166.41268920898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5260635614395142, + "rewards_train/margins": 5.765205264091492, + "rewards_train/rejected": -7.291268825531006, + "step": 1262 + }, + { + "epoch": 0.35, + "logps_train/chosen": -62.11125946044922, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -50.526222229003906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.4486260414123535, + "rewards_train/margins": -0.5335037708282471, + "rewards_train/rejected": -3.9151222705841064, + "step": 1262 + }, + { + "epoch": 0.35, + "logps_train/chosen": -3.790776491165161, + "logps_train/ref_chosen": -1.3359375, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -46.38349914550781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2454839050769806, + "rewards_train/margins": 3.2053661048412323, + "rewards_train/rejected": -3.450850009918213, + "step": 1262 + }, + { + "epoch": 0.35, + "logps_train/chosen": -128.15792846679688, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -223.9988250732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6157928705215454, + "rewards_train/margins": 9.234090209007263, + "rewards_train/rejected": -9.849883079528809, + "step": 1262 + }, + { + "epoch": 0.35, + "logps_train/chosen": -77.59796905517578, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -165.5731658935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7097969055175781, + "rewards_train/margins": 3.4975199699401855, + "rewards_train/rejected": -5.207316875457764, + "step": 1262 + }, + { + "epoch": 0.35, + "logps_train/chosen": -34.75870132446289, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -47.74822235107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.763370156288147, + "rewards_train/margins": 0.8864520788192749, + "rewards_train/rejected": -2.649822235107422, + "step": 1263 + }, + { + "epoch": 0.35, + "logps_train/chosen": -225.7360382080078, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -228.2771453857422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.47360372543335, + "rewards_train/margins": 0.25411081314086914, + "rewards_train/rejected": -6.727714538574219, + "step": 1263 + }, + { + "epoch": 0.35, + "logps_train/chosen": -125.08016967773438, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -183.65646362304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1080169677734375, + "rewards_train/margins": 4.357629299163818, + "rewards_train/rejected": -7.465646266937256, + "step": 1263 + }, + { + "epoch": 0.35, + "logps_train/chosen": -9.171890258789062, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -15.048356056213379, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06718903034925461, + "rewards_train/margins": 0.9595215991139412, + "rewards_train/rejected": -1.0267106294631958, + "step": 1263 + }, + { + "epoch": 0.35, + "logps_train/chosen": -19.135398864746094, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -29.22586441040039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9760398864746094, + "rewards_train/margins": 0.9840465784072876, + "rewards_train/rejected": -1.960086464881897, + "step": 1263 + }, + { + "epoch": 0.35, + "logps_train/chosen": -126.14297485351562, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -168.9985809326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0642974376678467, + "rewards_train/margins": 2.5355608463287354, + "rewards_train/rejected": -4.599858283996582, + "step": 1263 + }, + { + "epoch": 0.35, + "logps_train/chosen": -234.63096618652344, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -230.99899291992188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.763096809387207, + "rewards_train/margins": -0.2631974220275879, + "rewards_train/rejected": -7.499899387359619, + "step": 1263 + }, + { + "epoch": 0.35, + "logps_train/chosen": -29.36359405517578, + "logps_train/ref_chosen": -27.625, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -18.079933166503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1738594025373459, + "rewards_train/margins": 1.2450714856386185, + "rewards_train/rejected": -1.4189308881759644, + "step": 1263 + }, + { + "epoch": 0.35, + "learning_rate": 1.0641106298690704e-06, + "loss": 0.3505, + "step": 1264 + }, + { + "epoch": 0.35, + "logps_train/chosen": -136.02972412109375, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -236.17605590820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.952972412109375, + "rewards_train/margins": 5.064633369445801, + "rewards_train/rejected": -7.017605781555176, + "step": 1264 + }, + { + "epoch": 0.35, + "logps_train/chosen": -57.54686737060547, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -67.39877319335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0046867369674146175, + "rewards_train/margins": 0.41019058832898736, + "rewards_train/rejected": -0.414877325296402, + "step": 1264 + }, + { + "epoch": 0.35, + "logps_train/chosen": -131.05067443847656, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -159.1729278564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9050674438476562, + "rewards_train/margins": 2.2122254371643066, + "rewards_train/rejected": -4.117292881011963, + "step": 1264 + }, + { + "epoch": 0.35, + "logps_train/chosen": -26.55202865600586, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -31.158100128173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5802028775215149, + "rewards_train/margins": 2.0512322783470154, + "rewards_train/rejected": -2.6314351558685303, + "step": 1264 + }, + { + "epoch": 0.35, + "logps_train/chosen": -144.57528686523438, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -198.39833068847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6575286388397217, + "rewards_train/margins": 1.7823045253753662, + "rewards_train/rejected": -5.439833164215088, + "step": 1264 + }, + { + "epoch": 0.35, + "logps_train/chosen": -7.215012550354004, + "logps_train/ref_chosen": -6.25, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -37.351043701171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09650125354528427, + "rewards_train/margins": 2.0448530688881874, + "rewards_train/rejected": -2.1413543224334717, + "step": 1264 + }, + { + "epoch": 0.35, + "logps_train/chosen": -188.6663055419922, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -230.90533447265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3666305541992188, + "rewards_train/margins": 3.8239030838012695, + "rewards_train/rejected": -7.190533638000488, + "step": 1264 + }, + { + "epoch": 0.35, + "logps_train/chosen": -38.09563446044922, + "logps_train/ref_chosen": -7.75, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -39.37556457519531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0345635414123535, + "rewards_train/margins": 0.3842430114746094, + "rewards_train/rejected": -3.418806552886963, + "step": 1264 + }, + { + "epoch": 0.35, + "logps_train/chosen": -206.42410278320312, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -176.89801025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.842410564422607, + "rewards_train/margins": 2.097390651702881, + "rewards_train/rejected": -6.939801216125488, + "step": 1265 + }, + { + "epoch": 0.35, + "logps_train/chosen": -49.86615753173828, + "logps_train/ref_chosen": -26.875, + "logps_train/ref_rejected": -43.0, + "logps_train/rejected": -77.17277526855469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2991158962249756, + "rewards_train/margins": 1.118161678314209, + "rewards_train/rejected": -3.4172775745391846, + "step": 1265 + }, + { + "epoch": 0.35, + "logps_train/chosen": -29.584423065185547, + "logps_train/ref_chosen": -22.75, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -26.380617141723633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6834422945976257, + "rewards_train/margins": 0.2421194314956665, + "rewards_train/rejected": -0.9255617260932922, + "step": 1265 + }, + { + "epoch": 0.35, + "logps_train/chosen": -32.92138671875, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -3.953125, + "logps_train/rejected": -17.87721061706543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.867138683795929, + "rewards_train/margins": 0.5252699255943298, + "rewards_train/rejected": -1.3924086093902588, + "step": 1265 + }, + { + "epoch": 0.35, + "logps_train/chosen": -172.62710571289062, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -208.30508422851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.612710475921631, + "rewards_train/margins": 7.317798137664795, + "rewards_train/rejected": -11.930508613586426, + "step": 1265 + }, + { + "epoch": 0.35, + "logps_train/chosen": -33.36079788208008, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -51.94220733642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.342329740524292, + "rewards_train/margins": 1.2143909931182861, + "rewards_train/rejected": -3.556720733642578, + "step": 1265 + }, + { + "epoch": 0.35, + "logps_train/chosen": -199.43438720703125, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -215.1173095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.243438720703125, + "rewards_train/margins": 2.0682921409606934, + "rewards_train/rejected": -6.311730861663818, + "step": 1265 + }, + { + "epoch": 0.35, + "logps_train/chosen": -125.22705078125, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -165.8796844482422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.672705054283142, + "rewards_train/margins": 2.665263295173645, + "rewards_train/rejected": -4.337968349456787, + "step": 1265 + }, + { + "epoch": 0.35, + "learning_rate": 1.0614702993227423e-06, + "loss": 0.2156, + "step": 1266 + }, + { + "epoch": 0.35, + "logps_train/chosen": -142.97015380859375, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -163.88650512695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6470154523849487, + "rewards_train/margins": 3.1416350603103638, + "rewards_train/rejected": -4.7886505126953125, + "step": 1266 + }, + { + "epoch": 0.35, + "logps_train/chosen": -12.675359725952148, + "logps_train/ref_chosen": -3.140625, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -16.839595794677734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9534735083580017, + "rewards_train/margins": -0.30701392889022827, + "rewards_train/rejected": -0.6464595794677734, + "step": 1266 + }, + { + "epoch": 0.35, + "logps_train/chosen": -3.5360958576202393, + "logps_train/ref_chosen": -2.125, + "logps_train/ref_rejected": -2.546875, + "logps_train/rejected": -4.50766134262085, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14110958576202393, + "rewards_train/margins": 0.05496905744075775, + "rewards_train/rejected": -0.19607864320278168, + "step": 1266 + }, + { + "epoch": 0.35, + "logps_train/chosen": -71.55955505371094, + "logps_train/ref_chosen": -46.5, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -80.3512191772461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.505955457687378, + "rewards_train/margins": 2.9791667461395264, + "rewards_train/rejected": -5.485122203826904, + "step": 1266 + }, + { + "epoch": 0.35, + "logps_train/chosen": -157.52676391601562, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -280.6134338378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.752676486968994, + "rewards_train/margins": 9.508667469024658, + "rewards_train/rejected": -13.261343955993652, + "step": 1266 + }, + { + "epoch": 0.35, + "logps_train/chosen": -74.53352355957031, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -124.96279907226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05335235595703125, + "rewards_train/margins": 2.642927646636963, + "rewards_train/rejected": -2.696280002593994, + "step": 1266 + }, + { + "epoch": 0.35, + "logps_train/chosen": -9.743194580078125, + "logps_train/ref_chosen": -3.984375, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -20.279212951660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5758819580078125, + "rewards_train/margins": 0.7582893371582031, + "rewards_train/rejected": -1.3341712951660156, + "step": 1266 + }, + { + "epoch": 0.35, + "logps_train/chosen": -131.21005249023438, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -158.2813720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8710052967071533, + "rewards_train/margins": 0.7571320533752441, + "rewards_train/rejected": -2.6281373500823975, + "step": 1266 + }, + { + "epoch": 0.35, + "logps_train/chosen": -290.47100830078125, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -217.80577087402344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.447100639343262, + "rewards_train/margins": -3.7665233612060547, + "rewards_train/rejected": -8.680577278137207, + "step": 1267 + }, + { + "epoch": 0.35, + "logps_train/chosen": -2.0037283897399902, + "logps_train/ref_chosen": -1.234375, + "logps_train/ref_rejected": -2.1875, + "logps_train/rejected": -7.953601360321045, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07693534344434738, + "rewards_train/margins": 0.49967480450868607, + "rewards_train/rejected": -0.5766101479530334, + "step": 1267 + }, + { + "epoch": 0.35, + "logps_train/chosen": -94.69987487792969, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -152.1360626220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1699875593185425, + "rewards_train/margins": 2.7936187982559204, + "rewards_train/rejected": -3.963606357574463, + "step": 1267 + }, + { + "epoch": 0.35, + "logps_train/chosen": -174.48452758789062, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -187.39022827148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1484527587890625, + "rewards_train/margins": 3.8905701637268066, + "rewards_train/rejected": -6.039022922515869, + "step": 1267 + }, + { + "epoch": 0.35, + "logps_train/chosen": -3.915961503982544, + "logps_train/ref_chosen": -1.5625, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -20.45549774169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23534615337848663, + "rewards_train/margins": 0.9977035969495773, + "rewards_train/rejected": -1.233049750328064, + "step": 1267 + }, + { + "epoch": 0.35, + "logps_train/chosen": -115.95576477050781, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -235.80038452148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3955764770507812, + "rewards_train/margins": 4.0844621658325195, + "rewards_train/rejected": -7.480038642883301, + "step": 1267 + }, + { + "epoch": 0.35, + "logps_train/chosen": -47.07567596435547, + "logps_train/ref_chosen": -36.75, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -33.05510711669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0325676202774048, + "rewards_train/margins": 1.166693091392517, + "rewards_train/rejected": -2.199260711669922, + "step": 1267 + }, + { + "epoch": 0.35, + "logps_train/chosen": -107.111328125, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -104.46651458740234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3111329078674316, + "rewards_train/margins": -0.2644813060760498, + "rewards_train/rejected": -2.046651601791382, + "step": 1267 + }, + { + "epoch": 0.35, + "learning_rate": 1.058829538549492e-06, + "loss": 0.5145, + "step": 1268 + }, + { + "epoch": 0.35, + "logps_train/chosen": -248.65829467773438, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -192.7864532470703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.265829563140869, + "rewards_train/margins": -2.6871843338012695, + "rewards_train/rejected": -4.5786452293396, + "step": 1268 + }, + { + "epoch": 0.35, + "logps_train/chosen": -5.327823162078857, + "logps_train/ref_chosen": -1.796875, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -48.258888244628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35309481620788574, + "rewards_train/margins": 2.1102941036224365, + "rewards_train/rejected": -2.4633889198303223, + "step": 1268 + }, + { + "epoch": 0.35, + "logps_train/chosen": -13.210578918457031, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -5.71875, + "logps_train/rejected": -21.750173568725586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4898079037666321, + "rewards_train/margins": 1.1133344769477844, + "rewards_train/rejected": -1.6031423807144165, + "step": 1268 + }, + { + "epoch": 0.35, + "logps_train/chosen": -70.3731689453125, + "logps_train/ref_chosen": -48.0, + "logps_train/ref_rejected": -239.0, + "logps_train/rejected": -285.41900634765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.237316846847534, + "rewards_train/margins": 2.404583692550659, + "rewards_train/rejected": -4.641900539398193, + "step": 1268 + }, + { + "epoch": 0.35, + "logps_train/chosen": -77.2886962890625, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -110.61178588867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27886962890625, + "rewards_train/margins": 0.2323089838027954, + "rewards_train/rejected": -0.5111786127090454, + "step": 1268 + }, + { + "epoch": 0.35, + "logps_train/chosen": -60.98087692260742, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -109.36689758300781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6480876803398132, + "rewards_train/margins": -0.7613979205489159, + "rewards_train/rejected": 0.11331024020910263, + "step": 1268 + }, + { + "epoch": 0.35, + "logps_train/chosen": -192.87705993652344, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -223.97186279296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.787705898284912, + "rewards_train/margins": 1.4094805717468262, + "rewards_train/rejected": -6.197186470031738, + "step": 1268 + }, + { + "epoch": 0.35, + "logps_train/chosen": -219.16513061523438, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -219.62359619140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.416512966156006, + "rewards_train/margins": 0.34584665298461914, + "rewards_train/rejected": -6.762359619140625, + "step": 1268 + }, + { + "epoch": 0.35, + "logps_train/chosen": -18.93155288696289, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -49.767906188964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5181552767753601, + "rewards_train/margins": 2.846135437488556, + "rewards_train/rejected": -3.364290714263916, + "step": 1269 + }, + { + "epoch": 0.35, + "logps_train/chosen": -175.76065063476562, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -48.24125671386719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.026065349578857, + "rewards_train/margins": -1.9019396305084229, + "rewards_train/rejected": -3.1241257190704346, + "step": 1269 + }, + { + "epoch": 0.35, + "logps_train/chosen": -93.83943176269531, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -32.471405029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.983943223953247, + "rewards_train/margins": 0.1506972312927246, + "rewards_train/rejected": -2.1346404552459717, + "step": 1269 + }, + { + "epoch": 0.35, + "logps_train/chosen": -18.429065704345703, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -16.0894775390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2491565942764282, + "rewards_train/margins": -0.1120837926864624, + "rewards_train/rejected": -1.1370728015899658, + "step": 1269 + }, + { + "epoch": 0.35, + "logps_train/chosen": -171.13699340820312, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -225.31362915039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.363699436187744, + "rewards_train/margins": 1.5676636695861816, + "rewards_train/rejected": -5.931363105773926, + "step": 1269 + }, + { + "epoch": 0.35, + "logps_train/chosen": -79.52962493896484, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -56.29131317138672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.202962636947632, + "rewards_train/margins": -1.1238312721252441, + "rewards_train/rejected": -2.0791313648223877, + "step": 1269 + }, + { + "epoch": 0.35, + "logps_train/chosen": -77.89945220947266, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -120.91805267333984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3399452269077301, + "rewards_train/margins": 0.3018600642681122, + "rewards_train/rejected": -0.6418052911758423, + "step": 1269 + }, + { + "epoch": 0.35, + "logps_train/chosen": -23.153409957885742, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -33.71623611450195, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9403409957885742, + "rewards_train/margins": -0.03121739625930786, + "rewards_train/rejected": -0.9091235995292664, + "step": 1269 + }, + { + "epoch": 0.35, + "learning_rate": 1.0561883660318454e-06, + "loss": 0.7525, + "step": 1270 + }, + { + "epoch": 0.35, + "logps_train/chosen": -18.989959716796875, + "logps_train/ref_chosen": -3.859375, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -29.901758193969727, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5130585432052612, + "rewards_train/margins": -0.7103826999664307, + "rewards_train/rejected": -0.8026758432388306, + "step": 1270 + }, + { + "epoch": 0.35, + "logps_train/chosen": -106.73441314697266, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -173.12916564941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1234413385391235, + "rewards_train/margins": 6.489475131034851, + "rewards_train/rejected": -7.612916469573975, + "step": 1270 + }, + { + "epoch": 0.35, + "logps_train/chosen": -122.16688537597656, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -133.81288146972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1666886806488037, + "rewards_train/margins": 2.964599370956421, + "rewards_train/rejected": -5.131288051605225, + "step": 1270 + }, + { + "epoch": 0.35, + "logps_train/chosen": -148.135009765625, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -176.53073120117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9135010242462158, + "rewards_train/margins": 2.6395723819732666, + "rewards_train/rejected": -4.553073406219482, + "step": 1270 + }, + { + "epoch": 0.35, + "logps_train/chosen": -37.35701370239258, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -39.95118713378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5607013702392578, + "rewards_train/margins": 1.2281672954559326, + "rewards_train/rejected": -2.7888686656951904, + "step": 1270 + }, + { + "epoch": 0.35, + "logps_train/chosen": -19.84867286682129, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -57.641563415527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9473673105239868, + "rewards_train/margins": 1.2417889833450317, + "rewards_train/rejected": -2.1891562938690186, + "step": 1270 + }, + { + "epoch": 0.35, + "logps_train/chosen": -116.6451416015625, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -175.8542022705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4145143032073975, + "rewards_train/margins": 2.9709060192108154, + "rewards_train/rejected": -6.385420322418213, + "step": 1270 + }, + { + "epoch": 0.35, + "logps_train/chosen": -208.396728515625, + "logps_train/ref_chosen": -187.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -199.61959838867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1396729946136475, + "rewards_train/margins": 1.4222869873046875, + "rewards_train/rejected": -3.561959981918335, + "step": 1270 + }, + { + "epoch": 0.36, + "logps_train/chosen": -171.8897705078125, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -191.04795837402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1889771223068237, + "rewards_train/margins": 4.415818810462952, + "rewards_train/rejected": -5.604795932769775, + "step": 1271 + }, + { + "epoch": 0.36, + "logps_train/chosen": -13.459746360778809, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -12.417173385620117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.13972464203834534, + "rewards_train/margins": -0.3667573034763336, + "rewards_train/rejected": 0.22703266143798828, + "step": 1271 + }, + { + "epoch": 0.36, + "logps_train/chosen": -239.17681884765625, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -224.0, + "logps_train/rejected": -289.71954345703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.217681884765625, + "rewards_train/margins": -0.6457276344299316, + "rewards_train/rejected": -6.571954250335693, + "step": 1271 + }, + { + "epoch": 0.36, + "logps_train/chosen": -0.10121802985668182, + "logps_train/ref_chosen": -0.011962890625, + "logps_train/ref_rejected": -0.011962890625, + "logps_train/rejected": -0.10143516957759857, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008925514295697212, + "rewards_train/margins": 2.171378582715988e-05, + "rewards_train/rejected": -0.008947228081524372, + "step": 1271 + }, + { + "epoch": 0.36, + "logps_train/chosen": -44.23063278198242, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -24.875, + "logps_train/rejected": -60.80674743652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0230634212493896, + "rewards_train/margins": 1.5701112747192383, + "rewards_train/rejected": -3.593174695968628, + "step": 1271 + }, + { + "epoch": 0.36, + "logps_train/chosen": -87.43147277832031, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -26.764453887939453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9931472539901733, + "rewards_train/margins": -0.7542018890380859, + "rewards_train/rejected": -1.2389453649520874, + "step": 1271 + }, + { + "epoch": 0.36, + "logps_train/chosen": -109.18968200683594, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -205.68447875976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9689682722091675, + "rewards_train/margins": 5.099479794502258, + "rewards_train/rejected": -7.068448066711426, + "step": 1271 + }, + { + "epoch": 0.36, + "logps_train/chosen": -9.252731323242188, + "logps_train/ref_chosen": -7.1875, + "logps_train/ref_rejected": -12.0, + "logps_train/rejected": -17.342571258544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.206523135304451, + "rewards_train/margins": 0.32773397862911224, + "rewards_train/rejected": -0.5342571139335632, + "step": 1271 + }, + { + "epoch": 0.36, + "learning_rate": 1.0535468002552099e-06, + "loss": 0.4099, + "step": 1272 + }, + { + "epoch": 0.36, + "logps_train/chosen": -87.42636108398438, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -162.3565216064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14263610541820526, + "rewards_train/margins": 4.443016245961189, + "rewards_train/rejected": -4.5856523513793945, + "step": 1272 + }, + { + "epoch": 0.36, + "logps_train/chosen": -135.9366912841797, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -143.49090576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8436691761016846, + "rewards_train/margins": 1.355421543121338, + "rewards_train/rejected": -3.1990907192230225, + "step": 1272 + }, + { + "epoch": 0.36, + "logps_train/chosen": -11.636436462402344, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -19.918560028076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17614364624023438, + "rewards_train/margins": 0.6657123565673828, + "rewards_train/rejected": -0.8418560028076172, + "step": 1272 + }, + { + "epoch": 0.36, + "logps_train/chosen": -41.62705993652344, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -42.133323669433594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.175205945968628, + "rewards_train/margins": -0.14937353134155273, + "rewards_train/rejected": -2.025832414627075, + "step": 1272 + }, + { + "epoch": 0.36, + "logps_train/chosen": -121.85820007324219, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -179.3809814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 1.414180040359497, + "rewards_train/margins": 4.152278184890747, + "rewards_train/rejected": -2.73809814453125, + "step": 1272 + }, + { + "epoch": 0.36, + "logps_train/chosen": -156.63900756835938, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -167.13348388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.0139007568359375, + "rewards_train/margins": 1.3494477272033691, + "rewards_train/rejected": -5.363348484039307, + "step": 1272 + }, + { + "epoch": 0.36, + "logps_train/chosen": -120.47578430175781, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -197.77969360351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.997578501701355, + "rewards_train/margins": 5.380390763282776, + "rewards_train/rejected": -7.377969264984131, + "step": 1272 + }, + { + "epoch": 0.36, + "logps_train/chosen": -170.39501953125, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -146.56675720214844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.689502239227295, + "rewards_train/margins": -0.23282623291015625, + "rewards_train/rejected": -5.456676006317139, + "step": 1272 + }, + { + "epoch": 0.36, + "logps_train/chosen": -19.854106903076172, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -1.0703125, + "logps_train/rejected": -12.61127758026123, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4854106903076172, + "rewards_train/margins": -0.33131420612335205, + "rewards_train/rejected": -1.1540964841842651, + "step": 1273 + }, + { + "epoch": 0.36, + "logps_train/chosen": -262.448974609375, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -251.02880859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -14.594897270202637, + "rewards_train/margins": -0.5420160293579102, + "rewards_train/rejected": -14.052881240844727, + "step": 1273 + }, + { + "epoch": 0.36, + "logps_train/chosen": -164.9198455810547, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -207.81198120117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9919846057891846, + "rewards_train/margins": 2.0892136096954346, + "rewards_train/rejected": -5.081198215484619, + "step": 1273 + }, + { + "epoch": 0.36, + "logps_train/chosen": -50.850120544433594, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -40.20323181152344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.8287620544433594, + "rewards_train/margins": -1.0146887302398682, + "rewards_train/rejected": -2.814073324203491, + "step": 1273 + }, + { + "epoch": 0.36, + "logps_train/chosen": -23.69060516357422, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -31.875, + "logps_train/rejected": -33.82771682739258, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.48156052827835083, + "rewards_train/margins": -0.2862888425588608, + "rewards_train/rejected": -0.19527168571949005, + "step": 1273 + }, + { + "epoch": 0.36, + "logps_train/chosen": -20.541126251220703, + "logps_train/ref_chosen": -7.15625, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -38.58365249633789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3384876251220703, + "rewards_train/margins": 1.3573777675628662, + "rewards_train/rejected": -2.6958653926849365, + "step": 1273 + }, + { + "epoch": 0.36, + "logps_train/chosen": -21.070829391479492, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -7.15625, + "logps_train/rejected": -25.853525161743164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9633329510688782, + "rewards_train/margins": 0.9063945412635803, + "rewards_train/rejected": -1.8697274923324585, + "step": 1273 + }, + { + "epoch": 0.36, + "logps_train/chosen": -151.98301696777344, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -228.80484008789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1983017921447754, + "rewards_train/margins": 2.6821823120117188, + "rewards_train/rejected": -4.880484104156494, + "step": 1273 + }, + { + "epoch": 0.36, + "learning_rate": 1.0509048597077449e-06, + "loss": 0.4555, + "step": 1274 + }, + { + "epoch": 0.36, + "logps_train/chosen": -139.44386291503906, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -99.44976043701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.69438636302948, + "rewards_train/margins": 2.500589966773987, + "rewards_train/rejected": -4.194976329803467, + "step": 1274 + }, + { + "epoch": 0.36, + "logps_train/chosen": -104.75081634521484, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -151.4956512451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.024918366223573685, + "rewards_train/margins": 3.12448363378644, + "rewards_train/rejected": -3.099565267562866, + "step": 1274 + }, + { + "epoch": 0.36, + "logps_train/chosen": -220.21554565429688, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -181.98870849609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9215545654296875, + "rewards_train/margins": -1.322683572769165, + "rewards_train/rejected": -2.5988709926605225, + "step": 1274 + }, + { + "epoch": 0.36, + "logps_train/chosen": -124.85013580322266, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -174.75112915039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5850136280059814, + "rewards_train/margins": 3.090099573135376, + "rewards_train/rejected": -5.675113201141357, + "step": 1274 + }, + { + "epoch": 0.36, + "logps_train/chosen": -24.311756134033203, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -23.231420516967773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.581175684928894, + "rewards_train/margins": 0.07321643829345703, + "rewards_train/rejected": -1.654392123222351, + "step": 1274 + }, + { + "epoch": 0.36, + "logps_train/chosen": -187.53076171875, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -228.75474548339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.453076362609863, + "rewards_train/margins": 1.7223987579345703, + "rewards_train/rejected": -9.175475120544434, + "step": 1274 + }, + { + "epoch": 0.36, + "logps_train/chosen": -6.283733367919922, + "logps_train/ref_chosen": -5.21875, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -27.39197540283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1064983382821083, + "rewards_train/margins": 0.6951992139220238, + "rewards_train/rejected": -0.8016975522041321, + "step": 1274 + }, + { + "epoch": 0.36, + "logps_train/chosen": -131.3850555419922, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -131.31326293945312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5885055661201477, + "rewards_train/margins": -0.00717926025390625, + "rewards_train/rejected": -0.5813263058662415, + "step": 1274 + }, + { + "epoch": 0.36, + "logps_train/chosen": -252.0100555419922, + "logps_train/ref_chosen": -207.0, + "logps_train/ref_rejected": -200.0, + "logps_train/rejected": -228.28500366210938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.50100564956665, + "rewards_train/margins": -1.6725051403045654, + "rewards_train/rejected": -2.828500509262085, + "step": 1275 + }, + { + "epoch": 0.36, + "logps_train/chosen": -114.46892547607422, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -126.09696960449219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.796892523765564, + "rewards_train/margins": 1.3128045797348022, + "rewards_train/rejected": -3.109697103500366, + "step": 1275 + }, + { + "epoch": 0.36, + "logps_train/chosen": -87.07172393798828, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -44.37599182128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5071724653244019, + "rewards_train/margins": 1.0429266691207886, + "rewards_train/rejected": -2.5500991344451904, + "step": 1275 + }, + { + "epoch": 0.36, + "logps_train/chosen": -15.600200653076172, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -23.929405212402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8506450653076172, + "rewards_train/margins": 0.2047954797744751, + "rewards_train/rejected": -1.0554405450820923, + "step": 1275 + }, + { + "epoch": 0.36, + "logps_train/chosen": -81.1876220703125, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -141.53228759765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.531237781047821, + "rewards_train/margins": 1.084466576576233, + "rewards_train/rejected": -0.5532287955284119, + "step": 1275 + }, + { + "epoch": 0.36, + "logps_train/chosen": -131.00364685058594, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -59.0867919921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.200364589691162, + "rewards_train/margins": 0.43331480026245117, + "rewards_train/rejected": -4.633679389953613, + "step": 1275 + }, + { + "epoch": 0.36, + "logps_train/chosen": -23.999370574951172, + "logps_train/ref_chosen": -1.671875, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -34.519805908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2327497005462646, + "rewards_train/margins": 0.012980937957763672, + "rewards_train/rejected": -2.2457306385040283, + "step": 1275 + }, + { + "epoch": 0.36, + "logps_train/chosen": -97.69183349609375, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -118.71031951904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01918335072696209, + "rewards_train/margins": 2.201848553493619, + "rewards_train/rejected": -2.221031904220581, + "step": 1275 + }, + { + "epoch": 0.36, + "learning_rate": 1.0482625628802338e-06, + "loss": 0.5139, + "step": 1276 + }, + { + "epoch": 0.36, + "logps_train/chosen": -76.92799377441406, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -76.27699279785156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2427994012832642, + "rewards_train/margins": -0.06510007381439209, + "rewards_train/rejected": -1.177699327468872, + "step": 1276 + }, + { + "epoch": 0.36, + "logps_train/chosen": -4.742373466491699, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -8.861562728881836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06486234813928604, + "rewards_train/margins": 0.3431689366698265, + "rewards_train/rejected": -0.40803128480911255, + "step": 1276 + }, + { + "epoch": 0.36, + "logps_train/chosen": -6.718696594238281, + "logps_train/ref_chosen": -1.15625, + "logps_train/ref_rejected": -13.6875, + "logps_train/rejected": -30.047624588012695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5562446713447571, + "rewards_train/margins": 1.0797677636146545, + "rewards_train/rejected": -1.6360124349594116, + "step": 1276 + }, + { + "epoch": 0.36, + "logps_train/chosen": -147.73756408691406, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -163.18655395507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4737564027309418, + "rewards_train/margins": 3.794898897409439, + "rewards_train/rejected": -4.268655300140381, + "step": 1276 + }, + { + "epoch": 0.36, + "logps_train/chosen": -62.136390686035156, + "logps_train/ref_chosen": -22.25, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -125.45315551757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9886391162872314, + "rewards_train/margins": 0.706676721572876, + "rewards_train/rejected": -4.695315837860107, + "step": 1276 + }, + { + "epoch": 0.36, + "logps_train/chosen": -173.55978393554688, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -235.36085510253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9559783935546875, + "rewards_train/margins": 4.780107021331787, + "rewards_train/rejected": -7.736085414886475, + "step": 1276 + }, + { + "epoch": 0.36, + "logps_train/chosen": -17.798606872558594, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -48.692256927490234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8798606991767883, + "rewards_train/margins": 1.676865041255951, + "rewards_train/rejected": -2.5567257404327393, + "step": 1276 + }, + { + "epoch": 0.36, + "logps_train/chosen": -45.40503692626953, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -26.75, + "logps_train/rejected": -95.29434204101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5467536449432373, + "rewards_train/margins": 3.30768084526062, + "rewards_train/rejected": -6.854434490203857, + "step": 1276 + }, + { + "epoch": 0.36, + "logps_train/chosen": -69.81415557861328, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -85.062744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13141556084156036, + "rewards_train/margins": 1.2748588770627975, + "rewards_train/rejected": -1.406274437904358, + "step": 1277 + }, + { + "epoch": 0.36, + "logps_train/chosen": -24.368284225463867, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -63.25945281982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6368284225463867, + "rewards_train/margins": 3.189116954803467, + "rewards_train/rejected": -3.8259453773498535, + "step": 1277 + }, + { + "epoch": 0.36, + "logps_train/chosen": -141.07925415039062, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -227.81121826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6579254865646362, + "rewards_train/margins": 4.623196244239807, + "rewards_train/rejected": -6.281121730804443, + "step": 1277 + }, + { + "epoch": 0.36, + "logps_train/chosen": -25.90292739868164, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -29.89011573791504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.734042763710022, + "rewards_train/margins": 0.24871885776519775, + "rewards_train/rejected": -1.9827616214752197, + "step": 1277 + }, + { + "epoch": 0.36, + "logps_train/chosen": -19.793848037719727, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -29.262420654296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5075098276138306, + "rewards_train/margins": 0.7562323808670044, + "rewards_train/rejected": -2.263742208480835, + "step": 1277 + }, + { + "epoch": 0.36, + "logps_train/chosen": -11.885347366333008, + "logps_train/ref_chosen": -5.5625, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -41.22245407104492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6322847604751587, + "rewards_train/margins": 1.8399606943130493, + "rewards_train/rejected": -2.472245454788208, + "step": 1277 + }, + { + "epoch": 0.36, + "logps_train/chosen": -197.15066528320312, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -164.96923828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.715066432952881, + "rewards_train/margins": 0.2818574905395508, + "rewards_train/rejected": -4.996923923492432, + "step": 1277 + }, + { + "epoch": 0.36, + "logps_train/chosen": -238.76951599121094, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -164.02883911132812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.376951694488525, + "rewards_train/margins": -2.0740675926208496, + "rewards_train/rejected": -5.302884101867676, + "step": 1277 + }, + { + "epoch": 0.36, + "learning_rate": 1.045619928265952e-06, + "loss": 0.397, + "step": 1278 + }, + { + "epoch": 0.36, + "logps_train/chosen": -37.026832580566406, + "logps_train/ref_chosen": -23.875, + "logps_train/ref_rejected": -25.25, + "logps_train/rejected": -42.454185485839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3151832818984985, + "rewards_train/margins": 0.40523529052734375, + "rewards_train/rejected": -1.7204185724258423, + "step": 1278 + }, + { + "epoch": 0.36, + "logps_train/chosen": -85.80450439453125, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -95.349365234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.280450463294983, + "rewards_train/margins": 1.354486107826233, + "rewards_train/rejected": -2.634936571121216, + "step": 1278 + }, + { + "epoch": 0.36, + "logps_train/chosen": -12.21461009979248, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -3.328125, + "logps_train/rejected": -7.328236103057861, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6245860457420349, + "rewards_train/margins": -0.22457492351531982, + "rewards_train/rejected": -0.4000111222267151, + "step": 1278 + }, + { + "epoch": 0.36, + "logps_train/chosen": -19.494489669799805, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -3.09375, + "logps_train/rejected": -22.393888473510742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9369489550590515, + "rewards_train/margins": 0.9930649399757385, + "rewards_train/rejected": -1.93001389503479, + "step": 1278 + }, + { + "epoch": 0.36, + "logps_train/chosen": -117.11607360839844, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -188.88192749023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5116073489189148, + "rewards_train/margins": 5.476585686206818, + "rewards_train/rejected": -5.988193035125732, + "step": 1278 + }, + { + "epoch": 0.36, + "logps_train/chosen": -52.00624084472656, + "logps_train/ref_chosen": -47.25, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -79.7867431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47562408447265625, + "rewards_train/margins": 1.9780502319335938, + "rewards_train/rejected": -2.45367431640625, + "step": 1278 + }, + { + "epoch": 0.36, + "logps_train/chosen": -129.03915405273438, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -129.75357055664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9039154052734375, + "rewards_train/margins": 0.571441650390625, + "rewards_train/rejected": -2.4753570556640625, + "step": 1278 + }, + { + "epoch": 0.36, + "logps_train/chosen": -67.5924072265625, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -59.75, + "logps_train/rejected": -115.40965270996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0092408657073975, + "rewards_train/margins": 3.5567243099212646, + "rewards_train/rejected": -5.565965175628662, + "step": 1278 + }, + { + "epoch": 0.36, + "logps_train/chosen": -31.930383682250977, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -26.061281204223633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.049288511276245, + "rewards_train/margins": -0.28691041469573975, + "rewards_train/rejected": -1.7623780965805054, + "step": 1279 + }, + { + "epoch": 0.36, + "logps_train/chosen": -18.284881591796875, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -21.969303131103516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3097381591796875, + "rewards_train/margins": 0.23719215393066406, + "rewards_train/rejected": -1.5469303131103516, + "step": 1279 + }, + { + "epoch": 0.36, + "logps_train/chosen": -93.39301300048828, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -49.02424621582031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8393012881278992, + "rewards_train/margins": 2.5006232857704163, + "rewards_train/rejected": -3.3399245738983154, + "step": 1279 + }, + { + "epoch": 0.36, + "logps_train/chosen": -22.012039184570312, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -32.80459976196289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2387039214372635, + "rewards_train/margins": 1.1417561024427414, + "rewards_train/rejected": -1.3804600238800049, + "step": 1279 + }, + { + "epoch": 0.36, + "logps_train/chosen": -164.90802001953125, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -273.0753173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9908020496368408, + "rewards_train/margins": 7.916729688644409, + "rewards_train/rejected": -9.90753173828125, + "step": 1279 + }, + { + "epoch": 0.36, + "logps_train/chosen": -17.953353881835938, + "logps_train/ref_chosen": -6.46875, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -15.888625144958496, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1484603881835938, + "rewards_train/margins": -0.03147280216217041, + "rewards_train/rejected": -1.1169875860214233, + "step": 1279 + }, + { + "epoch": 0.36, + "logps_train/chosen": -86.05850219726562, + "logps_train/ref_chosen": -58.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -182.77487182617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8058502674102783, + "rewards_train/margins": 3.221637010574341, + "rewards_train/rejected": -6.027487277984619, + "step": 1279 + }, + { + "epoch": 0.36, + "logps_train/chosen": -332.5284729003906, + "logps_train/ref_chosen": -237.0, + "logps_train/ref_rejected": -258.0, + "logps_train/rejected": -354.5049743652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.552847862243652, + "rewards_train/margins": 0.09764957427978516, + "rewards_train/rejected": -9.650497436523438, + "step": 1279 + }, + { + "epoch": 0.36, + "learning_rate": 1.0429769743605405e-06, + "loss": 0.3534, + "step": 1280 + }, + { + "epoch": 0.36, + "logps_train/chosen": -150.27130126953125, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -211.43930053710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.577130079269409, + "rewards_train/margins": 4.966799974441528, + "rewards_train/rejected": -8.543930053710938, + "step": 1280 + }, + { + "epoch": 0.36, + "logps_train/chosen": -23.537242889404297, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -41.75, + "logps_train/rejected": -58.01446533203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7912243604660034, + "rewards_train/margins": -0.1647777557373047, + "rewards_train/rejected": -1.6264466047286987, + "step": 1280 + }, + { + "epoch": 0.36, + "logps_train/chosen": -22.309776306152344, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -39.27952194213867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0684776306152344, + "rewards_train/margins": 2.1750996112823486, + "rewards_train/rejected": -3.243577241897583, + "step": 1280 + }, + { + "epoch": 0.36, + "logps_train/chosen": -66.32569885253906, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -123.38093566894531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8825699090957642, + "rewards_train/margins": 1.155523657798767, + "rewards_train/rejected": -2.0380935668945312, + "step": 1280 + }, + { + "epoch": 0.36, + "logps_train/chosen": -4.567263126373291, + "logps_train/ref_chosen": -2.4375, + "logps_train/ref_rejected": -0.96875, + "logps_train/rejected": -2.7024147510528564, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21297632157802582, + "rewards_train/margins": -0.03960984945297241, + "rewards_train/rejected": -0.1733664721250534, + "step": 1280 + }, + { + "epoch": 0.36, + "logps_train/chosen": -95.32943725585938, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -127.43801879882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1829437017440796, + "rewards_train/margins": 1.7108582258224487, + "rewards_train/rejected": -2.8938019275665283, + "step": 1280 + }, + { + "epoch": 0.36, + "logps_train/chosen": -26.3409366607666, + "logps_train/ref_chosen": -11.5625, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -38.10075378417969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4778436422348022, + "rewards_train/margins": 1.3509818315505981, + "rewards_train/rejected": -2.8288254737854004, + "step": 1280 + }, + { + "epoch": 0.36, + "logps_train/chosen": -73.43791198730469, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -71.31117248535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1937911957502365, + "rewards_train/margins": 0.13732604682445526, + "rewards_train/rejected": -0.3311172425746918, + "step": 1280 + }, + { + "epoch": 0.36, + "logps_train/chosen": -19.37253189086914, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -7.78125, + "logps_train/rejected": -17.855863571166992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8747531771659851, + "rewards_train/margins": 0.13270825147628784, + "rewards_train/rejected": -1.007461428642273, + "step": 1281 + }, + { + "epoch": 0.36, + "logps_train/chosen": -122.35791015625, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -177.04244995117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.985791027545929, + "rewards_train/margins": 5.618454158306122, + "rewards_train/rejected": -6.604245185852051, + "step": 1281 + }, + { + "epoch": 0.36, + "logps_train/chosen": -4.804506778717041, + "logps_train/ref_chosen": -2.765625, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -14.644364356994629, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2038881778717041, + "rewards_train/margins": 0.6574232578277588, + "rewards_train/rejected": -0.8613114356994629, + "step": 1281 + }, + { + "epoch": 0.36, + "logps_train/chosen": -167.93280029296875, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -147.04962158203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7932801246643066, + "rewards_train/margins": -0.18831801414489746, + "rewards_train/rejected": -3.604962110519409, + "step": 1281 + }, + { + "epoch": 0.36, + "logps_train/chosen": -26.637958526611328, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -6.1875, + "logps_train/rejected": -32.62385177612305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9356708526611328, + "rewards_train/margins": 0.7079644203186035, + "rewards_train/rejected": -2.6436352729797363, + "step": 1281 + }, + { + "epoch": 0.36, + "logps_train/chosen": -13.371722221374512, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -2.359375, + "logps_train/rejected": -14.935500144958496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08157777786254883, + "rewards_train/margins": 1.3391903638839722, + "rewards_train/rejected": -1.2576125860214233, + "step": 1281 + }, + { + "epoch": 0.36, + "logps_train/chosen": -98.88412475585938, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -198.1072235107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4384124279022217, + "rewards_train/margins": 8.322309732437134, + "rewards_train/rejected": -10.760722160339355, + "step": 1281 + }, + { + "epoch": 0.36, + "logps_train/chosen": -197.89645385742188, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -188.6361083984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.239645957946777, + "rewards_train/margins": -1.2760348320007324, + "rewards_train/rejected": -7.963611125946045, + "step": 1281 + }, + { + "epoch": 0.36, + "learning_rate": 1.0403337196618748e-06, + "loss": 0.4309, + "step": 1282 + }, + { + "epoch": 0.36, + "logps_train/chosen": -151.1964111328125, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -151.74830627441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4196411073207855, + "rewards_train/margins": 0.05518952012062073, + "rewards_train/rejected": -0.47483062744140625, + "step": 1282 + }, + { + "epoch": 0.36, + "logps_train/chosen": -92.06689453125, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -114.90323638916016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.606689453125, + "rewards_train/margins": 0.8336342573165894, + "rewards_train/rejected": -1.4403237104415894, + "step": 1282 + }, + { + "epoch": 0.36, + "logps_train/chosen": -43.88856506347656, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -32.54569625854492, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9451065063476562, + "rewards_train/margins": -1.6280368566513062, + "rewards_train/rejected": -1.31706964969635, + "step": 1282 + }, + { + "epoch": 0.36, + "logps_train/chosen": -33.50326156616211, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -28.709604263305664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5128262042999268, + "rewards_train/margins": 0.7331342697143555, + "rewards_train/rejected": -2.2459604740142822, + "step": 1282 + }, + { + "epoch": 0.36, + "logps_train/chosen": -30.151540756225586, + "logps_train/ref_chosen": -34.75, + "logps_train/ref_rejected": -0.82421875, + "logps_train/rejected": -19.544095993041992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4598459303379059, + "rewards_train/margins": 2.331833630800247, + "rewards_train/rejected": -1.8719877004623413, + "step": 1282 + }, + { + "epoch": 0.36, + "logps_train/chosen": -87.52154541015625, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -107.91952514648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.052154541015625, + "rewards_train/margins": 0.0897979736328125, + "rewards_train/rejected": -1.1419525146484375, + "step": 1282 + }, + { + "epoch": 0.36, + "logps_train/chosen": -15.6254243850708, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -1.5703125, + "logps_train/rejected": -19.758277893066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5875424742698669, + "rewards_train/margins": 1.2312540411949158, + "rewards_train/rejected": -1.8187965154647827, + "step": 1282 + }, + { + "epoch": 0.36, + "logps_train/chosen": -38.664451599121094, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -3.609375, + "logps_train/rejected": -28.20013427734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.153945207595825, + "rewards_train/margins": 0.3051307201385498, + "rewards_train/rejected": -2.459075927734375, + "step": 1282 + }, + { + "epoch": 0.36, + "logps_train/chosen": -29.2839298248291, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -43.535213470458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4408930540084839, + "rewards_train/margins": 0.8126283884048462, + "rewards_train/rejected": -2.25352144241333, + "step": 1283 + }, + { + "epoch": 0.36, + "logps_train/chosen": -108.01252746582031, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -188.4440460205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8012527823448181, + "rewards_train/margins": 5.143151819705963, + "rewards_train/rejected": -5.944404602050781, + "step": 1283 + }, + { + "epoch": 0.36, + "logps_train/chosen": -1.0505847930908203, + "logps_train/ref_chosen": -0.189453125, + "logps_train/ref_rejected": -0.189453125, + "logps_train/rejected": -1.0498368740081787, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.08611316978931427, + "rewards_train/margins": -7.479637861251831e-05, + "rewards_train/rejected": -0.08603837341070175, + "step": 1283 + }, + { + "epoch": 0.36, + "logps_train/chosen": -92.95587921142578, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -94.62313842773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0455880165100098, + "rewards_train/margins": 0.5167257785797119, + "rewards_train/rejected": -2.5623137950897217, + "step": 1283 + }, + { + "epoch": 0.36, + "logps_train/chosen": -1.345989465713501, + "logps_train/ref_chosen": -1.3828125, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -5.43669319152832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.003682303475216031, + "rewards_train/margins": 0.28328912262804806, + "rewards_train/rejected": -0.27960681915283203, + "step": 1283 + }, + { + "epoch": 0.36, + "logps_train/chosen": -28.330821990966797, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -45.27853012084961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6955822706222534, + "rewards_train/margins": 0.3322707414627075, + "rewards_train/rejected": -2.027853012084961, + "step": 1283 + }, + { + "epoch": 0.36, + "logps_train/chosen": -189.37115478515625, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -151.97705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.137115478515625, + "rewards_train/margins": 1.260589599609375, + "rewards_train/rejected": -4.397705078125, + "step": 1283 + }, + { + "epoch": 0.36, + "logps_train/chosen": -66.81901550292969, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -28.625, + "logps_train/rejected": -65.57414245605469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.131901502609253, + "rewards_train/margins": 0.5630128383636475, + "rewards_train/rejected": -3.6949143409729004, + "step": 1283 + }, + { + "epoch": 0.36, + "learning_rate": 1.0376901826699347e-06, + "loss": 0.5071, + "step": 1284 + }, + { + "epoch": 0.36, + "logps_train/chosen": -102.77638244628906, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -120.24656677246094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3776382207870483, + "rewards_train/margins": -0.20298147201538086, + "rewards_train/rejected": -1.1746567487716675, + "step": 1284 + }, + { + "epoch": 0.36, + "logps_train/chosen": -76.75855255126953, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -191.81858825683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3508552312850952, + "rewards_train/margins": 7.431004166603088, + "rewards_train/rejected": -8.781859397888184, + "step": 1284 + }, + { + "epoch": 0.36, + "logps_train/chosen": -82.35037994384766, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -135.03375244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4350379705429077, + "rewards_train/margins": 3.1683374643325806, + "rewards_train/rejected": -4.603375434875488, + "step": 1284 + }, + { + "epoch": 0.36, + "logps_train/chosen": -22.059856414794922, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -60.96276092529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.243485689163208, + "rewards_train/margins": 2.602790355682373, + "rewards_train/rejected": -3.846276044845581, + "step": 1284 + }, + { + "epoch": 0.36, + "logps_train/chosen": -21.624454498291016, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -51.364013671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19994544982910156, + "rewards_train/margins": 2.7364559173583984, + "rewards_train/rejected": -2.9364013671875, + "step": 1284 + }, + { + "epoch": 0.36, + "logps_train/chosen": -114.28709411621094, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -160.2220458984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7287094593048096, + "rewards_train/margins": 3.6434953212738037, + "rewards_train/rejected": -6.372204780578613, + "step": 1284 + }, + { + "epoch": 0.36, + "logps_train/chosen": -102.63475036621094, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -197.384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.213474988937378, + "rewards_train/margins": 2.925001859664917, + "rewards_train/rejected": -6.138476848602295, + "step": 1284 + }, + { + "epoch": 0.36, + "logps_train/chosen": -26.475309371948242, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -29.25, + "logps_train/rejected": -38.77220916748047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3162809610366821, + "rewards_train/margins": -0.36406004428863525, + "rewards_train/rejected": -0.9522209167480469, + "step": 1284 + }, + { + "epoch": 0.36, + "logps_train/chosen": -102.67237854003906, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -123.86328887939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3672378063201904, + "rewards_train/margins": 0.41909122467041016, + "rewards_train/rejected": -2.7863290309906006, + "step": 1285 + }, + { + "epoch": 0.36, + "logps_train/chosen": -40.38797378540039, + "logps_train/ref_chosen": -27.125, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -35.25315475463867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.326297402381897, + "rewards_train/margins": 0.792768120765686, + "rewards_train/rejected": -2.119065523147583, + "step": 1285 + }, + { + "epoch": 0.36, + "logps_train/chosen": -19.500001907348633, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -36.50250244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9250001907348633, + "rewards_train/margins": 0.8002500534057617, + "rewards_train/rejected": -1.725250244140625, + "step": 1285 + }, + { + "epoch": 0.36, + "logps_train/chosen": -215.64321899414062, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -219.96054077148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.8643217086792, + "rewards_train/margins": 0.9817323684692383, + "rewards_train/rejected": -10.846054077148438, + "step": 1285 + }, + { + "epoch": 0.36, + "logps_train/chosen": -30.90131187438965, + "logps_train/ref_chosen": -30.125, + "logps_train/ref_rejected": -1.6328125, + "logps_train/rejected": -23.248079299926758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07763119041919708, + "rewards_train/margins": 2.0838954895734787, + "rewards_train/rejected": -2.161526679992676, + "step": 1285 + }, + { + "epoch": 0.36, + "logps_train/chosen": -119.4062271118164, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -163.42234802246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.640622854232788, + "rewards_train/margins": 3.301612138748169, + "rewards_train/rejected": -6.942234992980957, + "step": 1285 + }, + { + "epoch": 0.36, + "logps_train/chosen": -185.09744262695312, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -200.62515258789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.509744167327881, + "rewards_train/margins": 0.25277137756347656, + "rewards_train/rejected": -4.762515544891357, + "step": 1285 + }, + { + "epoch": 0.36, + "logps_train/chosen": -45.51853561401367, + "logps_train/ref_chosen": -34.25, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -48.926605224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.126853585243225, + "rewards_train/margins": 1.3783069849014282, + "rewards_train/rejected": -2.5051605701446533, + "step": 1285 + }, + { + "epoch": 0.36, + "learning_rate": 1.035046381886676e-06, + "loss": 0.2791, + "step": 1286 + }, + { + "epoch": 0.36, + "logps_train/chosen": -122.53524017333984, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -134.65719604492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5035240650177002, + "rewards_train/margins": 4.062195539474487, + "rewards_train/rejected": -5.5657196044921875, + "step": 1286 + }, + { + "epoch": 0.36, + "logps_train/chosen": -84.75932312011719, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -146.84182739257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7259323596954346, + "rewards_train/margins": 2.9582502841949463, + "rewards_train/rejected": -4.684182643890381, + "step": 1286 + }, + { + "epoch": 0.36, + "logps_train/chosen": -168.5987548828125, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -144.00372314453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6598756313323975, + "rewards_train/margins": -0.6595032215118408, + "rewards_train/rejected": -3.0003724098205566, + "step": 1286 + }, + { + "epoch": 0.36, + "logps_train/chosen": -123.45555877685547, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -130.26490783691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.045555830001831, + "rewards_train/margins": 0.580935001373291, + "rewards_train/rejected": -2.626490831375122, + "step": 1286 + }, + { + "epoch": 0.36, + "logps_train/chosen": -74.00444030761719, + "logps_train/ref_chosen": -58.25, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -115.567138671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5754441022872925, + "rewards_train/margins": 3.1812697649002075, + "rewards_train/rejected": -4.7567138671875, + "step": 1286 + }, + { + "epoch": 0.36, + "logps_train/chosen": -30.71523666381836, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -55.604713439941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6527737379074097, + "rewards_train/margins": 2.107697606086731, + "rewards_train/rejected": -3.7604713439941406, + "step": 1286 + }, + { + "epoch": 0.36, + "logps_train/chosen": -155.6501922607422, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -235.07473754882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.565019369125366, + "rewards_train/margins": 5.042454481124878, + "rewards_train/rejected": -7.607473850250244, + "step": 1286 + }, + { + "epoch": 0.36, + "logps_train/chosen": -27.037004470825195, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -22.707693099975586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0724503993988037, + "rewards_train/margins": -0.9891810417175293, + "rewards_train/rejected": -1.0832693576812744, + "step": 1286 + }, + { + "epoch": 0.36, + "logps_train/chosen": -60.96886444091797, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -82.70755004882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20311355590820312, + "rewards_train/margins": 1.0238685607910156, + "rewards_train/rejected": -0.8207550048828125, + "step": 1287 + }, + { + "epoch": 0.36, + "logps_train/chosen": -148.5886993408203, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -185.639892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9588699340820312, + "rewards_train/margins": 4.505119323730469, + "rewards_train/rejected": -6.4639892578125, + "step": 1287 + }, + { + "epoch": 0.36, + "logps_train/chosen": -22.324050903320312, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -116.81375122070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7511550784111023, + "rewards_train/margins": 2.08022004365921, + "rewards_train/rejected": -2.8313751220703125, + "step": 1287 + }, + { + "epoch": 0.36, + "logps_train/chosen": -21.291536331176758, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -54.25, + "logps_train/rejected": -82.12358093261719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6416536569595337, + "rewards_train/margins": 2.1457043886184692, + "rewards_train/rejected": -2.787358045578003, + "step": 1287 + }, + { + "epoch": 0.36, + "logps_train/chosen": -11.72176742553711, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -25.639408111572266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3590732514858246, + "rewards_train/margins": 1.5605140626430511, + "rewards_train/rejected": -1.2014408111572266, + "step": 1287 + }, + { + "epoch": 0.36, + "logps_train/chosen": -74.8177261352539, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -101.39421081542969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08177261799573898, + "rewards_train/margins": 2.1576485112309456, + "rewards_train/rejected": -2.2394211292266846, + "step": 1287 + }, + { + "epoch": 0.36, + "logps_train/chosen": -139.65676879882812, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -178.6489715576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.71567702293396, + "rewards_train/margins": 5.849220514297485, + "rewards_train/rejected": -9.564897537231445, + "step": 1287 + }, + { + "epoch": 0.36, + "logps_train/chosen": -215.93972778320312, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -238.51754760742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.893972873687744, + "rewards_train/margins": 1.3577818870544434, + "rewards_train/rejected": -8.251754760742188, + "step": 1287 + }, + { + "epoch": 0.36, + "learning_rate": 1.0324023358159025e-06, + "loss": 0.2583, + "step": 1288 + }, + { + "epoch": 0.36, + "logps_train/chosen": -31.779682159423828, + "logps_train/ref_chosen": -22.0, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -47.914329528808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9779682159423828, + "rewards_train/margins": 2.2009646892547607, + "rewards_train/rejected": -3.1789329051971436, + "step": 1288 + }, + { + "epoch": 0.36, + "logps_train/chosen": -17.41617202758789, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -12.0, + "logps_train/rejected": -26.61800765991211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.872867226600647, + "rewards_train/margins": 0.5889335870742798, + "rewards_train/rejected": -1.4618008136749268, + "step": 1288 + }, + { + "epoch": 0.36, + "logps_train/chosen": -94.15533447265625, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -203.08456420898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.765533447265625, + "rewards_train/margins": 1.2429230213165283, + "rewards_train/rejected": -2.0084564685821533, + "step": 1288 + }, + { + "epoch": 0.36, + "logps_train/chosen": -38.624881744384766, + "logps_train/ref_chosen": -37.5, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -23.372875213623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11248817294836044, + "rewards_train/margins": 0.7310493364930153, + "rewards_train/rejected": -0.8435375094413757, + "step": 1288 + }, + { + "epoch": 0.36, + "logps_train/chosen": -27.479896545410156, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -49.8017463684082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2792396545410156, + "rewards_train/margins": 1.9009349346160889, + "rewards_train/rejected": -3.1801745891571045, + "step": 1288 + }, + { + "epoch": 0.36, + "logps_train/chosen": -41.198699951171875, + "logps_train/ref_chosen": -25.625, + "logps_train/ref_rejected": -35.0, + "logps_train/rejected": -56.439083099365234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5573700666427612, + "rewards_train/margins": 0.5865381956100464, + "rewards_train/rejected": -2.1439082622528076, + "step": 1288 + }, + { + "epoch": 0.36, + "logps_train/chosen": -196.07498168945312, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -143.72320556640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.507498264312744, + "rewards_train/margins": -2.1351777017116547, + "rewards_train/rejected": -0.3723205626010895, + "step": 1288 + }, + { + "epoch": 0.36, + "logps_train/chosen": -83.49909973144531, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -90.4615707397461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39990997314453125, + "rewards_train/margins": 0.9462471008300781, + "rewards_train/rejected": -1.3461570739746094, + "step": 1288 + }, + { + "epoch": 0.36, + "logps_train/chosen": -90.13548278808594, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -52.38920211791992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.23645173013210297, + "rewards_train/margins": 0.4003719389438629, + "rewards_train/rejected": -0.16392020881175995, + "step": 1289 + }, + { + "epoch": 0.36, + "logps_train/chosen": -75.19588470458984, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -86.92164611816406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5695884823799133, + "rewards_train/margins": -0.1774238646030426, + "rewards_train/rejected": -0.3921646177768707, + "step": 1289 + }, + { + "epoch": 0.36, + "logps_train/chosen": -11.735273361206055, + "logps_train/ref_chosen": -3.71875, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -35.50347900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8016523718833923, + "rewards_train/margins": 1.3486955761909485, + "rewards_train/rejected": -2.150347948074341, + "step": 1289 + }, + { + "epoch": 0.36, + "logps_train/chosen": -40.31881332397461, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -109.9009017944336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0568814277648926, + "rewards_train/margins": -0.6167912483215332, + "rewards_train/rejected": -1.4400901794433594, + "step": 1289 + }, + { + "epoch": 0.36, + "logps_train/chosen": -40.42472839355469, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -37.05784225463867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2299728393554688, + "rewards_train/margins": 0.7101864814758301, + "rewards_train/rejected": -2.940159320831299, + "step": 1289 + }, + { + "epoch": 0.36, + "logps_train/chosen": -12.178300857543945, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -17.587934494018555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04908008500933647, + "rewards_train/margins": 1.1440884359180927, + "rewards_train/rejected": -1.1931685209274292, + "step": 1289 + }, + { + "epoch": 0.36, + "logps_train/chosen": -112.37265014648438, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -152.80465698242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1372649669647217, + "rewards_train/margins": 3.093200922012329, + "rewards_train/rejected": -5.230465888977051, + "step": 1289 + }, + { + "epoch": 0.36, + "logps_train/chosen": -25.495433807373047, + "logps_train/ref_chosen": -6.5, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -51.172576904296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8995434045791626, + "rewards_train/margins": 1.4677144289016724, + "rewards_train/rejected": -3.367257833480835, + "step": 1289 + }, + { + "epoch": 0.36, + "learning_rate": 1.0297580629631324e-06, + "loss": 0.492, + "step": 1290 + }, + { + "epoch": 0.36, + "logps_train/chosen": -213.85150146484375, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -205.39060974121094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.585150241851807, + "rewards_train/margins": -1.1460890769958496, + "rewards_train/rejected": -4.439061164855957, + "step": 1290 + }, + { + "epoch": 0.36, + "logps_train/chosen": -18.48981475830078, + "logps_train/ref_chosen": -5.6875, + "logps_train/ref_rejected": -7.84375, + "logps_train/rejected": -30.84988021850586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2802314758300781, + "rewards_train/margins": 1.0203816890716553, + "rewards_train/rejected": -2.3006131649017334, + "step": 1290 + }, + { + "epoch": 0.36, + "logps_train/chosen": -112.0787124633789, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -53.75, + "logps_train/rejected": -81.34341430664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6078712940216064, + "rewards_train/margins": 0.15147018432617188, + "rewards_train/rejected": -2.7593414783477783, + "step": 1290 + }, + { + "epoch": 0.36, + "logps_train/chosen": -5.635896682739258, + "logps_train/ref_chosen": -2.046875, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -10.773337364196777, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3589021861553192, + "rewards_train/margins": 0.29030653834342957, + "rewards_train/rejected": -0.6492087244987488, + "step": 1290 + }, + { + "epoch": 0.36, + "logps_train/chosen": -109.33419799804688, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -117.38602447509766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6834198236465454, + "rewards_train/margins": 0.10518264770507812, + "rewards_train/rejected": -1.7886024713516235, + "step": 1290 + }, + { + "epoch": 0.36, + "logps_train/chosen": -166.44439697265625, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -238.2401123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.944439649581909, + "rewards_train/margins": 4.579571485519409, + "rewards_train/rejected": -7.524011135101318, + "step": 1290 + }, + { + "epoch": 0.36, + "logps_train/chosen": -15.243498802185059, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -45.4381217956543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26184988021850586, + "rewards_train/margins": 3.6413373947143555, + "rewards_train/rejected": -3.9031872749328613, + "step": 1290 + }, + { + "epoch": 0.36, + "logps_train/chosen": -67.1246337890625, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -138.86392211914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9874634146690369, + "rewards_train/margins": 4.098928987979889, + "rewards_train/rejected": -5.086392402648926, + "step": 1290 + }, + { + "epoch": 0.36, + "logps_train/chosen": -24.70366096496582, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -82.7104721069336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.357866108417511, + "rewards_train/margins": 1.1131811738014221, + "rewards_train/rejected": -1.471047282218933, + "step": 1291 + }, + { + "epoch": 0.36, + "logps_train/chosen": -47.77352523803711, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -60.75, + "logps_train/rejected": -73.57707214355469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5523525476455688, + "rewards_train/margins": 0.7303546667098999, + "rewards_train/rejected": -1.2827072143554688, + "step": 1291 + }, + { + "epoch": 0.36, + "logps_train/chosen": -73.34648132324219, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -189.45986938476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4346481263637543, + "rewards_train/margins": 8.911339193582535, + "rewards_train/rejected": -9.345987319946289, + "step": 1291 + }, + { + "epoch": 0.36, + "logps_train/chosen": -51.90422058105469, + "logps_train/ref_chosen": -48.25, + "logps_train/ref_rejected": -47.5, + "logps_train/rejected": -71.75411224365234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3654220700263977, + "rewards_train/margins": 2.0599891543388367, + "rewards_train/rejected": -2.4254112243652344, + "step": 1291 + }, + { + "epoch": 0.36, + "logps_train/chosen": -118.31124877929688, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -36.5, + "logps_train/rejected": -70.84608459472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9311249256134033, + "rewards_train/margins": 0.5034835338592529, + "rewards_train/rejected": -3.4346084594726562, + "step": 1291 + }, + { + "epoch": 0.36, + "logps_train/chosen": -9.977557182312012, + "logps_train/ref_chosen": -3.796875, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -28.567975997924805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6180682182312012, + "rewards_train/margins": 0.026229381561279297, + "rewards_train/rejected": -0.6442975997924805, + "step": 1291 + }, + { + "epoch": 0.36, + "logps_train/chosen": -77.43964385986328, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -146.17430114746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7939643859863281, + "rewards_train/margins": 2.023465871810913, + "rewards_train/rejected": -2.817430257797241, + "step": 1291 + }, + { + "epoch": 0.36, + "logps_train/chosen": -129.66983032226562, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -154.79275512695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9669830799102783, + "rewards_train/margins": 2.5122926235198975, + "rewards_train/rejected": -4.479275703430176, + "step": 1291 + }, + { + "epoch": 0.36, + "learning_rate": 1.0271135818354721e-06, + "loss": 0.3601, + "step": 1292 + }, + { + "epoch": 0.36, + "logps_train/chosen": -244.2628631591797, + "logps_train/ref_chosen": -187.0, + "logps_train/ref_rejected": -196.0, + "logps_train/rejected": -259.9845275878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.7262864112854, + "rewards_train/margins": 0.6721663475036621, + "rewards_train/rejected": -6.3984527587890625, + "step": 1292 + }, + { + "epoch": 0.36, + "logps_train/chosen": -22.080013275146484, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -31.414588928222656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3580013513565063, + "rewards_train/margins": -0.05404245853424072, + "rewards_train/rejected": -1.3039588928222656, + "step": 1292 + }, + { + "epoch": 0.36, + "logps_train/chosen": -138.76483154296875, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -149.8516845703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.226483106613159, + "rewards_train/margins": -0.3413146734237671, + "rewards_train/rejected": -1.885168433189392, + "step": 1292 + }, + { + "epoch": 0.36, + "logps_train/chosen": -73.4728775024414, + "logps_train/ref_chosen": -43.5, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -47.11920928955078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9972877502441406, + "rewards_train/margins": -0.5978667736053467, + "rewards_train/rejected": -2.399420976638794, + "step": 1292 + }, + { + "epoch": 0.36, + "logps_train/chosen": -25.493112564086914, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -29.586868286132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7618112564086914, + "rewards_train/margins": 0.7875006198883057, + "rewards_train/rejected": -2.549311876296997, + "step": 1292 + }, + { + "epoch": 0.36, + "logps_train/chosen": -3.0331428050994873, + "logps_train/ref_chosen": -2.0, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -10.802863121032715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10331428050994873, + "rewards_train/margins": 0.3769720494747162, + "rewards_train/rejected": -0.4802863299846649, + "step": 1292 + }, + { + "epoch": 0.36, + "logps_train/chosen": -139.55496215820312, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -158.9415740966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9554962515830994, + "rewards_train/margins": 0.1386612057685852, + "rewards_train/rejected": -1.0941574573516846, + "step": 1292 + }, + { + "epoch": 0.36, + "logps_train/chosen": -14.08240795135498, + "logps_train/ref_chosen": -3.875, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -54.965171813964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0207408666610718, + "rewards_train/margins": 2.57577645778656, + "rewards_train/rejected": -3.596517324447632, + "step": 1292 + }, + { + "epoch": 0.36, + "logps_train/chosen": -57.52729797363281, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -45.221187591552734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.4339799880981445, + "rewards_train/margins": -0.46186113357543945, + "rewards_train/rejected": -3.972118854522705, + "step": 1293 + }, + { + "epoch": 0.36, + "logps_train/chosen": -46.087982177734375, + "logps_train/ref_chosen": -27.0, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -42.842918395996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9087982177734375, + "rewards_train/margins": 0.981743574142456, + "rewards_train/rejected": -2.8905417919158936, + "step": 1293 + }, + { + "epoch": 0.36, + "logps_train/chosen": -16.764480590820312, + "logps_train/ref_chosen": -14.5, + "logps_train/ref_rejected": -29.125, + "logps_train/rejected": -50.219120025634766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22644805908203125, + "rewards_train/margins": 1.8829638957977295, + "rewards_train/rejected": -2.1094119548797607, + "step": 1293 + }, + { + "epoch": 0.36, + "logps_train/chosen": -85.03668212890625, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -41.205230712890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.053668260574341, + "rewards_train/margins": -0.8831450939178467, + "rewards_train/rejected": -2.170523166656494, + "step": 1293 + }, + { + "epoch": 0.36, + "logps_train/chosen": -29.25110626220703, + "logps_train/ref_chosen": -28.25, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -25.957576751708984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10011062771081924, + "rewards_train/margins": 0.44564708322286606, + "rewards_train/rejected": -0.5457577109336853, + "step": 1293 + }, + { + "epoch": 0.36, + "logps_train/chosen": -6.694124698638916, + "logps_train/ref_chosen": -7.53125, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -22.480117797851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08371253311634064, + "rewards_train/margins": 0.14422431215643883, + "rewards_train/rejected": -0.06051177904009819, + "step": 1293 + }, + { + "epoch": 0.36, + "logps_train/chosen": -56.77522277832031, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -36.99829864501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.527522325515747, + "rewards_train/margins": 0.23480761051177979, + "rewards_train/rejected": -1.7623299360275269, + "step": 1293 + }, + { + "epoch": 0.36, + "logps_train/chosen": -178.6077880859375, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -195.69747924804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.560778617858887, + "rewards_train/margins": 2.008969306945801, + "rewards_train/rejected": -10.569747924804688, + "step": 1293 + }, + { + "epoch": 0.36, + "learning_rate": 1.0244689109414862e-06, + "loss": 0.5695, + "step": 1294 + }, + { + "epoch": 0.36, + "logps_train/chosen": -104.28813934326172, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -167.5925750732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8288139700889587, + "rewards_train/margins": 5.330443441867828, + "rewards_train/rejected": -6.159257411956787, + "step": 1294 + }, + { + "epoch": 0.36, + "logps_train/chosen": -251.06040954589844, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -245.97373962402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.506041526794434, + "rewards_train/margins": 1.2913322448730469, + "rewards_train/rejected": -10.79737377166748, + "step": 1294 + }, + { + "epoch": 0.36, + "logps_train/chosen": -11.32738971710205, + "logps_train/ref_chosen": -3.28125, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -28.688859939575195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8046140074729919, + "rewards_train/margins": 1.2330220341682434, + "rewards_train/rejected": -2.0376360416412354, + "step": 1294 + }, + { + "epoch": 0.36, + "logps_train/chosen": -158.5931854248047, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -190.25616455078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.159318447113037, + "rewards_train/margins": 3.5162978172302246, + "rewards_train/rejected": -8.675616264343262, + "step": 1294 + }, + { + "epoch": 0.36, + "logps_train/chosen": -134.66041564941406, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -44.81126403808594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.966041564941406, + "rewards_train/margins": -3.2349151372909546, + "rewards_train/rejected": -1.7311264276504517, + "step": 1294 + }, + { + "epoch": 0.36, + "logps_train/chosen": -169.249267578125, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -147.51918029785156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.474926948547363, + "rewards_train/margins": -1.5730087757110596, + "rewards_train/rejected": -2.9019181728363037, + "step": 1294 + }, + { + "epoch": 0.36, + "logps_train/chosen": -15.088251113891602, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -4.46875, + "logps_train/rejected": -14.834599494934082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0244501829147339, + "rewards_train/margins": 0.012134790420532227, + "rewards_train/rejected": -1.0365849733352661, + "step": 1294 + }, + { + "epoch": 0.36, + "logps_train/chosen": -68.85614776611328, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -195.71994018554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.710614800453186, + "rewards_train/margins": 8.761379599571228, + "rewards_train/rejected": -9.471994400024414, + "step": 1294 + }, + { + "epoch": 0.36, + "logps_train/chosen": -35.74896240234375, + "logps_train/ref_chosen": -23.0, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -28.511093139648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.274896264076233, + "rewards_train/margins": 0.779338002204895, + "rewards_train/rejected": -2.054234266281128, + "step": 1295 + }, + { + "epoch": 0.36, + "logps_train/chosen": -121.49612426757812, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -233.29824829101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3503875732421875, + "rewards_train/margins": 6.480212688446045, + "rewards_train/rejected": -6.129825115203857, + "step": 1295 + }, + { + "epoch": 0.36, + "logps_train/chosen": -24.116195678710938, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -52.579261779785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8178696036338806, + "rewards_train/margins": 2.8400567173957825, + "rewards_train/rejected": -3.657926321029663, + "step": 1295 + }, + { + "epoch": 0.36, + "logps_train/chosen": -7.448732376098633, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -7.6363396644592285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2698732316493988, + "rewards_train/margins": 0.07813572883605957, + "rewards_train/rejected": -0.3480089604854584, + "step": 1295 + }, + { + "epoch": 0.36, + "logps_train/chosen": -103.8668441772461, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -189.33795166015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7866843938827515, + "rewards_train/margins": 3.747110962867737, + "rewards_train/rejected": -5.533795356750488, + "step": 1295 + }, + { + "epoch": 0.36, + "logps_train/chosen": -14.331936836242676, + "logps_train/ref_chosen": -3.9375, + "logps_train/ref_rejected": -0.60546875, + "logps_train/rejected": -15.854715347290039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0394437313079834, + "rewards_train/margins": 0.4854809045791626, + "rewards_train/rejected": -1.524924635887146, + "step": 1295 + }, + { + "epoch": 0.36, + "logps_train/chosen": -81.59207916259766, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -98.92684173583984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8092079162597656, + "rewards_train/margins": 0.33347630500793457, + "rewards_train/rejected": -1.1426842212677002, + "step": 1295 + }, + { + "epoch": 0.36, + "logps_train/chosen": -67.8563461303711, + "logps_train/ref_chosen": -56.0, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -34.095054626464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1856346130371094, + "rewards_train/margins": 1.5394959449768066, + "rewards_train/rejected": -2.725130558013916, + "step": 1295 + }, + { + "epoch": 0.36, + "learning_rate": 1.0218240687910665e-06, + "loss": 0.5364, + "step": 1296 + }, + { + "epoch": 0.36, + "logps_train/chosen": -20.38246726989746, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -15.12521743774414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.006996750831604, + "rewards_train/margins": -0.4069749712944031, + "rewards_train/rejected": -0.6000217795372009, + "step": 1296 + }, + { + "epoch": 0.36, + "logps_train/chosen": -32.703399658203125, + "logps_train/ref_chosen": -7.6875, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -35.509517669677734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5015900135040283, + "rewards_train/margins": -0.05688810348510742, + "rewards_train/rejected": -2.444701910018921, + "step": 1296 + }, + { + "epoch": 0.36, + "logps_train/chosen": -15.974449157714844, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -21.876667022705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1411949396133423, + "rewards_train/margins": 0.015221834182739258, + "rewards_train/rejected": -1.1564167737960815, + "step": 1296 + }, + { + "epoch": 0.36, + "logps_train/chosen": -12.839695930480957, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -21.003131866455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47771960496902466, + "rewards_train/margins": 1.0538435578346252, + "rewards_train/rejected": -1.53156316280365, + "step": 1296 + }, + { + "epoch": 0.36, + "logps_train/chosen": -84.14103698730469, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -136.08731079101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8641037344932556, + "rewards_train/margins": 2.944627344608307, + "rewards_train/rejected": -3.8087310791015625, + "step": 1296 + }, + { + "epoch": 0.36, + "logps_train/chosen": -41.27471160888672, + "logps_train/ref_chosen": -13.0625, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -33.73735427856445, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.821221113204956, + "rewards_train/margins": -0.5287356376647949, + "rewards_train/rejected": -2.292485475540161, + "step": 1296 + }, + { + "epoch": 0.36, + "logps_train/chosen": -72.78374481201172, + "logps_train/ref_chosen": -59.25, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -247.23590087890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3533744812011719, + "rewards_train/margins": 7.77021598815918, + "rewards_train/rejected": -9.123590469360352, + "step": 1296 + }, + { + "epoch": 0.36, + "logps_train/chosen": -123.25093841552734, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -135.19644165039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8250938653945923, + "rewards_train/margins": 1.244550347328186, + "rewards_train/rejected": -3.0696442127227783, + "step": 1296 + }, + { + "epoch": 0.36, + "logps_train/chosen": -22.954050064086914, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -32.51994323730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0079050064086914, + "rewards_train/margins": 0.41908931732177734, + "rewards_train/rejected": -1.4269943237304688, + "step": 1297 + }, + { + "epoch": 0.36, + "logps_train/chosen": -10.214005470275879, + "logps_train/ref_chosen": -3.671875, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -28.345783233642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6542130708694458, + "rewards_train/margins": 1.4834903478622437, + "rewards_train/rejected": -2.1377034187316895, + "step": 1297 + }, + { + "epoch": 0.36, + "logps_train/chosen": -118.00407409667969, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -167.2194061279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2004075050354004, + "rewards_train/margins": 3.071533203125, + "rewards_train/rejected": -5.2719407081604, + "step": 1297 + }, + { + "epoch": 0.36, + "logps_train/chosen": -130.97769165039062, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -198.42642211914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7477691173553467, + "rewards_train/margins": 3.0948731899261475, + "rewards_train/rejected": -5.842642307281494, + "step": 1297 + }, + { + "epoch": 0.36, + "logps_train/chosen": -177.3590545654297, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -136.57376098632812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.8359055519104, + "rewards_train/margins": -2.0785293579101562, + "rewards_train/rejected": -5.757376194000244, + "step": 1297 + }, + { + "epoch": 0.36, + "logps_train/chosen": -18.22735023498535, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -4.5625, + "logps_train/rejected": -35.2288818359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7539850473403931, + "rewards_train/margins": 2.3126531839370728, + "rewards_train/rejected": -3.066638231277466, + "step": 1297 + }, + { + "epoch": 0.36, + "logps_train/chosen": -86.63956451416016, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -39.0, + "logps_train/rejected": -41.87419891357422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5139564275741577, + "rewards_train/margins": -1.2265365421772003, + "rewards_train/rejected": -0.2874198853969574, + "step": 1297 + }, + { + "epoch": 0.36, + "logps_train/chosen": -49.875999450683594, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -90.92729187011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3625999689102173, + "rewards_train/margins": 1.530129313468933, + "rewards_train/rejected": -2.8927292823791504, + "step": 1297 + }, + { + "epoch": 0.36, + "learning_rate": 1.0191790738953036e-06, + "loss": 0.5432, + "step": 1298 + }, + { + "epoch": 0.36, + "logps_train/chosen": -162.9386444091797, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -223.34823608398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.893864631652832, + "rewards_train/margins": 1.4409589767456055, + "rewards_train/rejected": -6.3348236083984375, + "step": 1298 + }, + { + "epoch": 0.36, + "logps_train/chosen": -21.720338821411133, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -14.1875, + "logps_train/rejected": -18.27943992614746, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4001588821411133, + "rewards_train/margins": -0.9909648895263672, + "rewards_train/rejected": -0.4091939926147461, + "step": 1298 + }, + { + "epoch": 0.36, + "logps_train/chosen": -131.3712158203125, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -168.96533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4371216297149658, + "rewards_train/margins": 2.159411668777466, + "rewards_train/rejected": -3.5965332984924316, + "step": 1298 + }, + { + "epoch": 0.36, + "logps_train/chosen": -136.65234375, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -194.89024353027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.815234422683716, + "rewards_train/margins": 4.673789739608765, + "rewards_train/rejected": -8.48902416229248, + "step": 1298 + }, + { + "epoch": 0.36, + "logps_train/chosen": -23.40686798095703, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -28.434425354003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9969367980957031, + "rewards_train/margins": 1.1371307373046875, + "rewards_train/rejected": -2.1340675354003906, + "step": 1298 + }, + { + "epoch": 0.36, + "logps_train/chosen": -36.26384735107422, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -43.246700286865234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2576348781585693, + "rewards_train/margins": 0.6857852935791016, + "rewards_train/rejected": -2.943420171737671, + "step": 1298 + }, + { + "epoch": 0.36, + "logps_train/chosen": -10.644722938537598, + "logps_train/ref_chosen": -4.625, + "logps_train/ref_rejected": -1.2109375, + "logps_train/rejected": -16.85079574584961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6019722819328308, + "rewards_train/margins": 0.9620135426521301, + "rewards_train/rejected": -1.563985824584961, + "step": 1298 + }, + { + "epoch": 0.36, + "logps_train/chosen": -16.242023468017578, + "logps_train/ref_chosen": -2.75, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -19.891023635864258, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3492023944854736, + "rewards_train/margins": -0.6976000070571899, + "rewards_train/rejected": -0.6516023874282837, + "step": 1298 + }, + { + "epoch": 0.36, + "logps_train/chosen": -125.06376647949219, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -125.55523681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3563766479492188, + "rewards_train/margins": 0.04914712905883789, + "rewards_train/rejected": -2.4055237770080566, + "step": 1299 + }, + { + "epoch": 0.36, + "logps_train/chosen": -16.049781799316406, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -46.98207092285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3424781858921051, + "rewards_train/margins": 1.605728954076767, + "rewards_train/rejected": -1.948207139968872, + "step": 1299 + }, + { + "epoch": 0.36, + "logps_train/chosen": -8.938444137573242, + "logps_train/ref_chosen": -1.171875, + "logps_train/ref_rejected": -1.171875, + "logps_train/rejected": -9.490426063537598, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7766569256782532, + "rewards_train/margins": 0.05519819259643555, + "rewards_train/rejected": -0.8318551182746887, + "step": 1299 + }, + { + "epoch": 0.36, + "logps_train/chosen": -8.341551780700684, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -3.0625, + "logps_train/rejected": -7.224602699279785, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2997801899909973, + "rewards_train/margins": 0.11643007397651672, + "rewards_train/rejected": -0.41621026396751404, + "step": 1299 + }, + { + "epoch": 0.36, + "logps_train/chosen": -32.34784698486328, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -7.25, + "logps_train/rejected": -23.998762130737305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.234784722328186, + "rewards_train/margins": 0.44009149074554443, + "rewards_train/rejected": -1.6748762130737305, + "step": 1299 + }, + { + "epoch": 0.36, + "logps_train/chosen": -47.94379425048828, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -42.551841735839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7568795680999756, + "rewards_train/margins": 0.3420546054840088, + "rewards_train/rejected": -3.0989341735839844, + "step": 1299 + }, + { + "epoch": 0.36, + "logps_train/chosen": -97.52622985839844, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -145.2179412841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6026229858398438, + "rewards_train/margins": 3.7191710472106934, + "rewards_train/rejected": -6.321794033050537, + "step": 1299 + }, + { + "epoch": 0.36, + "logps_train/chosen": -8.597332000732422, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -16.969255447387695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1777667999267578, + "rewards_train/margins": 0.7809423804283142, + "rewards_train/rejected": -0.6031755805015564, + "step": 1299 + }, + { + "epoch": 0.36, + "learning_rate": 1.0165339447663586e-06, + "loss": 0.4586, + "step": 1300 + }, + { + "epoch": 0.36, + "logps_train/chosen": -12.782207489013672, + "logps_train/ref_chosen": -2.8125, + "logps_train/ref_rejected": -2.265625, + "logps_train/rejected": -18.41356658935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9969707727432251, + "rewards_train/margins": 0.6178233623504639, + "rewards_train/rejected": -1.614794135093689, + "step": 1300 + }, + { + "epoch": 0.36, + "logps_train/chosen": -18.877717971801758, + "logps_train/ref_chosen": -13.625, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -23.535873413085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5252718329429626, + "rewards_train/margins": 1.197065532207489, + "rewards_train/rejected": -1.7223373651504517, + "step": 1300 + }, + { + "epoch": 0.36, + "logps_train/chosen": -190.36618041992188, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -203.62161254882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8366180658340454, + "rewards_train/margins": 3.3255432844161987, + "rewards_train/rejected": -5.162161350250244, + "step": 1300 + }, + { + "epoch": 0.36, + "logps_train/chosen": -9.195680618286133, + "logps_train/ref_chosen": -1.625, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -9.687188148498535, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7570680975914001, + "rewards_train/margins": -0.19772428274154663, + "rewards_train/rejected": -0.5593438148498535, + "step": 1300 + }, + { + "epoch": 0.36, + "logps_train/chosen": -46.348018646240234, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -43.0, + "logps_train/rejected": -90.2092056274414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2098019123077393, + "rewards_train/margins": 2.5111186504364014, + "rewards_train/rejected": -4.720920562744141, + "step": 1300 + }, + { + "epoch": 0.36, + "logps_train/chosen": -20.46902084350586, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -12.788835525512695, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7844021320343018, + "rewards_train/margins": -1.9117685854434967, + "rewards_train/rejected": 0.12736645340919495, + "step": 1300 + }, + { + "epoch": 0.36, + "logps_train/chosen": -137.74769592285156, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -130.05355834960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2747695446014404, + "rewards_train/margins": 1.3305864334106445, + "rewards_train/rejected": -3.605355978012085, + "step": 1300 + }, + { + "epoch": 0.36, + "logps_train/chosen": -116.05900573730469, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -180.2152099609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3059005737304688, + "rewards_train/margins": 5.3656206130981445, + "rewards_train/rejected": -6.671521186828613, + "step": 1300 + }, + { + "epoch": 0.36, + "logps_train/chosen": -4.672304630279541, + "logps_train/ref_chosen": -1.2578125, + "logps_train/ref_rejected": -2.609375, + "logps_train/rejected": -4.336719512939453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.34144923090934753, + "rewards_train/margins": -0.16871477663516998, + "rewards_train/rejected": -0.17273445427417755, + "step": 1301 + }, + { + "epoch": 0.36, + "logps_train/chosen": -104.67268371582031, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -199.29330444335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5672683715820312, + "rewards_train/margins": 4.262062072753906, + "rewards_train/rejected": -4.8293304443359375, + "step": 1301 + }, + { + "epoch": 0.36, + "logps_train/chosen": -39.87990951538086, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -40.83221435546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7379909753799438, + "rewards_train/margins": 1.3952304124832153, + "rewards_train/rejected": -2.133221387863159, + "step": 1301 + }, + { + "epoch": 0.36, + "logps_train/chosen": -122.54811096191406, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -37.5, + "logps_train/rejected": -58.94264221191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.654811143875122, + "rewards_train/margins": 0.4894530773162842, + "rewards_train/rejected": -2.1442642211914062, + "step": 1301 + }, + { + "epoch": 0.36, + "logps_train/chosen": -31.62649154663086, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -34.344032287597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2626491785049438, + "rewards_train/margins": 1.0342541933059692, + "rewards_train/rejected": -2.296903371810913, + "step": 1301 + }, + { + "epoch": 0.36, + "logps_train/chosen": -16.9970645904541, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -0.9453125, + "logps_train/rejected": -0.42511987686157227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4622064530849457, + "rewards_train/margins": -0.5142257176339626, + "rewards_train/rejected": 0.05201926454901695, + "step": 1301 + }, + { + "epoch": 0.36, + "logps_train/chosen": -17.74428939819336, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -20.100547790527344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.121303915977478, + "rewards_train/margins": -0.37374913692474365, + "rewards_train/rejected": -0.7475547790527344, + "step": 1301 + }, + { + "epoch": 0.36, + "logps_train/chosen": -25.645103454589844, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -1.3828125, + "logps_train/rejected": -16.645078659057617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6770103573799133, + "rewards_train/margins": 0.8492162823677063, + "rewards_train/rejected": -1.5262266397476196, + "step": 1301 + }, + { + "epoch": 0.36, + "learning_rate": 1.0138886999173298e-06, + "loss": 0.4955, + "step": 1302 + }, + { + "epoch": 0.36, + "logps_train/chosen": -113.4509048461914, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -224.68463134765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.345090627670288, + "rewards_train/margins": 6.623372316360474, + "rewards_train/rejected": -8.968462944030762, + "step": 1302 + }, + { + "epoch": 0.36, + "logps_train/chosen": -63.51976776123047, + "logps_train/ref_chosen": -50.0, + "logps_train/ref_rejected": -55.75, + "logps_train/rejected": -81.28994750976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.351976752281189, + "rewards_train/margins": 1.202018141746521, + "rewards_train/rejected": -2.55399489402771, + "step": 1302 + }, + { + "epoch": 0.36, + "logps_train/chosen": -214.59249877929688, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -219.5377960205078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.159249782562256, + "rewards_train/margins": -0.20547008514404297, + "rewards_train/rejected": -6.953779697418213, + "step": 1302 + }, + { + "epoch": 0.36, + "logps_train/chosen": -123.15133666992188, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -199.37432861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7651336789131165, + "rewards_train/margins": 6.0722991824150085, + "rewards_train/rejected": -6.837432861328125, + "step": 1302 + }, + { + "epoch": 0.36, + "logps_train/chosen": -35.46417236328125, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -80.80325317382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.221417188644409, + "rewards_train/margins": 2.4089081287384033, + "rewards_train/rejected": -4.6303253173828125, + "step": 1302 + }, + { + "epoch": 0.36, + "logps_train/chosen": -21.66336441040039, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -22.679073333740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6350864171981812, + "rewards_train/margins": 0.2203209400177002, + "rewards_train/rejected": -1.8554073572158813, + "step": 1302 + }, + { + "epoch": 0.36, + "logps_train/chosen": -101.57615661621094, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -177.75146484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.007615804672241, + "rewards_train/margins": 5.567530870437622, + "rewards_train/rejected": -7.575146675109863, + "step": 1302 + }, + { + "epoch": 0.36, + "logps_train/chosen": -111.02825927734375, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -90.19000244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9028259515762329, + "rewards_train/margins": 1.166174292564392, + "rewards_train/rejected": -2.069000244140625, + "step": 1302 + }, + { + "epoch": 0.36, + "logps_train/chosen": -23.190628051757812, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -37.71351623535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6065627932548523, + "rewards_train/margins": 1.3772888779640198, + "rewards_train/rejected": -1.983851671218872, + "step": 1303 + }, + { + "epoch": 0.36, + "logps_train/chosen": -44.22086715698242, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -23.5, + "logps_train/rejected": -54.144203186035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32208672165870667, + "rewards_train/margins": 2.7423337399959564, + "rewards_train/rejected": -3.064420461654663, + "step": 1303 + }, + { + "epoch": 0.36, + "logps_train/chosen": -145.6847381591797, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -182.87152099609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.868473768234253, + "rewards_train/margins": 5.318678140640259, + "rewards_train/rejected": -8.187151908874512, + "step": 1303 + }, + { + "epoch": 0.36, + "logps_train/chosen": -211.50350952148438, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -153.79876708984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.850351095199585, + "rewards_train/margins": 2.0295255184173584, + "rewards_train/rejected": -5.879876613616943, + "step": 1303 + }, + { + "epoch": 0.36, + "logps_train/chosen": -90.22847747802734, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -116.28267669677734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.472847819328308, + "rewards_train/margins": 0.6054199934005737, + "rewards_train/rejected": -2.078267812728882, + "step": 1303 + }, + { + "epoch": 0.36, + "logps_train/chosen": -39.592552185058594, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -92.8362045288086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4342552423477173, + "rewards_train/margins": 4.074365496635437, + "rewards_train/rejected": -5.508620738983154, + "step": 1303 + }, + { + "epoch": 0.36, + "logps_train/chosen": -68.50285339355469, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -68.64952087402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5002853274345398, + "rewards_train/margins": 0.01466679573059082, + "rewards_train/rejected": -0.5149521231651306, + "step": 1303 + }, + { + "epoch": 0.36, + "logps_train/chosen": -169.2742462158203, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -265.63128662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.127424716949463, + "rewards_train/margins": 9.735703945159912, + "rewards_train/rejected": -11.863128662109375, + "step": 1303 + }, + { + "epoch": 0.36, + "learning_rate": 1.0112433578621269e-06, + "loss": 0.2232, + "step": 1304 + }, + { + "epoch": 0.36, + "logps_train/chosen": -30.363595962524414, + "logps_train/ref_chosen": -14.0, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -45.97511291503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6363595724105835, + "rewards_train/margins": 0.7861517667770386, + "rewards_train/rejected": -2.422511339187622, + "step": 1304 + }, + { + "epoch": 0.36, + "logps_train/chosen": -89.36742401123047, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -85.3548812866211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2867424190044403, + "rewards_train/margins": 0.39874574542045593, + "rewards_train/rejected": -0.6854881644248962, + "step": 1304 + }, + { + "epoch": 0.36, + "logps_train/chosen": -3.2188000679016113, + "logps_train/ref_chosen": -1.5625, + "logps_train/ref_rejected": -1.9609375, + "logps_train/rejected": -4.183748245239258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1656300127506256, + "rewards_train/margins": 0.05665107071399689, + "rewards_train/rejected": -0.2222810834646225, + "step": 1304 + }, + { + "epoch": 0.36, + "logps_train/chosen": -10.945213317871094, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -45.373985290527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7867088317871094, + "rewards_train/margins": 1.363189697265625, + "rewards_train/rejected": -2.1498985290527344, + "step": 1304 + }, + { + "epoch": 0.36, + "logps_train/chosen": -10.514307022094727, + "logps_train/ref_chosen": -7.84375, + "logps_train/ref_rejected": -2.671875, + "logps_train/rejected": -21.201080322265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2670557200908661, + "rewards_train/margins": 1.5858648121356964, + "rewards_train/rejected": -1.8529205322265625, + "step": 1304 + }, + { + "epoch": 0.36, + "logps_train/chosen": -202.97528076171875, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -243.0, + "logps_train/rejected": -335.70697021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.997528076171875, + "rewards_train/margins": 6.27316951751709, + "rewards_train/rejected": -9.270697593688965, + "step": 1304 + }, + { + "epoch": 0.36, + "logps_train/chosen": -207.2589111328125, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -151.22525024414062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.825891017913818, + "rewards_train/margins": -0.7033658027648926, + "rewards_train/rejected": -5.122525215148926, + "step": 1304 + }, + { + "epoch": 0.36, + "logps_train/chosen": -39.22922897338867, + "logps_train/ref_chosen": -36.0, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -20.766294479370117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3229229152202606, + "rewards_train/margins": 1.216206580400467, + "rewards_train/rejected": -1.5391294956207275, + "step": 1304 + }, + { + "epoch": 0.36, + "logps_train/chosen": -193.0124969482422, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -243.92465209960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.701250076293945, + "rewards_train/margins": 0.2912149429321289, + "rewards_train/rejected": -8.992465019226074, + "step": 1305 + }, + { + "epoch": 0.36, + "logps_train/chosen": -65.66349792480469, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -93.26871490478516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0413497686386108, + "rewards_train/margins": 0.8855217695236206, + "rewards_train/rejected": -1.9268715381622314, + "step": 1305 + }, + { + "epoch": 0.36, + "logps_train/chosen": -73.41806030273438, + "logps_train/ref_chosen": -35.0, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -121.9347152709961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.841806173324585, + "rewards_train/margins": 3.9516656398773193, + "rewards_train/rejected": -7.793471813201904, + "step": 1305 + }, + { + "epoch": 0.36, + "logps_train/chosen": -130.937255859375, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -145.017578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.143725872039795, + "rewards_train/margins": 1.408031940460205, + "rewards_train/rejected": -6.5517578125, + "step": 1305 + }, + { + "epoch": 0.36, + "logps_train/chosen": -134.4333038330078, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -145.99749755859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8433303833007812, + "rewards_train/margins": 0.7564194202423096, + "rewards_train/rejected": -3.599749803543091, + "step": 1305 + }, + { + "epoch": 0.36, + "logps_train/chosen": -126.72183227539062, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -85.0260009765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7721832990646362, + "rewards_train/margins": 1.2054167985916138, + "rewards_train/rejected": -2.97760009765625, + "step": 1305 + }, + { + "epoch": 0.36, + "logps_train/chosen": -12.723230361938477, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -20.785377502441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5379480719566345, + "rewards_train/margins": 0.31558966636657715, + "rewards_train/rejected": -0.8535377383232117, + "step": 1305 + }, + { + "epoch": 0.36, + "logps_train/chosen": -257.5059814453125, + "logps_train/ref_chosen": -169.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -209.91912841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.850598335266113, + "rewards_train/margins": 0.14131450653076172, + "rewards_train/rejected": -8.991912841796875, + "step": 1305 + }, + { + "epoch": 0.37, + "learning_rate": 1.0085979371153396e-06, + "loss": 0.3934, + "step": 1306 + }, + { + "epoch": 0.37, + "logps_train/chosen": -15.963369369506836, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -26.5, + "logps_train/rejected": -50.76075744628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1932119131088257, + "rewards_train/margins": 1.2328637838363647, + "rewards_train/rejected": -2.4260756969451904, + "step": 1306 + }, + { + "epoch": 0.37, + "logps_train/chosen": -15.181833267211914, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -14.142633438110352, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0431833267211914, + "rewards_train/margins": -0.8539199829101562, + "rewards_train/rejected": -0.18926334381103516, + "step": 1306 + }, + { + "epoch": 0.37, + "logps_train/chosen": -28.713836669921875, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -32.23466110229492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0213836431503296, + "rewards_train/margins": 1.32083261013031, + "rewards_train/rejected": -2.3422162532806396, + "step": 1306 + }, + { + "epoch": 0.37, + "logps_train/chosen": -108.86390686035156, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -168.00540161132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6363906860351562, + "rewards_train/margins": 1.9641494750976562, + "rewards_train/rejected": -2.6005401611328125, + "step": 1306 + }, + { + "epoch": 0.37, + "logps_train/chosen": -190.2774658203125, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -303.9759216308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.527746677398682, + "rewards_train/margins": 10.169846057891846, + "rewards_train/rejected": -15.697592735290527, + "step": 1306 + }, + { + "epoch": 0.37, + "logps_train/chosen": -236.46011352539062, + "logps_train/ref_chosen": -213.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -216.90773010253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3460114002227783, + "rewards_train/margins": 3.144761800765991, + "rewards_train/rejected": -5.4907732009887695, + "step": 1306 + }, + { + "epoch": 0.37, + "logps_train/chosen": -132.342529296875, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -211.51876831054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.2342529296875, + "rewards_train/margins": 2.2176241874694824, + "rewards_train/rejected": -6.451877117156982, + "step": 1306 + }, + { + "epoch": 0.37, + "logps_train/chosen": -132.0475311279297, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -233.3172607421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.304753065109253, + "rewards_train/margins": 4.626972913742065, + "rewards_train/rejected": -6.931725978851318, + "step": 1306 + }, + { + "epoch": 0.37, + "logps_train/chosen": -125.6136703491211, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -121.4961166381836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8613670468330383, + "rewards_train/margins": 2.538244664669037, + "rewards_train/rejected": -3.399611711502075, + "step": 1307 + }, + { + "epoch": 0.37, + "logps_train/chosen": -117.116455078125, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -219.56948852539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8116455078125, + "rewards_train/margins": 5.845303535461426, + "rewards_train/rejected": -7.656949043273926, + "step": 1307 + }, + { + "epoch": 0.37, + "logps_train/chosen": -16.805007934570312, + "logps_train/ref_chosen": -6.15625, + "logps_train/ref_rejected": -31.25, + "logps_train/rejected": -39.63926315307617, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.064875841140747, + "rewards_train/margins": -0.22594952583312988, + "rewards_train/rejected": -0.8389263153076172, + "step": 1307 + }, + { + "epoch": 0.37, + "logps_train/chosen": -159.14993286132812, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -161.69619750976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.714993476867676, + "rewards_train/margins": 0.30462646484375, + "rewards_train/rejected": -7.019619941711426, + "step": 1307 + }, + { + "epoch": 0.37, + "logps_train/chosen": -33.01451110839844, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -7.25, + "logps_train/rejected": -42.297454833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7889511585235596, + "rewards_train/margins": 1.715794324874878, + "rewards_train/rejected": -3.5047454833984375, + "step": 1307 + }, + { + "epoch": 0.37, + "logps_train/chosen": -75.6762924194336, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -35.90842819213867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2323707640171051, + "rewards_train/margins": 3.2294637262821198, + "rewards_train/rejected": -2.9970929622650146, + "step": 1307 + }, + { + "epoch": 0.37, + "logps_train/chosen": -7.073748588562012, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -9.551124572753906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.39174985885620117, + "rewards_train/margins": -0.12101238965988159, + "rewards_train/rejected": -0.2707374691963196, + "step": 1307 + }, + { + "epoch": 0.37, + "logps_train/chosen": -43.51166534423828, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -37.014137268066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8886665105819702, + "rewards_train/margins": 0.2127472162246704, + "rewards_train/rejected": -2.1014137268066406, + "step": 1307 + }, + { + "epoch": 0.37, + "learning_rate": 1.0059524561921087e-06, + "loss": 0.3116, + "step": 1308 + }, + { + "epoch": 0.37, + "logps_train/chosen": -9.699398040771484, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -1.265625, + "logps_train/rejected": -5.712517738342285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4105648100376129, + "rewards_train/margins": 0.0341244637966156, + "rewards_train/rejected": -0.4446892738342285, + "step": 1308 + }, + { + "epoch": 0.37, + "logps_train/chosen": -135.26614379882812, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -42.545005798339844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6266143321990967, + "rewards_train/margins": -1.5971137285232544, + "rewards_train/rejected": -1.0295006036758423, + "step": 1308 + }, + { + "epoch": 0.37, + "logps_train/chosen": -32.63942337036133, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -41.04170227050781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2139424085617065, + "rewards_train/margins": 1.0277279615402222, + "rewards_train/rejected": -2.2416703701019287, + "step": 1308 + }, + { + "epoch": 0.37, + "logps_train/chosen": -26.97618293762207, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -28.52675437927246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0726183652877808, + "rewards_train/margins": 0.8113070726394653, + "rewards_train/rejected": -1.883925437927246, + "step": 1308 + }, + { + "epoch": 0.37, + "logps_train/chosen": -48.12761688232422, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -41.0, + "logps_train/rejected": -53.71068572998047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.975261688232422, + "rewards_train/margins": -1.704193115234375, + "rewards_train/rejected": -1.2710685729980469, + "step": 1308 + }, + { + "epoch": 0.37, + "logps_train/chosen": -18.948585510253906, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -31.755762100219727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7761085629463196, + "rewards_train/margins": 1.2057176232337952, + "rewards_train/rejected": -1.9818261861801147, + "step": 1308 + }, + { + "epoch": 0.37, + "logps_train/chosen": -21.766138076782227, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -35.03783416748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4766138792037964, + "rewards_train/margins": 1.3427945375442505, + "rewards_train/rejected": -2.819408416748047, + "step": 1308 + }, + { + "epoch": 0.37, + "logps_train/chosen": -9.93831729888916, + "logps_train/ref_chosen": -6.59375, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -40.38700485229492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33445674180984497, + "rewards_train/margins": 1.4042437672615051, + "rewards_train/rejected": -1.73870050907135, + "step": 1308 + }, + { + "epoch": 0.37, + "logps_train/chosen": -16.635223388671875, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -1.7421875, + "logps_train/rejected": -15.63625431060791, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6010223627090454, + "rewards_train/margins": 0.7883843183517456, + "rewards_train/rejected": -1.389406681060791, + "step": 1309 + }, + { + "epoch": 0.37, + "logps_train/chosen": -144.67828369140625, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -83.99667358398438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1678284406661987, + "rewards_train/margins": -0.918161079287529, + "rewards_train/rejected": -0.24966736137866974, + "step": 1309 + }, + { + "epoch": 0.37, + "logps_train/chosen": -18.88176155090332, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -23.933008193969727, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1600512266159058, + "rewards_train/margins": -0.2230004072189331, + "rewards_train/rejected": -0.9370508193969727, + "step": 1309 + }, + { + "epoch": 0.37, + "logps_train/chosen": -67.41987609863281, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -56.75, + "logps_train/rejected": -86.22063446044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7669875621795654, + "rewards_train/margins": 0.18007588386535645, + "rewards_train/rejected": -2.947063446044922, + "step": 1309 + }, + { + "epoch": 0.37, + "logps_train/chosen": -134.01937866210938, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -144.45755004882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.2519378662109375, + "rewards_train/margins": 1.5438170433044434, + "rewards_train/rejected": -5.795754909515381, + "step": 1309 + }, + { + "epoch": 0.37, + "logps_train/chosen": -219.51332092285156, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -198.0, + "logps_train/rejected": -259.2481689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.651332378387451, + "rewards_train/margins": 1.4734845161437988, + "rewards_train/rejected": -6.12481689453125, + "step": 1309 + }, + { + "epoch": 0.37, + "logps_train/chosen": -12.571325302124023, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -43.42135238647461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16963253915309906, + "rewards_train/margins": 2.0975028425455093, + "rewards_train/rejected": -2.2671353816986084, + "step": 1309 + }, + { + "epoch": 0.37, + "logps_train/chosen": -6.068260192871094, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -8.79749870300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021298980340361595, + "rewards_train/margins": 0.03229885082691908, + "rewards_train/rejected": -0.010999870486557484, + "step": 1309 + }, + { + "epoch": 0.37, + "learning_rate": 1.0033069336079952e-06, + "loss": 0.621, + "step": 1310 + }, + { + "epoch": 0.37, + "logps_train/chosen": -89.57005310058594, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -199.98927307128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10700531303882599, + "rewards_train/margins": 6.091922089457512, + "rewards_train/rejected": -6.198927402496338, + "step": 1310 + }, + { + "epoch": 0.37, + "logps_train/chosen": -169.55117797851562, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -216.84104919433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8551177978515625, + "rewards_train/margins": 3.8289871215820312, + "rewards_train/rejected": -7.684104919433594, + "step": 1310 + }, + { + "epoch": 0.37, + "logps_train/chosen": -9.934782028198242, + "logps_train/ref_chosen": -3.3125, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -30.623764038085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6622282266616821, + "rewards_train/margins": 1.2501481771469116, + "rewards_train/rejected": -1.9123764038085938, + "step": 1310 + }, + { + "epoch": 0.37, + "logps_train/chosen": -29.381267547607422, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -3.515625, + "logps_train/rejected": -30.75798988342285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7318767309188843, + "rewards_train/margins": 0.9923597574234009, + "rewards_train/rejected": -2.724236488342285, + "step": 1310 + }, + { + "epoch": 0.37, + "logps_train/chosen": -99.0501480102539, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -148.05953979492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4050148129463196, + "rewards_train/margins": 2.3509392142295837, + "rewards_train/rejected": -2.7559540271759033, + "step": 1310 + }, + { + "epoch": 0.37, + "logps_train/chosen": -82.93999481201172, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -47.0, + "logps_train/rejected": -83.42359161376953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5939995050430298, + "rewards_train/margins": 3.048359751701355, + "rewards_train/rejected": -3.6423592567443848, + "step": 1310 + }, + { + "epoch": 0.37, + "logps_train/chosen": -116.46003723144531, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -116.43632507324219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2460037469863892, + "rewards_train/margins": 2.4976288080215454, + "rewards_train/rejected": -3.7436325550079346, + "step": 1310 + }, + { + "epoch": 0.37, + "logps_train/chosen": -133.61119079589844, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -149.11013793945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0111191272735596, + "rewards_train/margins": 2.499894857406616, + "rewards_train/rejected": -4.511013984680176, + "step": 1310 + }, + { + "epoch": 0.37, + "logps_train/chosen": -182.94448852539062, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -105.8467788696289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2944488525390625, + "rewards_train/margins": -1.709770917892456, + "rewards_train/rejected": -1.5846779346466064, + "step": 1311 + }, + { + "epoch": 0.37, + "logps_train/chosen": -72.90205383300781, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -73.03936767578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2902053892612457, + "rewards_train/margins": 0.013731390237808228, + "rewards_train/rejected": -0.30393677949905396, + "step": 1311 + }, + { + "epoch": 0.37, + "logps_train/chosen": -238.96971130371094, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -212.47219848632812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.09697151184082, + "rewards_train/margins": -2.0497517585754395, + "rewards_train/rejected": -7.047219753265381, + "step": 1311 + }, + { + "epoch": 0.37, + "logps_train/chosen": -317.578369140625, + "logps_train/ref_chosen": -200.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -292.95220947265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.757837295532227, + "rewards_train/margins": -0.16261577606201172, + "rewards_train/rejected": -11.595221519470215, + "step": 1311 + }, + { + "epoch": 0.37, + "logps_train/chosen": -140.40884399414062, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -180.7153778076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44088441133499146, + "rewards_train/margins": 4.030653655529022, + "rewards_train/rejected": -4.471538066864014, + "step": 1311 + }, + { + "epoch": 0.37, + "logps_train/chosen": -77.13687896728516, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -121.03874206542969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0636879205703735, + "rewards_train/margins": 0.040186285972595215, + "rewards_train/rejected": -1.1038742065429688, + "step": 1311 + }, + { + "epoch": 0.37, + "logps_train/chosen": -9.964714050292969, + "logps_train/ref_chosen": -2.75, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -30.919527053833008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7214714288711548, + "rewards_train/margins": 1.8954812288284302, + "rewards_train/rejected": -2.616952657699585, + "step": 1311 + }, + { + "epoch": 0.37, + "logps_train/chosen": -148.10641479492188, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -204.53890991210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.010641574859619, + "rewards_train/margins": 6.643249988555908, + "rewards_train/rejected": -9.653891563415527, + "step": 1311 + }, + { + "epoch": 0.37, + "learning_rate": 1.0006613878788528e-06, + "loss": 0.4518, + "step": 1312 + }, + { + "epoch": 0.37, + "logps_train/chosen": -162.7376251220703, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -121.38680267333984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.373762607574463, + "rewards_train/margins": -2.0350823402404785, + "rewards_train/rejected": -3.3386802673339844, + "step": 1312 + }, + { + "epoch": 0.37, + "logps_train/chosen": -196.79666137695312, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -266.32794189453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.679666042327881, + "rewards_train/margins": 2.553128719329834, + "rewards_train/rejected": -9.232794761657715, + "step": 1312 + }, + { + "epoch": 0.37, + "logps_train/chosen": -118.16703796386719, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -114.93145751953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4167038202285767, + "rewards_train/margins": -0.9235580563545227, + "rewards_train/rejected": -0.49314576387405396, + "step": 1312 + }, + { + "epoch": 0.37, + "logps_train/chosen": -21.348569869995117, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -46.75177001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47235700488090515, + "rewards_train/margins": 2.80281999707222, + "rewards_train/rejected": -3.275177001953125, + "step": 1312 + }, + { + "epoch": 0.37, + "logps_train/chosen": -27.691652297973633, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -39.21205139160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6254152059555054, + "rewards_train/margins": 1.6457899808883667, + "rewards_train/rejected": -3.271205186843872, + "step": 1312 + }, + { + "epoch": 0.37, + "logps_train/chosen": -181.22775268554688, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -207.8887481689453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9227752685546875, + "rewards_train/margins": 3.6660995483398438, + "rewards_train/rejected": -5.588874816894531, + "step": 1312 + }, + { + "epoch": 0.37, + "logps_train/chosen": -54.30747604370117, + "logps_train/ref_chosen": -32.25, + "logps_train/ref_rejected": -30.625, + "logps_train/rejected": -76.8707275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.205747604370117, + "rewards_train/margins": 2.418825149536133, + "rewards_train/rejected": -4.62457275390625, + "step": 1312 + }, + { + "epoch": 0.37, + "logps_train/chosen": -88.22492980957031, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -178.8773956298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.372493028640747, + "rewards_train/margins": 6.6652467250823975, + "rewards_train/rejected": -8.037739753723145, + "step": 1312 + }, + { + "epoch": 0.37, + "logps_train/chosen": -115.04931640625, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -167.3299102783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.804931640625, + "rewards_train/margins": 4.628059387207031, + "rewards_train/rejected": -7.432991027832031, + "step": 1313 + }, + { + "epoch": 0.37, + "logps_train/chosen": -23.505680084228516, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -3.25, + "logps_train/rejected": -32.9495849609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7130680084228516, + "rewards_train/margins": 2.2568905353546143, + "rewards_train/rejected": -2.969958543777466, + "step": 1313 + }, + { + "epoch": 0.37, + "logps_train/chosen": -131.4569854736328, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -173.9418182373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8956985473632812, + "rewards_train/margins": 1.1984832286834717, + "rewards_train/rejected": -3.094181776046753, + "step": 1313 + }, + { + "epoch": 0.37, + "logps_train/chosen": -97.7796859741211, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -209.61917114257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8279685974121094, + "rewards_train/margins": 4.433948516845703, + "rewards_train/rejected": -5.2619171142578125, + "step": 1313 + }, + { + "epoch": 0.37, + "logps_train/chosen": -110.47786712646484, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -214.9850616455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5977866649627686, + "rewards_train/margins": 7.500719308853149, + "rewards_train/rejected": -10.098505973815918, + "step": 1313 + }, + { + "epoch": 0.37, + "logps_train/chosen": -45.408790588378906, + "logps_train/ref_chosen": -30.125, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -39.450408935546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5283790826797485, + "rewards_train/margins": 0.5166617631912231, + "rewards_train/rejected": -2.0450408458709717, + "step": 1313 + }, + { + "epoch": 0.37, + "logps_train/chosen": -4.8206000328063965, + "logps_train/ref_chosen": -0.6015625, + "logps_train/ref_rejected": -3.4375, + "logps_train/rejected": -26.9633846282959, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4219037592411041, + "rewards_train/margins": 1.93068465590477, + "rewards_train/rejected": -2.352588415145874, + "step": 1313 + }, + { + "epoch": 0.37, + "logps_train/chosen": -68.97589111328125, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -144.05197143554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8225891590118408, + "rewards_train/margins": 2.63260817527771, + "rewards_train/rejected": -4.455197334289551, + "step": 1313 + }, + { + "epoch": 0.37, + "learning_rate": 9.980158375206961e-07, + "loss": 0.306, + "step": 1314 + }, + { + "epoch": 0.37, + "logps_train/chosen": -97.44500732421875, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -138.0573272705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9945007562637329, + "rewards_train/margins": 3.0612319707870483, + "rewards_train/rejected": -4.055732727050781, + "step": 1314 + }, + { + "epoch": 0.37, + "logps_train/chosen": -9.063921928405762, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -11.682941436767578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5907672047615051, + "rewards_train/margins": -0.17247304320335388, + "rewards_train/rejected": -0.41829416155815125, + "step": 1314 + }, + { + "epoch": 0.37, + "logps_train/chosen": -38.22605895996094, + "logps_train/ref_chosen": -22.75, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -19.627504348754883, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5476058721542358, + "rewards_train/margins": -0.13485538959503174, + "rewards_train/rejected": -1.412750482559204, + "step": 1314 + }, + { + "epoch": 0.37, + "logps_train/chosen": -22.486351013183594, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -21.381752014160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7798851132392883, + "rewards_train/margins": 0.4332900643348694, + "rewards_train/rejected": -1.2131751775741577, + "step": 1314 + }, + { + "epoch": 0.37, + "logps_train/chosen": -9.759719848632812, + "logps_train/ref_chosen": -0.1904296875, + "logps_train/ref_rejected": -0.1904296875, + "logps_train/rejected": -9.634637832641602, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9569290280342102, + "rewards_train/margins": -0.012508213520050049, + "rewards_train/rejected": -0.9444208145141602, + "step": 1314 + }, + { + "epoch": 0.37, + "logps_train/chosen": -6.778745651245117, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -80.33538055419922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0033754350151866674, + "rewards_train/margins": 1.4369135142769665, + "rewards_train/rejected": -1.4335380792617798, + "step": 1314 + }, + { + "epoch": 0.37, + "logps_train/chosen": -144.79295349121094, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -316.72027587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.479295253753662, + "rewards_train/margins": 10.3927321434021, + "rewards_train/rejected": -14.872027397155762, + "step": 1314 + }, + { + "epoch": 0.37, + "logps_train/chosen": -16.71393394470215, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -229.26014709472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35889339447021484, + "rewards_train/margins": 3.4671213626861572, + "rewards_train/rejected": -3.826014757156372, + "step": 1314 + }, + { + "epoch": 0.37, + "logps_train/chosen": -82.98283386230469, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -111.3531494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3982833623886108, + "rewards_train/margins": 0.6370316743850708, + "rewards_train/rejected": -2.0353150367736816, + "step": 1315 + }, + { + "epoch": 0.37, + "logps_train/chosen": -161.67724609375, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -185.74264526367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.867724895477295, + "rewards_train/margins": 2.6065402030944824, + "rewards_train/rejected": -8.474265098571777, + "step": 1315 + }, + { + "epoch": 0.37, + "logps_train/chosen": -125.37657928466797, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -115.36434173583984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.087657928466797, + "rewards_train/margins": -0.0012238025665283203, + "rewards_train/rejected": -3.0864341259002686, + "step": 1315 + }, + { + "epoch": 0.37, + "logps_train/chosen": -48.66704559326172, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -7.46875, + "logps_train/rejected": -54.14888000488281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1167047023773193, + "rewards_train/margins": 1.5513083934783936, + "rewards_train/rejected": -4.668013095855713, + "step": 1315 + }, + { + "epoch": 0.37, + "logps_train/chosen": -88.42103576660156, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -1.25, + "logps_train/rejected": -18.03899383544922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.392103672027588, + "rewards_train/margins": -1.713204264640808, + "rewards_train/rejected": -1.6788994073867798, + "step": 1315 + }, + { + "epoch": 0.37, + "logps_train/chosen": -99.6627197265625, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -159.11590576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.216271996498108, + "rewards_train/margins": 4.445318579673767, + "rewards_train/rejected": -5.661590576171875, + "step": 1315 + }, + { + "epoch": 0.37, + "logps_train/chosen": -31.16389274597168, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -48.615264892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9038892984390259, + "rewards_train/margins": 2.401387095451355, + "rewards_train/rejected": -4.305276393890381, + "step": 1315 + }, + { + "epoch": 0.37, + "logps_train/chosen": -31.386219024658203, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -25.74056625366211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3261219263076782, + "rewards_train/margins": -0.2583153247833252, + "rewards_train/rejected": -1.067806601524353, + "step": 1315 + }, + { + "epoch": 0.37, + "learning_rate": 9.953703010495734e-07, + "loss": 0.4517, + "step": 1316 + }, + { + "epoch": 0.37, + "logps_train/chosen": -172.5490264892578, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -231.2916259765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.0049028396606445, + "rewards_train/margins": 2.0242600440979004, + "rewards_train/rejected": -7.029162883758545, + "step": 1316 + }, + { + "epoch": 0.37, + "logps_train/chosen": -35.69532775878906, + "logps_train/ref_chosen": -5.03125, + "logps_train/ref_rejected": -3.40625, + "logps_train/rejected": -50.099449157714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0664079189300537, + "rewards_train/margins": 1.602912187576294, + "rewards_train/rejected": -4.669320106506348, + "step": 1316 + }, + { + "epoch": 0.37, + "logps_train/chosen": -5.073960781097412, + "logps_train/ref_chosen": -2.890625, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -5.529163837432861, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21833358705043793, + "rewards_train/margins": -0.2185422033071518, + "rewards_train/rejected": 0.0002086162567138672, + "step": 1316 + }, + { + "epoch": 0.37, + "logps_train/chosen": -104.5999755859375, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -49.02122116088867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.859997510910034, + "rewards_train/margins": -0.4766254425048828, + "rewards_train/rejected": -3.3833720684051514, + "step": 1316 + }, + { + "epoch": 0.37, + "logps_train/chosen": -91.0751953125, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -153.42926025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6575195789337158, + "rewards_train/margins": 3.0354063510894775, + "rewards_train/rejected": -4.692925930023193, + "step": 1316 + }, + { + "epoch": 0.37, + "logps_train/chosen": -93.46861267089844, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -165.28274536132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0031387328635901213, + "rewards_train/margins": 2.331413221312687, + "rewards_train/rejected": -2.3282744884490967, + "step": 1316 + }, + { + "epoch": 0.37, + "logps_train/chosen": -91.25431823730469, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -170.9152374267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4754317998886108, + "rewards_train/margins": 5.866091847419739, + "rewards_train/rejected": -7.34152364730835, + "step": 1316 + }, + { + "epoch": 0.37, + "logps_train/chosen": -99.93954467773438, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -229.0, + "logps_train/rejected": -281.33123779296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1439545154571533, + "rewards_train/margins": 4.089169263839722, + "rewards_train/rejected": -5.233123779296875, + "step": 1316 + }, + { + "epoch": 0.37, + "logps_train/chosen": -42.90541076660156, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -110.15364074707031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6155411005020142, + "rewards_train/margins": 1.3498231172561646, + "rewards_train/rejected": -2.9653642177581787, + "step": 1317 + }, + { + "epoch": 0.37, + "logps_train/chosen": -150.39297485351562, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -160.07150268554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.189297676086426, + "rewards_train/margins": 0.41785287857055664, + "rewards_train/rejected": -4.607150554656982, + "step": 1317 + }, + { + "epoch": 0.37, + "logps_train/chosen": -163.17898559570312, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -228.34246826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.467898845672607, + "rewards_train/margins": 1.9663481712341309, + "rewards_train/rejected": -8.434247016906738, + "step": 1317 + }, + { + "epoch": 0.37, + "logps_train/chosen": -21.594768524169922, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -25.2042236328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8594768643379211, + "rewards_train/margins": 0.47344547510147095, + "rewards_train/rejected": -1.332922339439392, + "step": 1317 + }, + { + "epoch": 0.37, + "logps_train/chosen": -96.1695556640625, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -204.92843627929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5169556140899658, + "rewards_train/margins": 3.775888204574585, + "rewards_train/rejected": -5.292843818664551, + "step": 1317 + }, + { + "epoch": 0.37, + "logps_train/chosen": -27.339801788330078, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -3.234375, + "logps_train/rejected": -17.93276596069336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.85273015499115, + "rewards_train/margins": -0.38289105892181396, + "rewards_train/rejected": -1.469839096069336, + "step": 1317 + }, + { + "epoch": 0.37, + "logps_train/chosen": -14.785038948059082, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -29.45444107055664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7160038948059082, + "rewards_train/margins": 1.004440188407898, + "rewards_train/rejected": -1.7204440832138062, + "step": 1317 + }, + { + "epoch": 0.37, + "logps_train/chosen": -24.398141860961914, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -32.498268127441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4648141860961914, + "rewards_train/margins": 0.866262674331665, + "rewards_train/rejected": -2.3310768604278564, + "step": 1317 + }, + { + "epoch": 0.37, + "learning_rate": 9.927247969814348e-07, + "loss": 0.3237, + "step": 1318 + }, + { + "epoch": 0.37, + "logps_train/chosen": -16.91120147705078, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -21.348602294921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4973701536655426, + "rewards_train/margins": 0.7812400758266449, + "rewards_train/rejected": -1.2786102294921875, + "step": 1318 + }, + { + "epoch": 0.37, + "logps_train/chosen": -22.091426849365234, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -35.690185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05914268642663956, + "rewards_train/margins": 2.4973758682608604, + "rewards_train/rejected": -2.5565185546875, + "step": 1318 + }, + { + "epoch": 0.37, + "logps_train/chosen": -190.31195068359375, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -30.5, + "logps_train/rejected": -56.3575439453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.881195068359375, + "rewards_train/margins": -5.295440673828125, + "rewards_train/rejected": -2.58575439453125, + "step": 1318 + }, + { + "epoch": 0.37, + "logps_train/chosen": -124.07373046875, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -147.73904418945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.507373034954071, + "rewards_train/margins": 5.866531670093536, + "rewards_train/rejected": -6.373904705047607, + "step": 1318 + }, + { + "epoch": 0.37, + "logps_train/chosen": -16.114459991455078, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -7.125, + "logps_train/rejected": -26.149852752685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1583210229873657, + "rewards_train/margins": 0.744164228439331, + "rewards_train/rejected": -1.9024852514266968, + "step": 1318 + }, + { + "epoch": 0.37, + "logps_train/chosen": -100.28343963623047, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -43.73367691040039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3283441066741943, + "rewards_train/margins": 0.12627363204956055, + "rewards_train/rejected": -3.454617738723755, + "step": 1318 + }, + { + "epoch": 0.37, + "logps_train/chosen": -12.565332412719727, + "logps_train/ref_chosen": -6.5, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -16.639904022216797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6065332293510437, + "rewards_train/margins": -0.08004283905029297, + "rewards_train/rejected": -0.5264903903007507, + "step": 1318 + }, + { + "epoch": 0.37, + "logps_train/chosen": -109.0758056640625, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -147.805419921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.507580578327179, + "rewards_train/margins": 1.3729614615440369, + "rewards_train/rejected": -1.8805420398712158, + "step": 1318 + }, + { + "epoch": 0.37, + "logps_train/chosen": -65.98564910888672, + "logps_train/ref_chosen": -30.875, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -128.3172607421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.5110650062561035, + "rewards_train/margins": -0.4293389320373535, + "rewards_train/rejected": -3.08172607421875, + "step": 1319 + }, + { + "epoch": 0.37, + "logps_train/chosen": -36.54954528808594, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -33.099021911621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0049545764923096, + "rewards_train/margins": -0.3700523376464844, + "rewards_train/rejected": -1.6349022388458252, + "step": 1319 + }, + { + "epoch": 0.37, + "logps_train/chosen": -5.116884708404541, + "logps_train/ref_chosen": -1.390625, + "logps_train/ref_rejected": -3.234375, + "logps_train/rejected": -7.828616619110107, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3726259768009186, + "rewards_train/margins": 0.08679819107055664, + "rewards_train/rejected": -0.4594241678714752, + "step": 1319 + }, + { + "epoch": 0.37, + "logps_train/chosen": -205.79586791992188, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -179.62811279296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.979587078094482, + "rewards_train/margins": 0.4332242012023926, + "rewards_train/rejected": -6.412811279296875, + "step": 1319 + }, + { + "epoch": 0.37, + "logps_train/chosen": -143.687744140625, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -162.96714782714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2187745571136475, + "rewards_train/margins": 2.1279404163360596, + "rewards_train/rejected": -5.346714973449707, + "step": 1319 + }, + { + "epoch": 0.37, + "logps_train/chosen": -11.928163528442383, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -10.1875, + "logps_train/rejected": -36.60429763793945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7459413409233093, + "rewards_train/margins": 1.895738422870636, + "rewards_train/rejected": -2.6416797637939453, + "step": 1319 + }, + { + "epoch": 0.37, + "logps_train/chosen": -80.7761001586914, + "logps_train/ref_chosen": -47.0, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -73.8680648803711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.377609968185425, + "rewards_train/margins": -0.11580348014831543, + "rewards_train/rejected": -3.2618064880371094, + "step": 1319 + }, + { + "epoch": 0.37, + "logps_train/chosen": -99.9941635131836, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -111.30905151367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4494163691997528, + "rewards_train/margins": 0.4314887821674347, + "rewards_train/rejected": -0.8809051513671875, + "step": 1319 + }, + { + "epoch": 0.37, + "learning_rate": 9.900793438320036e-07, + "loss": 0.7636, + "step": 1320 + }, + { + "epoch": 0.37, + "logps_train/chosen": -24.114463806152344, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -20.992931365966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6114463806152344, + "rewards_train/margins": 1.0159717798233032, + "rewards_train/rejected": -1.6274181604385376, + "step": 1320 + }, + { + "epoch": 0.37, + "logps_train/chosen": -6.239890098571777, + "logps_train/ref_chosen": -3.875, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -8.342249870300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23648901283740997, + "rewards_train/margins": 0.060235992074012756, + "rewards_train/rejected": -0.29672500491142273, + "step": 1320 + }, + { + "epoch": 0.37, + "logps_train/chosen": -72.0924072265625, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -107.49398803710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.59075927734375, + "rewards_train/margins": 2.5901581048965454, + "rewards_train/rejected": -1.9993988275527954, + "step": 1320 + }, + { + "epoch": 0.37, + "logps_train/chosen": -96.07383728027344, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -69.18621063232422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0073837041854858, + "rewards_train/margins": -0.4387626051902771, + "rewards_train/rejected": -0.5686210989952087, + "step": 1320 + }, + { + "epoch": 0.37, + "logps_train/chosen": -29.897966384887695, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -23.13322639465332, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3491716384887695, + "rewards_train/margins": -1.1233489513397217, + "rewards_train/rejected": -1.2258226871490479, + "step": 1320 + }, + { + "epoch": 0.37, + "logps_train/chosen": -120.10279846191406, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -174.47219848632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6602798700332642, + "rewards_train/margins": 3.7369402647018433, + "rewards_train/rejected": -5.397220134735107, + "step": 1320 + }, + { + "epoch": 0.37, + "logps_train/chosen": -150.85195922851562, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -135.41058349609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.6851959228515625, + "rewards_train/margins": -0.5441374778747559, + "rewards_train/rejected": -4.141058444976807, + "step": 1320 + }, + { + "epoch": 0.37, + "logps_train/chosen": -6.48936128616333, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -22.0072021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05206112936139107, + "rewards_train/margins": 0.992409061640501, + "rewards_train/rejected": -1.044470191001892, + "step": 1320 + }, + { + "epoch": 0.37, + "logps_train/chosen": -40.29365158081055, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -49.125980377197266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5668652057647705, + "rewards_train/margins": 0.14573287963867188, + "rewards_train/rejected": -1.7125980854034424, + "step": 1321 + }, + { + "epoch": 0.37, + "logps_train/chosen": -23.245813369750977, + "logps_train/ref_chosen": -5.71875, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -20.76303482055664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7527064085006714, + "rewards_train/margins": -0.6139029264450073, + "rewards_train/rejected": -1.138803482055664, + "step": 1321 + }, + { + "epoch": 0.37, + "logps_train/chosen": -40.16757583618164, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -29.173877716064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8417576551437378, + "rewards_train/margins": 0.19438016414642334, + "rewards_train/rejected": -2.036137819290161, + "step": 1321 + }, + { + "epoch": 0.37, + "logps_train/chosen": -183.38681030273438, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -162.2113494873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7386810779571533, + "rewards_train/margins": 3.032453775405884, + "rewards_train/rejected": -5.771134853363037, + "step": 1321 + }, + { + "epoch": 0.37, + "logps_train/chosen": -20.45090103149414, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -26.981590270996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.416965126991272, + "rewards_train/margins": 0.4249439239501953, + "rewards_train/rejected": -1.8419090509414673, + "step": 1321 + }, + { + "epoch": 0.37, + "logps_train/chosen": -8.754807472229004, + "logps_train/ref_chosen": -2.34375, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -16.579179763793945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6411057710647583, + "rewards_train/margins": 0.4793121814727783, + "rewards_train/rejected": -1.1204179525375366, + "step": 1321 + }, + { + "epoch": 0.37, + "logps_train/chosen": -1.2125723361968994, + "logps_train/ref_chosen": -1.265625, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -4.47304630279541, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.005305266473442316, + "rewards_train/margins": -0.03801510250195861, + "rewards_train/rejected": 0.043320368975400925, + "step": 1321 + }, + { + "epoch": 0.37, + "logps_train/chosen": -44.13495635986328, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -44.45511245727539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2134956121444702, + "rewards_train/margins": 2.600765585899353, + "rewards_train/rejected": -3.8142611980438232, + "step": 1321 + }, + { + "epoch": 0.37, + "learning_rate": 9.874339601166472e-07, + "loss": 0.5508, + "step": 1322 + }, + { + "epoch": 0.37, + "logps_train/chosen": -94.03707122802734, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -131.53317260742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3537070751190186, + "rewards_train/margins": 2.6496102809906006, + "rewards_train/rejected": -5.003317356109619, + "step": 1322 + }, + { + "epoch": 0.37, + "logps_train/chosen": -179.76219177246094, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -261.3333435058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.276219129562378, + "rewards_train/margins": 5.457115411758423, + "rewards_train/rejected": -8.7333345413208, + "step": 1322 + }, + { + "epoch": 0.37, + "logps_train/chosen": -113.16827392578125, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -151.52455139160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26682740449905396, + "rewards_train/margins": 1.4856277108192444, + "rewards_train/rejected": -1.7524551153182983, + "step": 1322 + }, + { + "epoch": 0.37, + "logps_train/chosen": -34.01539993286133, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -30.308521270751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1890400648117065, + "rewards_train/margins": 0.3668121099472046, + "rewards_train/rejected": -1.5558521747589111, + "step": 1322 + }, + { + "epoch": 0.37, + "logps_train/chosen": -72.83804321289062, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -135.3112030029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.31619569659233093, + "rewards_train/margins": 2.3473159968852997, + "rewards_train/rejected": -2.0311203002929688, + "step": 1322 + }, + { + "epoch": 0.37, + "logps_train/chosen": -132.77598571777344, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -185.78460693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5275986194610596, + "rewards_train/margins": 6.25086236000061, + "rewards_train/rejected": -7.77846097946167, + "step": 1322 + }, + { + "epoch": 0.37, + "logps_train/chosen": -93.7778091430664, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -114.11802673339844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.3222191035747528, + "rewards_train/margins": -0.26597824692726135, + "rewards_train/rejected": 0.5881973505020142, + "step": 1322 + }, + { + "epoch": 0.37, + "logps_train/chosen": -14.188150405883789, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -3.765625, + "logps_train/rejected": -14.217493057250977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8406900763511658, + "rewards_train/margins": 0.20449680089950562, + "rewards_train/rejected": -1.0451868772506714, + "step": 1322 + }, + { + "epoch": 0.37, + "logps_train/chosen": -3.1485230922698975, + "logps_train/ref_chosen": -1.4140625, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -17.862943649291992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17344605922698975, + "rewards_train/margins": 0.5128483176231384, + "rewards_train/rejected": -0.6862943768501282, + "step": 1323 + }, + { + "epoch": 0.37, + "logps_train/chosen": -6.936097621917725, + "logps_train/ref_chosen": -3.703125, + "logps_train/ref_rejected": -1.1640625, + "logps_train/rejected": -4.8215765953063965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32329726219177246, + "rewards_train/margins": 0.042454153299331665, + "rewards_train/rejected": -0.3657514154911041, + "step": 1323 + }, + { + "epoch": 0.37, + "logps_train/chosen": -59.69658660888672, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -59.38692855834961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.5303413271903992, + "rewards_train/margins": -0.030965805053710938, + "rewards_train/rejected": 0.5613071322441101, + "step": 1323 + }, + { + "epoch": 0.37, + "logps_train/chosen": -22.948772430419922, + "logps_train/ref_chosen": -7.40625, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -29.587465286254883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.55425226688385, + "rewards_train/margins": 0.2669942378997803, + "rewards_train/rejected": -1.8212465047836304, + "step": 1323 + }, + { + "epoch": 0.37, + "logps_train/chosen": -209.868896484375, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -204.50897216796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.986889839172363, + "rewards_train/margins": -0.985992431640625, + "rewards_train/rejected": -8.000897407531738, + "step": 1323 + }, + { + "epoch": 0.37, + "logps_train/chosen": -10.304247856140137, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -28.83748435974121, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5522997975349426, + "rewards_train/margins": 0.08144867420196533, + "rewards_train/rejected": -0.633748471736908, + "step": 1323 + }, + { + "epoch": 0.37, + "logps_train/chosen": -99.33553314208984, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -106.544677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3335533142089844, + "rewards_train/margins": 0.32091450691223145, + "rewards_train/rejected": -3.654467821121216, + "step": 1323 + }, + { + "epoch": 0.37, + "logps_train/chosen": -2.198549270629883, + "logps_train/ref_chosen": -0.8515625, + "logps_train/ref_rejected": -3.703125, + "logps_train/rejected": -4.302767276763916, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.13469867408275604, + "rewards_train/margins": -0.07473444566130638, + "rewards_train/rejected": -0.05996422842144966, + "step": 1323 + }, + { + "epoch": 0.37, + "learning_rate": 9.847886643502475e-07, + "loss": 0.4987, + "step": 1324 + }, + { + "epoch": 0.37, + "logps_train/chosen": -17.01656150817871, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -26.778757095336914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.026656150817871094, + "rewards_train/margins": 1.1137195825576782, + "rewards_train/rejected": -1.1403757333755493, + "step": 1324 + }, + { + "epoch": 0.37, + "logps_train/chosen": -12.11790657043457, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -31.987258911132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06804066151380539, + "rewards_train/margins": 1.9431852772831917, + "rewards_train/rejected": -2.011225938796997, + "step": 1324 + }, + { + "epoch": 0.37, + "logps_train/chosen": -13.317906379699707, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -3.140625, + "logps_train/rejected": -8.851503372192383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7255406379699707, + "rewards_train/margins": -0.15445280075073242, + "rewards_train/rejected": -0.5710878372192383, + "step": 1324 + }, + { + "epoch": 0.37, + "logps_train/chosen": -106.70748901367188, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -153.65570068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07925110310316086, + "rewards_train/margins": 4.644821457564831, + "rewards_train/rejected": -4.56557035446167, + "step": 1324 + }, + { + "epoch": 0.37, + "logps_train/chosen": -12.83179759979248, + "logps_train/ref_chosen": -2.78125, + "logps_train/ref_rejected": -12.0, + "logps_train/rejected": -34.62077331542969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0050548315048218, + "rewards_train/margins": 1.257022500038147, + "rewards_train/rejected": -2.2620773315429688, + "step": 1324 + }, + { + "epoch": 0.37, + "logps_train/chosen": -32.43353271484375, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -86.48272705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.555853247642517, + "rewards_train/margins": 4.954919457435608, + "rewards_train/rejected": -6.510772705078125, + "step": 1324 + }, + { + "epoch": 0.37, + "logps_train/chosen": -145.2164306640625, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -101.96812438964844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.271643161773682, + "rewards_train/margins": -2.3998305797576904, + "rewards_train/rejected": -3.871812582015991, + "step": 1324 + }, + { + "epoch": 0.37, + "logps_train/chosen": -2.3228511810302734, + "logps_train/ref_chosen": -1.03125, + "logps_train/ref_rejected": -1.6484375, + "logps_train/rejected": -8.291705131530762, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12916012108325958, + "rewards_train/margins": 0.5351666659116745, + "rewards_train/rejected": -0.6643267869949341, + "step": 1324 + }, + { + "epoch": 0.37, + "logps_train/chosen": -164.205322265625, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -168.44241333007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.6705322265625, + "rewards_train/margins": 0.02370929718017578, + "rewards_train/rejected": -4.694241523742676, + "step": 1325 + }, + { + "epoch": 0.37, + "logps_train/chosen": -166.7667694091797, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -189.34222412109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.976676940917969, + "rewards_train/margins": 1.8575453758239746, + "rewards_train/rejected": -7.834222316741943, + "step": 1325 + }, + { + "epoch": 0.37, + "logps_train/chosen": -59.75681686401367, + "logps_train/ref_chosen": -40.5, + "logps_train/ref_rejected": -40.5, + "logps_train/rejected": -58.77863693237305, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.925681710243225, + "rewards_train/margins": -0.09781801700592041, + "rewards_train/rejected": -1.8278636932373047, + "step": 1325 + }, + { + "epoch": 0.37, + "logps_train/chosen": -238.68479919433594, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -238.89865112304688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.668479919433594, + "rewards_train/margins": -0.0786142349243164, + "rewards_train/rejected": -10.589865684509277, + "step": 1325 + }, + { + "epoch": 0.37, + "logps_train/chosen": -14.676126480102539, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -25.881269454956055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42386266589164734, + "rewards_train/margins": 1.479889303445816, + "rewards_train/rejected": -1.9037519693374634, + "step": 1325 + }, + { + "epoch": 0.37, + "logps_train/chosen": -9.898269653320312, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -3.84375, + "logps_train/rejected": -5.135369777679443, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6132645010948181, + "rewards_train/margins": -0.4841025173664093, + "rewards_train/rejected": -0.1291619837284088, + "step": 1325 + }, + { + "epoch": 0.37, + "logps_train/chosen": -47.73868942260742, + "logps_train/ref_chosen": -6.96875, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -26.455284118652344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.076993942260742, + "rewards_train/margins": -2.081465482711792, + "rewards_train/rejected": -1.9955284595489502, + "step": 1325 + }, + { + "epoch": 0.37, + "logps_train/chosen": -5.917710781097412, + "logps_train/ref_chosen": -5.8125, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -5.940953254699707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01052107848227024, + "rewards_train/margins": 0.0023242468014359474, + "rewards_train/rejected": -0.012845325283706188, + "step": 1325 + }, + { + "epoch": 0.37, + "learning_rate": 9.821434750470696e-07, + "loss": 0.6731, + "step": 1326 + }, + { + "epoch": 0.37, + "logps_train/chosen": -215.82901000976562, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -191.68539428710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.982901096343994, + "rewards_train/margins": 2.8356385231018066, + "rewards_train/rejected": -8.8185396194458, + "step": 1326 + }, + { + "epoch": 0.37, + "logps_train/chosen": -15.526708602905273, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -18.856754302978516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7089208960533142, + "rewards_train/margins": 0.20175457000732422, + "rewards_train/rejected": -0.9106754660606384, + "step": 1326 + }, + { + "epoch": 0.37, + "logps_train/chosen": -6.486968994140625, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -26.890405654907227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17057190835475922, + "rewards_train/margins": 1.5997187048196793, + "rewards_train/rejected": -1.7702906131744385, + "step": 1326 + }, + { + "epoch": 0.37, + "logps_train/chosen": -118.14254760742188, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -119.06861114501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6142547130584717, + "rewards_train/margins": 0.0926065444946289, + "rewards_train/rejected": -2.7068612575531006, + "step": 1326 + }, + { + "epoch": 0.37, + "logps_train/chosen": -27.836549758911133, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -23.660961151123047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7649049758911133, + "rewards_train/margins": -0.2175588607788086, + "rewards_train/rejected": -1.5473461151123047, + "step": 1326 + }, + { + "epoch": 0.37, + "logps_train/chosen": -14.722371101379395, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -0.486328125, + "logps_train/rejected": -18.429950714111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.44026288390159607, + "rewards_train/margins": 2.2346251904964447, + "rewards_train/rejected": -1.7943623065948486, + "step": 1326 + }, + { + "epoch": 0.37, + "logps_train/chosen": -223.1118621826172, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -244.06651306152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.111186027526855, + "rewards_train/margins": 3.395465850830078, + "rewards_train/rejected": -12.506651878356934, + "step": 1326 + }, + { + "epoch": 0.37, + "logps_train/chosen": -97.18974304199219, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -144.91201782226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.068974256515503, + "rewards_train/margins": 2.2722275257110596, + "rewards_train/rejected": -4.3412017822265625, + "step": 1326 + }, + { + "epoch": 0.37, + "logps_train/chosen": -162.3424530029297, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -170.33157348632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.234245300292969, + "rewards_train/margins": 1.2489123344421387, + "rewards_train/rejected": -6.483157634735107, + "step": 1327 + }, + { + "epoch": 0.37, + "logps_train/chosen": -43.92205810546875, + "logps_train/ref_chosen": -27.0, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -15.436774253845215, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.692205786705017, + "rewards_train/margins": -0.7922783493995667, + "rewards_train/rejected": -0.8999274373054504, + "step": 1327 + }, + { + "epoch": 0.37, + "logps_train/chosen": -37.179134368896484, + "logps_train/ref_chosen": -22.75, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -51.84505844116211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4429134130477905, + "rewards_train/margins": 0.9540923833847046, + "rewards_train/rejected": -2.397005796432495, + "step": 1327 + }, + { + "epoch": 0.37, + "logps_train/chosen": -44.78523635864258, + "logps_train/ref_chosen": -31.75, + "logps_train/ref_rejected": -45.0, + "logps_train/rejected": -65.63455963134766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3035236597061157, + "rewards_train/margins": 0.7599323987960815, + "rewards_train/rejected": -2.0634560585021973, + "step": 1327 + }, + { + "epoch": 0.37, + "logps_train/chosen": -106.28136444091797, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -157.82933044433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5781364440917969, + "rewards_train/margins": 1.3047966957092285, + "rewards_train/rejected": -2.8829331398010254, + "step": 1327 + }, + { + "epoch": 0.37, + "logps_train/chosen": -124.88302612304688, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -95.57444763183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0383026599884033, + "rewards_train/margins": 1.219142198562622, + "rewards_train/rejected": -2.2574448585510254, + "step": 1327 + }, + { + "epoch": 0.37, + "logps_train/chosen": -4.48952054977417, + "logps_train/ref_chosen": -0.50390625, + "logps_train/ref_rejected": -3.84375, + "logps_train/rejected": -11.042104721069336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3985614478588104, + "rewards_train/margins": 0.3212740123271942, + "rewards_train/rejected": -0.7198354601860046, + "step": 1327 + }, + { + "epoch": 0.37, + "logps_train/chosen": -189.0356903076172, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -245.32595825195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.403569221496582, + "rewards_train/margins": 2.2290267944335938, + "rewards_train/rejected": -7.632596015930176, + "step": 1327 + }, + { + "epoch": 0.37, + "learning_rate": 9.794984107206335e-07, + "loss": 0.3628, + "step": 1328 + }, + { + "epoch": 0.37, + "logps_train/chosen": -140.56942749023438, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -233.8641815185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.356942892074585, + "rewards_train/margins": 4.029475450515747, + "rewards_train/rejected": -6.386418342590332, + "step": 1328 + }, + { + "epoch": 0.37, + "logps_train/chosen": -4.413169860839844, + "logps_train/ref_chosen": -3.953125, + "logps_train/ref_rejected": -2.625, + "logps_train/rejected": -9.233083724975586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.046004485338926315, + "rewards_train/margins": 0.6148038990795612, + "rewards_train/rejected": -0.6608083844184875, + "step": 1328 + }, + { + "epoch": 0.37, + "logps_train/chosen": -13.972066879272461, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -21.35607147216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.944081723690033, + "rewards_train/margins": 0.010275423526763916, + "rewards_train/rejected": -0.9543571472167969, + "step": 1328 + }, + { + "epoch": 0.37, + "logps_train/chosen": -2.292898178100586, + "logps_train/ref_chosen": -2.34375, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -4.936765670776367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005085182376205921, + "rewards_train/margins": 0.109699253924191, + "rewards_train/rejected": -0.10461407154798508, + "step": 1328 + }, + { + "epoch": 0.37, + "logps_train/chosen": -34.30541229248047, + "logps_train/ref_chosen": -27.0, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -66.03004455566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7305412292480469, + "rewards_train/margins": 3.159963369369507, + "rewards_train/rejected": -3.8905045986175537, + "step": 1328 + }, + { + "epoch": 0.37, + "logps_train/chosen": -39.18964767456055, + "logps_train/ref_chosen": -33.5, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -98.41442108154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5689647793769836, + "rewards_train/margins": 2.922477424144745, + "rewards_train/rejected": -3.4914422035217285, + "step": 1328 + }, + { + "epoch": 0.37, + "logps_train/chosen": -104.35652160644531, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -160.6800537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7856521606445312, + "rewards_train/margins": 5.482353210449219, + "rewards_train/rejected": -7.26800537109375, + "step": 1328 + }, + { + "epoch": 0.37, + "logps_train/chosen": -129.1599578857422, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -59.25, + "logps_train/rejected": -79.15157318115234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4159958362579346, + "rewards_train/margins": -1.4258384704589844, + "rewards_train/rejected": -1.9901573657989502, + "step": 1328 + }, + { + "epoch": 0.37, + "logps_train/chosen": -102.6099624633789, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -113.65704345703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2609962224960327, + "rewards_train/margins": 1.454708218574524, + "rewards_train/rejected": -2.7157044410705566, + "step": 1329 + }, + { + "epoch": 0.37, + "logps_train/chosen": -16.35749053955078, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -30.00638198852539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.088874101638794, + "rewards_train/margins": 0.9242641925811768, + "rewards_train/rejected": -2.0131382942199707, + "step": 1329 + }, + { + "epoch": 0.37, + "logps_train/chosen": -28.821510314941406, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -42.80418014526367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0696511268615723, + "rewards_train/margins": 0.7170169353485107, + "rewards_train/rejected": -2.786668062210083, + "step": 1329 + }, + { + "epoch": 0.37, + "logps_train/chosen": -34.580223083496094, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -34.742637634277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3330223560333252, + "rewards_train/margins": 0.01624143123626709, + "rewards_train/rejected": -1.3492637872695923, + "step": 1329 + }, + { + "epoch": 0.37, + "logps_train/chosen": -132.2845001220703, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -157.70367431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1284501552581787, + "rewards_train/margins": 2.1419174671173096, + "rewards_train/rejected": -4.270367622375488, + "step": 1329 + }, + { + "epoch": 0.37, + "logps_train/chosen": -204.6547393798828, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -235.51263427734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.26547384262085, + "rewards_train/margins": 2.285789966583252, + "rewards_train/rejected": -9.551263809204102, + "step": 1329 + }, + { + "epoch": 0.37, + "logps_train/chosen": -121.12286376953125, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -129.1174774169922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.912286400794983, + "rewards_train/margins": -0.5505386590957642, + "rewards_train/rejected": -1.3617477416992188, + "step": 1329 + }, + { + "epoch": 0.37, + "logps_train/chosen": -26.1566104888916, + "logps_train/ref_chosen": -23.875, + "logps_train/ref_rejected": -26.875, + "logps_train/rejected": -44.28765106201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2281610518693924, + "rewards_train/margins": 1.5131041258573532, + "rewards_train/rejected": -1.7412651777267456, + "step": 1329 + }, + { + "epoch": 0.37, + "learning_rate": 9.76853489883586e-07, + "loss": 0.4098, + "step": 1330 + }, + { + "epoch": 0.37, + "logps_train/chosen": -61.48869323730469, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -48.7363395690918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0738693475723267, + "rewards_train/margins": 2.212264657020569, + "rewards_train/rejected": -3.2861340045928955, + "step": 1330 + }, + { + "epoch": 0.37, + "logps_train/chosen": -102.19496154785156, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -118.80451965332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9694961905479431, + "rewards_train/margins": 0.8609557747840881, + "rewards_train/rejected": -1.8304519653320312, + "step": 1330 + }, + { + "epoch": 0.37, + "logps_train/chosen": -7.105300426483154, + "logps_train/ref_chosen": -0.921875, + "logps_train/ref_rejected": -2.125, + "logps_train/rejected": -18.623384475708008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6183425784111023, + "rewards_train/margins": 1.0314958691596985, + "rewards_train/rejected": -1.6498384475708008, + "step": 1330 + }, + { + "epoch": 0.37, + "logps_train/chosen": -109.42864990234375, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -255.16636657714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.592864990234375, + "rewards_train/margins": 8.423771858215332, + "rewards_train/rejected": -11.016636848449707, + "step": 1330 + }, + { + "epoch": 0.37, + "logps_train/chosen": -41.26787567138672, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -42.0, + "logps_train/rejected": -45.80028533935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22678756713867188, + "rewards_train/margins": 0.15324097871780396, + "rewards_train/rejected": -0.38002854585647583, + "step": 1330 + }, + { + "epoch": 0.37, + "logps_train/chosen": -99.77998352050781, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -195.11050415039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8779983520507812, + "rewards_train/margins": 5.933052062988281, + "rewards_train/rejected": -7.8110504150390625, + "step": 1330 + }, + { + "epoch": 0.37, + "logps_train/chosen": -124.90538024902344, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -124.9422836303711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.190538167953491, + "rewards_train/margins": 0.0036902427673339844, + "rewards_train/rejected": -2.194228410720825, + "step": 1330 + }, + { + "epoch": 0.37, + "logps_train/chosen": -52.806854248046875, + "logps_train/ref_chosen": -37.75, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -124.42138671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5056854486465454, + "rewards_train/margins": 1.0864533185958862, + "rewards_train/rejected": -2.5921387672424316, + "step": 1330 + }, + { + "epoch": 0.37, + "logps_train/chosen": -48.64377975463867, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -54.897220611572266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4018781185150146, + "rewards_train/margins": 1.087843894958496, + "rewards_train/rejected": -3.4897220134735107, + "step": 1331 + }, + { + "epoch": 0.37, + "logps_train/chosen": -33.17692184448242, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -30.984481811523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1676921844482422, + "rewards_train/margins": 0.9370059967041016, + "rewards_train/rejected": -2.1046981811523438, + "step": 1331 + }, + { + "epoch": 0.37, + "logps_train/chosen": -119.8720474243164, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -99.65533447265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.087204694747925, + "rewards_train/margins": -0.47167110443115234, + "rewards_train/rejected": -2.6155335903167725, + "step": 1331 + }, + { + "epoch": 0.37, + "logps_train/chosen": -21.638172149658203, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -0.89453125, + "logps_train/rejected": -22.610456466674805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5138172507286072, + "rewards_train/margins": 1.6577752232551575, + "rewards_train/rejected": -2.1715924739837646, + "step": 1331 + }, + { + "epoch": 0.37, + "logps_train/chosen": -88.06166076660156, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -93.84369659423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.656166076660156, + "rewards_train/margins": 0.8032035827636719, + "rewards_train/rejected": -5.459369659423828, + "step": 1331 + }, + { + "epoch": 0.37, + "logps_train/chosen": -96.08609008789062, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -121.5733413696289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8086090087890625, + "rewards_train/margins": 1.5487251281738281, + "rewards_train/rejected": -2.3573341369628906, + "step": 1331 + }, + { + "epoch": 0.37, + "logps_train/chosen": -30.264263153076172, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -22.25, + "logps_train/rejected": -47.19633483886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6514263153076172, + "rewards_train/margins": 0.8432071208953857, + "rewards_train/rejected": -2.494633436203003, + "step": 1331 + }, + { + "epoch": 0.37, + "logps_train/chosen": -8.566864967346191, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -64.3355484008789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5870634913444519, + "rewards_train/margins": 4.658118426799774, + "rewards_train/rejected": -4.071054935455322, + "step": 1331 + }, + { + "epoch": 0.37, + "learning_rate": 9.742087310475689e-07, + "loss": 0.3154, + "step": 1332 + }, + { + "epoch": 0.37, + "logps_train/chosen": -14.448341369628906, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -21.38041114807129, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2635841369628906, + "rewards_train/margins": 0.8307069540023804, + "rewards_train/rejected": -1.094291090965271, + "step": 1332 + }, + { + "epoch": 0.37, + "logps_train/chosen": -32.399166107177734, + "logps_train/ref_chosen": -22.0, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -37.25811004638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0399166345596313, + "rewards_train/margins": 1.8296443223953247, + "rewards_train/rejected": -2.869560956954956, + "step": 1332 + }, + { + "epoch": 0.37, + "logps_train/chosen": -49.06071472167969, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -45.32750701904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1560714691877365, + "rewards_train/margins": 2.426679328083992, + "rewards_train/rejected": -2.5827507972717285, + "step": 1332 + }, + { + "epoch": 0.37, + "logps_train/chosen": -179.76795959472656, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -164.70932006835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7767961025238037, + "rewards_train/margins": 0.34413599967956543, + "rewards_train/rejected": -4.120932102203369, + "step": 1332 + }, + { + "epoch": 0.37, + "logps_train/chosen": -97.88941955566406, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -59.75, + "logps_train/rejected": -72.58587646484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4389419555664062, + "rewards_train/margins": -0.15535426139831543, + "rewards_train/rejected": -1.2835876941680908, + "step": 1332 + }, + { + "epoch": 0.37, + "logps_train/chosen": -157.91831970214844, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -140.96517944335938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2418320178985596, + "rewards_train/margins": -0.2453141212463379, + "rewards_train/rejected": -2.9965178966522217, + "step": 1332 + }, + { + "epoch": 0.37, + "logps_train/chosen": -35.7491340637207, + "logps_train/ref_chosen": -27.625, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -26.964990615844727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8124133944511414, + "rewards_train/margins": 0.3715856671333313, + "rewards_train/rejected": -1.1839990615844727, + "step": 1332 + }, + { + "epoch": 0.37, + "logps_train/chosen": -53.76506042480469, + "logps_train/ref_chosen": -29.25, + "logps_train/ref_rejected": -41.0, + "logps_train/rejected": -78.62664794921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4515061378479004, + "rewards_train/margins": 1.3111586570739746, + "rewards_train/rejected": -3.762664794921875, + "step": 1332 + }, + { + "epoch": 0.37, + "logps_train/chosen": -10.84920883178711, + "logps_train/ref_chosen": -2.0625, + "logps_train/ref_rejected": -3.09375, + "logps_train/rejected": -11.904350280761719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.878670871257782, + "rewards_train/margins": 0.002389192581176758, + "rewards_train/rejected": -0.8810600638389587, + "step": 1333 + }, + { + "epoch": 0.37, + "logps_train/chosen": -16.24884033203125, + "logps_train/ref_chosen": -15.6875, + "logps_train/ref_rejected": -15.6875, + "logps_train/rejected": -16.240909576416016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05613403394818306, + "rewards_train/margins": -0.0007930770516395569, + "rewards_train/rejected": -0.0553409568965435, + "step": 1333 + }, + { + "epoch": 0.37, + "logps_train/chosen": -115.49980163574219, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -177.3380584716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7499801516532898, + "rewards_train/margins": 0.28382569551467896, + "rewards_train/rejected": -1.0338058471679688, + "step": 1333 + }, + { + "epoch": 0.37, + "logps_train/chosen": -30.175884246826172, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -121.9968032836914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.555088460445404, + "rewards_train/margins": 2.744592010974884, + "rewards_train/rejected": -3.299680471420288, + "step": 1333 + }, + { + "epoch": 0.37, + "logps_train/chosen": -0.030570562928915024, + "logps_train/ref_chosen": -0.0908203125, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -11.06856632232666, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.00602497486397624, + "rewards_train/margins": 0.6753816190175712, + "rewards_train/rejected": -0.669356644153595, + "step": 1333 + }, + { + "epoch": 0.37, + "logps_train/chosen": -151.05267333984375, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -197.84033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.155267238616943, + "rewards_train/margins": 3.578765869140625, + "rewards_train/rejected": -7.734033107757568, + "step": 1333 + }, + { + "epoch": 0.37, + "logps_train/chosen": -52.02861785888672, + "logps_train/ref_chosen": -28.375, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -156.19598388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3653619289398193, + "rewards_train/margins": 2.4542367458343506, + "rewards_train/rejected": -4.81959867477417, + "step": 1333 + }, + { + "epoch": 0.37, + "logps_train/chosen": -32.5202522277832, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -3.296875, + "logps_train/rejected": -26.785585403442383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6895253658294678, + "rewards_train/margins": -0.3406543731689453, + "rewards_train/rejected": -2.3488709926605225, + "step": 1333 + }, + { + "epoch": 0.37, + "learning_rate": 9.715641527230889e-07, + "loss": 0.4312, + "step": 1334 + }, + { + "epoch": 0.37, + "logps_train/chosen": -2.7436585426330566, + "logps_train/ref_chosen": -2.5, + "logps_train/ref_rejected": -3.28125, + "logps_train/rejected": -8.346603393554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.024365855380892754, + "rewards_train/margins": 0.48216949589550495, + "rewards_train/rejected": -0.5065353512763977, + "step": 1334 + }, + { + "epoch": 0.37, + "logps_train/chosen": -109.95716857910156, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -136.3109130859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4957168698310852, + "rewards_train/margins": 3.985374629497528, + "rewards_train/rejected": -4.481091499328613, + "step": 1334 + }, + { + "epoch": 0.37, + "logps_train/chosen": -3.4805526733398438, + "logps_train/ref_chosen": -0.365234375, + "logps_train/ref_rejected": -0.365234375, + "logps_train/rejected": -3.360011577606201, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.31153184175491333, + "rewards_train/margins": -0.012054115533828735, + "rewards_train/rejected": -0.2994777262210846, + "step": 1334 + }, + { + "epoch": 0.37, + "logps_train/chosen": -165.45098876953125, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -191.60897827148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.745098888874054, + "rewards_train/margins": 2.1157988905906677, + "rewards_train/rejected": -2.8608977794647217, + "step": 1334 + }, + { + "epoch": 0.37, + "logps_train/chosen": -108.76041412353516, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -181.7741241455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4260414242744446, + "rewards_train/margins": 4.751371085643768, + "rewards_train/rejected": -5.177412509918213, + "step": 1334 + }, + { + "epoch": 0.37, + "logps_train/chosen": -114.34439849853516, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -309.5068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0344398021698, + "rewards_train/margins": 14.21624493598938, + "rewards_train/rejected": -16.25068473815918, + "step": 1334 + }, + { + "epoch": 0.37, + "logps_train/chosen": -104.32874298095703, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -121.2098159790039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6328743696212769, + "rewards_train/margins": 1.5881072282791138, + "rewards_train/rejected": -3.2209815979003906, + "step": 1334 + }, + { + "epoch": 0.37, + "logps_train/chosen": -27.01751708984375, + "logps_train/ref_chosen": -2.6875, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -32.05911636352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.433001756668091, + "rewards_train/margins": 0.17603492736816406, + "rewards_train/rejected": -2.609036684036255, + "step": 1334 + }, + { + "epoch": 0.37, + "logps_train/chosen": -92.18592834472656, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -42.5, + "logps_train/rejected": -72.1961669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5185928344726562, + "rewards_train/margins": 0.45102381706237793, + "rewards_train/rejected": -2.969616651535034, + "step": 1335 + }, + { + "epoch": 0.37, + "logps_train/chosen": -64.05543518066406, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -76.720947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3055435121059418, + "rewards_train/margins": 1.8415512144565582, + "rewards_train/rejected": -2.1470947265625, + "step": 1335 + }, + { + "epoch": 0.37, + "logps_train/chosen": -222.50686645507812, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -233.6322021484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.650687217712402, + "rewards_train/margins": -1.8874669075012207, + "rewards_train/rejected": -6.763220310211182, + "step": 1335 + }, + { + "epoch": 0.37, + "logps_train/chosen": -182.51316833496094, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -164.52853393554688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.001317024230957, + "rewards_train/margins": -1.1984634399414062, + "rewards_train/rejected": -7.802853584289551, + "step": 1335 + }, + { + "epoch": 0.37, + "logps_train/chosen": -11.602947235107422, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -17.18704605102539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5415447354316711, + "rewards_train/margins": 0.6240349411964417, + "rewards_train/rejected": -1.1655796766281128, + "step": 1335 + }, + { + "epoch": 0.37, + "logps_train/chosen": -128.20265197753906, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -237.7179412841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4702652096748352, + "rewards_train/margins": 6.5015289187431335, + "rewards_train/rejected": -6.971794128417969, + "step": 1335 + }, + { + "epoch": 0.37, + "logps_train/chosen": -8.838213920593262, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -42.75, + "logps_train/rejected": -63.520469665527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3556964099407196, + "rewards_train/margins": 1.7213506996631622, + "rewards_train/rejected": -2.077047109603882, + "step": 1335 + }, + { + "epoch": 0.37, + "logps_train/chosen": -36.579750061035156, + "logps_train/ref_chosen": -3.59375, + "logps_train/ref_rejected": -2.09375, + "logps_train/rejected": -34.50687026977539, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2985999584198, + "rewards_train/margins": -0.05728793144226074, + "rewards_train/rejected": -3.241312026977539, + "step": 1335 + }, + { + "epoch": 0.37, + "learning_rate": 9.689197734193912e-07, + "loss": 0.4728, + "step": 1336 + }, + { + "epoch": 0.37, + "logps_train/chosen": -48.41016387939453, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -43.36318588256836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.441016435623169, + "rewards_train/margins": 0.920302152633667, + "rewards_train/rejected": -3.361318588256836, + "step": 1336 + }, + { + "epoch": 0.37, + "logps_train/chosen": -68.36920928955078, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -152.23089599609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3869209289550781, + "rewards_train/margins": 4.8361687660217285, + "rewards_train/rejected": -5.223089694976807, + "step": 1336 + }, + { + "epoch": 0.37, + "logps_train/chosen": -14.180502891540527, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -23.948959350585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6899253129959106, + "rewards_train/margins": 0.5424706935882568, + "rewards_train/rejected": -1.2323960065841675, + "step": 1336 + }, + { + "epoch": 0.37, + "logps_train/chosen": -76.09732818603516, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -118.64591217041016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1597328186035156, + "rewards_train/margins": 2.654858350753784, + "rewards_train/rejected": -3.8145911693573, + "step": 1336 + }, + { + "epoch": 0.37, + "logps_train/chosen": -88.84892272949219, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -185.0941619873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4848922789096832, + "rewards_train/margins": 5.824524015188217, + "rewards_train/rejected": -6.3094162940979, + "step": 1336 + }, + { + "epoch": 0.37, + "logps_train/chosen": -30.35249900817871, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -103.04474639892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.691499948501587, + "rewards_train/margins": 3.462974786758423, + "rewards_train/rejected": -5.15447473526001, + "step": 1336 + }, + { + "epoch": 0.37, + "logps_train/chosen": -129.91583251953125, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -136.81253051757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.041583299636841, + "rewards_train/margins": 4.51466965675354, + "rewards_train/rejected": -7.556252956390381, + "step": 1336 + }, + { + "epoch": 0.37, + "logps_train/chosen": -18.90100860595703, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -15.098918914794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19010086357593536, + "rewards_train/margins": 0.18229104578495026, + "rewards_train/rejected": -0.3723919093608856, + "step": 1336 + }, + { + "epoch": 0.37, + "logps_train/chosen": -82.0147933959961, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -97.70936584472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0985206589102745, + "rewards_train/margins": 1.3194573149085045, + "rewards_train/rejected": -1.22093665599823, + "step": 1337 + }, + { + "epoch": 0.37, + "logps_train/chosen": -152.46934509277344, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -152.6210174560547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.546934604644775, + "rewards_train/margins": -0.13483285903930664, + "rewards_train/rejected": -4.412101745605469, + "step": 1337 + }, + { + "epoch": 0.37, + "logps_train/chosen": -89.28430938720703, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -146.291259765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.903430938720703, + "rewards_train/margins": 4.62569522857666, + "rewards_train/rejected": -7.529126167297363, + "step": 1337 + }, + { + "epoch": 0.37, + "logps_train/chosen": -174.37271118164062, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -233.4517059326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7372711300849915, + "rewards_train/margins": 6.207899749279022, + "rewards_train/rejected": -6.945170879364014, + "step": 1337 + }, + { + "epoch": 0.37, + "logps_train/chosen": -177.05130004882812, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -219.92172241210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9051300287246704, + "rewards_train/margins": 6.187042593955994, + "rewards_train/rejected": -8.092172622680664, + "step": 1337 + }, + { + "epoch": 0.37, + "logps_train/chosen": -23.9121036529541, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -49.44960403442383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2662104368209839, + "rewards_train/margins": -0.7212499976158142, + "rewards_train/rejected": -0.5449604392051697, + "step": 1337 + }, + { + "epoch": 0.37, + "logps_train/chosen": -110.44075012207031, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -127.04236602783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3440749645233154, + "rewards_train/margins": 0.010161638259887695, + "rewards_train/rejected": -2.354236602783203, + "step": 1337 + }, + { + "epoch": 0.37, + "logps_train/chosen": -0.6748558282852173, + "logps_train/ref_chosen": -0.70703125, + "logps_train/ref_rejected": -0.70703125, + "logps_train/rejected": -0.7268038988113403, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0032175423111766577, + "rewards_train/margins": 0.005194807192310691, + "rewards_train/rejected": -0.001977264881134033, + "step": 1337 + }, + { + "epoch": 0.37, + "learning_rate": 9.662756116443273e-07, + "loss": 0.3144, + "step": 1338 + }, + { + "epoch": 0.37, + "logps_train/chosen": -12.300975799560547, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -63.22180938720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7957226037979126, + "rewards_train/margins": 3.226458430290222, + "rewards_train/rejected": -4.022181034088135, + "step": 1338 + }, + { + "epoch": 0.37, + "logps_train/chosen": -1.688843846321106, + "logps_train/ref_chosen": -1.5, + "logps_train/ref_rejected": -0.53125, + "logps_train/rejected": -0.23838336765766144, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.018884385004639626, + "rewards_train/margins": -0.04817105084657669, + "rewards_train/rejected": 0.029286665841937065, + "step": 1338 + }, + { + "epoch": 0.37, + "logps_train/chosen": -38.49923324584961, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -51.84867858886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8499233722686768, + "rewards_train/margins": 0.7974445819854736, + "rewards_train/rejected": -2.6473679542541504, + "step": 1338 + }, + { + "epoch": 0.37, + "logps_train/chosen": -151.403564453125, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -191.76544189453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.190356731414795, + "rewards_train/margins": 0.5861873626708984, + "rewards_train/rejected": -4.776544094085693, + "step": 1338 + }, + { + "epoch": 0.37, + "logps_train/chosen": -2.2332401275634766, + "logps_train/ref_chosen": -0.53515625, + "logps_train/ref_rejected": -3.515625, + "logps_train/rejected": -0.48247870802879333, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.16980838775634766, + "rewards_train/margins": -0.47312304377555847, + "rewards_train/rejected": 0.3033146560192108, + "step": 1338 + }, + { + "epoch": 0.37, + "logps_train/chosen": -13.816312789916992, + "logps_train/ref_chosen": -2.109375, + "logps_train/ref_rejected": -1.3515625, + "logps_train/rejected": -7.6570868492126465, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1706937551498413, + "rewards_train/margins": -0.5401412844657898, + "rewards_train/rejected": -0.6305524706840515, + "step": 1338 + }, + { + "epoch": 0.37, + "logps_train/chosen": -59.32683563232422, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -125.05426788330078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8076835870742798, + "rewards_train/margins": 2.7977432012557983, + "rewards_train/rejected": -3.605426788330078, + "step": 1338 + }, + { + "epoch": 0.37, + "logps_train/chosen": -8.486648559570312, + "logps_train/ref_chosen": -2.171875, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -41.025047302246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6314773559570312, + "rewards_train/margins": 1.6460273265838623, + "rewards_train/rejected": -2.2775046825408936, + "step": 1338 + }, + { + "epoch": 0.37, + "logps_train/chosen": -62.153865814208984, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -1.5234375, + "logps_train/rejected": -17.814510345458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4653866291046143, + "rewards_train/margins": 0.1637207269668579, + "rewards_train/rejected": -1.6291073560714722, + "step": 1339 + }, + { + "epoch": 0.37, + "logps_train/chosen": -42.35103988647461, + "logps_train/ref_chosen": -27.5, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -11.604730606079102, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.485103964805603, + "rewards_train/margins": -0.713693380355835, + "rewards_train/rejected": -0.7714105844497681, + "step": 1339 + }, + { + "epoch": 0.37, + "logps_train/chosen": -16.268247604370117, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -23.73000717163086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7080747485160828, + "rewards_train/margins": 1.077426016330719, + "rewards_train/rejected": -1.7855007648468018, + "step": 1339 + }, + { + "epoch": 0.37, + "logps_train/chosen": -92.71188354492188, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -111.86483764648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6711883544921875, + "rewards_train/margins": 1.9652955532073975, + "rewards_train/rejected": -2.636483907699585, + "step": 1339 + }, + { + "epoch": 0.37, + "logps_train/chosen": -82.05345916748047, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -118.61774444580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7053459882736206, + "rewards_train/margins": 2.1564284563064575, + "rewards_train/rejected": -3.861774444580078, + "step": 1339 + }, + { + "epoch": 0.37, + "logps_train/chosen": -172.1302490234375, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -168.061767578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.91302490234375, + "rewards_train/margins": 1.993152141571045, + "rewards_train/rejected": -4.906177043914795, + "step": 1339 + }, + { + "epoch": 0.37, + "logps_train/chosen": -14.023599624633789, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -7.15625, + "logps_train/rejected": -16.157211303710938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9586099982261658, + "rewards_train/margins": -0.05851387977600098, + "rewards_train/rejected": -0.9000961184501648, + "step": 1339 + }, + { + "epoch": 0.37, + "logps_train/chosen": -15.054247856140137, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -1.4140625, + "logps_train/rejected": -6.055169105529785, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3366747796535492, + "rewards_train/margins": 0.12743589282035828, + "rewards_train/rejected": -0.46411067247390747, + "step": 1339 + }, + { + "epoch": 0.37, + "learning_rate": 9.636316859042257e-07, + "loss": 0.4691, + "step": 1340 + }, + { + "epoch": 0.37, + "logps_train/chosen": -14.049427032470703, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -3.3125, + "logps_train/rejected": -19.66057777404785, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16119270026683807, + "rewards_train/margins": 1.473615124821663, + "rewards_train/rejected": -1.634807825088501, + "step": 1340 + }, + { + "epoch": 0.37, + "logps_train/chosen": -84.22882080078125, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -112.38185119628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.547882080078125, + "rewards_train/margins": 0.6403031349182129, + "rewards_train/rejected": -4.188185214996338, + "step": 1340 + }, + { + "epoch": 0.37, + "logps_train/chosen": -232.46279907226562, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -189.1513214111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.446280002593994, + "rewards_train/margins": 0.9688520431518555, + "rewards_train/rejected": -6.41513204574585, + "step": 1340 + }, + { + "epoch": 0.37, + "logps_train/chosen": -22.684932708740234, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -2.90625, + "logps_train/rejected": -30.836578369140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4434932768344879, + "rewards_train/margins": 2.3495396077632904, + "rewards_train/rejected": -2.7930328845977783, + "step": 1340 + }, + { + "epoch": 0.37, + "logps_train/chosen": -52.13653564453125, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -99.56218719482422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06365356594324112, + "rewards_train/margins": 2.1425651535391808, + "rewards_train/rejected": -2.206218719482422, + "step": 1340 + }, + { + "epoch": 0.37, + "logps_train/chosen": -189.32131958007812, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -254.79000854492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.532132148742676, + "rewards_train/margins": 6.546869277954102, + "rewards_train/rejected": -12.079001426696777, + "step": 1340 + }, + { + "epoch": 0.37, + "logps_train/chosen": -115.40412902832031, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -222.36691284179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2904129028320312, + "rewards_train/margins": 6.796278953552246, + "rewards_train/rejected": -10.086691856384277, + "step": 1340 + }, + { + "epoch": 0.37, + "logps_train/chosen": -164.08377075195312, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -215.51979064941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.908377170562744, + "rewards_train/margins": 3.9936022758483887, + "rewards_train/rejected": -8.901979446411133, + "step": 1340 + }, + { + "epoch": 0.37, + "logps_train/chosen": -89.68502044677734, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -178.82907104492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.79350209236145, + "rewards_train/margins": 4.739404916763306, + "rewards_train/rejected": -7.532907009124756, + "step": 1341 + }, + { + "epoch": 0.37, + "logps_train/chosen": -28.456636428833008, + "logps_train/ref_chosen": -5.75, + "logps_train/ref_rejected": -3.03125, + "logps_train/rejected": -30.55445671081543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2706637382507324, + "rewards_train/margins": 0.4816570281982422, + "rewards_train/rejected": -2.7523207664489746, + "step": 1341 + }, + { + "epoch": 0.37, + "logps_train/chosen": -11.937177658081055, + "logps_train/ref_chosen": -2.171875, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -26.580150604248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9765302538871765, + "rewards_train/margins": 0.46898478269577026, + "rewards_train/rejected": -1.4455150365829468, + "step": 1341 + }, + { + "epoch": 0.37, + "logps_train/chosen": -163.28738403320312, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -283.92706298828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1287384033203125, + "rewards_train/margins": 10.06396770477295, + "rewards_train/rejected": -13.192706108093262, + "step": 1341 + }, + { + "epoch": 0.37, + "logps_train/chosen": -136.5614471435547, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -205.4034423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2561447620391846, + "rewards_train/margins": 5.784199476242065, + "rewards_train/rejected": -7.04034423828125, + "step": 1341 + }, + { + "epoch": 0.37, + "logps_train/chosen": -135.30154418945312, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -248.029052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.530154705047607, + "rewards_train/margins": 4.872750759124756, + "rewards_train/rejected": -9.402905464172363, + "step": 1341 + }, + { + "epoch": 0.37, + "logps_train/chosen": -241.74032592773438, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -243.74374389648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.874032974243164, + "rewards_train/margins": 1.1503419876098633, + "rewards_train/rejected": -12.024374961853027, + "step": 1341 + }, + { + "epoch": 0.37, + "logps_train/chosen": -213.3402099609375, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -150.94534301757812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.93402099609375, + "rewards_train/margins": -2.0394866466522217, + "rewards_train/rejected": -3.8945343494415283, + "step": 1341 + }, + { + "epoch": 0.38, + "learning_rate": 9.609880147037634e-07, + "loss": 0.2873, + "step": 1342 + }, + { + "epoch": 0.38, + "logps_train/chosen": -72.04530334472656, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -73.21035766601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4045303463935852, + "rewards_train/margins": 0.1165054440498352, + "rewards_train/rejected": -0.5210357904434204, + "step": 1342 + }, + { + "epoch": 0.38, + "logps_train/chosen": -282.38421630859375, + "logps_train/ref_chosen": -192.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -279.5426025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.038421630859375, + "rewards_train/margins": 3.7158384323120117, + "rewards_train/rejected": -12.754260063171387, + "step": 1342 + }, + { + "epoch": 0.38, + "logps_train/chosen": -124.77760314941406, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -208.61019897460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07776031643152237, + "rewards_train/margins": 7.983259581029415, + "rewards_train/rejected": -8.061019897460938, + "step": 1342 + }, + { + "epoch": 0.38, + "logps_train/chosen": -33.27677917480469, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -1.328125, + "logps_train/rejected": -18.54389762878418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3151779174804688, + "rewards_train/margins": 0.40639936923980713, + "rewards_train/rejected": -1.7215772867202759, + "step": 1342 + }, + { + "epoch": 0.38, + "logps_train/chosen": -10.152904510498047, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -47.275630950927734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20279045403003693, + "rewards_train/margins": 3.424772784113884, + "rewards_train/rejected": -3.627563238143921, + "step": 1342 + }, + { + "epoch": 0.38, + "logps_train/chosen": -105.47856903076172, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -105.85189056396484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8978568911552429, + "rewards_train/margins": 0.03733217716217041, + "rewards_train/rejected": -0.9351890683174133, + "step": 1342 + }, + { + "epoch": 0.38, + "logps_train/chosen": -23.88720703125, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -34.14279556274414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7012207508087158, + "rewards_train/margins": 0.7380588054656982, + "rewards_train/rejected": -2.439279556274414, + "step": 1342 + }, + { + "epoch": 0.38, + "logps_train/chosen": -16.583438873291016, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -3.8125, + "logps_train/rejected": -26.262908935546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3427189588546753, + "rewards_train/margins": 0.9023219347000122, + "rewards_train/rejected": -2.2450408935546875, + "step": 1342 + }, + { + "epoch": 0.38, + "logps_train/chosen": -199.68896484375, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -188.0, + "logps_train/rejected": -253.79754638671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.668896436691284, + "rewards_train/margins": 3.910858392715454, + "rewards_train/rejected": -6.579754829406738, + "step": 1343 + }, + { + "epoch": 0.38, + "logps_train/chosen": -83.42655181884766, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -62.05833053588867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8676552772521973, + "rewards_train/margins": -0.13682222366333008, + "rewards_train/rejected": -2.730833053588867, + "step": 1343 + }, + { + "epoch": 0.38, + "logps_train/chosen": -188.3726806640625, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -210.294189453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.737268090248108, + "rewards_train/margins": 1.3921509981155396, + "rewards_train/rejected": -3.1294190883636475, + "step": 1343 + }, + { + "epoch": 0.38, + "logps_train/chosen": -107.7630386352539, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -172.75918579101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1763038635253906, + "rewards_train/margins": 7.549614906311035, + "rewards_train/rejected": -9.725918769836426, + "step": 1343 + }, + { + "epoch": 0.38, + "logps_train/chosen": -17.031869888305664, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -5.0, + "logps_train/rejected": -26.25564956665039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7031869888305664, + "rewards_train/margins": 1.4223780632019043, + "rewards_train/rejected": -2.1255650520324707, + "step": 1343 + }, + { + "epoch": 0.38, + "logps_train/chosen": -3.9046154022216797, + "logps_train/ref_chosen": -1.1015625, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -27.597734451293945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28030529618263245, + "rewards_train/margins": 0.7794681489467621, + "rewards_train/rejected": -1.0597734451293945, + "step": 1343 + }, + { + "epoch": 0.38, + "logps_train/chosen": -45.99650192260742, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -84.85926818847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0003498077567201108, + "rewards_train/margins": 0.9862766146834474, + "rewards_train/rejected": -0.9859268069267273, + "step": 1343 + }, + { + "epoch": 0.38, + "logps_train/chosen": -184.61798095703125, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -97.24407958984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6617982387542725, + "rewards_train/margins": -0.4373903274536133, + "rewards_train/rejected": -3.224407911300659, + "step": 1343 + }, + { + "epoch": 0.38, + "learning_rate": 9.583446165458363e-07, + "loss": 0.3414, + "step": 1344 + }, + { + "epoch": 0.38, + "logps_train/chosen": -95.69589233398438, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -214.18153381347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4695892333984375, + "rewards_train/margins": 9.448564529418945, + "rewards_train/rejected": -9.918153762817383, + "step": 1344 + }, + { + "epoch": 0.38, + "logps_train/chosen": -125.87376403808594, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -125.74214935302734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8873764276504517, + "rewards_train/margins": -0.01316148042678833, + "rewards_train/rejected": -0.8742149472236633, + "step": 1344 + }, + { + "epoch": 0.38, + "logps_train/chosen": -177.30287170410156, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -187.85740661621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9302871227264404, + "rewards_train/margins": 1.8554537296295166, + "rewards_train/rejected": -4.785740852355957, + "step": 1344 + }, + { + "epoch": 0.38, + "logps_train/chosen": -91.92219543457031, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -183.41748046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6422195434570312, + "rewards_train/margins": 3.949528694152832, + "rewards_train/rejected": -6.591748237609863, + "step": 1344 + }, + { + "epoch": 0.38, + "logps_train/chosen": -0.8266907334327698, + "logps_train/ref_chosen": -0.75390625, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -13.883578300476074, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007278448436409235, + "rewards_train/margins": 0.29357939353212714, + "rewards_train/rejected": -0.3008578419685364, + "step": 1344 + }, + { + "epoch": 0.38, + "logps_train/chosen": -12.345234870910645, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -30.289470672607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7345234751701355, + "rewards_train/margins": 0.5069236159324646, + "rewards_train/rejected": -1.2414470911026, + "step": 1344 + }, + { + "epoch": 0.38, + "logps_train/chosen": -61.45269012451172, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -75.01893615722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004730987828224897, + "rewards_train/margins": 0.3066246095113456, + "rewards_train/rejected": -0.3018936216831207, + "step": 1344 + }, + { + "epoch": 0.38, + "logps_train/chosen": -35.073081970214844, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -39.0, + "logps_train/rejected": -64.95988464355469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8698081970214844, + "rewards_train/margins": 1.7261803150177002, + "rewards_train/rejected": -2.5959885120391846, + "step": 1344 + }, + { + "epoch": 0.38, + "logps_train/chosen": -100.79444885253906, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -137.62811279296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0794448852539062, + "rewards_train/margins": 1.133366346359253, + "rewards_train/rejected": -3.212811231613159, + "step": 1345 + }, + { + "epoch": 0.38, + "logps_train/chosen": -150.30599975585938, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -142.4847412109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.580600261688232, + "rewards_train/margins": -0.3321261405944824, + "rewards_train/rejected": -5.24847412109375, + "step": 1345 + }, + { + "epoch": 0.38, + "logps_train/chosen": -273.0870361328125, + "logps_train/ref_chosen": -226.0, + "logps_train/ref_rejected": -212.0, + "logps_train/rejected": -252.38665771484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.708703517913818, + "rewards_train/margins": -0.6700377464294434, + "rewards_train/rejected": -4.038665771484375, + "step": 1345 + }, + { + "epoch": 0.38, + "logps_train/chosen": -9.520588874816895, + "logps_train/ref_chosen": -5.03125, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -7.813174724578857, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4489338994026184, + "rewards_train/margins": -0.3676164224743843, + "rewards_train/rejected": -0.0813174769282341, + "step": 1345 + }, + { + "epoch": 0.38, + "logps_train/chosen": -77.47654724121094, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -101.46869659423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20234528183937073, + "rewards_train/margins": 2.349214941263199, + "rewards_train/rejected": -2.146869659423828, + "step": 1345 + }, + { + "epoch": 0.38, + "logps_train/chosen": -25.26993751525879, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -41.033905029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45199376344680786, + "rewards_train/margins": 1.5513967871665955, + "rewards_train/rejected": -2.0033905506134033, + "step": 1345 + }, + { + "epoch": 0.38, + "logps_train/chosen": -124.00669860839844, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -150.9533233642578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05066986009478569, + "rewards_train/margins": -0.05533752357587218, + "rewards_train/rejected": 0.0046676634810864925, + "step": 1345 + }, + { + "epoch": 0.38, + "logps_train/chosen": -113.40330505371094, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -166.7204132080078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2903304994106293, + "rewards_train/margins": 5.931711107492447, + "rewards_train/rejected": -6.222041606903076, + "step": 1345 + }, + { + "epoch": 0.38, + "learning_rate": 9.557015099314286e-07, + "loss": 0.4215, + "step": 1346 + }, + { + "epoch": 0.38, + "logps_train/chosen": -126.96819305419922, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -124.22746276855469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7968193292617798, + "rewards_train/margins": 0.025926947593688965, + "rewards_train/rejected": -1.8227462768554688, + "step": 1346 + }, + { + "epoch": 0.38, + "logps_train/chosen": -112.64557647705078, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -195.0, + "logps_train/rejected": -243.20802307128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.914557695388794, + "rewards_train/margins": 2.9062445163726807, + "rewards_train/rejected": -4.820802211761475, + "step": 1346 + }, + { + "epoch": 0.38, + "logps_train/chosen": -44.11829376220703, + "logps_train/ref_chosen": -25.625, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -48.85542678833008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8493293523788452, + "rewards_train/margins": 0.6987134218215942, + "rewards_train/rejected": -2.5480427742004395, + "step": 1346 + }, + { + "epoch": 0.38, + "logps_train/chosen": -109.84097290039062, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -127.06075286865234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1340973377227783, + "rewards_train/margins": 0.5219779014587402, + "rewards_train/rejected": -2.6560752391815186, + "step": 1346 + }, + { + "epoch": 0.38, + "logps_train/chosen": -59.72040557861328, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -59.17976379394531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.415790557861328, + "rewards_train/margins": 0.5584359169006348, + "rewards_train/rejected": -4.974226474761963, + "step": 1346 + }, + { + "epoch": 0.38, + "logps_train/chosen": -65.4410171508789, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -38.203643798828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14410172402858734, + "rewards_train/margins": 0.4512626677751541, + "rewards_train/rejected": -0.5953643918037415, + "step": 1346 + }, + { + "epoch": 0.38, + "logps_train/chosen": -159.19351196289062, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -219.8718719482422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.969351291656494, + "rewards_train/margins": 0.5178360939025879, + "rewards_train/rejected": -7.487187385559082, + "step": 1346 + }, + { + "epoch": 0.38, + "logps_train/chosen": -136.30953979492188, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -130.87008666992188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.530954122543335, + "rewards_train/margins": -1.1939454078674316, + "rewards_train/rejected": -1.3370087146759033, + "step": 1346 + }, + { + "epoch": 0.38, + "logps_train/chosen": -46.73121643066406, + "logps_train/ref_chosen": -26.25, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -42.52793884277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.048121690750122, + "rewards_train/margins": 1.3046722412109375, + "rewards_train/rejected": -3.3527939319610596, + "step": 1347 + }, + { + "epoch": 0.38, + "logps_train/chosen": -206.40023803710938, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -159.58071899414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.5400238037109375, + "rewards_train/margins": 0.968048095703125, + "rewards_train/rejected": -5.5080718994140625, + "step": 1347 + }, + { + "epoch": 0.38, + "logps_train/chosen": -143.61343383789062, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -211.21693420410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1613433361053467, + "rewards_train/margins": 2.1603500843048096, + "rewards_train/rejected": -4.321693420410156, + "step": 1347 + }, + { + "epoch": 0.38, + "logps_train/chosen": -35.13134765625, + "logps_train/ref_chosen": -25.125, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -17.552488327026367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.000634789466858, + "rewards_train/margins": 0.12961411476135254, + "rewards_train/rejected": -1.1302489042282104, + "step": 1347 + }, + { + "epoch": 0.38, + "logps_train/chosen": -2.9726638793945312, + "logps_train/ref_chosen": -2.9375, + "logps_train/ref_rejected": -4.25, + "logps_train/rejected": -25.923633575439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0035163878928869963, + "rewards_train/margins": 2.163847017334774, + "rewards_train/rejected": -2.167363405227661, + "step": 1347 + }, + { + "epoch": 0.38, + "logps_train/chosen": -151.82542419433594, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -196.2935791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7825424671173096, + "rewards_train/margins": 4.046815633773804, + "rewards_train/rejected": -5.829358100891113, + "step": 1347 + }, + { + "epoch": 0.38, + "logps_train/chosen": -48.92350769042969, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -122.93539428710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7423507571220398, + "rewards_train/margins": 4.001188576221466, + "rewards_train/rejected": -4.743539333343506, + "step": 1347 + }, + { + "epoch": 0.38, + "logps_train/chosen": -39.572021484375, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -39.55447006225586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9447021484375, + "rewards_train/margins": -0.6267549991607666, + "rewards_train/rejected": -2.3179471492767334, + "step": 1347 + }, + { + "epoch": 0.38, + "learning_rate": 9.530587133594848e-07, + "loss": 0.4359, + "step": 1348 + }, + { + "epoch": 0.38, + "logps_train/chosen": -7.606287002563477, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -30.95844268798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2893713116645813, + "rewards_train/margins": 0.8852155804634094, + "rewards_train/rejected": -0.5958442687988281, + "step": 1348 + }, + { + "epoch": 0.38, + "logps_train/chosen": -82.09770202636719, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -107.12738800048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3902297914028168, + "rewards_train/margins": 0.3029685914516449, + "rewards_train/rejected": 0.08726119995117188, + "step": 1348 + }, + { + "epoch": 0.38, + "logps_train/chosen": -71.17977905273438, + "logps_train/ref_chosen": -32.25, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -148.65194702148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8929779529571533, + "rewards_train/margins": 0.17221665382385254, + "rewards_train/rejected": -4.065194606781006, + "step": 1348 + }, + { + "epoch": 0.38, + "logps_train/chosen": -114.4382553100586, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -205.62408447265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.093825578689575, + "rewards_train/margins": 3.1685831546783447, + "rewards_train/rejected": -5.26240873336792, + "step": 1348 + }, + { + "epoch": 0.38, + "logps_train/chosen": -10.790504455566406, + "logps_train/ref_chosen": -3.125, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -16.001211166381836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7665504813194275, + "rewards_train/margins": 0.26794570684432983, + "rewards_train/rejected": -1.0344961881637573, + "step": 1348 + }, + { + "epoch": 0.38, + "logps_train/chosen": -88.23068237304688, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -67.56486511230469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5730682611465454, + "rewards_train/margins": 1.7834182977676392, + "rewards_train/rejected": -2.3564865589141846, + "step": 1348 + }, + { + "epoch": 0.38, + "logps_train/chosen": -103.43192291259766, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -127.20353698730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.40680772066116333, + "rewards_train/margins": 0.32716141641139984, + "rewards_train/rejected": 0.07964630424976349, + "step": 1348 + }, + { + "epoch": 0.38, + "logps_train/chosen": -41.80730056762695, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -3.84375, + "logps_train/rejected": -27.17525863647461, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.786980152130127, + "rewards_train/margins": -0.453829288482666, + "rewards_train/rejected": -2.333150863647461, + "step": 1348 + }, + { + "epoch": 0.38, + "logps_train/chosen": -17.76004981994629, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -46.218544006347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0822550058364868, + "rewards_train/margins": 2.002099394798279, + "rewards_train/rejected": -3.0843544006347656, + "step": 1349 + }, + { + "epoch": 0.38, + "logps_train/chosen": -109.12987518310547, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -238.33187866210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6129875183105469, + "rewards_train/margins": 7.920200347900391, + "rewards_train/rejected": -8.533187866210938, + "step": 1349 + }, + { + "epoch": 0.38, + "logps_train/chosen": -8.900906562805176, + "logps_train/ref_chosen": -3.390625, + "logps_train/ref_rejected": -0.447265625, + "logps_train/rejected": -8.435133934020996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5510281920433044, + "rewards_train/margins": 0.2477586269378662, + "rewards_train/rejected": -0.7987868189811707, + "step": 1349 + }, + { + "epoch": 0.38, + "logps_train/chosen": -25.35967445373535, + "logps_train/ref_chosen": -15.8125, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -33.042137145996094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9547174572944641, + "rewards_train/margins": -0.18800371885299683, + "rewards_train/rejected": -0.7667137384414673, + "step": 1349 + }, + { + "epoch": 0.38, + "logps_train/chosen": -118.24748229980469, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -184.31185913085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0747482776641846, + "rewards_train/margins": 7.10643744468689, + "rewards_train/rejected": -8.181185722351074, + "step": 1349 + }, + { + "epoch": 0.38, + "logps_train/chosen": -7.571957111358643, + "logps_train/ref_chosen": -0.66015625, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -11.882033348083496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6911801099777222, + "rewards_train/margins": 0.01889824867248535, + "rewards_train/rejected": -0.7100783586502075, + "step": 1349 + }, + { + "epoch": 0.38, + "logps_train/chosen": -112.8560562133789, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -164.03399658203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7856056690216064, + "rewards_train/margins": 0.7177939414978027, + "rewards_train/rejected": -3.503399610519409, + "step": 1349 + }, + { + "epoch": 0.38, + "logps_train/chosen": -143.7113800048828, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -184.23480224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.271138072013855, + "rewards_train/margins": 4.052342057228088, + "rewards_train/rejected": -5.323480129241943, + "step": 1349 + }, + { + "epoch": 0.38, + "learning_rate": 9.504162453267776e-07, + "loss": 0.3979, + "step": 1350 + }, + { + "epoch": 0.38, + "logps_train/chosen": -177.55801391601562, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -221.1752166748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.855801582336426, + "rewards_train/margins": 4.9617204666137695, + "rewards_train/rejected": -9.817522048950195, + "step": 1350 + }, + { + "epoch": 0.38, + "logps_train/chosen": -28.122406005859375, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -56.49137878417969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9434906244277954, + "rewards_train/margins": 2.3056472539901733, + "rewards_train/rejected": -4.249137878417969, + "step": 1350 + }, + { + "epoch": 0.38, + "logps_train/chosen": -110.3934326171875, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -165.88238525390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1893433332443237, + "rewards_train/margins": 0.6988952159881592, + "rewards_train/rejected": -1.888238549232483, + "step": 1350 + }, + { + "epoch": 0.38, + "logps_train/chosen": -256.0242919921875, + "logps_train/ref_chosen": -218.0, + "logps_train/ref_rejected": -286.0, + "logps_train/rejected": -339.8289794921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.80242919921875, + "rewards_train/margins": 1.5804686546325684, + "rewards_train/rejected": -5.382897853851318, + "step": 1350 + }, + { + "epoch": 0.38, + "logps_train/chosen": -97.32894134521484, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -104.7403564453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6328941583633423, + "rewards_train/margins": 0.24114155769348145, + "rewards_train/rejected": -1.8740357160568237, + "step": 1350 + }, + { + "epoch": 0.38, + "logps_train/chosen": -10.475287437438965, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -23.479385375976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5912787318229675, + "rewards_train/margins": 1.1785348057746887, + "rewards_train/rejected": -1.7698135375976562, + "step": 1350 + }, + { + "epoch": 0.38, + "logps_train/chosen": -60.384300231933594, + "logps_train/ref_chosen": -43.0, + "logps_train/ref_rejected": -51.75, + "logps_train/rejected": -80.14613342285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7384300231933594, + "rewards_train/margins": 1.1011834144592285, + "rewards_train/rejected": -2.839613437652588, + "step": 1350 + }, + { + "epoch": 0.38, + "logps_train/chosen": -8.568769454956055, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -25.532285690307617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03687305375933647, + "rewards_train/margins": 0.7401016466319561, + "rewards_train/rejected": -0.7032285928726196, + "step": 1350 + }, + { + "epoch": 0.38, + "logps_train/chosen": -115.52671813964844, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -224.62704467773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7026718854904175, + "rewards_train/margins": 9.810032963752747, + "rewards_train/rejected": -11.512704849243164, + "step": 1351 + }, + { + "epoch": 0.38, + "logps_train/chosen": -57.78375244140625, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -114.82793426513672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.478375196456909, + "rewards_train/margins": -0.24558162689208984, + "rewards_train/rejected": -2.2327935695648193, + "step": 1351 + }, + { + "epoch": 0.38, + "logps_train/chosen": -52.556190490722656, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -14.4375, + "logps_train/rejected": -51.73811340332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6306190490722656, + "rewards_train/margins": 0.0994422435760498, + "rewards_train/rejected": -3.7300612926483154, + "step": 1351 + }, + { + "epoch": 0.38, + "logps_train/chosen": -123.4691390991211, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -156.45242309570312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1469138860702515, + "rewards_train/margins": -1.201671577990055, + "rewards_train/rejected": 0.05475769191980362, + "step": 1351 + }, + { + "epoch": 0.38, + "logps_train/chosen": -7.666385650634766, + "logps_train/ref_chosen": -5.71875, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -6.448101043701172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.19476357102394104, + "rewards_train/margins": -0.053078457713127136, + "rewards_train/rejected": -0.1416851133108139, + "step": 1351 + }, + { + "epoch": 0.38, + "logps_train/chosen": -92.19645690917969, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -186.4239501953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4196456670761108, + "rewards_train/margins": 6.722749352455139, + "rewards_train/rejected": -8.14239501953125, + "step": 1351 + }, + { + "epoch": 0.38, + "logps_train/chosen": -96.47299194335938, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -216.0486602783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9972991943359375, + "rewards_train/margins": 5.70756721496582, + "rewards_train/rejected": -8.704866409301758, + "step": 1351 + }, + { + "epoch": 0.38, + "logps_train/chosen": -6.29165506362915, + "logps_train/ref_chosen": -2.46875, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -26.143640518188477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3822905123233795, + "rewards_train/margins": 0.869573563337326, + "rewards_train/rejected": -1.2518640756607056, + "step": 1351 + }, + { + "epoch": 0.38, + "learning_rate": 9.477741243277827e-07, + "loss": 0.389, + "step": 1352 + }, + { + "epoch": 0.38, + "logps_train/chosen": -6.842741012573242, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -13.708331108093262, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23739910125732422, + "rewards_train/margins": 0.527184009552002, + "rewards_train/rejected": -0.7645831108093262, + "step": 1352 + }, + { + "epoch": 0.38, + "logps_train/chosen": -81.00360107421875, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -80.87210083007812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.35036012530326843, + "rewards_train/margins": -0.013150036334991455, + "rewards_train/rejected": -0.337210088968277, + "step": 1352 + }, + { + "epoch": 0.38, + "logps_train/chosen": -143.60833740234375, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -174.18772888183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.810833692550659, + "rewards_train/margins": 1.8079392910003662, + "rewards_train/rejected": -5.618772983551025, + "step": 1352 + }, + { + "epoch": 0.38, + "logps_train/chosen": -98.96403503417969, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -163.75494384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.546403646469116, + "rewards_train/margins": 4.629091024398804, + "rewards_train/rejected": -7.17549467086792, + "step": 1352 + }, + { + "epoch": 0.38, + "logps_train/chosen": -18.798385620117188, + "logps_train/ref_chosen": -4.9375, + "logps_train/ref_rejected": -43.5, + "logps_train/rejected": -81.77589416503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3860886096954346, + "rewards_train/margins": 2.4415009021759033, + "rewards_train/rejected": -3.827589511871338, + "step": 1352 + }, + { + "epoch": 0.38, + "logps_train/chosen": -55.59377670288086, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -48.0, + "logps_train/rejected": -72.4229507446289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.459377646446228, + "rewards_train/margins": 0.9829174280166626, + "rewards_train/rejected": -2.4422950744628906, + "step": 1352 + }, + { + "epoch": 0.38, + "logps_train/chosen": -15.963146209716797, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -23.513643264770508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0056896209716797, + "rewards_train/margins": 0.3456747531890869, + "rewards_train/rejected": -1.3513643741607666, + "step": 1352 + }, + { + "epoch": 0.38, + "logps_train/chosen": -85.34098052978516, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -77.44103240966797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6840981245040894, + "rewards_train/margins": -0.38999485969543457, + "rewards_train/rejected": -1.2941032648086548, + "step": 1352 + }, + { + "epoch": 0.38, + "logps_train/chosen": -39.06175994873047, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -6.375, + "logps_train/rejected": -39.11388397216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2186760902404785, + "rewards_train/margins": 1.0552122592926025, + "rewards_train/rejected": -3.273888349533081, + "step": 1353 + }, + { + "epoch": 0.38, + "logps_train/chosen": -9.039950370788574, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -28.74587631225586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47899505496025085, + "rewards_train/margins": 1.5955925285816193, + "rewards_train/rejected": -2.07458758354187, + "step": 1353 + }, + { + "epoch": 0.38, + "logps_train/chosen": -7.070405960083008, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -2.78125, + "logps_train/rejected": -19.116901397705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2726655900478363, + "rewards_train/margins": 1.3608995974063873, + "rewards_train/rejected": -1.6335651874542236, + "step": 1353 + }, + { + "epoch": 0.38, + "logps_train/chosen": -215.9535369873047, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -189.0, + "logps_train/rejected": -264.5797424316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.595353603363037, + "rewards_train/margins": 0.962620735168457, + "rewards_train/rejected": -7.557974338531494, + "step": 1353 + }, + { + "epoch": 0.38, + "logps_train/chosen": -17.835973739624023, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -30.35248565673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5960974097251892, + "rewards_train/margins": 0.5016512274742126, + "rewards_train/rejected": -1.0977486371994019, + "step": 1353 + }, + { + "epoch": 0.38, + "logps_train/chosen": -14.606425285339355, + "logps_train/ref_chosen": -12.0, + "logps_train/ref_rejected": -4.0, + "logps_train/rejected": -17.398893356323242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26064252853393555, + "rewards_train/margins": 1.0792468786239624, + "rewards_train/rejected": -1.339889407157898, + "step": 1353 + }, + { + "epoch": 0.38, + "logps_train/chosen": -100.32733917236328, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -28.0, + "logps_train/rejected": -80.63116455078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48273393511772156, + "rewards_train/margins": 4.780382424592972, + "rewards_train/rejected": -5.263116359710693, + "step": 1353 + }, + { + "epoch": 0.38, + "logps_train/chosen": -28.43410301208496, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -56.25, + "logps_train/rejected": -76.71481323242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0934103727340698, + "rewards_train/margins": 0.9530709981918335, + "rewards_train/rejected": -2.0464813709259033, + "step": 1353 + }, + { + "epoch": 0.38, + "learning_rate": 9.451323688545458e-07, + "loss": 0.3315, + "step": 1354 + }, + { + "epoch": 0.38, + "logps_train/chosen": -114.44670104980469, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -172.78750610351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4946701526641846, + "rewards_train/margins": 3.534080743789673, + "rewards_train/rejected": -6.028750896453857, + "step": 1354 + }, + { + "epoch": 0.38, + "logps_train/chosen": -148.43670654296875, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -244.865478515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.043670654296875, + "rewards_train/margins": 6.842877388000488, + "rewards_train/rejected": -6.886548042297363, + "step": 1354 + }, + { + "epoch": 0.38, + "logps_train/chosen": -105.1375732421875, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -169.27810668945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.213757276535034, + "rewards_train/margins": 2.2640535831451416, + "rewards_train/rejected": -5.477810859680176, + "step": 1354 + }, + { + "epoch": 0.38, + "logps_train/chosen": -95.69265747070312, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -82.05874633789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11926575005054474, + "rewards_train/margins": 0.03660888969898224, + "rewards_train/rejected": -0.15587463974952698, + "step": 1354 + }, + { + "epoch": 0.38, + "logps_train/chosen": -77.86125183105469, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -31.961963653564453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.161125183105469, + "rewards_train/margins": -1.7305538654327393, + "rewards_train/rejected": -2.4305713176727295, + "step": 1354 + }, + { + "epoch": 0.38, + "logps_train/chosen": -143.61708068847656, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -235.2913818359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.911708354949951, + "rewards_train/margins": 4.7174296379089355, + "rewards_train/rejected": -9.629137992858887, + "step": 1354 + }, + { + "epoch": 0.38, + "logps_train/chosen": -81.01470184326172, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -182.90255737304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2514702081680298, + "rewards_train/margins": 7.888785719871521, + "rewards_train/rejected": -9.14025592803955, + "step": 1354 + }, + { + "epoch": 0.38, + "logps_train/chosen": -126.82192993164062, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -70.95609283447266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4321930408477783, + "rewards_train/margins": 0.713416337966919, + "rewards_train/rejected": -2.1456093788146973, + "step": 1354 + }, + { + "epoch": 0.38, + "logps_train/chosen": -69.85310363769531, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -177.1222381591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4353103637695312, + "rewards_train/margins": 6.076913833618164, + "rewards_train/rejected": -8.512224197387695, + "step": 1355 + }, + { + "epoch": 0.38, + "logps_train/chosen": -16.96539306640625, + "logps_train/ref_chosen": -15.1875, + "logps_train/ref_rejected": -2.84375, + "logps_train/rejected": -15.630279541015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17778931558132172, + "rewards_train/margins": 1.1008637100458145, + "rewards_train/rejected": -1.2786530256271362, + "step": 1355 + }, + { + "epoch": 0.38, + "logps_train/chosen": -80.29495239257812, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -125.6383056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9544953107833862, + "rewards_train/margins": 2.9093352556228638, + "rewards_train/rejected": -4.86383056640625, + "step": 1355 + }, + { + "epoch": 0.38, + "logps_train/chosen": -16.116193771362305, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -30.65140151977539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0991194248199463, + "rewards_train/margins": 0.3535207509994507, + "rewards_train/rejected": -1.452640175819397, + "step": 1355 + }, + { + "epoch": 0.38, + "logps_train/chosen": -13.993785858154297, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -15.822118759155273, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9243785738945007, + "rewards_train/margins": -0.20466667413711548, + "rewards_train/rejected": -0.7197118997573853, + "step": 1355 + }, + { + "epoch": 0.38, + "logps_train/chosen": -19.12834930419922, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -31.561511993408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3972100019454956, + "rewards_train/margins": 0.9526911973953247, + "rewards_train/rejected": -2.3499011993408203, + "step": 1355 + }, + { + "epoch": 0.38, + "logps_train/chosen": -2.6539878845214844, + "logps_train/ref_chosen": -1.7421875, + "logps_train/ref_rejected": -1.7421875, + "logps_train/rejected": -2.7030210494995117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09118004143238068, + "rewards_train/margins": 0.004903316497802734, + "rewards_train/rejected": -0.09608335793018341, + "step": 1355 + }, + { + "epoch": 0.38, + "logps_train/chosen": -26.81398582458496, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -19.836572647094727, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.681398630142212, + "rewards_train/margins": -1.0789913535118103, + "rewards_train/rejected": -0.6024072766304016, + "step": 1355 + }, + { + "epoch": 0.38, + "learning_rate": 9.424909973965538e-07, + "loss": 0.4479, + "step": 1356 + }, + { + "epoch": 0.38, + "logps_train/chosen": -66.29147338867188, + "logps_train/ref_chosen": -36.0, + "logps_train/ref_rejected": -53.25, + "logps_train/rejected": -111.64067077636719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0291473865509033, + "rewards_train/margins": 2.809919595718384, + "rewards_train/rejected": -5.839066982269287, + "step": 1356 + }, + { + "epoch": 0.38, + "logps_train/chosen": -9.747797966003418, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -3.484375, + "logps_train/rejected": -8.5091552734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5622798204421997, + "rewards_train/margins": -0.05980175733566284, + "rewards_train/rejected": -0.5024780631065369, + "step": 1356 + }, + { + "epoch": 0.38, + "logps_train/chosen": -94.75630950927734, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -208.27630615234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5256309509277344, + "rewards_train/margins": 3.7019996643066406, + "rewards_train/rejected": -5.227630615234375, + "step": 1356 + }, + { + "epoch": 0.38, + "logps_train/chosen": -24.970928192138672, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -33.482261657714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.415842890739441, + "rewards_train/margins": 1.3448833227157593, + "rewards_train/rejected": -2.7607262134552, + "step": 1356 + }, + { + "epoch": 0.38, + "logps_train/chosen": -125.81715393066406, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -172.45635986328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1317155361175537, + "rewards_train/margins": 3.563920736312866, + "rewards_train/rejected": -5.69563627243042, + "step": 1356 + }, + { + "epoch": 0.38, + "logps_train/chosen": -16.756420135498047, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -18.24826431274414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7693920135498047, + "rewards_train/margins": 0.13668441772460938, + "rewards_train/rejected": -0.9060764312744141, + "step": 1356 + }, + { + "epoch": 0.38, + "logps_train/chosen": -112.673095703125, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -242.94483947753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6673096418380737, + "rewards_train/margins": 8.02717411518097, + "rewards_train/rejected": -9.694483757019043, + "step": 1356 + }, + { + "epoch": 0.38, + "logps_train/chosen": -71.2135009765625, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -96.80038452148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.42864990234375, + "rewards_train/margins": 2.5586884021759033, + "rewards_train/rejected": -2.1300384998321533, + "step": 1356 + }, + { + "epoch": 0.38, + "logps_train/chosen": -156.463623046875, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -212.3441162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.346362352371216, + "rewards_train/margins": 5.0880491733551025, + "rewards_train/rejected": -7.434411525726318, + "step": 1357 + }, + { + "epoch": 0.38, + "logps_train/chosen": -38.96711349487305, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -96.240234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5717113614082336, + "rewards_train/margins": 2.9273120760917664, + "rewards_train/rejected": -3.4990234375, + "step": 1357 + }, + { + "epoch": 0.38, + "logps_train/chosen": -97.12678527832031, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -191.5440673828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6126785278320312, + "rewards_train/margins": 7.991728782653809, + "rewards_train/rejected": -9.60440731048584, + "step": 1357 + }, + { + "epoch": 0.38, + "logps_train/chosen": -53.942771911621094, + "logps_train/ref_chosen": -48.25, + "logps_train/ref_rejected": -58.5, + "logps_train/rejected": -72.61083221435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5692772269248962, + "rewards_train/margins": 0.8418059945106506, + "rewards_train/rejected": -1.4110832214355469, + "step": 1357 + }, + { + "epoch": 0.38, + "logps_train/chosen": -85.3232421875, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -99.11160278320312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.25732421875, + "rewards_train/margins": -0.2961639165878296, + "rewards_train/rejected": -1.9611603021621704, + "step": 1357 + }, + { + "epoch": 0.38, + "logps_train/chosen": -110.0016098022461, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -22.625, + "logps_train/rejected": -29.373197555541992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4501609802246094, + "rewards_train/margins": 0.2246587872505188, + "rewards_train/rejected": -0.6748197674751282, + "step": 1357 + }, + { + "epoch": 0.38, + "logps_train/chosen": -18.98175048828125, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -21.607614517211914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.029425024986267, + "rewards_train/margins": 0.6188364028930664, + "rewards_train/rejected": -1.6482614278793335, + "step": 1357 + }, + { + "epoch": 0.38, + "logps_train/chosen": -78.78709411621094, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -49.11589813232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6537094116210938, + "rewards_train/margins": 1.3578803539276123, + "rewards_train/rejected": -3.011589765548706, + "step": 1357 + }, + { + "epoch": 0.38, + "learning_rate": 9.398500284406063e-07, + "loss": 0.2674, + "step": 1358 + }, + { + "epoch": 0.38, + "logps_train/chosen": -71.03256225585938, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -57.75, + "logps_train/rejected": -63.082252502441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40325623750686646, + "rewards_train/margins": 0.12996900081634521, + "rewards_train/rejected": -0.5332252383232117, + "step": 1358 + }, + { + "epoch": 0.38, + "logps_train/chosen": -16.915870666503906, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -33.90336608886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2665871381759644, + "rewards_train/margins": 0.6112494468688965, + "rewards_train/rejected": -1.8778365850448608, + "step": 1358 + }, + { + "epoch": 0.38, + "logps_train/chosen": -56.862754821777344, + "logps_train/ref_chosen": -35.75, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -127.42137145996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1112754344940186, + "rewards_train/margins": 3.580861806869507, + "rewards_train/rejected": -5.692137241363525, + "step": 1358 + }, + { + "epoch": 0.38, + "logps_train/chosen": -170.4820556640625, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -248.95602416992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5482056140899658, + "rewards_train/margins": 10.047397375106812, + "rewards_train/rejected": -11.595602989196777, + "step": 1358 + }, + { + "epoch": 0.38, + "logps_train/chosen": -262.765380859375, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -222.3135528564453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.97653865814209, + "rewards_train/margins": -0.8451833724975586, + "rewards_train/rejected": -8.131355285644531, + "step": 1358 + }, + { + "epoch": 0.38, + "logps_train/chosen": -101.01325988769531, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -181.94105529785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.751326084136963, + "rewards_train/margins": 3.7927794456481934, + "rewards_train/rejected": -6.544105529785156, + "step": 1358 + }, + { + "epoch": 0.38, + "logps_train/chosen": -54.245689392089844, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -46.0, + "logps_train/rejected": -74.37763977050781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0995689630508423, + "rewards_train/margins": 1.7381950616836548, + "rewards_train/rejected": -2.837764024734497, + "step": 1358 + }, + { + "epoch": 0.38, + "logps_train/chosen": -155.0857391357422, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -193.22177124023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0085740089416504, + "rewards_train/margins": 6.263603687286377, + "rewards_train/rejected": -9.272177696228027, + "step": 1358 + }, + { + "epoch": 0.38, + "logps_train/chosen": -161.67543029785156, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -174.16818237304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.567543029785156, + "rewards_train/margins": 0.1492757797241211, + "rewards_train/rejected": -8.716818809509277, + "step": 1359 + }, + { + "epoch": 0.38, + "logps_train/chosen": -165.3260955810547, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -137.6480712890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.132609844207764, + "rewards_train/margins": -1.367802619934082, + "rewards_train/rejected": -3.7648072242736816, + "step": 1359 + }, + { + "epoch": 0.38, + "logps_train/chosen": -18.733715057373047, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -26.37169647216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0296214818954468, + "rewards_train/margins": 0.2075481414794922, + "rewards_train/rejected": -1.237169623374939, + "step": 1359 + }, + { + "epoch": 0.38, + "logps_train/chosen": -147.2515411376953, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -222.7930450439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3251540660858154, + "rewards_train/margins": 4.254150629043579, + "rewards_train/rejected": -7.5793046951293945, + "step": 1359 + }, + { + "epoch": 0.38, + "logps_train/chosen": -165.36746215820312, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -125.15826416015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.0867462158203125, + "rewards_train/margins": -2.120919704437256, + "rewards_train/rejected": -2.9658265113830566, + "step": 1359 + }, + { + "epoch": 0.38, + "logps_train/chosen": -22.089937210083008, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -19.678173065185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8277437090873718, + "rewards_train/margins": 0.10882359743118286, + "rewards_train/rejected": -0.9365673065185547, + "step": 1359 + }, + { + "epoch": 0.38, + "logps_train/chosen": -21.301010131835938, + "logps_train/ref_chosen": -13.625, + "logps_train/ref_rejected": -14.875, + "logps_train/rejected": -37.19218444824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7676010131835938, + "rewards_train/margins": 1.4641175270080566, + "rewards_train/rejected": -2.2317185401916504, + "step": 1359 + }, + { + "epoch": 0.38, + "logps_train/chosen": -18.126907348632812, + "logps_train/ref_chosen": -3.0, + "logps_train/ref_rejected": -3.21875, + "logps_train/rejected": -6.1766886711120605, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.512690782546997, + "rewards_train/margins": -1.2168969213962555, + "rewards_train/rejected": -0.2957938611507416, + "step": 1359 + }, + { + "epoch": 0.38, + "learning_rate": 9.372094804706866e-07, + "loss": 0.6165, + "step": 1360 + }, + { + "epoch": 0.38, + "logps_train/chosen": -95.97499084472656, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -134.78358459472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.097499132156372, + "rewards_train/margins": 3.630859613418579, + "rewards_train/rejected": -4.728358745574951, + "step": 1360 + }, + { + "epoch": 0.38, + "logps_train/chosen": -0.8713188171386719, + "logps_train/ref_chosen": -0.1494140625, + "logps_train/ref_rejected": -0.1494140625, + "logps_train/rejected": -0.8441968560218811, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07219047844409943, + "rewards_train/margins": -0.0027121976017951965, + "rewards_train/rejected": -0.06947828084230423, + "step": 1360 + }, + { + "epoch": 0.38, + "logps_train/chosen": -161.69064331054688, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -192.34750366210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.319064617156982, + "rewards_train/margins": 0.51568603515625, + "rewards_train/rejected": -6.834750652313232, + "step": 1360 + }, + { + "epoch": 0.38, + "logps_train/chosen": -165.08279418945312, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -180.09750366210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6082794666290283, + "rewards_train/margins": 4.351471185684204, + "rewards_train/rejected": -6.959750652313232, + "step": 1360 + }, + { + "epoch": 0.38, + "logps_train/chosen": -44.992530822753906, + "logps_train/ref_chosen": -39.25, + "logps_train/ref_rejected": -47.5, + "logps_train/rejected": -57.577239990234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5742530822753906, + "rewards_train/margins": 0.4334709644317627, + "rewards_train/rejected": -1.0077240467071533, + "step": 1360 + }, + { + "epoch": 0.38, + "logps_train/chosen": -128.7531280517578, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -164.51930236816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2253129482269287, + "rewards_train/margins": 3.626617193222046, + "rewards_train/rejected": -5.851930141448975, + "step": 1360 + }, + { + "epoch": 0.38, + "logps_train/chosen": -19.774206161499023, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -22.829151153564453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7961706519126892, + "rewards_train/margins": -0.6007555276155472, + "rewards_train/rejected": -0.19541512429714203, + "step": 1360 + }, + { + "epoch": 0.38, + "logps_train/chosen": -81.9281005859375, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -148.9113311767578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.542810082435608, + "rewards_train/margins": 6.048323035240173, + "rewards_train/rejected": -7.591133117675781, + "step": 1360 + }, + { + "epoch": 0.38, + "logps_train/chosen": -14.284466743469238, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -0.4765625, + "logps_train/rejected": -0.031139038503170013, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8346967101097107, + "rewards_train/margins": -0.8792390562593937, + "rewards_train/rejected": 0.044542346149683, + "step": 1361 + }, + { + "epoch": 0.38, + "logps_train/chosen": -36.59867858886719, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -33.11134719848633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5348678827285767, + "rewards_train/margins": -0.09873318672180176, + "rewards_train/rejected": -1.436134696006775, + "step": 1361 + }, + { + "epoch": 0.38, + "logps_train/chosen": -125.87548065185547, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -105.40034484863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.687548041343689, + "rewards_train/margins": 0.7524865865707397, + "rewards_train/rejected": -2.4400346279144287, + "step": 1361 + }, + { + "epoch": 0.38, + "logps_train/chosen": -23.062612533569336, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -54.663475036621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7093862295150757, + "rewards_train/margins": 2.7632113695144653, + "rewards_train/rejected": -4.472597599029541, + "step": 1361 + }, + { + "epoch": 0.38, + "logps_train/chosen": -107.93746948242188, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -143.41647338867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.143747091293335, + "rewards_train/margins": 0.24790024757385254, + "rewards_train/rejected": -2.3916473388671875, + "step": 1361 + }, + { + "epoch": 0.38, + "logps_train/chosen": -152.3511962890625, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -156.65240478515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.485119819641113, + "rewards_train/margins": 1.780120849609375, + "rewards_train/rejected": -6.265240669250488, + "step": 1361 + }, + { + "epoch": 0.38, + "logps_train/chosen": -29.118009567260742, + "logps_train/ref_chosen": -27.375, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -74.33961486816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17430095374584198, + "rewards_train/margins": 0.40966053307056427, + "rewards_train/rejected": -0.5839614868164062, + "step": 1361 + }, + { + "epoch": 0.38, + "logps_train/chosen": -16.576271057128906, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -31.56035614013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1607521772384644, + "rewards_train/margins": 0.12028348445892334, + "rewards_train/rejected": -1.2810356616973877, + "step": 1361 + }, + { + "epoch": 0.38, + "learning_rate": 9.345693719678306e-07, + "loss": 0.4409, + "step": 1362 + }, + { + "epoch": 0.38, + "logps_train/chosen": -33.80320358276367, + "logps_train/ref_chosen": -6.84375, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -37.79061508178711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6959455013275146, + "rewards_train/margins": 0.34561610221862793, + "rewards_train/rejected": -3.0415616035461426, + "step": 1362 + }, + { + "epoch": 0.38, + "logps_train/chosen": -191.4169921875, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -160.450439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8416993618011475, + "rewards_train/margins": 1.9533445835113525, + "rewards_train/rejected": -5.7950439453125, + "step": 1362 + }, + { + "epoch": 0.38, + "logps_train/chosen": -17.253860473632812, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -36.5, + "logps_train/rejected": -48.15777587890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2472610473632812, + "rewards_train/margins": -0.08148348331451416, + "rewards_train/rejected": -1.165777564048767, + "step": 1362 + }, + { + "epoch": 0.38, + "logps_train/chosen": -25.752944946289062, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -20.016984939575195, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2877944707870483, + "rewards_train/margins": -0.17359590530395508, + "rewards_train/rejected": -1.1141985654830933, + "step": 1362 + }, + { + "epoch": 0.38, + "logps_train/chosen": -80.88407897949219, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -158.27850341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1884078979492188, + "rewards_train/margins": 6.089442729949951, + "rewards_train/rejected": -7.27785062789917, + "step": 1362 + }, + { + "epoch": 0.38, + "logps_train/chosen": -117.45545959472656, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -58.25, + "logps_train/rejected": -132.55728149414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.545546054840088, + "rewards_train/margins": 1.8851823806762695, + "rewards_train/rejected": -7.430728435516357, + "step": 1362 + }, + { + "epoch": 0.38, + "logps_train/chosen": -231.15147399902344, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -203.81387329101562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.91514778137207, + "rewards_train/margins": -2.133760452270508, + "rewards_train/rejected": -7.7813873291015625, + "step": 1362 + }, + { + "epoch": 0.38, + "logps_train/chosen": -16.063182830810547, + "logps_train/ref_chosen": -13.875, + "logps_train/ref_rejected": -20.375, + "logps_train/rejected": -30.650644302368164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2188182920217514, + "rewards_train/margins": 0.8087461143732071, + "rewards_train/rejected": -1.0275644063949585, + "step": 1362 + }, + { + "epoch": 0.38, + "logps_train/chosen": -15.19955062866211, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -0.9375, + "logps_train/rejected": -21.150711059570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28245505690574646, + "rewards_train/margins": 1.738866001367569, + "rewards_train/rejected": -2.0213210582733154, + "step": 1363 + }, + { + "epoch": 0.38, + "logps_train/chosen": -17.847763061523438, + "logps_train/ref_chosen": -2.796875, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -38.831016540527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5050888061523438, + "rewards_train/margins": 1.8373878002166748, + "rewards_train/rejected": -3.3424766063690186, + "step": 1363 + }, + { + "epoch": 0.38, + "logps_train/chosen": -101.5218505859375, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -124.44189453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.102185010910034, + "rewards_train/margins": 0.6920044422149658, + "rewards_train/rejected": -2.794189453125, + "step": 1363 + }, + { + "epoch": 0.38, + "logps_train/chosen": -214.66378784179688, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -216.21556091308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.8663787841796875, + "rewards_train/margins": 0.7551774978637695, + "rewards_train/rejected": -6.621556282043457, + "step": 1363 + }, + { + "epoch": 0.38, + "logps_train/chosen": -9.356903076171875, + "logps_train/ref_chosen": -3.796875, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -55.53240203857422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5560027956962585, + "rewards_train/margins": 1.4222374558448792, + "rewards_train/rejected": -1.9782402515411377, + "step": 1363 + }, + { + "epoch": 0.38, + "logps_train/chosen": -16.017963409423828, + "logps_train/ref_chosen": -0.91015625, + "logps_train/ref_rejected": -0.91015625, + "logps_train/rejected": -15.302704811096191, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.510780692100525, + "rewards_train/margins": -0.07152581214904785, + "rewards_train/rejected": -1.439254879951477, + "step": 1363 + }, + { + "epoch": 0.38, + "logps_train/chosen": -137.545654296875, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -192.29959106445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25456544756889343, + "rewards_train/margins": 4.775393754243851, + "rewards_train/rejected": -5.029959201812744, + "step": 1363 + }, + { + "epoch": 0.38, + "logps_train/chosen": -108.08158111572266, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -43.5, + "logps_train/rejected": -78.11393737792969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3581581115722656, + "rewards_train/margins": 1.1032357215881348, + "rewards_train/rejected": -3.4613938331604004, + "step": 1363 + }, + { + "epoch": 0.38, + "learning_rate": 9.319297214099984e-07, + "loss": 0.4555, + "step": 1364 + }, + { + "epoch": 0.38, + "logps_train/chosen": -0.9210327863693237, + "logps_train/ref_chosen": -0.3671875, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -9.286497116088867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05538452789187431, + "rewards_train/margins": 0.38264020159840584, + "rewards_train/rejected": -0.43802472949028015, + "step": 1364 + }, + { + "epoch": 0.38, + "logps_train/chosen": -58.890995025634766, + "logps_train/ref_chosen": -40.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -132.6893768310547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8390995264053345, + "rewards_train/margins": 2.229838252067566, + "rewards_train/rejected": -4.0689377784729, + "step": 1364 + }, + { + "epoch": 0.38, + "logps_train/chosen": -24.89228630065918, + "logps_train/ref_chosen": -7.125, + "logps_train/ref_rejected": -7.78125, + "logps_train/rejected": -37.692745208740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.776728630065918, + "rewards_train/margins": 1.214421033859253, + "rewards_train/rejected": -2.991149663925171, + "step": 1364 + }, + { + "epoch": 0.38, + "logps_train/chosen": -14.398858070373535, + "logps_train/ref_chosen": -7.9375, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -25.435771942138672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6461358070373535, + "rewards_train/margins": 0.6161913871765137, + "rewards_train/rejected": -1.2623271942138672, + "step": 1364 + }, + { + "epoch": 0.38, + "logps_train/chosen": -128.43624877929688, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -158.9212646484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7436249256134033, + "rewards_train/margins": 3.4985015392303467, + "rewards_train/rejected": -6.24212646484375, + "step": 1364 + }, + { + "epoch": 0.38, + "logps_train/chosen": -51.593238830566406, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -44.25, + "logps_train/rejected": -73.70172119140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1093238815665245, + "rewards_train/margins": 2.8358481898903847, + "rewards_train/rejected": -2.945172071456909, + "step": 1364 + }, + { + "epoch": 0.38, + "logps_train/chosen": -29.095924377441406, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -30.5, + "logps_train/rejected": -63.78050994873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5283424854278564, + "rewards_train/margins": 1.799708604812622, + "rewards_train/rejected": -3.3280510902404785, + "step": 1364 + }, + { + "epoch": 0.38, + "logps_train/chosen": -32.51901626586914, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -30.762537002563477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4331517219543457, + "rewards_train/margins": 0.06497693061828613, + "rewards_train/rejected": -2.498128652572632, + "step": 1364 + }, + { + "epoch": 0.38, + "logps_train/chosen": -7.170039653778076, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -28.7595272064209, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2888789772987366, + "rewards_train/margins": 1.193323791027069, + "rewards_train/rejected": -1.4822027683258057, + "step": 1365 + }, + { + "epoch": 0.38, + "logps_train/chosen": -28.168607711791992, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -59.0, + "logps_train/rejected": -71.03642272949219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4043607711791992, + "rewards_train/margins": -0.20071852207183838, + "rewards_train/rejected": -1.2036422491073608, + "step": 1365 + }, + { + "epoch": 0.38, + "logps_train/chosen": -155.37147521972656, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -114.73989868164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9371475577354431, + "rewards_train/margins": 0.6868423819541931, + "rewards_train/rejected": -1.6239899396896362, + "step": 1365 + }, + { + "epoch": 0.38, + "logps_train/chosen": -147.76731872558594, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -93.7799072265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0767319202423096, + "rewards_train/margins": 1.451258897781372, + "rewards_train/rejected": -2.5279908180236816, + "step": 1365 + }, + { + "epoch": 0.38, + "logps_train/chosen": -128.03567504882812, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -243.8966064453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2035675048828125, + "rewards_train/margins": 6.786093711853027, + "rewards_train/rejected": -8.98966121673584, + "step": 1365 + }, + { + "epoch": 0.38, + "logps_train/chosen": -122.58582305908203, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -187.81753540039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.908582329750061, + "rewards_train/margins": 2.473171353340149, + "rewards_train/rejected": -3.38175368309021, + "step": 1365 + }, + { + "epoch": 0.38, + "logps_train/chosen": -14.921196937561035, + "logps_train/ref_chosen": -2.84375, + "logps_train/ref_rejected": -0.515625, + "logps_train/rejected": -7.510332107543945, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2077447175979614, + "rewards_train/margins": -0.5082740187644958, + "rewards_train/rejected": -0.6994706988334656, + "step": 1365 + }, + { + "epoch": 0.38, + "logps_train/chosen": -57.967742919921875, + "logps_train/ref_chosen": -35.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -234.41835021972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.296774387359619, + "rewards_train/margins": 8.795060634613037, + "rewards_train/rejected": -11.091835021972656, + "step": 1365 + }, + { + "epoch": 0.38, + "learning_rate": 9.292905472719452e-07, + "loss": 0.3103, + "step": 1366 + }, + { + "epoch": 0.38, + "logps_train/chosen": -156.66676330566406, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -224.98495483398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2666763067245483, + "rewards_train/margins": 6.531819462776184, + "rewards_train/rejected": -7.798495769500732, + "step": 1366 + }, + { + "epoch": 0.38, + "logps_train/chosen": -110.07695007324219, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -191.58828735351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.007694959640503, + "rewards_train/margins": 6.201133966445923, + "rewards_train/rejected": -9.208828926086426, + "step": 1366 + }, + { + "epoch": 0.38, + "logps_train/chosen": -22.724411010742188, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -40.90843200683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6974411010742188, + "rewards_train/margins": 2.784027099609375, + "rewards_train/rejected": -3.4814682006835938, + "step": 1366 + }, + { + "epoch": 0.38, + "logps_train/chosen": -81.80811309814453, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -142.27621459960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23081131279468536, + "rewards_train/margins": 2.6468100994825363, + "rewards_train/rejected": -2.8776214122772217, + "step": 1366 + }, + { + "epoch": 0.38, + "logps_train/chosen": -88.16523742675781, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -114.40628051757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.316523790359497, + "rewards_train/margins": 1.274104356765747, + "rewards_train/rejected": -2.590628147125244, + "step": 1366 + }, + { + "epoch": 0.38, + "logps_train/chosen": -143.51210021972656, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -246.98011779785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.101210117340088, + "rewards_train/margins": 5.796802043914795, + "rewards_train/rejected": -11.898012161254883, + "step": 1366 + }, + { + "epoch": 0.38, + "logps_train/chosen": -19.421127319335938, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -54.8725471496582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4858627319335938, + "rewards_train/margins": 2.126392126083374, + "rewards_train/rejected": -3.6122548580169678, + "step": 1366 + }, + { + "epoch": 0.38, + "logps_train/chosen": -23.889989852905273, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -3.9375, + "logps_train/rejected": -34.42232894897461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3264989852905273, + "rewards_train/margins": 1.7219839096069336, + "rewards_train/rejected": -3.048482894897461, + "step": 1366 + }, + { + "epoch": 0.38, + "logps_train/chosen": -138.33026123046875, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -187.0, + "logps_train/rejected": -251.03277587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4330261945724487, + "rewards_train/margins": 4.970251679420471, + "rewards_train/rejected": -6.40327787399292, + "step": 1367 + }, + { + "epoch": 0.38, + "logps_train/chosen": -115.47264099121094, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -185.41656494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7972640991210938, + "rewards_train/margins": 5.5943922996521, + "rewards_train/rejected": -7.391656398773193, + "step": 1367 + }, + { + "epoch": 0.38, + "logps_train/chosen": -160.64324951171875, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -241.3026885986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.064324975013733, + "rewards_train/margins": 5.865943789482117, + "rewards_train/rejected": -6.93026876449585, + "step": 1367 + }, + { + "epoch": 0.38, + "logps_train/chosen": -44.86102294921875, + "logps_train/ref_chosen": -44.0, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -54.00776672363281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08610229939222336, + "rewards_train/margins": 3.720924325287342, + "rewards_train/rejected": -3.8070266246795654, + "step": 1367 + }, + { + "epoch": 0.38, + "logps_train/chosen": -114.24501037597656, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -168.08627319335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9245010614395142, + "rewards_train/margins": 4.934126257896423, + "rewards_train/rejected": -6.8586273193359375, + "step": 1367 + }, + { + "epoch": 0.38, + "logps_train/chosen": -10.012777328491211, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -3.015625, + "logps_train/rejected": -8.618263244628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.27372226119041443, + "rewards_train/margins": 0.8339860737323761, + "rewards_train/rejected": -0.5602638125419617, + "step": 1367 + }, + { + "epoch": 0.38, + "logps_train/chosen": -29.96798324584961, + "logps_train/ref_chosen": -29.0, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -43.31922149658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09679832309484482, + "rewards_train/margins": 2.1726239696145058, + "rewards_train/rejected": -2.2694222927093506, + "step": 1367 + }, + { + "epoch": 0.38, + "logps_train/chosen": -32.86295700073242, + "logps_train/ref_chosen": -37.0, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -25.75457191467285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4137043058872223, + "rewards_train/margins": 1.7516615688800812, + "rewards_train/rejected": -1.3379572629928589, + "step": 1367 + }, + { + "epoch": 0.38, + "learning_rate": 9.266518680250926e-07, + "loss": 0.0832, + "step": 1368 + }, + { + "epoch": 0.38, + "logps_train/chosen": -30.14188003540039, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -30.25980567932129, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.914188027381897, + "rewards_train/margins": 0.2367926836013794, + "rewards_train/rejected": -2.1509807109832764, + "step": 1368 + }, + { + "epoch": 0.38, + "logps_train/chosen": -136.98764038085938, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -202.9281005859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.148764133453369, + "rewards_train/margins": 4.344046115875244, + "rewards_train/rejected": -6.492810249328613, + "step": 1368 + }, + { + "epoch": 0.38, + "logps_train/chosen": -38.11739730834961, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -48.383113861083984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4367398023605347, + "rewards_train/margins": 1.0390716791152954, + "rewards_train/rejected": -2.47581148147583, + "step": 1368 + }, + { + "epoch": 0.38, + "logps_train/chosen": -95.91165161132812, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -82.60698699951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4411652088165283, + "rewards_train/margins": 0.16953349113464355, + "rewards_train/rejected": -1.6106986999511719, + "step": 1368 + }, + { + "epoch": 0.38, + "logps_train/chosen": -110.83067321777344, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -111.48973846435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9330673217773438, + "rewards_train/margins": 0.06590652465820312, + "rewards_train/rejected": -1.9989738464355469, + "step": 1368 + }, + { + "epoch": 0.38, + "logps_train/chosen": -51.88570022583008, + "logps_train/ref_chosen": -50.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -86.06893157958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1885700225830078, + "rewards_train/margins": 0.8183231353759766, + "rewards_train/rejected": -1.0068931579589844, + "step": 1368 + }, + { + "epoch": 0.38, + "logps_train/chosen": -147.18048095703125, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -240.34320068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.518048048019409, + "rewards_train/margins": 6.416272401809692, + "rewards_train/rejected": -8.934320449829102, + "step": 1368 + }, + { + "epoch": 0.38, + "logps_train/chosen": -71.92707824707031, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -59.99552917480469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.917707920074463, + "rewards_train/margins": -0.6681549549102783, + "rewards_train/rejected": -3.2495529651641846, + "step": 1368 + }, + { + "epoch": 0.38, + "logps_train/chosen": -46.33268356323242, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -259.6430358886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3832683563232422, + "rewards_train/margins": 10.781035423278809, + "rewards_train/rejected": -12.16430377960205, + "step": 1369 + }, + { + "epoch": 0.38, + "logps_train/chosen": -34.94200134277344, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -5.0, + "logps_train/rejected": -46.76869201660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7317001819610596, + "rewards_train/margins": 2.44516921043396, + "rewards_train/rejected": -4.1768693923950195, + "step": 1369 + }, + { + "epoch": 0.38, + "logps_train/chosen": -17.054746627807617, + "logps_train/ref_chosen": -15.5, + "logps_train/ref_rejected": -26.875, + "logps_train/rejected": -36.472686767578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15547466278076172, + "rewards_train/margins": 0.8042940497398376, + "rewards_train/rejected": -0.9597687125205994, + "step": 1369 + }, + { + "epoch": 0.38, + "logps_train/chosen": -31.265451431274414, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -22.614532470703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8015451431274414, + "rewards_train/margins": -1.0650919079780579, + "rewards_train/rejected": -0.7364532351493835, + "step": 1369 + }, + { + "epoch": 0.38, + "logps_train/chosen": -32.61294174194336, + "logps_train/ref_chosen": -22.25, + "logps_train/ref_rejected": -14.1875, + "logps_train/rejected": -46.91248321533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0362942218780518, + "rewards_train/margins": 2.236204147338867, + "rewards_train/rejected": -3.272498369216919, + "step": 1369 + }, + { + "epoch": 0.38, + "logps_train/chosen": -15.334287643432617, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -21.098018646240234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5084287524223328, + "rewards_train/margins": -0.011126875877380371, + "rewards_train/rejected": -0.4973018765449524, + "step": 1369 + }, + { + "epoch": 0.38, + "logps_train/chosen": -96.3403091430664, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -42.439151763916016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0340309143066406, + "rewards_train/margins": 1.5223844051361084, + "rewards_train/rejected": -2.556415319442749, + "step": 1369 + }, + { + "epoch": 0.38, + "logps_train/chosen": -151.56137084960938, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -207.54141235351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.506137371063232, + "rewards_train/margins": 1.548004150390625, + "rewards_train/rejected": -6.054141521453857, + "step": 1369 + }, + { + "epoch": 0.38, + "learning_rate": 9.240137021373968e-07, + "loss": 0.4141, + "step": 1370 + }, + { + "epoch": 0.38, + "logps_train/chosen": -131.45912170410156, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -102.8829345703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9959123134613037, + "rewards_train/margins": -0.8576188087463379, + "rewards_train/rejected": -3.138293504714966, + "step": 1370 + }, + { + "epoch": 0.38, + "logps_train/chosen": -85.1207046508789, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -85.48143005371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5620704889297485, + "rewards_train/margins": 1.4860726594924927, + "rewards_train/rejected": -2.048143148422241, + "step": 1370 + }, + { + "epoch": 0.38, + "logps_train/chosen": -3.6059134006500244, + "logps_train/ref_chosen": -2.5625, + "logps_train/ref_rejected": -11.75, + "logps_train/rejected": -21.322158813476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10434134304523468, + "rewards_train/margins": 0.8528745621442795, + "rewards_train/rejected": -0.9572159051895142, + "step": 1370 + }, + { + "epoch": 0.38, + "logps_train/chosen": -23.61757469177246, + "logps_train/ref_chosen": -2.65625, + "logps_train/ref_rejected": -4.25, + "logps_train/rejected": -19.912343978881836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.096132516860962, + "rewards_train/margins": -0.5298980474472046, + "rewards_train/rejected": -1.5662344694137573, + "step": 1370 + }, + { + "epoch": 0.38, + "logps_train/chosen": -0.20095030963420868, + "logps_train/ref_chosen": -0.1298828125, + "logps_train/ref_rejected": -0.1298828125, + "logps_train/rejected": -0.2159857600927353, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007106749806553125, + "rewards_train/margins": 0.0015035453252494335, + "rewards_train/rejected": -0.008610295131802559, + "step": 1370 + }, + { + "epoch": 0.38, + "logps_train/chosen": -117.32988739013672, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -151.9091796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3329887390136719, + "rewards_train/margins": 3.7579293251037598, + "rewards_train/rejected": -5.090918064117432, + "step": 1370 + }, + { + "epoch": 0.38, + "logps_train/chosen": -59.042388916015625, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -129.02748107910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5792388916015625, + "rewards_train/margins": 0.4235093593597412, + "rewards_train/rejected": -3.0027482509613037, + "step": 1370 + }, + { + "epoch": 0.38, + "logps_train/chosen": -292.1120910644531, + "logps_train/ref_chosen": -205.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -115.89901733398438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.711209297180176, + "rewards_train/margins": -4.7213075160980225, + "rewards_train/rejected": -3.9899017810821533, + "step": 1370 + }, + { + "epoch": 0.38, + "logps_train/chosen": -17.3614444732666, + "logps_train/ref_chosen": -6.09375, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -28.793468475341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1267694234848022, + "rewards_train/margins": 0.48382747173309326, + "rewards_train/rejected": -1.6105968952178955, + "step": 1371 + }, + { + "epoch": 0.38, + "logps_train/chosen": -6.334837436676025, + "logps_train/ref_chosen": -2.78125, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -26.22280502319336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.355358749628067, + "rewards_train/margins": 0.3669217526912689, + "rewards_train/rejected": -0.7222805023193359, + "step": 1371 + }, + { + "epoch": 0.38, + "logps_train/chosen": -32.779850006103516, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -0.388671875, + "logps_train/rejected": -20.492630004882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2154849767684937, + "rewards_train/margins": 0.7949107885360718, + "rewards_train/rejected": -2.0103957653045654, + "step": 1371 + }, + { + "epoch": 0.38, + "logps_train/chosen": -174.88360595703125, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -203.61036682128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.888360500335693, + "rewards_train/margins": 4.072676181793213, + "rewards_train/rejected": -8.961036682128906, + "step": 1371 + }, + { + "epoch": 0.38, + "logps_train/chosen": -161.78805541992188, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -178.95187377929688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.7288055419921875, + "rewards_train/margins": -0.5336179733276367, + "rewards_train/rejected": -4.195187568664551, + "step": 1371 + }, + { + "epoch": 0.38, + "logps_train/chosen": -12.554274559020996, + "logps_train/ref_chosen": -1.3984375, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -14.167784690856934, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1155837774276733, + "rewards_train/margins": -0.34880530834198, + "rewards_train/rejected": -0.7667784690856934, + "step": 1371 + }, + { + "epoch": 0.38, + "logps_train/chosen": -47.046722412109375, + "logps_train/ref_chosen": -37.25, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -73.55908966064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9796722531318665, + "rewards_train/margins": 2.8012368083000183, + "rewards_train/rejected": -3.7809090614318848, + "step": 1371 + }, + { + "epoch": 0.38, + "logps_train/chosen": -86.41437530517578, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -202.6862030029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2414376735687256, + "rewards_train/margins": 7.0271828174591064, + "rewards_train/rejected": -9.268620491027832, + "step": 1371 + }, + { + "epoch": 0.38, + "learning_rate": 9.213760680732226e-07, + "loss": 0.7535, + "step": 1372 + }, + { + "epoch": 0.38, + "logps_train/chosen": -92.23211669921875, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -94.95152282714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.773211658000946, + "rewards_train/margins": 0.07194066047668457, + "rewards_train/rejected": -0.8451523184776306, + "step": 1372 + }, + { + "epoch": 0.38, + "logps_train/chosen": -78.07940673828125, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -54.75, + "logps_train/rejected": -90.03660583496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7579407691955566, + "rewards_train/margins": 0.7707197666168213, + "rewards_train/rejected": -3.528660535812378, + "step": 1372 + }, + { + "epoch": 0.38, + "logps_train/chosen": -27.25927734375, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -30.08728790283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.350927710533142, + "rewards_train/margins": 0.22655105590820312, + "rewards_train/rejected": -1.5774787664413452, + "step": 1372 + }, + { + "epoch": 0.38, + "logps_train/chosen": -44.38349914550781, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -22.25, + "logps_train/rejected": -48.807159423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3383500576019287, + "rewards_train/margins": 0.3173658847808838, + "rewards_train/rejected": -2.6557159423828125, + "step": 1372 + }, + { + "epoch": 0.38, + "logps_train/chosen": -105.24481964111328, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -135.17807006835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2244819700717926, + "rewards_train/margins": 3.3933251798152924, + "rewards_train/rejected": -3.617807149887085, + "step": 1372 + }, + { + "epoch": 0.38, + "logps_train/chosen": -18.411502838134766, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -37.84781265258789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7724003195762634, + "rewards_train/margins": 1.5811310410499573, + "rewards_train/rejected": -2.3535313606262207, + "step": 1372 + }, + { + "epoch": 0.38, + "logps_train/chosen": -8.573524475097656, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -1.1953125, + "logps_train/rejected": -1.6372157335281372, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5714149475097656, + "rewards_train/margins": -0.5272246226668358, + "rewards_train/rejected": -0.04419032484292984, + "step": 1372 + }, + { + "epoch": 0.38, + "logps_train/chosen": -161.31427001953125, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -122.27996826171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.381427049636841, + "rewards_train/margins": -0.20343017578125, + "rewards_train/rejected": -3.177996873855591, + "step": 1372 + }, + { + "epoch": 0.38, + "logps_train/chosen": -106.68817138671875, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -202.22097778320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4188172817230225, + "rewards_train/margins": 5.353280782699585, + "rewards_train/rejected": -7.772098064422607, + "step": 1373 + }, + { + "epoch": 0.38, + "logps_train/chosen": -35.557289123535156, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -46.14870834350586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8057289123535156, + "rewards_train/margins": -0.5908581018447876, + "rewards_train/rejected": -1.214870810508728, + "step": 1373 + }, + { + "epoch": 0.38, + "logps_train/chosen": -240.34426879882812, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -187.14353942871094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.8344268798828125, + "rewards_train/margins": -2.0200729370117188, + "rewards_train/rejected": -5.814353942871094, + "step": 1373 + }, + { + "epoch": 0.38, + "logps_train/chosen": -84.74491119384766, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -209.24278259277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3744910955429077, + "rewards_train/margins": 8.8497873544693, + "rewards_train/rejected": -10.224278450012207, + "step": 1373 + }, + { + "epoch": 0.38, + "logps_train/chosen": -21.920576095581055, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -39.88227844238281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2295576333999634, + "rewards_train/margins": 1.8149203062057495, + "rewards_train/rejected": -3.044477939605713, + "step": 1373 + }, + { + "epoch": 0.38, + "logps_train/chosen": -104.91012573242188, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -139.49630737304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6910126209259033, + "rewards_train/margins": 0.9586184024810791, + "rewards_train/rejected": -4.649631023406982, + "step": 1373 + }, + { + "epoch": 0.38, + "logps_train/chosen": -10.175771713256836, + "logps_train/ref_chosen": -3.421875, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -28.563453674316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6753897070884705, + "rewards_train/margins": 1.5840807557106018, + "rewards_train/rejected": -2.2594704627990723, + "step": 1373 + }, + { + "epoch": 0.38, + "logps_train/chosen": -110.78853607177734, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -171.18772888183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5288535952568054, + "rewards_train/margins": 6.889919579029083, + "rewards_train/rejected": -7.418773174285889, + "step": 1373 + }, + { + "epoch": 0.38, + "learning_rate": 9.187389842932111e-07, + "loss": 0.5017, + "step": 1374 + }, + { + "epoch": 0.38, + "logps_train/chosen": -43.948699951171875, + "logps_train/ref_chosen": -41.75, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -87.82803344726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21987000107765198, + "rewards_train/margins": 0.46293333172798157, + "rewards_train/rejected": -0.6828033328056335, + "step": 1374 + }, + { + "epoch": 0.38, + "logps_train/chosen": -44.973426818847656, + "logps_train/ref_chosen": -34.25, + "logps_train/ref_rejected": -48.0, + "logps_train/rejected": -58.199493408203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0723427534103394, + "rewards_train/margins": -0.052393436431884766, + "rewards_train/rejected": -1.0199493169784546, + "step": 1374 + }, + { + "epoch": 0.38, + "logps_train/chosen": -81.59076690673828, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -165.600341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9090766906738281, + "rewards_train/margins": 0.5509575605392456, + "rewards_train/rejected": -1.4600342512130737, + "step": 1374 + }, + { + "epoch": 0.38, + "logps_train/chosen": -245.86087036132812, + "logps_train/ref_chosen": -202.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -211.45278930664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.386086940765381, + "rewards_train/margins": 0.25919198989868164, + "rewards_train/rejected": -4.6452789306640625, + "step": 1374 + }, + { + "epoch": 0.38, + "logps_train/chosen": -12.069781303405762, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -21.600627899169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04927187040448189, + "rewards_train/margins": 1.7749597318470478, + "rewards_train/rejected": -1.725687861442566, + "step": 1374 + }, + { + "epoch": 0.38, + "logps_train/chosen": -75.57102966308594, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -62.25, + "logps_train/rejected": -65.94920349121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3571029603481293, + "rewards_train/margins": 0.0128173828125, + "rewards_train/rejected": -0.3699203431606293, + "step": 1374 + }, + { + "epoch": 0.38, + "logps_train/chosen": -112.34161376953125, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -149.57595825195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.984161376953125, + "rewards_train/margins": 2.773434638977051, + "rewards_train/rejected": -4.757596015930176, + "step": 1374 + }, + { + "epoch": 0.38, + "logps_train/chosen": -21.202316284179688, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -2.765625, + "logps_train/rejected": -5.713591575622559, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4702316224575043, + "rewards_train/margins": -0.17543494701385498, + "rewards_train/rejected": -0.2947966754436493, + "step": 1374 + }, + { + "epoch": 0.38, + "logps_train/chosen": -167.22610473632812, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -225.28985595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.422610759735107, + "rewards_train/margins": 3.4563746452331543, + "rewards_train/rejected": -10.878985404968262, + "step": 1375 + }, + { + "epoch": 0.38, + "logps_train/chosen": -16.003875732421875, + "logps_train/ref_chosen": -6.65625, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -47.47577667236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9347625970840454, + "rewards_train/margins": 2.50656521320343, + "rewards_train/rejected": -3.4413278102874756, + "step": 1375 + }, + { + "epoch": 0.38, + "logps_train/chosen": -83.92919921875, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -83.4000015258789, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8429199457168579, + "rewards_train/margins": -0.05291980504989624, + "rewards_train/rejected": -0.7900001406669617, + "step": 1375 + }, + { + "epoch": 0.38, + "logps_train/chosen": -203.40115356445312, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -200.90769958496094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.840115547180176, + "rewards_train/margins": -1.5493454933166504, + "rewards_train/rejected": -4.290770053863525, + "step": 1375 + }, + { + "epoch": 0.38, + "logps_train/chosen": -175.34860229492188, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -206.08848571777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5348602533340454, + "rewards_train/margins": 3.0739883184432983, + "rewards_train/rejected": -4.608848571777344, + "step": 1375 + }, + { + "epoch": 0.38, + "logps_train/chosen": -122.16885375976562, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -230.1539306640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4668853282928467, + "rewards_train/margins": 7.798507928848267, + "rewards_train/rejected": -11.265393257141113, + "step": 1375 + }, + { + "epoch": 0.38, + "logps_train/chosen": -0.8976350426673889, + "logps_train/ref_chosen": -1.7421875, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -9.702569961547852, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08445524424314499, + "rewards_train/margins": 0.5672122463583946, + "rewards_train/rejected": -0.48275700211524963, + "step": 1375 + }, + { + "epoch": 0.38, + "logps_train/chosen": -174.88751220703125, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -189.25875854492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.938751220703125, + "rewards_train/margins": 0.08712482452392578, + "rewards_train/rejected": -6.025876045227051, + "step": 1375 + }, + { + "epoch": 0.38, + "learning_rate": 9.161024692541535e-07, + "loss": 0.4774, + "step": 1376 + }, + { + "epoch": 0.38, + "logps_train/chosen": -116.21302032470703, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -115.9565658569336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17130203545093536, + "rewards_train/margins": 0.024354547262191772, + "rewards_train/rejected": -0.19565658271312714, + "step": 1376 + }, + { + "epoch": 0.38, + "logps_train/chosen": -97.49409484863281, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -122.99507141113281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5005905032157898, + "rewards_train/margins": 1.250097632408142, + "rewards_train/rejected": -0.7495071291923523, + "step": 1376 + }, + { + "epoch": 0.38, + "logps_train/chosen": -24.602821350097656, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -46.384796142578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3977822065353394, + "rewards_train/margins": 1.228197455406189, + "rewards_train/rejected": -2.6259796619415283, + "step": 1376 + }, + { + "epoch": 0.38, + "logps_train/chosen": -128.7040252685547, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -151.56924438476562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.720402479171753, + "rewards_train/margins": -0.06347799301147461, + "rewards_train/rejected": -2.6569244861602783, + "step": 1376 + }, + { + "epoch": 0.38, + "logps_train/chosen": -110.55345153808594, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -110.39142608642578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.04465484619140625, + "rewards_train/margins": -0.016202546656131744, + "rewards_train/rejected": 0.060857392847537994, + "step": 1376 + }, + { + "epoch": 0.38, + "logps_train/chosen": -87.16610717773438, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -97.44703674316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.8333892822265625, + "rewards_train/margins": 0.6780929565429688, + "rewards_train/rejected": 0.15529632568359375, + "step": 1376 + }, + { + "epoch": 0.38, + "logps_train/chosen": -14.05482292175293, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -14.245526313781738, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16173230111598969, + "rewards_train/margins": 0.662820354104042, + "rewards_train/rejected": -0.8245526552200317, + "step": 1376 + }, + { + "epoch": 0.38, + "logps_train/chosen": -137.54946899414062, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -207.8194122314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.154946804046631, + "rewards_train/margins": 3.0269947052001953, + "rewards_train/rejected": -7.181941509246826, + "step": 1376 + }, + { + "epoch": 0.38, + "logps_train/chosen": -20.200918197631836, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -27.611297607421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1763418912887573, + "rewards_train/margins": 0.697287917137146, + "rewards_train/rejected": -1.8736298084259033, + "step": 1377 + }, + { + "epoch": 0.38, + "logps_train/chosen": -49.605567932128906, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -24.111225128173828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9855567812919617, + "rewards_train/margins": -0.1994342803955078, + "rewards_train/rejected": -0.7861225008964539, + "step": 1377 + }, + { + "epoch": 0.38, + "logps_train/chosen": -28.709638595581055, + "logps_train/ref_chosen": -7.65625, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -50.87620162963867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1053388118743896, + "rewards_train/margins": 0.8572814464569092, + "rewards_train/rejected": -2.962620258331299, + "step": 1377 + }, + { + "epoch": 0.38, + "logps_train/chosen": -3.760232925415039, + "logps_train/ref_chosen": -1.1015625, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -19.872648239135742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26586705446243286, + "rewards_train/margins": 0.1276477873325348, + "rewards_train/rejected": -0.39351484179496765, + "step": 1377 + }, + { + "epoch": 0.38, + "logps_train/chosen": -15.72293758392334, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -40.94559097290039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37854376435279846, + "rewards_train/margins": 1.4160153567790985, + "rewards_train/rejected": -1.794559121131897, + "step": 1377 + }, + { + "epoch": 0.38, + "logps_train/chosen": -203.03404235839844, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -169.7803955078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.703404426574707, + "rewards_train/margins": 0.07463502883911133, + "rewards_train/rejected": -5.778039455413818, + "step": 1377 + }, + { + "epoch": 0.38, + "logps_train/chosen": -65.66998291015625, + "logps_train/ref_chosen": -42.0, + "logps_train/ref_rejected": -56.25, + "logps_train/rejected": -93.42343139648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3669984340667725, + "rewards_train/margins": 1.3503446578979492, + "rewards_train/rejected": -3.7173430919647217, + "step": 1377 + }, + { + "epoch": 0.38, + "logps_train/chosen": -1.6930956840515137, + "logps_train/ref_chosen": -2.6875, + "logps_train/ref_rejected": -3.3125, + "logps_train/rejected": -4.019115447998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09944043308496475, + "rewards_train/margins": 0.17010197788476944, + "rewards_train/rejected": -0.07066154479980469, + "step": 1377 + }, + { + "epoch": 0.39, + "learning_rate": 9.134665414088596e-07, + "loss": 0.462, + "step": 1378 + }, + { + "epoch": 0.39, + "logps_train/chosen": -120.25750732421875, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -237.63052368164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.725750744342804, + "rewards_train/margins": 5.6373016238212585, + "rewards_train/rejected": -6.3630523681640625, + "step": 1378 + }, + { + "epoch": 0.39, + "logps_train/chosen": -75.16349029541016, + "logps_train/ref_chosen": -52.25, + "logps_train/ref_rejected": -51.25, + "logps_train/rejected": -67.60232543945312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.291349172592163, + "rewards_train/margins": -0.6561166048049927, + "rewards_train/rejected": -1.6352325677871704, + "step": 1378 + }, + { + "epoch": 0.39, + "logps_train/chosen": -81.99041748046875, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -127.78553771972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.349041700363159, + "rewards_train/margins": 2.9295122623443604, + "rewards_train/rejected": -5.2785539627075195, + "step": 1378 + }, + { + "epoch": 0.39, + "logps_train/chosen": -81.62518310546875, + "logps_train/ref_chosen": -59.75, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -105.06929016113281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.187518358230591, + "rewards_train/margins": -0.03058934211730957, + "rewards_train/rejected": -2.1569290161132812, + "step": 1378 + }, + { + "epoch": 0.39, + "logps_train/chosen": -64.64883422851562, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -55.0, + "logps_train/rejected": -63.969451904296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7398834228515625, + "rewards_train/margins": 0.15706175565719604, + "rewards_train/rejected": -0.8969451785087585, + "step": 1378 + }, + { + "epoch": 0.39, + "logps_train/chosen": -145.3827362060547, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -174.315673828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2382736206054688, + "rewards_train/margins": 1.9932937622070312, + "rewards_train/rejected": -3.2315673828125, + "step": 1378 + }, + { + "epoch": 0.39, + "logps_train/chosen": -27.982398986816406, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -17.5581111907959, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9482399225234985, + "rewards_train/margins": 0.35757124423980713, + "rewards_train/rejected": -1.3058111667633057, + "step": 1378 + }, + { + "epoch": 0.39, + "logps_train/chosen": -85.84382629394531, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -23.985904693603516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4843826293945312, + "rewards_train/margins": -1.2920421361923218, + "rewards_train/rejected": -1.1923404932022095, + "step": 1378 + }, + { + "epoch": 0.39, + "logps_train/chosen": -6.059850215911865, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -16.299701690673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.35651499032974243, + "rewards_train/margins": 1.3927351832389832, + "rewards_train/rejected": -1.0362201929092407, + "step": 1379 + }, + { + "epoch": 0.39, + "logps_train/chosen": -123.90801239013672, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -137.1567840576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2908012866973877, + "rewards_train/margins": 2.224877119064331, + "rewards_train/rejected": -3.5156784057617188, + "step": 1379 + }, + { + "epoch": 0.39, + "logps_train/chosen": -43.747093200683594, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -66.20594024658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4247093200683594, + "rewards_train/margins": 2.183384895324707, + "rewards_train/rejected": -4.608094215393066, + "step": 1379 + }, + { + "epoch": 0.39, + "logps_train/chosen": -101.18717956542969, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -81.60160827636719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7687179446220398, + "rewards_train/margins": 0.24144285917282104, + "rewards_train/rejected": -1.0101608037948608, + "step": 1379 + }, + { + "epoch": 0.39, + "logps_train/chosen": -68.5421142578125, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -79.65950012207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.37921142578125, + "rewards_train/margins": 0.23673856258392334, + "rewards_train/rejected": -1.6159499883651733, + "step": 1379 + }, + { + "epoch": 0.39, + "logps_train/chosen": -6.839840888977051, + "logps_train/ref_chosen": -0.34765625, + "logps_train/ref_rejected": -0.34765625, + "logps_train/rejected": -6.781663417816162, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6492184996604919, + "rewards_train/margins": -0.005817770957946777, + "rewards_train/rejected": -0.6434007287025452, + "step": 1379 + }, + { + "epoch": 0.39, + "logps_train/chosen": -1.4868855476379395, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -7.125, + "logps_train/rejected": -9.215895652770996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15912394225597382, + "rewards_train/margins": 0.3682135045528412, + "rewards_train/rejected": -0.20908956229686737, + "step": 1379 + }, + { + "epoch": 0.39, + "logps_train/chosen": -57.008296966552734, + "logps_train/ref_chosen": -37.0, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -107.20344543457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0008296966552734, + "rewards_train/margins": 1.9195148944854736, + "rewards_train/rejected": -3.920344591140747, + "step": 1379 + }, + { + "epoch": 0.39, + "learning_rate": 9.108312192060296e-07, + "loss": 0.4747, + "step": 1380 + }, + { + "epoch": 0.39, + "logps_train/chosen": -151.38253784179688, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -161.57308959960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.138253927230835, + "rewards_train/margins": 2.969054937362671, + "rewards_train/rejected": -6.107308864593506, + "step": 1380 + }, + { + "epoch": 0.39, + "logps_train/chosen": -0.26807209849357605, + "logps_train/ref_chosen": -0.55859375, + "logps_train/ref_rejected": -5.46875, + "logps_train/rejected": -17.027664184570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.029052166268229485, + "rewards_train/margins": 1.1849435847252607, + "rewards_train/rejected": -1.1558914184570312, + "step": 1380 + }, + { + "epoch": 0.39, + "logps_train/chosen": -15.071749687194824, + "logps_train/ref_chosen": -1.1171875, + "logps_train/ref_rejected": -1.1171875, + "logps_train/rejected": -15.067488670349121, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3954561948776245, + "rewards_train/margins": -0.0004260540008544922, + "rewards_train/rejected": -1.39503014087677, + "step": 1380 + }, + { + "epoch": 0.39, + "logps_train/chosen": -38.22322082519531, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -54.01613235473633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1973220854997635, + "rewards_train/margins": 3.0667911022901535, + "rewards_train/rejected": -3.264113187789917, + "step": 1380 + }, + { + "epoch": 0.39, + "logps_train/chosen": -338.359375, + "logps_train/ref_chosen": -225.0, + "logps_train/ref_rejected": -197.0, + "logps_train/rejected": -295.881103515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.3359375, + "rewards_train/margins": -1.4478273391723633, + "rewards_train/rejected": -9.888110160827637, + "step": 1380 + }, + { + "epoch": 0.39, + "logps_train/chosen": -121.81016540527344, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -208.83926391601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3189834654331207, + "rewards_train/margins": 8.90290966629982, + "rewards_train/rejected": -8.5839262008667, + "step": 1380 + }, + { + "epoch": 0.39, + "logps_train/chosen": -92.69757080078125, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -166.36392211914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2697571516036987, + "rewards_train/margins": 5.966635346412659, + "rewards_train/rejected": -7.236392498016357, + "step": 1380 + }, + { + "epoch": 0.39, + "logps_train/chosen": -118.71847534179688, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -266.41326904296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.571847677230835, + "rewards_train/margins": 8.269479036331177, + "rewards_train/rejected": -11.841326713562012, + "step": 1380 + }, + { + "epoch": 0.39, + "logps_train/chosen": -66.07862091064453, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -56.25, + "logps_train/rejected": -66.28486633300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.982862114906311, + "rewards_train/margins": 0.020624518394470215, + "rewards_train/rejected": -1.0034866333007812, + "step": 1381 + }, + { + "epoch": 0.39, + "logps_train/chosen": -179.60621643066406, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -132.15162658691406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9606215953826904, + "rewards_train/margins": -0.3954589366912842, + "rewards_train/rejected": -3.5651626586914062, + "step": 1381 + }, + { + "epoch": 0.39, + "logps_train/chosen": -33.57953643798828, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -116.8046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.507953643798828, + "rewards_train/margins": -0.027484893798828125, + "rewards_train/rejected": -2.48046875, + "step": 1381 + }, + { + "epoch": 0.39, + "logps_train/chosen": -144.8872833251953, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -169.10992431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4387283325195312, + "rewards_train/margins": 3.272264003753662, + "rewards_train/rejected": -5.710992336273193, + "step": 1381 + }, + { + "epoch": 0.39, + "logps_train/chosen": -81.51136779785156, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -14.8125, + "logps_train/rejected": -50.925655364990234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00113677978515625, + "rewards_train/margins": 3.6101787090301514, + "rewards_train/rejected": -3.6113154888153076, + "step": 1381 + }, + { + "epoch": 0.39, + "logps_train/chosen": -78.99067687988281, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -194.22543334960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7490676641464233, + "rewards_train/margins": 2.4734755754470825, + "rewards_train/rejected": -4.222543239593506, + "step": 1381 + }, + { + "epoch": 0.39, + "logps_train/chosen": 0.0, + "logps_train/ref_chosen": 0.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -212.714599609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0, + "rewards_train/margins": 4.571460247039795, + "rewards_train/rejected": -4.571460247039795, + "step": 1381 + }, + { + "epoch": 0.39, + "logps_train/chosen": -68.10986328125, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -108.04071044921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3609863221645355, + "rewards_train/margins": 2.0430847704410553, + "rewards_train/rejected": -2.404071092605591, + "step": 1381 + }, + { + "epoch": 0.39, + "learning_rate": 9.081965210901244e-07, + "loss": 0.331, + "step": 1382 + }, + { + "epoch": 0.39, + "logps_train/chosen": -1.7644439935684204, + "logps_train/ref_chosen": -0.86328125, + "logps_train/ref_rejected": -6.90625, + "logps_train/rejected": -8.188151359558105, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09011627733707428, + "rewards_train/margins": 0.03807386755943298, + "rewards_train/rejected": -0.12819014489650726, + "step": 1382 + }, + { + "epoch": 0.39, + "logps_train/chosen": -119.1571044921875, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -200.5888671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.265710473060608, + "rewards_train/margins": 6.643176436424255, + "rewards_train/rejected": -7.908886909484863, + "step": 1382 + }, + { + "epoch": 0.39, + "logps_train/chosen": -7.340872287750244, + "logps_train/ref_chosen": -7.4375, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -56.841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009662771597504616, + "rewards_train/margins": 3.9813424590975046, + "rewards_train/rejected": -3.9716796875, + "step": 1382 + }, + { + "epoch": 0.39, + "logps_train/chosen": -20.487581253051758, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -21.125, + "logps_train/rejected": -32.03288269042969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4550081491470337, + "rewards_train/margins": -0.36421990394592285, + "rewards_train/rejected": -1.0907882452011108, + "step": 1382 + }, + { + "epoch": 0.39, + "logps_train/chosen": -143.72222900390625, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -137.66732788085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.872222900390625, + "rewards_train/margins": 1.2945098876953125, + "rewards_train/rejected": -2.1667327880859375, + "step": 1382 + }, + { + "epoch": 0.39, + "logps_train/chosen": -107.5814437866211, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -158.0755157470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0081443786621094, + "rewards_train/margins": 3.6994071006774902, + "rewards_train/rejected": -5.7075514793396, + "step": 1382 + }, + { + "epoch": 0.39, + "logps_train/chosen": -10.818979263305664, + "logps_train/ref_chosen": -3.828125, + "logps_train/ref_rejected": -7.90625, + "logps_train/rejected": -23.486495971679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6990854144096375, + "rewards_train/margins": 0.8589392304420471, + "rewards_train/rejected": -1.5580246448516846, + "step": 1382 + }, + { + "epoch": 0.39, + "logps_train/chosen": -72.59664916992188, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -130.02552795410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0096648931503296, + "rewards_train/margins": 1.6428879499435425, + "rewards_train/rejected": -2.652552843093872, + "step": 1382 + }, + { + "epoch": 0.39, + "logps_train/chosen": -21.45703125, + "logps_train/ref_chosen": -15.1875, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -58.52452850341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.626953125, + "rewards_train/margins": 2.000499725341797, + "rewards_train/rejected": -2.627452850341797, + "step": 1383 + }, + { + "epoch": 0.39, + "logps_train/chosen": -11.310012817382812, + "logps_train/ref_chosen": -13.0625, + "logps_train/ref_rejected": -3.4375, + "logps_train/rejected": -26.595184326171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17524872720241547, + "rewards_train/margins": 2.491017207503319, + "rewards_train/rejected": -2.3157684803009033, + "step": 1383 + }, + { + "epoch": 0.39, + "logps_train/chosen": -46.6097526550293, + "logps_train/ref_chosen": -27.375, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -25.969331741333008, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9234752655029297, + "rewards_train/margins": -0.8577920198440552, + "rewards_train/rejected": -1.0656832456588745, + "step": 1383 + }, + { + "epoch": 0.39, + "logps_train/chosen": -20.93062973022461, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -3.75, + "logps_train/rejected": -23.315704345703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.502437949180603, + "rewards_train/margins": 0.4541325569152832, + "rewards_train/rejected": -1.9565705060958862, + "step": 1383 + }, + { + "epoch": 0.39, + "logps_train/chosen": -121.69911193847656, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -199.39801025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13008880615234375, + "rewards_train/margins": 3.9698898792266846, + "rewards_train/rejected": -3.839801073074341, + "step": 1383 + }, + { + "epoch": 0.39, + "logps_train/chosen": -223.42445373535156, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -195.94656372070312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.242445468902588, + "rewards_train/margins": -0.5477890968322754, + "rewards_train/rejected": -6.6946563720703125, + "step": 1383 + }, + { + "epoch": 0.39, + "logps_train/chosen": -174.30715942382812, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -205.9482879638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2307159900665283, + "rewards_train/margins": 5.764112710952759, + "rewards_train/rejected": -6.994828701019287, + "step": 1383 + }, + { + "epoch": 0.39, + "logps_train/chosen": -5.625621795654297, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -12.716914176940918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20006218552589417, + "rewards_train/margins": 0.43100425601005554, + "rewards_train/rejected": -0.6310664415359497, + "step": 1383 + }, + { + "epoch": 0.39, + "learning_rate": 9.055624655012381e-07, + "loss": 0.3635, + "step": 1384 + }, + { + "epoch": 0.39, + "logps_train/chosen": -98.84185028076172, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -180.8148193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.26581498980522156, + "rewards_train/margins": 5.5472972095012665, + "rewards_train/rejected": -5.281482219696045, + "step": 1384 + }, + { + "epoch": 0.39, + "logps_train/chosen": -20.535968780517578, + "logps_train/ref_chosen": -1.9453125, + "logps_train/ref_rejected": -2.734375, + "logps_train/rejected": -21.719383239746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8590656518936157, + "rewards_train/margins": 0.03943514823913574, + "rewards_train/rejected": -1.8985008001327515, + "step": 1384 + }, + { + "epoch": 0.39, + "logps_train/chosen": -58.76874923706055, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -51.4927864074707, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.895625114440918, + "rewards_train/margins": -1.2713463306427002, + "rewards_train/rejected": -3.6242787837982178, + "step": 1384 + }, + { + "epoch": 0.39, + "logps_train/chosen": -165.9725799560547, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -187.552001953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.3972580432891846, + "rewards_train/margins": -0.9420578479766846, + "rewards_train/rejected": -2.4552001953125, + "step": 1384 + }, + { + "epoch": 0.39, + "logps_train/chosen": -215.4378204345703, + "logps_train/ref_chosen": -186.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -279.7324523925781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.943782091140747, + "rewards_train/margins": 7.829463720321655, + "rewards_train/rejected": -10.773245811462402, + "step": 1384 + }, + { + "epoch": 0.39, + "logps_train/chosen": -25.56682014465332, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -1.8203125, + "logps_train/rejected": -17.085304260253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46918201446533203, + "rewards_train/margins": 1.0573171377182007, + "rewards_train/rejected": -1.5264991521835327, + "step": 1384 + }, + { + "epoch": 0.39, + "logps_train/chosen": -15.547550201416016, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -34.62379837036133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12350501865148544, + "rewards_train/margins": 2.866999961435795, + "rewards_train/rejected": -2.9905049800872803, + "step": 1384 + }, + { + "epoch": 0.39, + "logps_train/chosen": -159.0921173095703, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -159.176025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.509212017059326, + "rewards_train/margins": 0.008390426635742188, + "rewards_train/rejected": -4.517602443695068, + "step": 1384 + }, + { + "epoch": 0.39, + "logps_train/chosen": -148.09327697753906, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -102.9105224609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.159327745437622, + "rewards_train/margins": -2.11827552318573, + "rewards_train/rejected": -1.041052222251892, + "step": 1385 + }, + { + "epoch": 0.39, + "logps_train/chosen": -31.932373046875, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -45.72417068481445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6182373762130737, + "rewards_train/margins": 1.2041796445846558, + "rewards_train/rejected": -2.8224170207977295, + "step": 1385 + }, + { + "epoch": 0.39, + "logps_train/chosen": -3.2108850479125977, + "logps_train/ref_chosen": -2.421875, + "logps_train/ref_rejected": -0.279296875, + "logps_train/rejected": -0.06730815023183823, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.078901007771492, + "rewards_train/margins": -0.10009988024830818, + "rewards_train/rejected": 0.021198872476816177, + "step": 1385 + }, + { + "epoch": 0.39, + "logps_train/chosen": -92.59837341308594, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -193.15492248535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2598373889923096, + "rewards_train/margins": 4.855654954910278, + "rewards_train/rejected": -7.115492343902588, + "step": 1385 + }, + { + "epoch": 0.39, + "logps_train/chosen": -34.23921585083008, + "logps_train/ref_chosen": -2.734375, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -45.02658462524414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.150484085083008, + "rewards_train/margins": -0.3978254795074463, + "rewards_train/rejected": -2.7526586055755615, + "step": 1385 + }, + { + "epoch": 0.39, + "logps_train/chosen": -5.2528862953186035, + "logps_train/ref_chosen": -1.5390625, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -13.63718318939209, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37138238549232483, + "rewards_train/margins": 0.19233593344688416, + "rewards_train/rejected": -0.563718318939209, + "step": 1385 + }, + { + "epoch": 0.39, + "logps_train/chosen": -30.03923988342285, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -7.708369255065918, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.947674036026001, + "rewards_train/margins": -1.733087107539177, + "rewards_train/rejected": -0.21458692848682404, + "step": 1385 + }, + { + "epoch": 0.39, + "logps_train/chosen": -123.72027587890625, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -171.64320373535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.522027611732483, + "rewards_train/margins": 3.3422926664352417, + "rewards_train/rejected": -4.864320278167725, + "step": 1385 + }, + { + "epoch": 0.39, + "learning_rate": 9.029290708749668e-07, + "loss": 0.7001, + "step": 1386 + }, + { + "epoch": 0.39, + "logps_train/chosen": -62.196678161621094, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -102.08148193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.419667959213257, + "rewards_train/margins": 4.11348032951355, + "rewards_train/rejected": -6.533148288726807, + "step": 1386 + }, + { + "epoch": 0.39, + "logps_train/chosen": -6.891332626342773, + "logps_train/ref_chosen": -3.953125, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -11.083698272705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2938207685947418, + "rewards_train/margins": 0.34892407059669495, + "rewards_train/rejected": -0.6427448391914368, + "step": 1386 + }, + { + "epoch": 0.39, + "logps_train/chosen": -22.24139976501465, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -6.46875, + "logps_train/rejected": -27.464733123779297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3116399943828583, + "rewards_train/margins": 1.787958413362503, + "rewards_train/rejected": -2.0995984077453613, + "step": 1386 + }, + { + "epoch": 0.39, + "logps_train/chosen": -25.19211196899414, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -20.306127548217773, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2004612684249878, + "rewards_train/margins": -0.13859844207763672, + "rewards_train/rejected": -1.061862826347351, + "step": 1386 + }, + { + "epoch": 0.39, + "logps_train/chosen": -122.3128433227539, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -145.4180450439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0312843322753906, + "rewards_train/margins": 2.660520076751709, + "rewards_train/rejected": -4.6918044090271, + "step": 1386 + }, + { + "epoch": 0.39, + "logps_train/chosen": -78.44540405273438, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -132.70147705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6445404291152954, + "rewards_train/margins": 1.4256073236465454, + "rewards_train/rejected": -2.070147752761841, + "step": 1386 + }, + { + "epoch": 0.39, + "logps_train/chosen": -11.972179412841797, + "logps_train/ref_chosen": -4.65625, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -42.49016571044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7315929532051086, + "rewards_train/margins": 1.1549236178398132, + "rewards_train/rejected": -1.8865165710449219, + "step": 1386 + }, + { + "epoch": 0.39, + "logps_train/chosen": -8.377306938171387, + "logps_train/ref_chosen": -2.609375, + "logps_train/ref_rejected": -12.4375, + "logps_train/rejected": -37.98752212524414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5767931938171387, + "rewards_train/margins": 1.9782090187072754, + "rewards_train/rejected": -2.555002212524414, + "step": 1386 + }, + { + "epoch": 0.39, + "logps_train/chosen": -14.174236297607422, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -12.4375, + "logps_train/rejected": -43.395809173583984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0017986297607422, + "rewards_train/margins": 2.0940322875976562, + "rewards_train/rejected": -3.0958309173583984, + "step": 1387 + }, + { + "epoch": 0.39, + "logps_train/chosen": -53.98472213745117, + "logps_train/ref_chosen": -31.375, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -126.5477294921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.260972261428833, + "rewards_train/margins": 3.9938008785247803, + "rewards_train/rejected": -6.254773139953613, + "step": 1387 + }, + { + "epoch": 0.39, + "logps_train/chosen": -123.46597290039062, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -209.12149047851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7465972900390625, + "rewards_train/margins": 5.0655517578125, + "rewards_train/rejected": -6.8121490478515625, + "step": 1387 + }, + { + "epoch": 0.39, + "logps_train/chosen": -45.86914825439453, + "logps_train/ref_chosen": -30.25, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -42.70423126220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5619148015975952, + "rewards_train/margins": 1.980383276939392, + "rewards_train/rejected": -3.5422980785369873, + "step": 1387 + }, + { + "epoch": 0.39, + "logps_train/chosen": -67.22454833984375, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -143.27989196777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22754517197608948, + "rewards_train/margins": 3.855534464120865, + "rewards_train/rejected": -3.6279892921447754, + "step": 1387 + }, + { + "epoch": 0.39, + "logps_train/chosen": -104.21812438964844, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -298.10650634765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.321812391281128, + "rewards_train/margins": 10.888838052749634, + "rewards_train/rejected": -14.210650444030762, + "step": 1387 + }, + { + "epoch": 0.39, + "logps_train/chosen": -18.48183250427246, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -32.95246124267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6231832504272461, + "rewards_train/margins": 0.609562873840332, + "rewards_train/rejected": -1.2327461242675781, + "step": 1387 + }, + { + "epoch": 0.39, + "logps_train/chosen": -6.113550186157227, + "logps_train/ref_chosen": -2.734375, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -28.60557746887207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3379175364971161, + "rewards_train/margins": 1.316390186548233, + "rewards_train/rejected": -1.6543077230453491, + "step": 1387 + }, + { + "epoch": 0.39, + "learning_rate": 9.002963556422807e-07, + "loss": 0.1949, + "step": 1388 + }, + { + "epoch": 0.39, + "logps_train/chosen": -30.44537353515625, + "logps_train/ref_chosen": -25.375, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -27.5411376953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.507037341594696, + "rewards_train/margins": 1.3783264756202698, + "rewards_train/rejected": -1.8853638172149658, + "step": 1388 + }, + { + "epoch": 0.39, + "logps_train/chosen": -19.14677619934082, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -2.8125, + "logps_train/rejected": -34.04961395263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.570927619934082, + "rewards_train/margins": 1.552783727645874, + "rewards_train/rejected": -3.123711347579956, + "step": 1388 + }, + { + "epoch": 0.39, + "logps_train/chosen": -20.019298553466797, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -25.85814094543457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5175548791885376, + "rewards_train/margins": -0.14424073696136475, + "rewards_train/rejected": -1.3733141422271729, + "step": 1388 + }, + { + "epoch": 0.39, + "logps_train/chosen": -151.90460205078125, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -151.44888305664062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9904602766036987, + "rewards_train/margins": -0.04557192325592041, + "rewards_train/rejected": -1.9448883533477783, + "step": 1388 + }, + { + "epoch": 0.39, + "logps_train/chosen": -108.26695251464844, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -111.67945861816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8766952753067017, + "rewards_train/margins": 0.09125059843063354, + "rewards_train/rejected": -0.9679458737373352, + "step": 1388 + }, + { + "epoch": 0.39, + "logps_train/chosen": -112.66931915283203, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -52.25, + "logps_train/rejected": -81.45306396484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.066931962966919, + "rewards_train/margins": -0.14662551879882812, + "rewards_train/rejected": -2.920306444168091, + "step": 1388 + }, + { + "epoch": 0.39, + "logps_train/chosen": -169.83531188964844, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -175.09324645996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6835312843322754, + "rewards_train/margins": 0.62579345703125, + "rewards_train/rejected": -4.309324741363525, + "step": 1388 + }, + { + "epoch": 0.39, + "logps_train/chosen": -140.46734619140625, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -107.87419128417969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.696734666824341, + "rewards_train/margins": -0.8593153953552246, + "rewards_train/rejected": -2.837419271469116, + "step": 1388 + }, + { + "epoch": 0.39, + "logps_train/chosen": -44.976593017578125, + "logps_train/ref_chosen": -27.0, + "logps_train/ref_rejected": -60.75, + "logps_train/rejected": -124.9966812133789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7976592779159546, + "rewards_train/margins": 4.627009034156799, + "rewards_train/rejected": -6.424668312072754, + "step": 1389 + }, + { + "epoch": 0.39, + "logps_train/chosen": -90.05607604980469, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -41.0, + "logps_train/rejected": -88.5309066772461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1556077003479004, + "rewards_train/margins": 2.5974831581115723, + "rewards_train/rejected": -4.753090858459473, + "step": 1389 + }, + { + "epoch": 0.39, + "logps_train/chosen": -64.9162368774414, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -166.03660583496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9666237235069275, + "rewards_train/margins": 4.787036955356598, + "rewards_train/rejected": -5.753660678863525, + "step": 1389 + }, + { + "epoch": 0.39, + "logps_train/chosen": -16.60627555847168, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -22.697256088256836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3731275498867035, + "rewards_train/margins": 0.9715980589389801, + "rewards_train/rejected": -1.3447256088256836, + "step": 1389 + }, + { + "epoch": 0.39, + "logps_train/chosen": -23.270183563232422, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -29.118099212646484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1332683563232422, + "rewards_train/margins": 0.9347915649414062, + "rewards_train/rejected": -2.0680599212646484, + "step": 1389 + }, + { + "epoch": 0.39, + "logps_train/chosen": -15.199865341186523, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -29.573440551757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0106115341186523, + "rewards_train/margins": 0.12173259258270264, + "rewards_train/rejected": -1.132344126701355, + "step": 1389 + }, + { + "epoch": 0.39, + "logps_train/chosen": -58.02518844604492, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -46.0, + "logps_train/rejected": -66.13304901123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05251884460449219, + "rewards_train/margins": 1.9607861042022705, + "rewards_train/rejected": -2.0133049488067627, + "step": 1389 + }, + { + "epoch": 0.39, + "logps_train/chosen": -93.85358428955078, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -182.45321655273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2853584289550781, + "rewards_train/margins": 4.309963226318359, + "rewards_train/rejected": -5.5953216552734375, + "step": 1389 + }, + { + "epoch": 0.39, + "learning_rate": 8.97664338229395e-07, + "loss": 0.4049, + "step": 1390 + }, + { + "epoch": 0.39, + "logps_train/chosen": -162.506591796875, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -236.75991821289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.000659465789795, + "rewards_train/margins": 4.475332736968994, + "rewards_train/rejected": -8.475992202758789, + "step": 1390 + }, + { + "epoch": 0.39, + "logps_train/chosen": -192.9033203125, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -176.633056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.890331983566284, + "rewards_train/margins": 1.922973871231079, + "rewards_train/rejected": -5.813305854797363, + "step": 1390 + }, + { + "epoch": 0.39, + "logps_train/chosen": -140.60536193847656, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -172.90359497070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1105363368988037, + "rewards_train/margins": 2.3298232555389404, + "rewards_train/rejected": -5.440359592437744, + "step": 1390 + }, + { + "epoch": 0.39, + "logps_train/chosen": -82.56676483154297, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -92.70286560058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9066764712333679, + "rewards_train/margins": 0.9636101126670837, + "rewards_train/rejected": -1.8702865839004517, + "step": 1390 + }, + { + "epoch": 0.39, + "logps_train/chosen": -51.198211669921875, + "logps_train/ref_chosen": -43.0, + "logps_train/ref_rejected": -48.25, + "logps_train/rejected": -57.50394821166992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8198211789131165, + "rewards_train/margins": 0.10557365417480469, + "rewards_train/rejected": -0.9253948330879211, + "step": 1390 + }, + { + "epoch": 0.39, + "logps_train/chosen": -119.9704360961914, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -172.52622985839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2470436096191406, + "rewards_train/margins": 5.255579471588135, + "rewards_train/rejected": -6.502623081207275, + "step": 1390 + }, + { + "epoch": 0.39, + "logps_train/chosen": -42.02060317993164, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -45.86547088623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2645604610443115, + "rewards_train/margins": 0.5282366275787354, + "rewards_train/rejected": -3.792797088623047, + "step": 1390 + }, + { + "epoch": 0.39, + "logps_train/chosen": -40.25062942504883, + "logps_train/ref_chosen": -27.875, + "logps_train/ref_rejected": -52.0, + "logps_train/rejected": -104.9568862915039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2375630140304565, + "rewards_train/margins": 4.058125615119934, + "rewards_train/rejected": -5.295688629150391, + "step": 1390 + }, + { + "epoch": 0.39, + "logps_train/chosen": -32.43257141113281, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -96.14567565917969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.593257188796997, + "rewards_train/margins": 2.94631028175354, + "rewards_train/rejected": -4.539567470550537, + "step": 1391 + }, + { + "epoch": 0.39, + "logps_train/chosen": -25.407588958740234, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -45.93292999267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5407589673995972, + "rewards_train/margins": 2.518159031867981, + "rewards_train/rejected": -4.058917999267578, + "step": 1391 + }, + { + "epoch": 0.39, + "logps_train/chosen": -6.053973197937012, + "logps_train/ref_chosen": -1.0390625, + "logps_train/ref_rejected": -0.71875, + "logps_train/rejected": -4.801114082336426, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5014910697937012, + "rewards_train/margins": -0.09325465559959412, + "rewards_train/rejected": -0.40823641419410706, + "step": 1391 + }, + { + "epoch": 0.39, + "logps_train/chosen": -4.414485454559326, + "logps_train/ref_chosen": -0.609375, + "logps_train/ref_rejected": -0.609375, + "logps_train/rejected": -4.321587562561035, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3805110454559326, + "rewards_train/margins": -0.009289771318435669, + "rewards_train/rejected": -0.37122127413749695, + "step": 1391 + }, + { + "epoch": 0.39, + "logps_train/chosen": -173.86676025390625, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -185.87042236328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.486675977706909, + "rewards_train/margins": 1.1003663539886475, + "rewards_train/rejected": -4.587042331695557, + "step": 1391 + }, + { + "epoch": 0.39, + "logps_train/chosen": -213.96531677246094, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -223.88760375976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.896531581878662, + "rewards_train/margins": 0.1922287940979004, + "rewards_train/rejected": -5.0887603759765625, + "step": 1391 + }, + { + "epoch": 0.39, + "logps_train/chosen": -213.3701171875, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -158.7025146484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.337011814117432, + "rewards_train/margins": -1.6667602062225342, + "rewards_train/rejected": -3.6702516078948975, + "step": 1391 + }, + { + "epoch": 0.39, + "logps_train/chosen": -47.748809814453125, + "logps_train/ref_chosen": -41.25, + "logps_train/ref_rejected": -41.75, + "logps_train/rejected": -71.10335540771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6498810052871704, + "rewards_train/margins": 2.2854546308517456, + "rewards_train/rejected": -2.935335636138916, + "step": 1391 + }, + { + "epoch": 0.39, + "learning_rate": 8.950330370576415e-07, + "loss": 0.3803, + "step": 1392 + }, + { + "epoch": 0.39, + "logps_train/chosen": -186.6953887939453, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -185.31930541992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.069538950920105, + "rewards_train/margins": 3.962391495704651, + "rewards_train/rejected": -5.031930446624756, + "step": 1392 + }, + { + "epoch": 0.39, + "logps_train/chosen": -29.30817222595215, + "logps_train/ref_chosen": -2.640625, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -43.63016891479492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.666754722595215, + "rewards_train/margins": 0.30251216888427734, + "rewards_train/rejected": -2.969266891479492, + "step": 1392 + }, + { + "epoch": 0.39, + "logps_train/chosen": -51.039039611816406, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -64.58487701416016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.141403913497925, + "rewards_train/margins": 1.3920838832855225, + "rewards_train/rejected": -4.533487796783447, + "step": 1392 + }, + { + "epoch": 0.39, + "logps_train/chosen": -14.970123291015625, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -26.32045555114746, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09673767536878586, + "rewards_train/margins": 1.203783206641674, + "rewards_train/rejected": -1.1070455312728882, + "step": 1392 + }, + { + "epoch": 0.39, + "logps_train/chosen": -2.634098529815674, + "logps_train/ref_chosen": -0.328125, + "logps_train/ref_rejected": -3.671875, + "logps_train/rejected": -17.31533432006836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2305973619222641, + "rewards_train/margins": 1.133748546242714, + "rewards_train/rejected": -1.364345908164978, + "step": 1392 + }, + { + "epoch": 0.39, + "logps_train/chosen": -4.380508899688721, + "logps_train/ref_chosen": -1.4921875, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -15.811795234680176, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2888321578502655, + "rewards_train/margins": 0.7298473417758942, + "rewards_train/rejected": -1.0186794996261597, + "step": 1392 + }, + { + "epoch": 0.39, + "logps_train/chosen": -187.87362670898438, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -132.716796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.787362575531006, + "rewards_train/margins": 0.4843173027038574, + "rewards_train/rejected": -5.271679878234863, + "step": 1392 + }, + { + "epoch": 0.39, + "logps_train/chosen": -148.28851318359375, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -165.52220153808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.17885160446167, + "rewards_train/margins": 2.9733686447143555, + "rewards_train/rejected": -7.152220249176025, + "step": 1392 + }, + { + "epoch": 0.39, + "logps_train/chosen": -52.96551513671875, + "logps_train/ref_chosen": -22.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -95.15667724609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0965516567230225, + "rewards_train/margins": -1.4308838844299316, + "rewards_train/rejected": -1.6656677722930908, + "step": 1393 + }, + { + "epoch": 0.39, + "logps_train/chosen": -24.987503051757812, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -46.12821960449219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9862503409385681, + "rewards_train/margins": -0.2484283447265625, + "rewards_train/rejected": -0.7378219962120056, + "step": 1393 + }, + { + "epoch": 0.39, + "logps_train/chosen": -35.210548400878906, + "logps_train/ref_chosen": -24.875, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -57.07996368408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0335549116134644, + "rewards_train/margins": 0.7994414567947388, + "rewards_train/rejected": -1.8329963684082031, + "step": 1393 + }, + { + "epoch": 0.39, + "logps_train/chosen": -116.80712127685547, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -104.13460540771484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.130712032318115, + "rewards_train/margins": -1.9672515392303467, + "rewards_train/rejected": -2.1634604930877686, + "step": 1393 + }, + { + "epoch": 0.39, + "logps_train/chosen": -4.467861175537109, + "logps_train/ref_chosen": -3.3125, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -6.970477104187012, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11553611606359482, + "rewards_train/margins": 0.015886597335338593, + "rewards_train/rejected": -0.1314227133989334, + "step": 1393 + }, + { + "epoch": 0.39, + "logps_train/chosen": -35.515968322753906, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -46.30051803588867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6765968799591064, + "rewards_train/margins": 0.9409549236297607, + "rewards_train/rejected": -3.617551803588867, + "step": 1393 + }, + { + "epoch": 0.39, + "logps_train/chosen": -65.88859558105469, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -65.70570373535156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.23885956406593323, + "rewards_train/margins": -0.018289193511009216, + "rewards_train/rejected": -0.220570370554924, + "step": 1393 + }, + { + "epoch": 0.39, + "logps_train/chosen": -63.0206298828125, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -106.68118286132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05206298828125, + "rewards_train/margins": 0.9660552740097046, + "rewards_train/rejected": -1.0181182622909546, + "step": 1393 + }, + { + "epoch": 0.39, + "learning_rate": 8.924024705433382e-07, + "loss": 0.5774, + "step": 1394 + }, + { + "epoch": 0.39, + "logps_train/chosen": -162.39874267578125, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -202.96615600585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2398743629455566, + "rewards_train/margins": 0.35674118995666504, + "rewards_train/rejected": -3.5966155529022217, + "step": 1394 + }, + { + "epoch": 0.39, + "logps_train/chosen": -21.56427001953125, + "logps_train/ref_chosen": -2.921875, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -32.093238830566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8642395734786987, + "rewards_train/margins": 0.6669594049453735, + "rewards_train/rejected": -2.5311989784240723, + "step": 1394 + }, + { + "epoch": 0.39, + "logps_train/chosen": -156.0114288330078, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -167.4327392578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.801143169403076, + "rewards_train/margins": 1.5921306610107422, + "rewards_train/rejected": -7.393273830413818, + "step": 1394 + }, + { + "epoch": 0.39, + "logps_train/chosen": -10.149911880493164, + "logps_train/ref_chosen": -1.828125, + "logps_train/ref_rejected": -12.4375, + "logps_train/rejected": -32.8951301574707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8321787118911743, + "rewards_train/margins": 1.213584303855896, + "rewards_train/rejected": -2.0457630157470703, + "step": 1394 + }, + { + "epoch": 0.39, + "logps_train/chosen": -111.0044937133789, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -89.40179443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5995506644248962, + "rewards_train/margins": 0.3397301137447357, + "rewards_train/rejected": 0.2598205506801605, + "step": 1394 + }, + { + "epoch": 0.39, + "logps_train/chosen": -62.4481315612793, + "logps_train/ref_chosen": -28.875, + "logps_train/ref_rejected": -29.375, + "logps_train/rejected": -78.37466430664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3573131561279297, + "rewards_train/margins": 1.5426535606384277, + "rewards_train/rejected": -4.899966716766357, + "step": 1394 + }, + { + "epoch": 0.39, + "logps_train/chosen": -27.28103256225586, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -24.25, + "logps_train/rejected": -42.10219192504883, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.41247820854187, + "rewards_train/margins": -0.6272590160369873, + "rewards_train/rejected": -1.7852191925048828, + "step": 1394 + }, + { + "epoch": 0.39, + "logps_train/chosen": -88.56079864501953, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -116.93891906738281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.70607990026474, + "rewards_train/margins": 3.037812054157257, + "rewards_train/rejected": -3.743891954421997, + "step": 1394 + }, + { + "epoch": 0.39, + "logps_train/chosen": -154.48876953125, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -32.75, + "logps_train/rejected": -75.5921859741211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.748877048492432, + "rewards_train/margins": -1.464658260345459, + "rewards_train/rejected": -4.284218788146973, + "step": 1395 + }, + { + "epoch": 0.39, + "logps_train/chosen": -85.50250244140625, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -119.28598022460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04974975809454918, + "rewards_train/margins": 5.128347780555487, + "rewards_train/rejected": -5.0785980224609375, + "step": 1395 + }, + { + "epoch": 0.39, + "logps_train/chosen": -197.10833740234375, + "logps_train/ref_chosen": -182.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -203.5737762451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.510833740234375, + "rewards_train/margins": 2.6465439796447754, + "rewards_train/rejected": -4.15737771987915, + "step": 1395 + }, + { + "epoch": 0.39, + "logps_train/chosen": -23.39592170715332, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -19.80511474609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.514592170715332, + "rewards_train/margins": 1.0768568515777588, + "rewards_train/rejected": -1.5914490222930908, + "step": 1395 + }, + { + "epoch": 0.39, + "logps_train/chosen": -116.7538070678711, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -197.95140075683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4253807067871094, + "rewards_train/margins": 7.019759178161621, + "rewards_train/rejected": -10.44513988494873, + "step": 1395 + }, + { + "epoch": 0.39, + "logps_train/chosen": -105.0610122680664, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -122.10922241210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8561012744903564, + "rewards_train/margins": 4.029821157455444, + "rewards_train/rejected": -5.885922431945801, + "step": 1395 + }, + { + "epoch": 0.39, + "logps_train/chosen": -19.032718658447266, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -31.017078399658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7532718777656555, + "rewards_train/margins": 1.8984360098838806, + "rewards_train/rejected": -2.651707887649536, + "step": 1395 + }, + { + "epoch": 0.39, + "logps_train/chosen": -34.150978088378906, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -55.74081802368164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7650978565216064, + "rewards_train/margins": 1.7339839935302734, + "rewards_train/rejected": -3.49908185005188, + "step": 1395 + }, + { + "epoch": 0.39, + "learning_rate": 8.89772657097662e-07, + "loss": 0.3504, + "step": 1396 + }, + { + "epoch": 0.39, + "logps_train/chosen": -2.9462358951568604, + "logps_train/ref_chosen": -0.62109375, + "logps_train/ref_rejected": -6.65625, + "logps_train/rejected": -19.532421112060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23251421749591827, + "rewards_train/margins": 1.0551028698682785, + "rewards_train/rejected": -1.2876170873641968, + "step": 1396 + }, + { + "epoch": 0.39, + "logps_train/chosen": -123.221435546875, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -158.4026336669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4721435606479645, + "rewards_train/margins": 4.318120092153549, + "rewards_train/rejected": -4.790263652801514, + "step": 1396 + }, + { + "epoch": 0.39, + "logps_train/chosen": -126.13398742675781, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -194.23306274414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8133987188339233, + "rewards_train/margins": 4.109907746315002, + "rewards_train/rejected": -5.923306465148926, + "step": 1396 + }, + { + "epoch": 0.39, + "logps_train/chosen": -18.647781372070312, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -26.583749771118164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3835281431674957, + "rewards_train/margins": 1.1310968101024628, + "rewards_train/rejected": -1.5146249532699585, + "step": 1396 + }, + { + "epoch": 0.39, + "logps_train/chosen": -76.77793884277344, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -131.0395965576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9277938604354858, + "rewards_train/margins": 1.626165747642517, + "rewards_train/rejected": -3.553959608078003, + "step": 1396 + }, + { + "epoch": 0.39, + "logps_train/chosen": -19.33493423461914, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -2.265625, + "logps_train/rejected": -20.448007583618164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.670993447303772, + "rewards_train/margins": 1.1472448110580444, + "rewards_train/rejected": -1.8182382583618164, + "step": 1396 + }, + { + "epoch": 0.39, + "logps_train/chosen": -119.62843322753906, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -165.4375762939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.86284339427948, + "rewards_train/margins": 2.2309142351150513, + "rewards_train/rejected": -4.093757629394531, + "step": 1396 + }, + { + "epoch": 0.39, + "logps_train/chosen": -17.567277908325195, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -4.03125, + "logps_train/rejected": -17.782560348510742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3536027669906616, + "rewards_train/margins": 0.021528244018554688, + "rewards_train/rejected": -1.3751310110092163, + "step": 1396 + }, + { + "epoch": 0.39, + "logps_train/chosen": -17.49188995361328, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -30.68075942993164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.239814043045044, + "rewards_train/margins": 0.7095118761062622, + "rewards_train/rejected": -1.9493259191513062, + "step": 1397 + }, + { + "epoch": 0.39, + "logps_train/chosen": -86.55972290039062, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -129.99282836914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0309722423553467, + "rewards_train/margins": 0.518310546875, + "rewards_train/rejected": -3.5492827892303467, + "step": 1397 + }, + { + "epoch": 0.39, + "logps_train/chosen": -105.28404998779297, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -105.35629272460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.028404999524354935, + "rewards_train/margins": 0.007224272936582565, + "rewards_train/rejected": -0.0356292724609375, + "step": 1397 + }, + { + "epoch": 0.39, + "logps_train/chosen": -59.21278381347656, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -1.984375, + "logps_train/rejected": -24.11918830871582, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6337783336639404, + "rewards_train/margins": -1.4202969074249268, + "rewards_train/rejected": -2.2134814262390137, + "step": 1397 + }, + { + "epoch": 0.39, + "logps_train/chosen": -251.0855255126953, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -189.36317443847656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.108552932739258, + "rewards_train/margins": -2.322235107421875, + "rewards_train/rejected": -9.786317825317383, + "step": 1397 + }, + { + "epoch": 0.39, + "logps_train/chosen": -134.41415405273438, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -187.43719482421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9414154291152954, + "rewards_train/margins": 5.752303957939148, + "rewards_train/rejected": -6.693719387054443, + "step": 1397 + }, + { + "epoch": 0.39, + "logps_train/chosen": -1.815234661102295, + "logps_train/ref_chosen": -0.765625, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -7.250638961791992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10496097058057785, + "rewards_train/margins": 0.20135293155908585, + "rewards_train/rejected": -0.3063139021396637, + "step": 1397 + }, + { + "epoch": 0.39, + "logps_train/chosen": -157.54837036132812, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -202.5572509765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.454837322235107, + "rewards_train/margins": 2.3008880615234375, + "rewards_train/rejected": -6.755725383758545, + "step": 1397 + }, + { + "epoch": 0.39, + "learning_rate": 8.871436151265182e-07, + "loss": 0.5096, + "step": 1398 + }, + { + "epoch": 0.39, + "logps_train/chosen": -113.36563110351562, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -171.84703063964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0865631103515625, + "rewards_train/margins": 3.4981400966644287, + "rewards_train/rejected": -3.584703207015991, + "step": 1398 + }, + { + "epoch": 0.39, + "logps_train/chosen": -145.64663696289062, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -236.56260681152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.314663887023926, + "rewards_train/margins": 4.7415971755981445, + "rewards_train/rejected": -10.05626106262207, + "step": 1398 + }, + { + "epoch": 0.39, + "logps_train/chosen": -29.48777961730957, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -40.981964111328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.530027985572815, + "rewards_train/margins": 1.6556683778762817, + "rewards_train/rejected": -3.1856963634490967, + "step": 1398 + }, + { + "epoch": 0.39, + "logps_train/chosen": -34.0715446472168, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -19.54194450378418, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3634045124053955, + "rewards_train/margins": -2.1217100620269775, + "rewards_train/rejected": -0.24169445037841797, + "step": 1398 + }, + { + "epoch": 0.39, + "logps_train/chosen": -131.07339477539062, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -128.12969970703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9573394656181335, + "rewards_train/margins": 0.3556305766105652, + "rewards_train/rejected": -1.3129700422286987, + "step": 1398 + }, + { + "epoch": 0.39, + "logps_train/chosen": -104.18836975097656, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -86.5025863647461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2688369750976562, + "rewards_train/margins": 0.8814218044281006, + "rewards_train/rejected": -2.150258779525757, + "step": 1398 + }, + { + "epoch": 0.39, + "logps_train/chosen": -62.29657745361328, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -34.02855682373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07034225761890411, + "rewards_train/margins": 1.8481980115175247, + "rewards_train/rejected": -1.7778557538986206, + "step": 1398 + }, + { + "epoch": 0.39, + "logps_train/chosen": -144.18948364257812, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -143.6868896484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.918948650360107, + "rewards_train/margins": -0.10025978088378906, + "rewards_train/rejected": -4.818688869476318, + "step": 1398 + }, + { + "epoch": 0.39, + "logps_train/chosen": -25.918561935424805, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -32.731956481933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1543562412261963, + "rewards_train/margins": 0.4063394069671631, + "rewards_train/rejected": -1.5606956481933594, + "step": 1399 + }, + { + "epoch": 0.39, + "logps_train/chosen": -20.051469802856445, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -20.10462760925293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04264698177576065, + "rewards_train/margins": 1.1615658029913902, + "rewards_train/rejected": -1.2042127847671509, + "step": 1399 + }, + { + "epoch": 0.39, + "logps_train/chosen": -34.782047271728516, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -76.09780883789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2907047271728516, + "rewards_train/margins": 4.256576061248779, + "rewards_train/rejected": -5.547280788421631, + "step": 1399 + }, + { + "epoch": 0.39, + "logps_train/chosen": -74.01594543457031, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -140.68284606933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3484054505825043, + "rewards_train/margins": 2.9666901528835297, + "rewards_train/rejected": -2.6182847023010254, + "step": 1399 + }, + { + "epoch": 0.39, + "logps_train/chosen": -30.210580825805664, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -42.75, + "logps_train/rejected": -79.69849395751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7835581302642822, + "rewards_train/margins": 1.9112913608551025, + "rewards_train/rejected": -3.6948494911193848, + "step": 1399 + }, + { + "epoch": 0.39, + "logps_train/chosen": -20.195472717285156, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -36.68869400024414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1695473194122314, + "rewards_train/margins": 1.4118220806121826, + "rewards_train/rejected": -2.581369400024414, + "step": 1399 + }, + { + "epoch": 0.39, + "logps_train/chosen": -214.04342651367188, + "logps_train/ref_chosen": -180.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -214.80950927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4043426513671875, + "rewards_train/margins": 2.1766085624694824, + "rewards_train/rejected": -5.58095121383667, + "step": 1399 + }, + { + "epoch": 0.39, + "logps_train/chosen": -159.2178955078125, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -176.17747497558594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.671789646148682, + "rewards_train/margins": -2.054041862487793, + "rewards_train/rejected": -4.617747783660889, + "step": 1399 + }, + { + "epoch": 0.39, + "learning_rate": 8.845153630304139e-07, + "loss": 0.4814, + "step": 1400 + }, + { + "epoch": 0.39, + "logps_train/chosen": -6.959278106689453, + "logps_train/ref_chosen": -3.875, + "logps_train/ref_rejected": -24.0, + "logps_train/rejected": -26.206308364868164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3084278106689453, + "rewards_train/margins": -0.08779697120189667, + "rewards_train/rejected": -0.22063083946704865, + "step": 1400 + }, + { + "epoch": 0.39, + "logps_train/chosen": -108.30348205566406, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -118.80279541015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8803482055664062, + "rewards_train/margins": 1.6999313831329346, + "rewards_train/rejected": -2.580279588699341, + "step": 1400 + }, + { + "epoch": 0.39, + "logps_train/chosen": -219.78955078125, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -220.49258422851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.078955173492432, + "rewards_train/margins": 0.07030344009399414, + "rewards_train/rejected": -6.149258613586426, + "step": 1400 + }, + { + "epoch": 0.39, + "logps_train/chosen": -21.33388328552246, + "logps_train/ref_chosen": -3.109375, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -26.73935317993164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.822450876235962, + "rewards_train/margins": 0.3796095848083496, + "rewards_train/rejected": -2.2020604610443115, + "step": 1400 + }, + { + "epoch": 0.39, + "logps_train/chosen": -150.40525817871094, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -194.0, + "logps_train/rejected": -297.54742431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.740525960922241, + "rewards_train/margins": 6.614217042922974, + "rewards_train/rejected": -10.354743003845215, + "step": 1400 + }, + { + "epoch": 0.39, + "logps_train/chosen": -1.4292755126953125, + "logps_train/ref_chosen": -0.5390625, + "logps_train/ref_rejected": -3.53125, + "logps_train/rejected": -7.254379749298096, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08902130275964737, + "rewards_train/margins": 0.28329169005155563, + "rewards_train/rejected": -0.372312992811203, + "step": 1400 + }, + { + "epoch": 0.39, + "logps_train/chosen": -11.298524856567383, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -14.076120376586914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20485249161720276, + "rewards_train/margins": 0.5996345579624176, + "rewards_train/rejected": -0.8044870495796204, + "step": 1400 + }, + { + "epoch": 0.39, + "logps_train/chosen": -26.36380386352539, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -18.469654083251953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6488804221153259, + "rewards_train/margins": -0.5394150093197823, + "rewards_train/rejected": -0.10946541279554367, + "step": 1400 + }, + { + "epoch": 0.39, + "logps_train/chosen": -37.389137268066406, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -27.892436981201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3389137983322144, + "rewards_train/margins": 1.0065799951553345, + "rewards_train/rejected": -2.345493793487549, + "step": 1401 + }, + { + "epoch": 0.39, + "logps_train/chosen": -164.39556884765625, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -199.61624145507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2395570278167725, + "rewards_train/margins": 1.6220672130584717, + "rewards_train/rejected": -3.861624240875244, + "step": 1401 + }, + { + "epoch": 0.39, + "logps_train/chosen": -68.36836242675781, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -47.5, + "logps_train/rejected": -56.1533203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9868362545967102, + "rewards_train/margins": -0.12150418758392334, + "rewards_train/rejected": -0.8653320670127869, + "step": 1401 + }, + { + "epoch": 0.39, + "logps_train/chosen": -44.89423370361328, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -28.125, + "logps_train/rejected": -58.04636764526367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5019233226776123, + "rewards_train/margins": 0.49021339416503906, + "rewards_train/rejected": -2.9921367168426514, + "step": 1401 + }, + { + "epoch": 0.39, + "logps_train/chosen": -101.20230102539062, + "logps_train/ref_chosen": -47.25, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -88.370361328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.395230293273926, + "rewards_train/margins": 1.7668061256408691, + "rewards_train/rejected": -7.162036418914795, + "step": 1401 + }, + { + "epoch": 0.39, + "logps_train/chosen": -28.282922744750977, + "logps_train/ref_chosen": -15.1875, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -56.404632568359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3095422983169556, + "rewards_train/margins": 0.455920934677124, + "rewards_train/rejected": -1.7654632329940796, + "step": 1401 + }, + { + "epoch": 0.39, + "logps_train/chosen": -8.535897254943848, + "logps_train/ref_chosen": -0.625, + "logps_train/ref_rejected": -3.703125, + "logps_train/rejected": -7.860559463500977, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7910897135734558, + "rewards_train/margins": -0.37534627318382263, + "rewards_train/rejected": -0.4157434403896332, + "step": 1401 + }, + { + "epoch": 0.39, + "logps_train/chosen": -155.4048614501953, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -236.1227264404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.490486145019531, + "rewards_train/margins": 4.621787071228027, + "rewards_train/rejected": -9.112273216247559, + "step": 1401 + }, + { + "epoch": 0.39, + "learning_rate": 8.818879192043269e-07, + "loss": 0.4604, + "step": 1402 + }, + { + "epoch": 0.39, + "logps_train/chosen": -80.18922424316406, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -79.51768493652344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4189224243164062, + "rewards_train/margins": -0.0671539306640625, + "rewards_train/rejected": -1.3517684936523438, + "step": 1402 + }, + { + "epoch": 0.39, + "logps_train/chosen": -0.09702272713184357, + "logps_train/ref_chosen": -0.99609375, + "logps_train/ref_rejected": -1.2421875, + "logps_train/rejected": -2.7492258548736572, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08990710228681564, + "rewards_train/margins": 0.24061093479394913, + "rewards_train/rejected": -0.15070383250713348, + "step": 1402 + }, + { + "epoch": 0.39, + "logps_train/chosen": -149.59136962890625, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -127.7385025024414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.659137010574341, + "rewards_train/margins": -0.385286808013916, + "rewards_train/rejected": -3.273850202560425, + "step": 1402 + }, + { + "epoch": 0.39, + "logps_train/chosen": -131.6002197265625, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -109.96471405029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1600220203399658, + "rewards_train/margins": 0.5364494323730469, + "rewards_train/rejected": -1.6964714527130127, + "step": 1402 + }, + { + "epoch": 0.39, + "logps_train/chosen": -26.932697296142578, + "logps_train/ref_chosen": -12.6875, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -14.03407096862793, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4245197772979736, + "rewards_train/margins": -0.45861268043518066, + "rewards_train/rejected": -0.965907096862793, + "step": 1402 + }, + { + "epoch": 0.39, + "logps_train/chosen": -9.762348175048828, + "logps_train/ref_chosen": -2.9375, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -14.517675399780273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6824848055839539, + "rewards_train/margins": 0.27240777015686035, + "rewards_train/rejected": -0.9548925757408142, + "step": 1402 + }, + { + "epoch": 0.39, + "logps_train/chosen": -21.31089973449707, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -14.125, + "logps_train/rejected": -45.55147933959961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0685900449752808, + "rewards_train/margins": 2.074057936668396, + "rewards_train/rejected": -3.1426479816436768, + "step": 1402 + }, + { + "epoch": 0.39, + "logps_train/chosen": -28.143898010253906, + "logps_train/ref_chosen": -6.59375, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -19.49786949157715, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.155014753341675, + "rewards_train/margins": -0.6583527326583862, + "rewards_train/rejected": -1.4966620206832886, + "step": 1402 + }, + { + "epoch": 0.39, + "logps_train/chosen": -45.76137924194336, + "logps_train/ref_chosen": -34.75, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -61.50370788574219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1011379957199097, + "rewards_train/margins": 3.9492326974868774, + "rewards_train/rejected": -5.050370693206787, + "step": 1403 + }, + { + "epoch": 0.39, + "logps_train/chosen": -222.03466796875, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -157.76580810546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.503467082977295, + "rewards_train/margins": -1.8768863677978516, + "rewards_train/rejected": -5.626580715179443, + "step": 1403 + }, + { + "epoch": 0.39, + "logps_train/chosen": -13.145990371704102, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -42.27854919433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46459904313087463, + "rewards_train/margins": 1.8007558286190033, + "rewards_train/rejected": -2.265354871749878, + "step": 1403 + }, + { + "epoch": 0.39, + "logps_train/chosen": -100.26720428466797, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -206.90155029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6767204403877258, + "rewards_train/margins": 4.3134347796440125, + "rewards_train/rejected": -4.990155220031738, + "step": 1403 + }, + { + "epoch": 0.39, + "logps_train/chosen": -29.843908309936523, + "logps_train/ref_chosen": -5.40625, + "logps_train/ref_rejected": -5.4375, + "logps_train/rejected": -35.58625030517578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.443765878677368, + "rewards_train/margins": 0.5711092948913574, + "rewards_train/rejected": -3.0148751735687256, + "step": 1403 + }, + { + "epoch": 0.39, + "logps_train/chosen": -32.81528854370117, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -73.79110717773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018471146002411842, + "rewards_train/margins": 3.722582006826997, + "rewards_train/rejected": -3.704110860824585, + "step": 1403 + }, + { + "epoch": 0.39, + "logps_train/chosen": -115.00043487548828, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -158.30450439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2000434398651123, + "rewards_train/margins": 3.3804070949554443, + "rewards_train/rejected": -6.580450534820557, + "step": 1403 + }, + { + "epoch": 0.39, + "logps_train/chosen": -61.890567779541016, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -168.49880981445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.010943221859633923, + "rewards_train/margins": 5.710824298672378, + "rewards_train/rejected": -5.699881076812744, + "step": 1403 + }, + { + "epoch": 0.39, + "learning_rate": 8.792613020375782e-07, + "loss": 0.5059, + "step": 1404 + }, + { + "epoch": 0.39, + "logps_train/chosen": -4.852794170379639, + "logps_train/ref_chosen": -1.78125, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -40.2954216003418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30715441703796387, + "rewards_train/margins": 2.6723878383636475, + "rewards_train/rejected": -2.9795422554016113, + "step": 1404 + }, + { + "epoch": 0.39, + "logps_train/chosen": -139.7935791015625, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -58.47472381591797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.729358196258545, + "rewards_train/margins": -0.7006359100341797, + "rewards_train/rejected": -5.028722286224365, + "step": 1404 + }, + { + "epoch": 0.39, + "logps_train/chosen": -22.738018035888672, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -15.0, + "logps_train/rejected": -42.47235107421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44880181550979614, + "rewards_train/margins": 2.298433244228363, + "rewards_train/rejected": -2.747235059738159, + "step": 1404 + }, + { + "epoch": 0.39, + "logps_train/chosen": -114.79946899414062, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -106.7848129272461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.329946994781494, + "rewards_train/margins": 0.09853434562683105, + "rewards_train/rejected": -3.428481340408325, + "step": 1404 + }, + { + "epoch": 0.39, + "logps_train/chosen": -7.239150047302246, + "logps_train/ref_chosen": -3.546875, + "logps_train/ref_rejected": -1.7578125, + "logps_train/rejected": -6.203919887542725, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36922749876976013, + "rewards_train/margins": 0.0753832459449768, + "rewards_train/rejected": -0.44461074471473694, + "step": 1404 + }, + { + "epoch": 0.39, + "logps_train/chosen": -42.38108444213867, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -24.25, + "logps_train/rejected": -41.96100616455078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.863108515739441, + "rewards_train/margins": -0.09200787544250488, + "rewards_train/rejected": -1.771100640296936, + "step": 1404 + }, + { + "epoch": 0.39, + "logps_train/chosen": -12.561027526855469, + "logps_train/ref_chosen": -6.15625, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -35.076602935791016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6404777765274048, + "rewards_train/margins": 2.1046825647354126, + "rewards_train/rejected": -2.7451603412628174, + "step": 1404 + }, + { + "epoch": 0.39, + "logps_train/chosen": -236.66165161132812, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -215.2491912841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.766165256500244, + "rewards_train/margins": 0.9587540626525879, + "rewards_train/rejected": -6.724919319152832, + "step": 1404 + }, + { + "epoch": 0.39, + "logps_train/chosen": -156.28298950195312, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -202.81387329101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7282989025115967, + "rewards_train/margins": 3.4530885219573975, + "rewards_train/rejected": -7.181387424468994, + "step": 1405 + }, + { + "epoch": 0.39, + "logps_train/chosen": -76.15045166015625, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -165.93124389648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.515045166015625, + "rewards_train/margins": 4.128079414367676, + "rewards_train/rejected": -7.643124580383301, + "step": 1405 + }, + { + "epoch": 0.39, + "logps_train/chosen": -9.613088607788086, + "logps_train/ref_chosen": -1.109375, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -14.477017402648926, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8503713607788086, + "rewards_train/margins": -0.8714196216315031, + "rewards_train/rejected": 0.02104826085269451, + "step": 1405 + }, + { + "epoch": 0.39, + "logps_train/chosen": -131.1324462890625, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -108.46561431884766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.863244533538818, + "rewards_train/margins": 0.4583168029785156, + "rewards_train/rejected": -5.321561336517334, + "step": 1405 + }, + { + "epoch": 0.39, + "logps_train/chosen": -42.69709777832031, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -42.24824523925781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.332209825515747, + "rewards_train/margins": -0.08238530158996582, + "rewards_train/rejected": -3.2498245239257812, + "step": 1405 + }, + { + "epoch": 0.39, + "logps_train/chosen": -11.833002090454102, + "logps_train/ref_chosen": -4.9375, + "logps_train/ref_rejected": -3.15625, + "logps_train/rejected": -24.644399642944336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6895502209663391, + "rewards_train/margins": 1.4592646956443787, + "rewards_train/rejected": -2.1488149166107178, + "step": 1405 + }, + { + "epoch": 0.39, + "logps_train/chosen": -3.82417893409729, + "logps_train/ref_chosen": -2.09375, + "logps_train/ref_rejected": -1.984375, + "logps_train/rejected": -0.4890121817588806, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.173042893409729, + "rewards_train/margins": -0.32257917523384094, + "rewards_train/rejected": 0.14953628182411194, + "step": 1405 + }, + { + "epoch": 0.39, + "logps_train/chosen": -210.25732421875, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -231.1572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.225732326507568, + "rewards_train/margins": 1.7899909019470215, + "rewards_train/rejected": -9.01572322845459, + "step": 1405 + }, + { + "epoch": 0.39, + "learning_rate": 8.766355299137028e-07, + "loss": 0.4661, + "step": 1406 + }, + { + "epoch": 0.39, + "logps_train/chosen": -87.64234924316406, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -144.99850463867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.264235019683838, + "rewards_train/margins": 2.085615634918213, + "rewards_train/rejected": -4.349850654602051, + "step": 1406 + }, + { + "epoch": 0.39, + "logps_train/chosen": -8.153602600097656, + "logps_train/ref_chosen": -6.15625, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -20.578838348388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19973526895046234, + "rewards_train/margins": 0.9206485897302628, + "rewards_train/rejected": -1.120383858680725, + "step": 1406 + }, + { + "epoch": 0.39, + "logps_train/chosen": -0.02056615985929966, + "logps_train/ref_chosen": -0.259765625, + "logps_train/ref_rejected": -2.25, + "logps_train/rejected": -4.395079135894775, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02391994558274746, + "rewards_train/margins": 0.23842786811292171, + "rewards_train/rejected": -0.21450792253017426, + "step": 1406 + }, + { + "epoch": 0.39, + "logps_train/chosen": -126.47693634033203, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -155.3973846435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2976937294006348, + "rewards_train/margins": 3.0920448303222656, + "rewards_train/rejected": -5.3897385597229, + "step": 1406 + }, + { + "epoch": 0.39, + "logps_train/chosen": -50.33964538574219, + "logps_train/ref_chosen": -43.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -105.69224548339844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6839645504951477, + "rewards_train/margins": -0.11474001407623291, + "rewards_train/rejected": -0.5692245364189148, + "step": 1406 + }, + { + "epoch": 0.39, + "logps_train/chosen": -6.170263767242432, + "logps_train/ref_chosen": -5.875, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -6.169148921966553, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.029526377096772194, + "rewards_train/margins": 0.03113851509988308, + "rewards_train/rejected": -0.06066489219665527, + "step": 1406 + }, + { + "epoch": 0.39, + "logps_train/chosen": -19.338022232055664, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -11.35528564453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8275522589683533, + "rewards_train/margins": -0.25452369451522827, + "rewards_train/rejected": -0.573028564453125, + "step": 1406 + }, + { + "epoch": 0.39, + "logps_train/chosen": -121.39744567871094, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -132.56918334960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6897445917129517, + "rewards_train/margins": 2.8671740293502808, + "rewards_train/rejected": -4.556918621063232, + "step": 1406 + }, + { + "epoch": 0.39, + "logps_train/chosen": -23.495540618896484, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -18.722461700439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1558040380477905, + "rewards_train/margins": 0.14769220352172852, + "rewards_train/rejected": -1.303496241569519, + "step": 1407 + }, + { + "epoch": 0.39, + "logps_train/chosen": -83.13294219970703, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -85.336669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5632942318916321, + "rewards_train/margins": 0.17037278413772583, + "rewards_train/rejected": -0.7336670160293579, + "step": 1407 + }, + { + "epoch": 0.39, + "logps_train/chosen": -2.787879705429077, + "logps_train/ref_chosen": -0.859375, + "logps_train/ref_rejected": -3.75, + "logps_train/rejected": -12.826558113098145, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19285047054290771, + "rewards_train/margins": 0.7148053646087646, + "rewards_train/rejected": -0.9076558351516724, + "step": 1407 + }, + { + "epoch": 0.39, + "logps_train/chosen": -6.804392337799072, + "logps_train/ref_chosen": -3.21875, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -108.31037902832031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35856422781944275, + "rewards_train/margins": 0.9724736511707306, + "rewards_train/rejected": -1.3310378789901733, + "step": 1407 + }, + { + "epoch": 0.39, + "logps_train/chosen": -19.440563201904297, + "logps_train/ref_chosen": -14.75, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -27.939964294433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4690563380718231, + "rewards_train/margins": 0.74369016289711, + "rewards_train/rejected": -1.212746500968933, + "step": 1407 + }, + { + "epoch": 0.39, + "logps_train/chosen": -32.629127502441406, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -71.82865905761719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8066627979278564, + "rewards_train/margins": 2.1762032508850098, + "rewards_train/rejected": -3.982866048812866, + "step": 1407 + }, + { + "epoch": 0.39, + "logps_train/chosen": -170.92138671875, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -201.49412536621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.792138695716858, + "rewards_train/margins": 3.8572739362716675, + "rewards_train/rejected": -5.649412631988525, + "step": 1407 + }, + { + "epoch": 0.39, + "logps_train/chosen": -106.73945617675781, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -55.0, + "logps_train/rejected": -82.38369750976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7239456176757812, + "rewards_train/margins": 1.0144240856170654, + "rewards_train/rejected": -2.7383697032928467, + "step": 1407 + }, + { + "epoch": 0.39, + "learning_rate": 8.740106212103221e-07, + "loss": 0.3861, + "step": 1408 + }, + { + "epoch": 0.39, + "logps_train/chosen": -27.77120018005371, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -32.51581954956055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2708700895309448, + "rewards_train/margins": 0.33071184158325195, + "rewards_train/rejected": -1.6015819311141968, + "step": 1408 + }, + { + "epoch": 0.39, + "logps_train/chosen": -106.23445129394531, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -107.14108276367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9734451174736023, + "rewards_train/margins": 1.0406633019447327, + "rewards_train/rejected": -2.014108419418335, + "step": 1408 + }, + { + "epoch": 0.39, + "logps_train/chosen": -259.0321044921875, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -290.10943603515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.703210830688477, + "rewards_train/margins": 2.1077327728271484, + "rewards_train/rejected": -10.810943603515625, + "step": 1408 + }, + { + "epoch": 0.39, + "logps_train/chosen": -26.768028259277344, + "logps_train/ref_chosen": -3.609375, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -37.38301467895508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3158652782440186, + "rewards_train/margins": 0.8161861896514893, + "rewards_train/rejected": -3.132051467895508, + "step": 1408 + }, + { + "epoch": 0.39, + "logps_train/chosen": -162.71746826171875, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -131.66900634765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.371747016906738, + "rewards_train/margins": 1.1951537132263184, + "rewards_train/rejected": -6.566900730133057, + "step": 1408 + }, + { + "epoch": 0.39, + "logps_train/chosen": -143.0970458984375, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -114.70611572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.8902954459190369, + "rewards_train/margins": 4.210907161235809, + "rewards_train/rejected": -3.3206117153167725, + "step": 1408 + }, + { + "epoch": 0.39, + "logps_train/chosen": -14.142167091369629, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -26.5, + "logps_train/rejected": -44.18508529663086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6360917091369629, + "rewards_train/margins": 1.132416844367981, + "rewards_train/rejected": -1.7685085535049438, + "step": 1408 + }, + { + "epoch": 0.39, + "logps_train/chosen": -50.34648132324219, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -91.65940856933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4346481263637543, + "rewards_train/margins": 1.3312927782535553, + "rewards_train/rejected": -1.7659409046173096, + "step": 1408 + }, + { + "epoch": 0.39, + "logps_train/chosen": -0.498718798160553, + "logps_train/ref_chosen": -1.0, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -27.071563720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05012812092900276, + "rewards_train/margins": 1.8385344929993153, + "rewards_train/rejected": -1.7884063720703125, + "step": 1409 + }, + { + "epoch": 0.39, + "logps_train/chosen": -11.561217308044434, + "logps_train/ref_chosen": -6.96875, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -23.168476104736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4592467248439789, + "rewards_train/margins": 0.926350861787796, + "rewards_train/rejected": -1.385597586631775, + "step": 1409 + }, + { + "epoch": 0.39, + "logps_train/chosen": -99.43450164794922, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -51.77020263671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2934502363204956, + "rewards_train/margins": 1.7210701704025269, + "rewards_train/rejected": -3.0145204067230225, + "step": 1409 + }, + { + "epoch": 0.39, + "logps_train/chosen": -111.47900390625, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -96.46829986572266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.547900438308716, + "rewards_train/margins": 0.9489295482635498, + "rewards_train/rejected": -4.496829986572266, + "step": 1409 + }, + { + "epoch": 0.39, + "logps_train/chosen": -163.65618896484375, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -256.05987548828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.86561918258667, + "rewards_train/margins": 6.340368747711182, + "rewards_train/rejected": -13.205987930297852, + "step": 1409 + }, + { + "epoch": 0.39, + "logps_train/chosen": -161.89256286621094, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -251.69064331054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7892563343048096, + "rewards_train/margins": 4.079807996749878, + "rewards_train/rejected": -7.8690643310546875, + "step": 1409 + }, + { + "epoch": 0.39, + "logps_train/chosen": -94.25038146972656, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -95.10221862792969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4750381708145142, + "rewards_train/margins": 0.08518373966217041, + "rewards_train/rejected": -1.5602219104766846, + "step": 1409 + }, + { + "epoch": 0.39, + "logps_train/chosen": -60.0017204284668, + "logps_train/ref_chosen": -42.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -146.65907287597656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7501720190048218, + "rewards_train/margins": -0.2842646837234497, + "rewards_train/rejected": -1.465907335281372, + "step": 1409 + }, + { + "epoch": 0.39, + "learning_rate": 8.713865942990141e-07, + "loss": 0.2878, + "step": 1410 + }, + { + "epoch": 0.39, + "logps_train/chosen": -9.78762435913086, + "logps_train/ref_chosen": -1.3203125, + "logps_train/ref_rejected": -1.3203125, + "logps_train/rejected": -9.866289138793945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8467311859130859, + "rewards_train/margins": 0.007866501808166504, + "rewards_train/rejected": -0.8545976877212524, + "step": 1410 + }, + { + "epoch": 0.39, + "logps_train/chosen": -11.167686462402344, + "logps_train/ref_chosen": -3.671875, + "logps_train/ref_rejected": -2.03125, + "logps_train/rejected": -12.029936790466309, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7495811581611633, + "rewards_train/margins": 0.2502875328063965, + "rewards_train/rejected": -0.9998686909675598, + "step": 1410 + }, + { + "epoch": 0.39, + "logps_train/chosen": -50.011837005615234, + "logps_train/ref_chosen": -37.25, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -45.909507751464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2761837244033813, + "rewards_train/margins": 1.6522670984268188, + "rewards_train/rejected": -2.9284508228302, + "step": 1410 + }, + { + "epoch": 0.39, + "logps_train/chosen": -199.76364135742188, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -194.69398498535156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.876364231109619, + "rewards_train/margins": -0.6069655418395996, + "rewards_train/rejected": -5.2693986892700195, + "step": 1410 + }, + { + "epoch": 0.39, + "logps_train/chosen": -17.15512466430664, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -34.02894973754883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05301246792078018, + "rewards_train/margins": 2.649882458150387, + "rewards_train/rejected": -2.702894926071167, + "step": 1410 + }, + { + "epoch": 0.39, + "logps_train/chosen": -24.431907653808594, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -28.0, + "logps_train/rejected": -51.317848205566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7181907892227173, + "rewards_train/margins": 1.6135941743850708, + "rewards_train/rejected": -2.331784963607788, + "step": 1410 + }, + { + "epoch": 0.39, + "logps_train/chosen": -176.50779724121094, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -148.56854248046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.050779819488525, + "rewards_train/margins": 0.6560745239257812, + "rewards_train/rejected": -4.706854343414307, + "step": 1410 + }, + { + "epoch": 0.39, + "logps_train/chosen": -128.080810546875, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -165.57440185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.508081078529358, + "rewards_train/margins": 2.599359393119812, + "rewards_train/rejected": -4.10744047164917, + "step": 1410 + }, + { + "epoch": 0.39, + "logps_train/chosen": -65.18973541259766, + "logps_train/ref_chosen": -27.75, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -81.04935455322266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.74397349357605, + "rewards_train/margins": -0.6140379905700684, + "rewards_train/rejected": -3.1299355030059814, + "step": 1411 + }, + { + "epoch": 0.39, + "logps_train/chosen": -10.628737449645996, + "logps_train/ref_chosen": -0.65625, + "logps_train/ref_rejected": -1.28125, + "logps_train/rejected": -13.132052421569824, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9972487688064575, + "rewards_train/margins": 0.18783152103424072, + "rewards_train/rejected": -1.1850802898406982, + "step": 1411 + }, + { + "epoch": 0.39, + "logps_train/chosen": -65.27157592773438, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -84.99635314941406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.15215758979320526, + "rewards_train/margins": -0.7025222629308701, + "rewards_train/rejected": 0.5503646731376648, + "step": 1411 + }, + { + "epoch": 0.39, + "logps_train/chosen": -110.62422943115234, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -176.5554962158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.56242299079895, + "rewards_train/margins": 5.693126440048218, + "rewards_train/rejected": -8.255549430847168, + "step": 1411 + }, + { + "epoch": 0.39, + "logps_train/chosen": -26.010068893432617, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -26.731233596801758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4760068953037262, + "rewards_train/margins": 0.3846164643764496, + "rewards_train/rejected": -0.8606233596801758, + "step": 1411 + }, + { + "epoch": 0.39, + "logps_train/chosen": -14.194387435913086, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -41.16511535644531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5756887793540955, + "rewards_train/margins": -0.03417724370956421, + "rewards_train/rejected": -0.5415115356445312, + "step": 1411 + }, + { + "epoch": 0.39, + "logps_train/chosen": -46.97755813598633, + "logps_train/ref_chosen": -28.875, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -49.158111572265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8102558851242065, + "rewards_train/margins": -0.24444472789764404, + "rewards_train/rejected": -1.5658111572265625, + "step": 1411 + }, + { + "epoch": 0.39, + "logps_train/chosen": -103.17239379882812, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -152.45123291015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1672394275665283, + "rewards_train/margins": 1.3278839588165283, + "rewards_train/rejected": -4.495123386383057, + "step": 1411 + }, + { + "epoch": 0.39, + "learning_rate": 8.687634675451844e-07, + "loss": 0.5165, + "step": 1412 + }, + { + "epoch": 0.39, + "logps_train/chosen": -193.27719116210938, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -190.3609619140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.827719211578369, + "rewards_train/margins": -3.891623020172119, + "rewards_train/rejected": -1.93609619140625, + "step": 1412 + }, + { + "epoch": 0.39, + "logps_train/chosen": -12.4447021484375, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -38.45879364013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.29302978515625, + "rewards_train/margins": 0.5139091461896896, + "rewards_train/rejected": -0.22087936103343964, + "step": 1412 + }, + { + "epoch": 0.39, + "logps_train/chosen": -131.30967712402344, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -233.66726684570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.080967664718628, + "rewards_train/margins": 7.985759019851685, + "rewards_train/rejected": -10.066726684570312, + "step": 1412 + }, + { + "epoch": 0.39, + "logps_train/chosen": -22.649250030517578, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -24.19463348388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4774249792099, + "rewards_train/margins": 0.24203836917877197, + "rewards_train/rejected": -1.7194633483886719, + "step": 1412 + }, + { + "epoch": 0.39, + "logps_train/chosen": -154.92800903320312, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -236.50759887695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.142800807952881, + "rewards_train/margins": 3.907959461212158, + "rewards_train/rejected": -9.050760269165039, + "step": 1412 + }, + { + "epoch": 0.39, + "logps_train/chosen": -111.94727325439453, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -104.33757019042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.055272676050662994, + "rewards_train/margins": 1.0890297666192055, + "rewards_train/rejected": -1.0337570905685425, + "step": 1412 + }, + { + "epoch": 0.39, + "logps_train/chosen": -99.04743194580078, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -2.375, + "logps_train/rejected": -8.446849822998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2952568233013153, + "rewards_train/margins": 0.9024418294429779, + "rewards_train/rejected": -0.6071850061416626, + "step": 1412 + }, + { + "epoch": 0.39, + "logps_train/chosen": -21.093639373779297, + "logps_train/ref_chosen": -12.0, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -32.58613586425781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9093639254570007, + "rewards_train/margins": 0.4492496848106384, + "rewards_train/rejected": -1.3586136102676392, + "step": 1412 + }, + { + "epoch": 0.39, + "logps_train/chosen": -23.788393020629883, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -65.16921997070312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.47883930802345276, + "rewards_train/margins": -0.261917307972908, + "rewards_train/rejected": -0.21692200005054474, + "step": 1413 + }, + { + "epoch": 0.39, + "logps_train/chosen": -100.45332336425781, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -140.86685180664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8953323364257812, + "rewards_train/margins": 2.091352939605713, + "rewards_train/rejected": -3.986685276031494, + "step": 1413 + }, + { + "epoch": 0.39, + "logps_train/chosen": -146.6864471435547, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -138.80665588378906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4686447381973267, + "rewards_train/margins": -0.2879791259765625, + "rewards_train/rejected": -1.1806656122207642, + "step": 1413 + }, + { + "epoch": 0.39, + "logps_train/chosen": -134.2161102294922, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -174.58706665039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.071610927581787, + "rewards_train/margins": 0.737095832824707, + "rewards_train/rejected": -4.808706760406494, + "step": 1413 + }, + { + "epoch": 0.39, + "logps_train/chosen": -121.33041381835938, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -96.61213684082031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6830413341522217, + "rewards_train/margins": -2.0718276500701904, + "rewards_train/rejected": -1.6112136840820312, + "step": 1413 + }, + { + "epoch": 0.39, + "logps_train/chosen": -4.151193141937256, + "logps_train/ref_chosen": -2.46875, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -16.851232528686523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16824431717395782, + "rewards_train/margins": 1.0450039356946945, + "rewards_train/rejected": -1.2132482528686523, + "step": 1413 + }, + { + "epoch": 0.39, + "logps_train/chosen": -106.22984313964844, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -129.39764404296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4229843616485596, + "rewards_train/margins": 4.016780138015747, + "rewards_train/rejected": -5.439764499664307, + "step": 1413 + }, + { + "epoch": 0.39, + "logps_train/chosen": -48.53966522216797, + "logps_train/ref_chosen": -28.25, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -106.83944702148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0289666652679443, + "rewards_train/margins": 1.9049780368804932, + "rewards_train/rejected": -3.9339447021484375, + "step": 1413 + }, + { + "epoch": 0.4, + "learning_rate": 8.661412593079397e-07, + "loss": 0.684, + "step": 1414 + }, + { + "epoch": 0.4, + "logps_train/chosen": -120.15166473388672, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -165.34584045410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4151666164398193, + "rewards_train/margins": 4.119417428970337, + "rewards_train/rejected": -6.534584045410156, + "step": 1414 + }, + { + "epoch": 0.4, + "logps_train/chosen": -11.716623306274414, + "logps_train/ref_chosen": -0.70703125, + "logps_train/ref_rejected": -3.09375, + "logps_train/rejected": -12.747037887573242, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1009591817855835, + "rewards_train/margins": -0.13563036918640137, + "rewards_train/rejected": -0.9653288125991821, + "step": 1414 + }, + { + "epoch": 0.4, + "logps_train/chosen": -15.684440612792969, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -43.22629928588867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.191881537437439, + "rewards_train/margins": 2.5994983911514282, + "rewards_train/rejected": -3.791379928588867, + "step": 1414 + }, + { + "epoch": 0.4, + "logps_train/chosen": -247.36563110351562, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -27.75, + "logps_train/rejected": -58.084590911865234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.136563301086426, + "rewards_train/margins": -9.10310411453247, + "rewards_train/rejected": -3.033459186553955, + "step": 1414 + }, + { + "epoch": 0.4, + "logps_train/chosen": -98.82441711425781, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -114.0649642944336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2324416637420654, + "rewards_train/margins": 0.47405481338500977, + "rewards_train/rejected": -2.706496477127075, + "step": 1414 + }, + { + "epoch": 0.4, + "logps_train/chosen": -16.631778717041016, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -14.625, + "logps_train/rejected": -48.83751678466797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22567787766456604, + "rewards_train/margins": 3.1955738961696625, + "rewards_train/rejected": -3.4212517738342285, + "step": 1414 + }, + { + "epoch": 0.4, + "logps_train/chosen": -81.77777099609375, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -81.6953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.22777710855007172, + "rewards_train/margins": -0.008245855569839478, + "rewards_train/rejected": -0.21953125298023224, + "step": 1414 + }, + { + "epoch": 0.4, + "logps_train/chosen": -18.142711639404297, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -24.162776947021484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6892711520195007, + "rewards_train/margins": 0.6957566142082214, + "rewards_train/rejected": -1.3850277662277222, + "step": 1414 + }, + { + "epoch": 0.4, + "logps_train/chosen": -189.63101196289062, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -194.74769592285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.013101577758789, + "rewards_train/margins": 0.2116680145263672, + "rewards_train/rejected": -8.224769592285156, + "step": 1415 + }, + { + "epoch": 0.4, + "logps_train/chosen": -107.13133239746094, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -186.7078857421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0631332397460938, + "rewards_train/margins": 5.907655715942383, + "rewards_train/rejected": -8.970788955688477, + "step": 1415 + }, + { + "epoch": 0.4, + "logps_train/chosen": -51.03337860107422, + "logps_train/ref_chosen": -46.75, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -36.36455535888672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42833787202835083, + "rewards_train/margins": 1.3831177353858948, + "rewards_train/rejected": -1.8114556074142456, + "step": 1415 + }, + { + "epoch": 0.4, + "logps_train/chosen": -147.89198303222656, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -165.70730590820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9891984462738037, + "rewards_train/margins": 0.33153223991394043, + "rewards_train/rejected": -4.320730686187744, + "step": 1415 + }, + { + "epoch": 0.4, + "logps_train/chosen": -20.892974853515625, + "logps_train/ref_chosen": -2.828125, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -36.08946990966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8064850568771362, + "rewards_train/margins": 0.8149620294570923, + "rewards_train/rejected": -2.6214470863342285, + "step": 1415 + }, + { + "epoch": 0.4, + "logps_train/chosen": -31.357940673828125, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -55.891082763671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.135794162750244, + "rewards_train/margins": 2.022064208984375, + "rewards_train/rejected": -4.157858371734619, + "step": 1415 + }, + { + "epoch": 0.4, + "logps_train/chosen": -150.92352294921875, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -213.2438201904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7923524379730225, + "rewards_train/margins": 4.032029867172241, + "rewards_train/rejected": -7.824382305145264, + "step": 1415 + }, + { + "epoch": 0.4, + "logps_train/chosen": -2.4922847747802734, + "logps_train/ref_chosen": -0.41796875, + "logps_train/ref_rejected": -0.41796875, + "logps_train/rejected": -2.384618043899536, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2074315994977951, + "rewards_train/margins": -0.010766670107841492, + "rewards_train/rejected": -0.1966649293899536, + "step": 1415 + }, + { + "epoch": 0.4, + "learning_rate": 8.635199879399575e-07, + "loss": 0.8858, + "step": 1416 + }, + { + "epoch": 0.4, + "logps_train/chosen": -18.626625061035156, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -21.767099380493164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9689125418663025, + "rewards_train/margins": 0.19529742002487183, + "rewards_train/rejected": -1.1642099618911743, + "step": 1416 + }, + { + "epoch": 0.4, + "logps_train/chosen": -98.29383850097656, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -108.68441009521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22938385605812073, + "rewards_train/margins": 0.7890572249889374, + "rewards_train/rejected": -1.018441081047058, + "step": 1416 + }, + { + "epoch": 0.4, + "logps_train/chosen": -151.9281768798828, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -196.78814697265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.642817974090576, + "rewards_train/margins": -0.4640030860900879, + "rewards_train/rejected": -5.178814888000488, + "step": 1416 + }, + { + "epoch": 0.4, + "logps_train/chosen": -146.826904296875, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -286.5328369140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.032690525054932, + "rewards_train/margins": 9.020593166351318, + "rewards_train/rejected": -13.05328369140625, + "step": 1416 + }, + { + "epoch": 0.4, + "logps_train/chosen": -103.52496337890625, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -179.27520751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20249633491039276, + "rewards_train/margins": 1.3250244408845901, + "rewards_train/rejected": -1.527520775794983, + "step": 1416 + }, + { + "epoch": 0.4, + "logps_train/chosen": -119.58943176269531, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -32.04255294799805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.208943173289299, + "rewards_train/margins": 1.9890622645616531, + "rewards_train/rejected": -2.198005437850952, + "step": 1416 + }, + { + "epoch": 0.4, + "logps_train/chosen": -138.11187744140625, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -186.82394409179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4611878395080566, + "rewards_train/margins": 3.871206760406494, + "rewards_train/rejected": -6.332394599914551, + "step": 1416 + }, + { + "epoch": 0.4, + "logps_train/chosen": -26.5544376373291, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -24.353355407714844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7929437756538391, + "rewards_train/margins": -0.19510823488235474, + "rewards_train/rejected": -0.5978355407714844, + "step": 1416 + }, + { + "epoch": 0.4, + "logps_train/chosen": -66.86469268798828, + "logps_train/ref_chosen": -49.25, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -71.82796478271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7614692449569702, + "rewards_train/margins": 1.49632728099823, + "rewards_train/rejected": -3.2577965259552, + "step": 1417 + }, + { + "epoch": 0.4, + "logps_train/chosen": -11.959256172180176, + "logps_train/ref_chosen": -2.875, + "logps_train/ref_rejected": -7.90625, + "logps_train/rejected": -25.175161361694336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9084256291389465, + "rewards_train/margins": 0.818465530872345, + "rewards_train/rejected": -1.7268911600112915, + "step": 1417 + }, + { + "epoch": 0.4, + "logps_train/chosen": -47.11042404174805, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -51.24140167236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2047924995422363, + "rewards_train/margins": 1.2630977630615234, + "rewards_train/rejected": -4.46789026260376, + "step": 1417 + }, + { + "epoch": 0.4, + "logps_train/chosen": -66.1747055053711, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -101.55384063720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7674705386161804, + "rewards_train/margins": 1.3879135251045227, + "rewards_train/rejected": -2.155384063720703, + "step": 1417 + }, + { + "epoch": 0.4, + "logps_train/chosen": -32.23445510864258, + "logps_train/ref_chosen": -26.0, + "logps_train/ref_rejected": -28.75, + "logps_train/rejected": -32.39204025268555, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6234455108642578, + "rewards_train/margins": -0.2592414915561676, + "rewards_train/rejected": -0.3642040193080902, + "step": 1417 + }, + { + "epoch": 0.4, + "logps_train/chosen": -5.030879497528076, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -11.802146911621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24058794975280762, + "rewards_train/margins": 0.3615017533302307, + "rewards_train/rejected": -0.6020897030830383, + "step": 1417 + }, + { + "epoch": 0.4, + "logps_train/chosen": -13.712427139282227, + "logps_train/ref_chosen": -13.0625, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -31.909984588623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06499271839857101, + "rewards_train/margins": 2.494755692780018, + "rewards_train/rejected": -2.559748411178589, + "step": 1417 + }, + { + "epoch": 0.4, + "logps_train/chosen": -95.11405944824219, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -146.16305541992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5614059567451477, + "rewards_train/margins": 2.35489958524704, + "rewards_train/rejected": -2.9163055419921875, + "step": 1417 + }, + { + "epoch": 0.4, + "learning_rate": 8.608996717873578e-07, + "loss": 0.3547, + "step": 1418 + }, + { + "epoch": 0.4, + "logps_train/chosen": -123.85877227783203, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -220.54151916503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3358772993087769, + "rewards_train/margins": 7.818274617195129, + "rewards_train/rejected": -9.154151916503906, + "step": 1418 + }, + { + "epoch": 0.4, + "logps_train/chosen": -159.5503692626953, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -151.82518005371094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.055037021636963, + "rewards_train/margins": -0.8725190162658691, + "rewards_train/rejected": -3.1825180053710938, + "step": 1418 + }, + { + "epoch": 0.4, + "logps_train/chosen": -133.25784301757812, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -135.62551879882812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.32578444480896, + "rewards_train/margins": -0.6632325649261475, + "rewards_train/rejected": -1.6625518798828125, + "step": 1418 + }, + { + "epoch": 0.4, + "logps_train/chosen": -21.661487579345703, + "logps_train/ref_chosen": -13.0625, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -11.890202522277832, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8598987460136414, + "rewards_train/margins": -0.08962845802307129, + "rewards_train/rejected": -0.7702702879905701, + "step": 1418 + }, + { + "epoch": 0.4, + "logps_train/chosen": -126.61280059814453, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -162.339599609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36128005385398865, + "rewards_train/margins": 3.3226799070835114, + "rewards_train/rejected": -3.6839599609375, + "step": 1418 + }, + { + "epoch": 0.4, + "logps_train/chosen": -41.055519104003906, + "logps_train/ref_chosen": -26.0, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -45.165557861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5055519342422485, + "rewards_train/margins": 1.073503851890564, + "rewards_train/rejected": -2.5790557861328125, + "step": 1418 + }, + { + "epoch": 0.4, + "logps_train/chosen": -47.392860412597656, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -44.32272720336914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9392860531806946, + "rewards_train/margins": 1.1304866671562195, + "rewards_train/rejected": -2.069772720336914, + "step": 1418 + }, + { + "epoch": 0.4, + "logps_train/chosen": -10.317863464355469, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -10.382529258728027, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4724113643169403, + "rewards_train/margins": 0.11271658539772034, + "rewards_train/rejected": -0.5851279497146606, + "step": 1418 + }, + { + "epoch": 0.4, + "logps_train/chosen": -28.72591209411621, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -138.14859008789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9975911974906921, + "rewards_train/margins": 1.9672678112983704, + "rewards_train/rejected": -2.9648590087890625, + "step": 1419 + }, + { + "epoch": 0.4, + "logps_train/chosen": -38.87926483154297, + "logps_train/ref_chosen": -23.0, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -32.63642883300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5879265069961548, + "rewards_train/margins": 0.0007164478302001953, + "rewards_train/rejected": -1.588642954826355, + "step": 1419 + }, + { + "epoch": 0.4, + "logps_train/chosen": -5.478918075561523, + "logps_train/ref_chosen": -4.5, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -35.1981315612793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09789180755615234, + "rewards_train/margins": 2.421921491622925, + "rewards_train/rejected": -2.519813299179077, + "step": 1419 + }, + { + "epoch": 0.4, + "logps_train/chosen": -21.06591033935547, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -19.907194137573242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6253410577774048, + "rewards_train/margins": 0.7122533321380615, + "rewards_train/rejected": -1.3375943899154663, + "step": 1419 + }, + { + "epoch": 0.4, + "logps_train/chosen": -23.085155487060547, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -23.608428955078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1772655248641968, + "rewards_train/margins": 0.23982739448547363, + "rewards_train/rejected": -1.4170929193496704, + "step": 1419 + }, + { + "epoch": 0.4, + "logps_train/chosen": -240.0987548828125, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -224.52987670898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.109875679016113, + "rewards_train/margins": 1.0431122779846191, + "rewards_train/rejected": -7.152987957000732, + "step": 1419 + }, + { + "epoch": 0.4, + "logps_train/chosen": -76.05694580078125, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -157.58995056152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1056946516036987, + "rewards_train/margins": 4.453300595283508, + "rewards_train/rejected": -5.558995246887207, + "step": 1419 + }, + { + "epoch": 0.4, + "logps_train/chosen": -1.218963861465454, + "logps_train/ref_chosen": -0.93359375, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -24.060731887817383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02853701077401638, + "rewards_train/margins": 1.3212862256914377, + "rewards_train/rejected": -1.349823236465454, + "step": 1419 + }, + { + "epoch": 0.4, + "learning_rate": 8.582803291895757e-07, + "loss": 0.4206, + "step": 1420 + }, + { + "epoch": 0.4, + "logps_train/chosen": -22.37807273864746, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -2.671875, + "logps_train/rejected": -20.776769638061523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4128073453903198, + "rewards_train/margins": 0.39768218994140625, + "rewards_train/rejected": -1.810489535331726, + "step": 1420 + }, + { + "epoch": 0.4, + "logps_train/chosen": -114.01311492919922, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -182.07916259765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5513114929199219, + "rewards_train/margins": 1.7566049098968506, + "rewards_train/rejected": -2.3079164028167725, + "step": 1420 + }, + { + "epoch": 0.4, + "logps_train/chosen": -15.994302749633789, + "logps_train/ref_chosen": -3.265625, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -46.441917419433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2728677988052368, + "rewards_train/margins": 2.4525738954544067, + "rewards_train/rejected": -3.7254416942596436, + "step": 1420 + }, + { + "epoch": 0.4, + "logps_train/chosen": -99.95345306396484, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -122.96942901611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9453453421592712, + "rewards_train/margins": 0.801597535610199, + "rewards_train/rejected": -1.7469428777694702, + "step": 1420 + }, + { + "epoch": 0.4, + "logps_train/chosen": -25.059267044067383, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -44.28885269165039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9934266805648804, + "rewards_train/margins": 0.6104587316513062, + "rewards_train/rejected": -2.6038854122161865, + "step": 1420 + }, + { + "epoch": 0.4, + "logps_train/chosen": -55.449764251708984, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -21.38148307800293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7699764370918274, + "rewards_train/margins": 0.8056718707084656, + "rewards_train/rejected": -1.575648307800293, + "step": 1420 + }, + { + "epoch": 0.4, + "logps_train/chosen": -104.8327407836914, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -50.5, + "logps_train/rejected": -95.8823013305664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3832740783691406, + "rewards_train/margins": 3.154956340789795, + "rewards_train/rejected": -4.5382304191589355, + "step": 1420 + }, + { + "epoch": 0.4, + "logps_train/chosen": -109.1187744140625, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -233.22142028808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7618775367736816, + "rewards_train/margins": 7.660264492034912, + "rewards_train/rejected": -10.422142028808594, + "step": 1420 + }, + { + "epoch": 0.4, + "logps_train/chosen": -131.51014709472656, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -135.71343994140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3510148525238037, + "rewards_train/margins": 0.8703291416168213, + "rewards_train/rejected": -4.221343994140625, + "step": 1421 + }, + { + "epoch": 0.4, + "logps_train/chosen": -28.421493530273438, + "logps_train/ref_chosen": -3.984375, + "logps_train/ref_rejected": -2.25, + "logps_train/rejected": -23.16817855834961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.443711996078491, + "rewards_train/margins": -0.3518941402435303, + "rewards_train/rejected": -2.091817855834961, + "step": 1421 + }, + { + "epoch": 0.4, + "logps_train/chosen": -84.48129272460938, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -36.5, + "logps_train/rejected": -90.05799865722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8981293439865112, + "rewards_train/margins": 3.45767080783844, + "rewards_train/rejected": -5.355800151824951, + "step": 1421 + }, + { + "epoch": 0.4, + "logps_train/chosen": -116.83484649658203, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -144.43679809570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7334847450256348, + "rewards_train/margins": 2.810194969177246, + "rewards_train/rejected": -5.543679714202881, + "step": 1421 + }, + { + "epoch": 0.4, + "logps_train/chosen": -0.6600969433784485, + "logps_train/ref_chosen": -0.1787109375, + "logps_train/ref_rejected": -0.1787109375, + "logps_train/rejected": -0.7224218249320984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04813859984278679, + "rewards_train/margins": 0.00623248890042305, + "rewards_train/rejected": -0.05437108874320984, + "step": 1421 + }, + { + "epoch": 0.4, + "logps_train/chosen": -31.779634475708008, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -13.536368370056152, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7529634833335876, + "rewards_train/margins": -0.12745165824890137, + "rewards_train/rejected": -0.6255118250846863, + "step": 1421 + }, + { + "epoch": 0.4, + "logps_train/chosen": -16.78485870361328, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -1.6484375, + "logps_train/rejected": -15.625170707702637, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15348587930202484, + "rewards_train/margins": 1.2441874891519547, + "rewards_train/rejected": -1.3976733684539795, + "step": 1421 + }, + { + "epoch": 0.4, + "logps_train/chosen": -102.18644714355469, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -195.0150146484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5186446905136108, + "rewards_train/margins": 6.982857346534729, + "rewards_train/rejected": -8.50150203704834, + "step": 1421 + }, + { + "epoch": 0.4, + "learning_rate": 8.556619784792326e-07, + "loss": 0.3126, + "step": 1422 + }, + { + "epoch": 0.4, + "logps_train/chosen": -65.184814453125, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -62.75, + "logps_train/rejected": -68.00714111328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5815185904502869, + "rewards_train/margins": 1.107232689857483, + "rewards_train/rejected": -0.525714099407196, + "step": 1422 + }, + { + "epoch": 0.4, + "logps_train/chosen": -18.904680252075195, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -38.718955993652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4779680967330933, + "rewards_train/margins": 1.5376776456832886, + "rewards_train/rejected": -3.015645742416382, + "step": 1422 + }, + { + "epoch": 0.4, + "logps_train/chosen": -97.6854248046875, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -116.78761291503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.868542492389679, + "rewards_train/margins": 1.2602187991142273, + "rewards_train/rejected": -2.1287612915039062, + "step": 1422 + }, + { + "epoch": 0.4, + "logps_train/chosen": -107.09335327148438, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -58.0, + "logps_train/rejected": -83.79795837402344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.109335422515869, + "rewards_train/margins": -0.5295395851135254, + "rewards_train/rejected": -2.5797958374023438, + "step": 1422 + }, + { + "epoch": 0.4, + "logps_train/chosen": -92.93861389160156, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -186.379638671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7938613891601562, + "rewards_train/margins": 4.594102382659912, + "rewards_train/rejected": -6.387963771820068, + "step": 1422 + }, + { + "epoch": 0.4, + "logps_train/chosen": -33.83885192871094, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -50.87456512451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7463852167129517, + "rewards_train/margins": 2.991071343421936, + "rewards_train/rejected": -3.7374565601348877, + "step": 1422 + }, + { + "epoch": 0.4, + "logps_train/chosen": -5.428289413452148, + "logps_train/ref_chosen": -1.2890625, + "logps_train/ref_rejected": -3.21875, + "logps_train/rejected": -6.8170647621154785, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4139226973056793, + "rewards_train/margins": -0.05409121513366699, + "rewards_train/rejected": -0.35983148217201233, + "step": 1422 + }, + { + "epoch": 0.4, + "logps_train/chosen": -8.724494934082031, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -7.15625, + "logps_train/rejected": -21.45784568786621, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3974494934082031, + "rewards_train/margins": 1.032710075378418, + "rewards_train/rejected": -1.430159568786621, + "step": 1422 + }, + { + "epoch": 0.4, + "logps_train/chosen": -6.5222320556640625, + "logps_train/ref_chosen": -2.890625, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -29.51126480102539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3631606996059418, + "rewards_train/margins": 0.5004657804965973, + "rewards_train/rejected": -0.8636264801025391, + "step": 1423 + }, + { + "epoch": 0.4, + "logps_train/chosen": -42.73823547363281, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -156.1427001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3238236904144287, + "rewards_train/margins": 4.240446329116821, + "rewards_train/rejected": -6.56427001953125, + "step": 1423 + }, + { + "epoch": 0.4, + "logps_train/chosen": -35.900081634521484, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -33.917518615722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7150081992149353, + "rewards_train/margins": 0.5892437100410461, + "rewards_train/rejected": -1.3042519092559814, + "step": 1423 + }, + { + "epoch": 0.4, + "logps_train/chosen": -121.28340148925781, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -176.81900024414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17165985703468323, + "rewards_train/margins": 3.953560024499893, + "rewards_train/rejected": -3.78190016746521, + "step": 1423 + }, + { + "epoch": 0.4, + "logps_train/chosen": -202.68423461914062, + "logps_train/ref_chosen": -180.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -247.17852783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.268423557281494, + "rewards_train/margins": 4.6494293212890625, + "rewards_train/rejected": -6.917852878570557, + "step": 1423 + }, + { + "epoch": 0.4, + "logps_train/chosen": -1.7625577449798584, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -10.109378814697266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12686923146247864, + "rewards_train/margins": 0.10655711218714714, + "rewards_train/rejected": 0.020312119275331497, + "step": 1423 + }, + { + "epoch": 0.4, + "logps_train/chosen": -19.387073516845703, + "logps_train/ref_chosen": -14.0, + "logps_train/ref_rejected": -1.9140625, + "logps_train/rejected": -13.753005027770996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5387073755264282, + "rewards_train/margins": 0.6451869010925293, + "rewards_train/rejected": -1.1838942766189575, + "step": 1423 + }, + { + "epoch": 0.4, + "logps_train/chosen": -121.13510131835938, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -118.91304779052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3135101795196533, + "rewards_train/margins": 2.527794599533081, + "rewards_train/rejected": -3.8413047790527344, + "step": 1423 + }, + { + "epoch": 0.4, + "learning_rate": 8.530446379820069e-07, + "loss": 0.3066, + "step": 1424 + }, + { + "epoch": 0.4, + "logps_train/chosen": -94.03208923339844, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -148.18191528320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7532089948654175, + "rewards_train/margins": 2.564982533454895, + "rewards_train/rejected": -4.3181915283203125, + "step": 1424 + }, + { + "epoch": 0.4, + "logps_train/chosen": -25.425182342529297, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -18.131328582763672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8612682819366455, + "rewards_train/margins": -0.9731354117393494, + "rewards_train/rejected": -0.8881328701972961, + "step": 1424 + }, + { + "epoch": 0.4, + "logps_train/chosen": -88.05398559570312, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -206.92579650878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.05539870262146, + "rewards_train/margins": 2.137181043624878, + "rewards_train/rejected": -5.192579746246338, + "step": 1424 + }, + { + "epoch": 0.4, + "logps_train/chosen": -302.4735107421875, + "logps_train/ref_chosen": -216.0, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -263.15673828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.647351264953613, + "rewards_train/margins": 0.16832256317138672, + "rewards_train/rejected": -8.815673828125, + "step": 1424 + }, + { + "epoch": 0.4, + "logps_train/chosen": -26.187232971191406, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -50.0, + "logps_train/rejected": -68.21156311035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09372329711914062, + "rewards_train/margins": 1.7274330854415894, + "rewards_train/rejected": -1.82115638256073, + "step": 1424 + }, + { + "epoch": 0.4, + "logps_train/chosen": -134.2266845703125, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -95.64675903320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3226684629917145, + "rewards_train/margins": 0.892007440328598, + "rewards_train/rejected": -1.2146759033203125, + "step": 1424 + }, + { + "epoch": 0.4, + "logps_train/chosen": -21.33535385131836, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -11.751605987548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20853538811206818, + "rewards_train/margins": 0.22600020468235016, + "rewards_train/rejected": -0.43453559279441833, + "step": 1424 + }, + { + "epoch": 0.4, + "logps_train/chosen": -55.104862213134766, + "logps_train/ref_chosen": -24.375, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -92.12994384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.072986364364624, + "rewards_train/margins": 2.540008306503296, + "rewards_train/rejected": -5.61299467086792, + "step": 1424 + }, + { + "epoch": 0.4, + "logps_train/chosen": -31.126304626464844, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -3.3125, + "logps_train/rejected": -38.82741165161133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8095054626464844, + "rewards_train/margins": 0.7419857978820801, + "rewards_train/rejected": -3.5514912605285645, + "step": 1425 + }, + { + "epoch": 0.4, + "logps_train/chosen": -15.193164825439453, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -40.07341766357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8786914944648743, + "rewards_train/margins": 1.4286503195762634, + "rewards_train/rejected": -2.3073418140411377, + "step": 1425 + }, + { + "epoch": 0.4, + "logps_train/chosen": -28.856693267822266, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -29.436588287353516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.123169422149658, + "rewards_train/margins": -0.06701064109802246, + "rewards_train/rejected": -2.0561587810516357, + "step": 1425 + }, + { + "epoch": 0.4, + "logps_train/chosen": -205.9153594970703, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -229.52142333984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.891536235809326, + "rewards_train/margins": 2.6606059074401855, + "rewards_train/rejected": -8.552142143249512, + "step": 1425 + }, + { + "epoch": 0.4, + "logps_train/chosen": -82.00631713867188, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -82.16341400146484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7506316900253296, + "rewards_train/margins": 0.015709757804870605, + "rewards_train/rejected": -1.7663414478302002, + "step": 1425 + }, + { + "epoch": 0.4, + "logps_train/chosen": -14.535551071166992, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -3.625, + "logps_train/rejected": -21.4645938873291, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.050430178642273, + "rewards_train/margins": 0.7335292100906372, + "rewards_train/rejected": -1.7839593887329102, + "step": 1425 + }, + { + "epoch": 0.4, + "logps_train/chosen": -203.86752319335938, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -233.4249267578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.086752414703369, + "rewards_train/margins": -0.04425954818725586, + "rewards_train/rejected": -6.042492866516113, + "step": 1425 + }, + { + "epoch": 0.4, + "logps_train/chosen": -171.32473754882812, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -160.0475616455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.682473659515381, + "rewards_train/margins": 0.27228260040283203, + "rewards_train/rejected": -5.954756259918213, + "step": 1425 + }, + { + "epoch": 0.4, + "learning_rate": 8.504283260165075e-07, + "loss": 0.4387, + "step": 1426 + }, + { + "epoch": 0.4, + "logps_train/chosen": -198.82725524902344, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -228.75942993164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.982725620269775, + "rewards_train/margins": 2.743217945098877, + "rewards_train/rejected": -10.725943565368652, + "step": 1426 + }, + { + "epoch": 0.4, + "logps_train/chosen": -164.9891357421875, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -194.95468139648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.898913621902466, + "rewards_train/margins": 4.346554517745972, + "rewards_train/rejected": -8.245468139648438, + "step": 1426 + }, + { + "epoch": 0.4, + "logps_train/chosen": -25.005762100219727, + "logps_train/ref_chosen": -6.6875, + "logps_train/ref_rejected": -4.46875, + "logps_train/rejected": -29.874441146850586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8318262100219727, + "rewards_train/margins": 0.7087428569793701, + "rewards_train/rejected": -2.5405690670013428, + "step": 1426 + }, + { + "epoch": 0.4, + "logps_train/chosen": -12.666194915771484, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -22.875, + "logps_train/rejected": -35.52874755859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26036950945854187, + "rewards_train/margins": 1.005005270242691, + "rewards_train/rejected": -1.265374779701233, + "step": 1426 + }, + { + "epoch": 0.4, + "logps_train/chosen": -24.07009506225586, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -85.89326477050781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.469509482383728, + "rewards_train/margins": 0.7448171377182007, + "rewards_train/rejected": -2.2143266201019287, + "step": 1426 + }, + { + "epoch": 0.4, + "logps_train/chosen": -12.016257286071777, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -49.16460037231445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11087427288293839, + "rewards_train/margins": 1.1023342981934547, + "rewards_train/rejected": -0.9914600253105164, + "step": 1426 + }, + { + "epoch": 0.4, + "logps_train/chosen": -132.37225341796875, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -147.93997192382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.937225341796875, + "rewards_train/margins": 1.556771993637085, + "rewards_train/rejected": -3.49399733543396, + "step": 1426 + }, + { + "epoch": 0.4, + "logps_train/chosen": -13.892416954040527, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -22.38726806640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8579916954040527, + "rewards_train/margins": 0.7838600873947144, + "rewards_train/rejected": -1.641851782798767, + "step": 1426 + }, + { + "epoch": 0.4, + "logps_train/chosen": -115.86874389648438, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -113.48358917236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.336874485015869, + "rewards_train/margins": 0.461484432220459, + "rewards_train/rejected": -3.798358917236328, + "step": 1427 + }, + { + "epoch": 0.4, + "logps_train/chosen": -36.26954650878906, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -36.72130584716797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.539454698562622, + "rewards_train/margins": -0.4923241138458252, + "rewards_train/rejected": -2.047130584716797, + "step": 1427 + }, + { + "epoch": 0.4, + "logps_train/chosen": -128.38943481445312, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -131.14463806152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1889435052871704, + "rewards_train/margins": 1.9755202531814575, + "rewards_train/rejected": -3.164463758468628, + "step": 1427 + }, + { + "epoch": 0.4, + "logps_train/chosen": -190.485107421875, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -255.9140167236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.148510694503784, + "rewards_train/margins": 6.342890977859497, + "rewards_train/rejected": -9.491401672363281, + "step": 1427 + }, + { + "epoch": 0.4, + "logps_train/chosen": -88.60974884033203, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -173.73187255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.960974931716919, + "rewards_train/margins": 3.212212324142456, + "rewards_train/rejected": -5.173187255859375, + "step": 1427 + }, + { + "epoch": 0.4, + "logps_train/chosen": -15.649991989135742, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -29.13273811340332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.14937424659729, + "rewards_train/margins": 1.1638996601104736, + "rewards_train/rejected": -2.3132739067077637, + "step": 1427 + }, + { + "epoch": 0.4, + "logps_train/chosen": -68.43187713623047, + "logps_train/ref_chosen": -14.75, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -35.6016960144043, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.36818790435791, + "rewards_train/margins": -2.620518207550049, + "rewards_train/rejected": -2.7476696968078613, + "step": 1427 + }, + { + "epoch": 0.4, + "logps_train/chosen": -6.632145881652832, + "logps_train/ref_chosen": -2.546875, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -40.198638916015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40852710604667664, + "rewards_train/margins": 2.342586785554886, + "rewards_train/rejected": -2.7511138916015625, + "step": 1427 + }, + { + "epoch": 0.4, + "learning_rate": 8.478130608941434e-07, + "loss": 0.4198, + "step": 1428 + }, + { + "epoch": 0.4, + "logps_train/chosen": -87.38567352294922, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -83.07965087890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.388567328453064, + "rewards_train/margins": -1.2806022390723228, + "rewards_train/rejected": -0.10796508938074112, + "step": 1428 + }, + { + "epoch": 0.4, + "logps_train/chosen": -12.708823204040527, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -26.25, + "logps_train/rejected": -31.20665740966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16661767661571503, + "rewards_train/margins": 0.6622834354639053, + "rewards_train/rejected": -0.4956657588481903, + "step": 1428 + }, + { + "epoch": 0.4, + "logps_train/chosen": -34.349090576171875, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -2.0, + "logps_train/rejected": -13.485090255737305, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8099091053009033, + "rewards_train/margins": -0.6614000797271729, + "rewards_train/rejected": -1.1485090255737305, + "step": 1428 + }, + { + "epoch": 0.4, + "logps_train/chosen": -22.407766342163086, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -23.382719039916992, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2532767057418823, + "rewards_train/margins": -0.040004730224609375, + "rewards_train/rejected": -1.213271975517273, + "step": 1428 + }, + { + "epoch": 0.4, + "logps_train/chosen": -179.9927978515625, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -175.1817626953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2992799282073975, + "rewards_train/margins": 2.618896245956421, + "rewards_train/rejected": -4.918176174163818, + "step": 1428 + }, + { + "epoch": 0.4, + "logps_train/chosen": -175.16500854492188, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -164.12184143066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.8665008544921875, + "rewards_train/margins": 1.3956832885742188, + "rewards_train/rejected": -8.262184143066406, + "step": 1428 + }, + { + "epoch": 0.4, + "logps_train/chosen": -94.76005554199219, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -62.75, + "logps_train/rejected": -73.18118286132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7260055541992188, + "rewards_train/margins": 0.3171128034591675, + "rewards_train/rejected": -1.0431183576583862, + "step": 1428 + }, + { + "epoch": 0.4, + "logps_train/chosen": -95.07449340820312, + "logps_train/ref_chosen": -48.5, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -49.60496520996094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.657449245452881, + "rewards_train/margins": -1.5719525814056396, + "rewards_train/rejected": -3.085496664047241, + "step": 1428 + }, + { + "epoch": 0.4, + "logps_train/chosen": -168.5017547607422, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -39.832794189453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5501755475997925, + "rewards_train/margins": 0.5831040143966675, + "rewards_train/rejected": -2.13327956199646, + "step": 1429 + }, + { + "epoch": 0.4, + "logps_train/chosen": -193.7415008544922, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -141.66690063476562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.824150085449219, + "rewards_train/margins": -2.4074597358703613, + "rewards_train/rejected": -4.416690349578857, + "step": 1429 + }, + { + "epoch": 0.4, + "logps_train/chosen": -19.444046020507812, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -20.886077880859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41315460205078125, + "rewards_train/margins": 0.4692031741142273, + "rewards_train/rejected": -0.8823577761650085, + "step": 1429 + }, + { + "epoch": 0.4, + "logps_train/chosen": -110.72322845458984, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -143.12075805664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.672322988510132, + "rewards_train/margins": 0.6397528648376465, + "rewards_train/rejected": -3.3120758533477783, + "step": 1429 + }, + { + "epoch": 0.4, + "logps_train/chosen": -30.573286056518555, + "logps_train/ref_chosen": -29.375, + "logps_train/ref_rejected": -28.75, + "logps_train/rejected": -56.99390411376953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11982860416173935, + "rewards_train/margins": 2.7045618072152138, + "rewards_train/rejected": -2.824390411376953, + "step": 1429 + }, + { + "epoch": 0.4, + "logps_train/chosen": -149.43148803710938, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -186.0, + "logps_train/rejected": -240.08062744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.843148946762085, + "rewards_train/margins": 2.5649139881134033, + "rewards_train/rejected": -5.408062934875488, + "step": 1429 + }, + { + "epoch": 0.4, + "logps_train/chosen": -17.624624252319336, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -113.375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5874624252319336, + "rewards_train/margins": 1.4500377178192139, + "rewards_train/rejected": -2.0375001430511475, + "step": 1429 + }, + { + "epoch": 0.4, + "logps_train/chosen": -125.14209747314453, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -234.57373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.264209747314453, + "rewards_train/margins": 6.59316349029541, + "rewards_train/rejected": -8.857373237609863, + "step": 1429 + }, + { + "epoch": 0.4, + "learning_rate": 8.451988609189986e-07, + "loss": 0.6589, + "step": 1430 + }, + { + "epoch": 0.4, + "logps_train/chosen": -25.664888381958008, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -13.836042404174805, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4477388858795166, + "rewards_train/margins": -0.7328846454620361, + "rewards_train/rejected": -0.7148542404174805, + "step": 1430 + }, + { + "epoch": 0.4, + "logps_train/chosen": -111.99592590332031, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -131.31887817382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0495927333831787, + "rewards_train/margins": 0.28229522705078125, + "rewards_train/rejected": -3.33188796043396, + "step": 1430 + }, + { + "epoch": 0.4, + "logps_train/chosen": -74.44280242919922, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -90.27798461914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8442802429199219, + "rewards_train/margins": 1.8085181713104248, + "rewards_train/rejected": -2.6527984142303467, + "step": 1430 + }, + { + "epoch": 0.4, + "logps_train/chosen": -156.90692138671875, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -168.86643981933594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.040692329406738, + "rewards_train/margins": -0.25404834747314453, + "rewards_train/rejected": -4.786643981933594, + "step": 1430 + }, + { + "epoch": 0.4, + "logps_train/chosen": -39.48411560058594, + "logps_train/ref_chosen": -36.75, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -41.20874786376953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2734115719795227, + "rewards_train/margins": 0.6224632263183594, + "rewards_train/rejected": -0.8958747982978821, + "step": 1430 + }, + { + "epoch": 0.4, + "logps_train/chosen": -27.74504852294922, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -57.11451721191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3120049238204956, + "rewards_train/margins": 0.9244469404220581, + "rewards_train/rejected": -2.2364518642425537, + "step": 1430 + }, + { + "epoch": 0.4, + "logps_train/chosen": -80.02305603027344, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -54.0, + "logps_train/rejected": -94.1866683959961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4023056030273438, + "rewards_train/margins": 2.616361141204834, + "rewards_train/rejected": -4.018666744232178, + "step": 1430 + }, + { + "epoch": 0.4, + "logps_train/chosen": -174.3304443359375, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -176.2317352294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.133044719696045, + "rewards_train/margins": 2.940128803253174, + "rewards_train/rejected": -7.073173522949219, + "step": 1430 + }, + { + "epoch": 0.4, + "logps_train/chosen": -38.63966369628906, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -28.46590232849121, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8389663696289062, + "rewards_train/margins": 0.007623910903930664, + "rewards_train/rejected": -1.846590280532837, + "step": 1431 + }, + { + "epoch": 0.4, + "logps_train/chosen": -197.78536987304688, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -224.70343017578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.428537368774414, + "rewards_train/margins": 1.4418058395385742, + "rewards_train/rejected": -9.870343208312988, + "step": 1431 + }, + { + "epoch": 0.4, + "logps_train/chosen": -96.74603271484375, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -97.552490234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.524603247642517, + "rewards_train/margins": 0.08064579963684082, + "rewards_train/rejected": -1.605249047279358, + "step": 1431 + }, + { + "epoch": 0.4, + "logps_train/chosen": -65.64576721191406, + "logps_train/ref_chosen": -28.0, + "logps_train/ref_rejected": -58.75, + "logps_train/rejected": -99.45027160644531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7645766735076904, + "rewards_train/margins": 0.3054506778717041, + "rewards_train/rejected": -4.0700273513793945, + "step": 1431 + }, + { + "epoch": 0.4, + "logps_train/chosen": -85.1208724975586, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -118.13902282714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3120872676372528, + "rewards_train/margins": 0.8518150150775909, + "rewards_train/rejected": -1.1639022827148438, + "step": 1431 + }, + { + "epoch": 0.4, + "logps_train/chosen": -59.22904968261719, + "logps_train/ref_chosen": -29.625, + "logps_train/ref_rejected": -26.875, + "logps_train/rejected": -52.362648010253906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.960405111312866, + "rewards_train/margins": -0.4116401672363281, + "rewards_train/rejected": -2.548764944076538, + "step": 1431 + }, + { + "epoch": 0.4, + "logps_train/chosen": -5.159893035888672, + "logps_train/ref_chosen": -3.5, + "logps_train/ref_rejected": -2.734375, + "logps_train/rejected": -9.224357604980469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16598930954933167, + "rewards_train/margins": 0.4830089509487152, + "rewards_train/rejected": -0.6489982604980469, + "step": 1431 + }, + { + "epoch": 0.4, + "logps_train/chosen": -79.13673400878906, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -135.54006958007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.738673448562622, + "rewards_train/margins": 3.1153337955474854, + "rewards_train/rejected": -4.854007244110107, + "step": 1431 + }, + { + "epoch": 0.4, + "learning_rate": 8.42585744387701e-07, + "loss": 0.4662, + "step": 1432 + }, + { + "epoch": 0.4, + "logps_train/chosen": -10.884541511535645, + "logps_train/ref_chosen": -3.453125, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -55.825416564941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7431416511535645, + "rewards_train/margins": 2.1769001483917236, + "rewards_train/rejected": -2.920041799545288, + "step": 1432 + }, + { + "epoch": 0.4, + "logps_train/chosen": -97.1189193725586, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -187.93359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6118919253349304, + "rewards_train/margins": 7.731468021869659, + "rewards_train/rejected": -8.34335994720459, + "step": 1432 + }, + { + "epoch": 0.4, + "logps_train/chosen": -114.22930145263672, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -187.1502685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1729302406311035, + "rewards_train/margins": 3.3420968055725098, + "rewards_train/rejected": -5.515027046203613, + "step": 1432 + }, + { + "epoch": 0.4, + "logps_train/chosen": -167.29151916503906, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -163.9132080078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.529151916503906, + "rewards_train/margins": -0.6378309726715088, + "rewards_train/rejected": -3.8913209438323975, + "step": 1432 + }, + { + "epoch": 0.4, + "logps_train/chosen": -14.352323532104492, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -9.5625, + "logps_train/rejected": -21.135311126708984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6227323412895203, + "rewards_train/margins": 0.534548819065094, + "rewards_train/rejected": -1.1572811603546143, + "step": 1432 + }, + { + "epoch": 0.4, + "logps_train/chosen": -146.7540283203125, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -186.26536560058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8754029273986816, + "rewards_train/margins": 4.501133918762207, + "rewards_train/rejected": -7.376536846160889, + "step": 1432 + }, + { + "epoch": 0.4, + "logps_train/chosen": -11.743621826171875, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -21.413864135742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25561219453811646, + "rewards_train/margins": 1.2763991951942444, + "rewards_train/rejected": -1.5320113897323608, + "step": 1432 + }, + { + "epoch": 0.4, + "logps_train/chosen": -7.57239294052124, + "logps_train/ref_chosen": -1.25, + "logps_train/ref_rejected": -5.0, + "logps_train/rejected": -11.624284744262695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6322392821311951, + "rewards_train/margins": 0.030189216136932373, + "rewards_train/rejected": -0.6624284982681274, + "step": 1432 + }, + { + "epoch": 0.4, + "logps_train/chosen": -130.0410614013672, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -143.5235137939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4541062116622925, + "rewards_train/margins": 0.44824516773223877, + "rewards_train/rejected": -1.9023513793945312, + "step": 1433 + }, + { + "epoch": 0.4, + "logps_train/chosen": -6.842432975769043, + "logps_train/ref_chosen": -1.765625, + "logps_train/ref_rejected": -3.78125, + "logps_train/rejected": -12.384992599487305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5076808333396912, + "rewards_train/margins": 0.35269343852996826, + "rewards_train/rejected": -0.8603742718696594, + "step": 1433 + }, + { + "epoch": 0.4, + "logps_train/chosen": -10.152284622192383, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -54.85163497924805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5027284622192383, + "rewards_train/margins": 3.1199350357055664, + "rewards_train/rejected": -3.6226634979248047, + "step": 1433 + }, + { + "epoch": 0.4, + "logps_train/chosen": -76.29621124267578, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -101.89070892333984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5796210765838623, + "rewards_train/margins": 0.6594498157501221, + "rewards_train/rejected": -3.2390708923339844, + "step": 1433 + }, + { + "epoch": 0.4, + "logps_train/chosen": -17.010761260986328, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -0.9765625, + "logps_train/rejected": -13.471443176269531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2917011976242065, + "rewards_train/margins": -0.0422130823135376, + "rewards_train/rejected": -1.249488115310669, + "step": 1433 + }, + { + "epoch": 0.4, + "logps_train/chosen": -100.30805969238281, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -100.11881256103516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5808059573173523, + "rewards_train/margins": -0.018924713134765625, + "rewards_train/rejected": -0.5618812441825867, + "step": 1433 + }, + { + "epoch": 0.4, + "logps_train/chosen": -30.196334838867188, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -25.25, + "logps_train/rejected": -38.30487823486328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3696335554122925, + "rewards_train/margins": -0.06414568424224854, + "rewards_train/rejected": -1.305487871170044, + "step": 1433 + }, + { + "epoch": 0.4, + "logps_train/chosen": -193.54257202148438, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -187.2286376953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7542572021484375, + "rewards_train/margins": 2.5686068534851074, + "rewards_train/rejected": -4.322864055633545, + "step": 1433 + }, + { + "epoch": 0.4, + "learning_rate": 8.399737295892956e-07, + "loss": 0.394, + "step": 1434 + }, + { + "epoch": 0.4, + "logps_train/chosen": -24.222265243530273, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -46.31199264526367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4472265243530273, + "rewards_train/margins": 1.3714728355407715, + "rewards_train/rejected": -2.818699359893799, + "step": 1434 + }, + { + "epoch": 0.4, + "logps_train/chosen": -9.892312049865723, + "logps_train/ref_chosen": -1.140625, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -33.76930236816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8751687407493591, + "rewards_train/margins": 1.1517615914344788, + "rewards_train/rejected": -2.026930332183838, + "step": 1434 + }, + { + "epoch": 0.4, + "logps_train/chosen": -179.52859497070312, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -156.88153076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.652859687805176, + "rewards_train/margins": 1.2852935791015625, + "rewards_train/rejected": -5.938153266906738, + "step": 1434 + }, + { + "epoch": 0.4, + "logps_train/chosen": -101.05255889892578, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -106.88377380371094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.505255937576294, + "rewards_train/margins": -0.3168785572052002, + "rewards_train/rejected": -2.1883773803710938, + "step": 1434 + }, + { + "epoch": 0.4, + "logps_train/chosen": -33.94926452636719, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -57.25, + "logps_train/rejected": -86.92850494384766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.194926455616951, + "rewards_train/margins": 2.772923991084099, + "rewards_train/rejected": -2.96785044670105, + "step": 1434 + }, + { + "epoch": 0.4, + "logps_train/chosen": -157.1407928466797, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -216.26699829101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7140792608261108, + "rewards_train/margins": 6.012620568275452, + "rewards_train/rejected": -7.7266998291015625, + "step": 1434 + }, + { + "epoch": 0.4, + "logps_train/chosen": -100.96949768066406, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -170.60897827148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.19694983959198, + "rewards_train/margins": 5.513948082923889, + "rewards_train/rejected": -6.710897922515869, + "step": 1434 + }, + { + "epoch": 0.4, + "logps_train/chosen": -107.09042358398438, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -159.95791625976562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.309042364358902, + "rewards_train/margins": -0.21325073391199112, + "rewards_train/rejected": -0.09579163044691086, + "step": 1434 + }, + { + "epoch": 0.4, + "logps_train/chosen": -9.006577491760254, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -17.56553840637207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3555922508239746, + "rewards_train/margins": 1.6683961153030396, + "rewards_train/rejected": -1.312803864479065, + "step": 1435 + }, + { + "epoch": 0.4, + "logps_train/chosen": -12.195757865905762, + "logps_train/ref_chosen": -2.0, + "logps_train/ref_rejected": -0.83203125, + "logps_train/rejected": -24.40302276611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.019575834274292, + "rewards_train/margins": 1.3375234603881836, + "rewards_train/rejected": -2.3570992946624756, + "step": 1435 + }, + { + "epoch": 0.4, + "logps_train/chosen": -16.202880859375, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -21.9032039642334, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30778810381889343, + "rewards_train/margins": 0.6575323045253754, + "rewards_train/rejected": -0.9653204083442688, + "step": 1435 + }, + { + "epoch": 0.4, + "logps_train/chosen": -21.31202507019043, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -21.862037658691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8374525308609009, + "rewards_train/margins": 0.24250125885009766, + "rewards_train/rejected": -1.0799537897109985, + "step": 1435 + }, + { + "epoch": 0.4, + "logps_train/chosen": -33.098514556884766, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -128.47235107421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9098514914512634, + "rewards_train/margins": 4.087383806705475, + "rewards_train/rejected": -4.997235298156738, + "step": 1435 + }, + { + "epoch": 0.4, + "logps_train/chosen": -41.97938537597656, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -56.25, + "logps_train/rejected": -67.41780853271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6770614981651306, + "rewards_train/margins": 1.793842375278473, + "rewards_train/rejected": -1.1167808771133423, + "step": 1435 + }, + { + "epoch": 0.4, + "logps_train/chosen": -166.0582275390625, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -214.67233276367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20582275092601776, + "rewards_train/margins": 4.561410620808601, + "rewards_train/rejected": -4.767233371734619, + "step": 1435 + }, + { + "epoch": 0.4, + "logps_train/chosen": -7.3757476806640625, + "logps_train/ref_chosen": -3.609375, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -37.48860168457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3766372799873352, + "rewards_train/margins": 1.1847229599952698, + "rewards_train/rejected": -1.561360239982605, + "step": 1435 + }, + { + "epoch": 0.4, + "learning_rate": 8.373628348051163e-07, + "loss": 0.2707, + "step": 1436 + }, + { + "epoch": 0.4, + "logps_train/chosen": -91.46697998046875, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -152.89938354492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5466980338096619, + "rewards_train/margins": 4.093240320682526, + "rewards_train/rejected": -4.6399383544921875, + "step": 1436 + }, + { + "epoch": 0.4, + "logps_train/chosen": -12.299216270446777, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -27.625, + "logps_train/rejected": -71.09013366699219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3924216330051422, + "rewards_train/margins": 3.954091638326645, + "rewards_train/rejected": -4.346513271331787, + "step": 1436 + }, + { + "epoch": 0.4, + "logps_train/chosen": -0.8208505511283875, + "logps_train/ref_chosen": -1.8671875, + "logps_train/ref_rejected": -1.3046875, + "logps_train/rejected": -5.561649799346924, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10463368892669678, + "rewards_train/margins": 0.5303299129009247, + "rewards_train/rejected": -0.4256962239742279, + "step": 1436 + }, + { + "epoch": 0.4, + "logps_train/chosen": -94.60786437988281, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -129.91604614257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.660786509513855, + "rewards_train/margins": 0.5308181047439575, + "rewards_train/rejected": -2.1916046142578125, + "step": 1436 + }, + { + "epoch": 0.4, + "logps_train/chosen": -122.64582824707031, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -217.413330078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.014582872390747, + "rewards_train/margins": 5.926750421524048, + "rewards_train/rejected": -6.941333293914795, + "step": 1436 + }, + { + "epoch": 0.4, + "logps_train/chosen": -26.21889305114746, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -3.203125, + "logps_train/rejected": -22.059612274169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.771889328956604, + "rewards_train/margins": 0.11375939846038818, + "rewards_train/rejected": -1.8856487274169922, + "step": 1436 + }, + { + "epoch": 0.4, + "logps_train/chosen": -28.022384643554688, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -34.765541076660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1897385120391846, + "rewards_train/margins": 0.036815643310546875, + "rewards_train/rejected": -1.2265541553497314, + "step": 1436 + }, + { + "epoch": 0.4, + "logps_train/chosen": -92.3380126953125, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -41.830650329589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.633801221847534, + "rewards_train/margins": 0.4055137634277344, + "rewards_train/rejected": -3.0393149852752686, + "step": 1436 + }, + { + "epoch": 0.4, + "logps_train/chosen": -7.5965471267700195, + "logps_train/ref_chosen": -3.84375, + "logps_train/ref_rejected": -1.3984375, + "logps_train/rejected": -11.179727554321289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3752797245979309, + "rewards_train/margins": 0.6028493046760559, + "rewards_train/rejected": -0.9781290292739868, + "step": 1437 + }, + { + "epoch": 0.4, + "logps_train/chosen": -121.70661163330078, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -121.16328430175781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5206611156463623, + "rewards_train/margins": -0.054332733154296875, + "rewards_train/rejected": -2.4663283824920654, + "step": 1437 + }, + { + "epoch": 0.4, + "logps_train/chosen": -125.15583801269531, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -154.87246704101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5155838131904602, + "rewards_train/margins": 6.071663081645966, + "rewards_train/rejected": -6.587246894836426, + "step": 1437 + }, + { + "epoch": 0.4, + "logps_train/chosen": -123.81425476074219, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -148.91812133789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2814254760742188, + "rewards_train/margins": 0.11038661003112793, + "rewards_train/rejected": -2.3918120861053467, + "step": 1437 + }, + { + "epoch": 0.4, + "logps_train/chosen": -18.059410095214844, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -45.162086486816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2809410095214844, + "rewards_train/margins": 1.8602676391601562, + "rewards_train/rejected": -3.1412086486816406, + "step": 1437 + }, + { + "epoch": 0.4, + "logps_train/chosen": -111.40423583984375, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -196.38914489746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.390423536300659, + "rewards_train/margins": 6.548490762710571, + "rewards_train/rejected": -8.93891429901123, + "step": 1437 + }, + { + "epoch": 0.4, + "logps_train/chosen": -101.11441040039062, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -103.30479431152344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4614410400390625, + "rewards_train/margins": -0.08096146583557129, + "rewards_train/rejected": -2.380479574203491, + "step": 1437 + }, + { + "epoch": 0.4, + "logps_train/chosen": -39.96559143066406, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -41.62467956542969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9715592861175537, + "rewards_train/margins": 0.09090876579284668, + "rewards_train/rejected": -3.0624680519104004, + "step": 1437 + }, + { + "epoch": 0.4, + "learning_rate": 8.347530783086592e-07, + "loss": 0.3823, + "step": 1438 + }, + { + "epoch": 0.4, + "logps_train/chosen": -13.31317138671875, + "logps_train/ref_chosen": -2.9375, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -38.08049392700195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.037567138671875, + "rewards_train/margins": 2.3392322063446045, + "rewards_train/rejected": -3.3767993450164795, + "step": 1438 + }, + { + "epoch": 0.4, + "logps_train/chosen": -138.73678588867188, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -134.7141571044922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.873678684234619, + "rewards_train/margins": -0.10226297378540039, + "rewards_train/rejected": -3.7714157104492188, + "step": 1438 + }, + { + "epoch": 0.4, + "logps_train/chosen": -35.758384704589844, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -7.34375, + "logps_train/rejected": -22.649654388427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.288338541984558, + "rewards_train/margins": 0.24225187301635742, + "rewards_train/rejected": -1.5305904150009155, + "step": 1438 + }, + { + "epoch": 0.4, + "logps_train/chosen": -104.66023254394531, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -218.75335693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2660233974456787, + "rewards_train/margins": 7.059312105178833, + "rewards_train/rejected": -9.325335502624512, + "step": 1438 + }, + { + "epoch": 0.4, + "logps_train/chosen": -153.574462890625, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -140.42037963867188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.057446479797363, + "rewards_train/margins": -2.2154085636138916, + "rewards_train/rejected": -3.8420379161834717, + "step": 1438 + }, + { + "epoch": 0.4, + "logps_train/chosen": -115.21807861328125, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -203.744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02180786244571209, + "rewards_train/margins": 6.252606200054288, + "rewards_train/rejected": -6.2744140625, + "step": 1438 + }, + { + "epoch": 0.4, + "logps_train/chosen": -33.42832946777344, + "logps_train/ref_chosen": -25.625, + "logps_train/ref_rejected": -37.25, + "logps_train/rejected": -87.32644653320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7803329825401306, + "rewards_train/margins": 4.227311670780182, + "rewards_train/rejected": -5.0076446533203125, + "step": 1438 + }, + { + "epoch": 0.4, + "logps_train/chosen": -229.20693969726562, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -241.50694274902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.620694160461426, + "rewards_train/margins": 0.23000049591064453, + "rewards_train/rejected": -9.85069465637207, + "step": 1438 + }, + { + "epoch": 0.4, + "logps_train/chosen": -56.63554382324219, + "logps_train/ref_chosen": -46.5, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -38.75796890258789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0135544538497925, + "rewards_train/margins": 1.4997423887252808, + "rewards_train/rejected": -2.5132968425750732, + "step": 1439 + }, + { + "epoch": 0.4, + "logps_train/chosen": -10.725617408752441, + "logps_train/ref_chosen": -7.5, + "logps_train/ref_rejected": -6.46875, + "logps_train/rejected": -17.798158645629883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32256174087524414, + "rewards_train/margins": 0.810379147529602, + "rewards_train/rejected": -1.1329408884048462, + "step": 1439 + }, + { + "epoch": 0.4, + "logps_train/chosen": -43.64622497558594, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -24.0, + "logps_train/rejected": -57.083648681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6521224975585938, + "rewards_train/margins": 0.6562423706054688, + "rewards_train/rejected": -3.3083648681640625, + "step": 1439 + }, + { + "epoch": 0.4, + "logps_train/chosen": -79.93896484375, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -76.36566925048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8438966274261475, + "rewards_train/margins": 1.1176702976226807, + "rewards_train/rejected": -3.961566925048828, + "step": 1439 + }, + { + "epoch": 0.4, + "logps_train/chosen": -4.030515193939209, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -20.78426170349121, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006323480512946844, + "rewards_train/margins": 0.3097496568225324, + "rewards_train/rejected": -0.30342617630958557, + "step": 1439 + }, + { + "epoch": 0.4, + "logps_train/chosen": -7.217978477478027, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -1.4609375, + "logps_train/rejected": -18.012704849243164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01257715281099081, + "rewards_train/margins": 1.6677539115771651, + "rewards_train/rejected": -1.6551767587661743, + "step": 1439 + }, + { + "epoch": 0.4, + "logps_train/chosen": -198.6396484375, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -223.6602783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.363965034484863, + "rewards_train/margins": 0.80206298828125, + "rewards_train/rejected": -8.166028022766113, + "step": 1439 + }, + { + "epoch": 0.4, + "logps_train/chosen": -162.63931274414062, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -175.71533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.36393141746521, + "rewards_train/margins": 0.40760183334350586, + "rewards_train/rejected": -3.771533250808716, + "step": 1439 + }, + { + "epoch": 0.4, + "learning_rate": 8.321444783654523e-07, + "loss": 0.4506, + "step": 1440 + }, + { + "epoch": 0.4, + "logps_train/chosen": -26.807992935180664, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -34.25, + "logps_train/rejected": -39.294490814208984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8307992815971375, + "rewards_train/margins": -0.32635021209716797, + "rewards_train/rejected": -0.5044490694999695, + "step": 1440 + }, + { + "epoch": 0.4, + "logps_train/chosen": -155.70895385742188, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -259.226806640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.520895481109619, + "rewards_train/margins": 6.201785564422607, + "rewards_train/rejected": -10.722681045532227, + "step": 1440 + }, + { + "epoch": 0.4, + "logps_train/chosen": -87.72114562988281, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -170.14698791503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3221147060394287, + "rewards_train/margins": 5.992584466934204, + "rewards_train/rejected": -8.314699172973633, + "step": 1440 + }, + { + "epoch": 0.4, + "logps_train/chosen": -99.67813110351562, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -72.78496551513672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6678130626678467, + "rewards_train/margins": 2.710683584213257, + "rewards_train/rejected": -5.3784966468811035, + "step": 1440 + }, + { + "epoch": 0.4, + "logps_train/chosen": -32.065608978271484, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -36.242713928222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9190608859062195, + "rewards_train/margins": 1.248960554599762, + "rewards_train/rejected": -2.1680214405059814, + "step": 1440 + }, + { + "epoch": 0.4, + "logps_train/chosen": -30.540599822998047, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -14.8125, + "logps_train/rejected": -46.99468231201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7040599584579468, + "rewards_train/margins": 1.5141583681106567, + "rewards_train/rejected": -3.2182183265686035, + "step": 1440 + }, + { + "epoch": 0.4, + "logps_train/chosen": -16.138166427612305, + "logps_train/ref_chosen": -7.1875, + "logps_train/ref_rejected": -3.28125, + "logps_train/rejected": -15.0439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8950666785240173, + "rewards_train/margins": 0.28120285272598267, + "rewards_train/rejected": -1.17626953125, + "step": 1440 + }, + { + "epoch": 0.4, + "logps_train/chosen": -98.81668090820312, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -110.68836975097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03166809305548668, + "rewards_train/margins": 2.9371689297258854, + "rewards_train/rejected": -2.968837022781372, + "step": 1440 + }, + { + "epoch": 0.4, + "logps_train/chosen": -22.406309127807617, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -41.41679382324219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4593809843063354, + "rewards_train/margins": 0.7822984457015991, + "rewards_train/rejected": -2.2416794300079346, + "step": 1441 + }, + { + "epoch": 0.4, + "logps_train/chosen": -83.93136596679688, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -44.25, + "logps_train/rejected": -59.641231536865234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7431366443634033, + "rewards_train/margins": -0.20401346683502197, + "rewards_train/rejected": -1.5391231775283813, + "step": 1441 + }, + { + "epoch": 0.4, + "logps_train/chosen": -76.06391143798828, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -137.3204345703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.256391167640686, + "rewards_train/margins": 1.9756523370742798, + "rewards_train/rejected": -3.232043504714966, + "step": 1441 + }, + { + "epoch": 0.4, + "logps_train/chosen": -34.29785919189453, + "logps_train/ref_chosen": -6.15625, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -35.95362091064453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8141610622406006, + "rewards_train/margins": -0.27504897117614746, + "rewards_train/rejected": -2.539112091064453, + "step": 1441 + }, + { + "epoch": 0.4, + "logps_train/chosen": -151.49661254882812, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -190.82000732421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6496613025665283, + "rewards_train/margins": 4.532339334487915, + "rewards_train/rejected": -6.182000637054443, + "step": 1441 + }, + { + "epoch": 0.4, + "logps_train/chosen": -25.787626266479492, + "logps_train/ref_chosen": -5.375, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -18.217166900634766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.041262626647949, + "rewards_train/margins": -1.6882959306240082, + "rewards_train/rejected": -0.35296669602394104, + "step": 1441 + }, + { + "epoch": 0.4, + "logps_train/chosen": -59.33257293701172, + "logps_train/ref_chosen": -42.75, + "logps_train/ref_rejected": -28.75, + "logps_train/rejected": -47.79388427734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6582573652267456, + "rewards_train/margins": 0.2461310625076294, + "rewards_train/rejected": -1.904388427734375, + "step": 1441 + }, + { + "epoch": 0.4, + "logps_train/chosen": -53.29736328125, + "logps_train/ref_chosen": -52.75, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -134.09432983398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05473632737994194, + "rewards_train/margins": 2.854696799069643, + "rewards_train/rejected": -2.909433126449585, + "step": 1441 + }, + { + "epoch": 0.4, + "learning_rate": 8.295370532329295e-07, + "loss": 0.4158, + "step": 1442 + }, + { + "epoch": 0.4, + "logps_train/chosen": -83.3887710571289, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -83.46562194824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6888771057128906, + "rewards_train/margins": 0.00768512487411499, + "rewards_train/rejected": -0.6965622305870056, + "step": 1442 + }, + { + "epoch": 0.4, + "logps_train/chosen": -20.99199676513672, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -32.52523422241211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7679497003555298, + "rewards_train/margins": 1.028323769569397, + "rewards_train/rejected": -1.7962734699249268, + "step": 1442 + }, + { + "epoch": 0.4, + "logps_train/chosen": -198.08370971679688, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -156.88055419921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.808371067047119, + "rewards_train/margins": -1.0703153610229492, + "rewards_train/rejected": -5.73805570602417, + "step": 1442 + }, + { + "epoch": 0.4, + "logps_train/chosen": -24.32720184326172, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -37.456417083740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.057720184326171875, + "rewards_train/margins": 2.881671667098999, + "rewards_train/rejected": -2.939391851425171, + "step": 1442 + }, + { + "epoch": 0.4, + "logps_train/chosen": -26.93970489501953, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -33.730934143066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4564704895019531, + "rewards_train/margins": 1.3041229248046875, + "rewards_train/rejected": -1.7605934143066406, + "step": 1442 + }, + { + "epoch": 0.4, + "logps_train/chosen": -29.13861846923828, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -20.601482391357422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0201117992401123, + "rewards_train/margins": -0.4005885124206543, + "rewards_train/rejected": -1.619523286819458, + "step": 1442 + }, + { + "epoch": 0.4, + "logps_train/chosen": -194.56072998046875, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -176.45907592773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8560731410980225, + "rewards_train/margins": 4.739834547042847, + "rewards_train/rejected": -7.595907688140869, + "step": 1442 + }, + { + "epoch": 0.4, + "logps_train/chosen": -58.416587829589844, + "logps_train/ref_chosen": -34.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -193.9338836669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4416587352752686, + "rewards_train/margins": 5.501729726791382, + "rewards_train/rejected": -7.94338846206665, + "step": 1442 + }, + { + "epoch": 0.4, + "logps_train/chosen": -121.485107421875, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -176.20700073242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.298510789871216, + "rewards_train/margins": 5.622189283370972, + "rewards_train/rejected": -7.9207000732421875, + "step": 1443 + }, + { + "epoch": 0.4, + "logps_train/chosen": -152.24114990234375, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -218.22174072265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4741151332855225, + "rewards_train/margins": 6.898059129714966, + "rewards_train/rejected": -9.372174263000488, + "step": 1443 + }, + { + "epoch": 0.4, + "logps_train/chosen": -22.88399887084961, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -35.898887634277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9696499109268188, + "rewards_train/margins": 0.7577388286590576, + "rewards_train/rejected": -1.7273887395858765, + "step": 1443 + }, + { + "epoch": 0.4, + "logps_train/chosen": -31.317916870117188, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -43.0, + "logps_train/rejected": -63.10219955444336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9505417346954346, + "rewards_train/margins": 0.05967831611633301, + "rewards_train/rejected": -2.0102200508117676, + "step": 1443 + }, + { + "epoch": 0.4, + "logps_train/chosen": -103.21174621582031, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -130.03475952148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.521174669265747, + "rewards_train/margins": 2.6823012828826904, + "rewards_train/rejected": -4.2034759521484375, + "step": 1443 + }, + { + "epoch": 0.4, + "logps_train/chosen": -255.59739685058594, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -221.9793243408203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.859740257263184, + "rewards_train/margins": -1.7118072509765625, + "rewards_train/rejected": -10.147933006286621, + "step": 1443 + }, + { + "epoch": 0.4, + "logps_train/chosen": -203.84075927734375, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -171.6119384765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.984076023101807, + "rewards_train/margins": -0.172882080078125, + "rewards_train/rejected": -5.811193943023682, + "step": 1443 + }, + { + "epoch": 0.4, + "logps_train/chosen": -135.05654907226562, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -219.7202606201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9556548595428467, + "rewards_train/margins": 5.216371774673462, + "rewards_train/rejected": -8.172026634216309, + "step": 1443 + }, + { + "epoch": 0.4, + "learning_rate": 8.269308211603021e-07, + "loss": 0.4606, + "step": 1444 + }, + { + "epoch": 0.4, + "logps_train/chosen": -117.13218688964844, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -267.6932373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.313218832015991, + "rewards_train/margins": 11.156105279922485, + "rewards_train/rejected": -13.469324111938477, + "step": 1444 + }, + { + "epoch": 0.4, + "logps_train/chosen": -60.43419647216797, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -60.0, + "logps_train/rejected": -89.87097930908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39341965317726135, + "rewards_train/margins": 2.5936783254146576, + "rewards_train/rejected": -2.987097978591919, + "step": 1444 + }, + { + "epoch": 0.4, + "logps_train/chosen": -81.87950134277344, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -115.04148864746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7879501581192017, + "rewards_train/margins": 1.6161986589431763, + "rewards_train/rejected": -2.404148817062378, + "step": 1444 + }, + { + "epoch": 0.4, + "logps_train/chosen": -175.07974243164062, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -233.36062622070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7079742550849915, + "rewards_train/margins": 3.628088653087616, + "rewards_train/rejected": -4.336062908172607, + "step": 1444 + }, + { + "epoch": 0.4, + "logps_train/chosen": -1.7120399475097656, + "logps_train/ref_chosen": -2.09375, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -19.82074546813965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03817100450396538, + "rewards_train/margins": 1.1889955513179302, + "rewards_train/rejected": -1.1508245468139648, + "step": 1444 + }, + { + "epoch": 0.4, + "logps_train/chosen": -216.22164916992188, + "logps_train/ref_chosen": -191.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -216.00936889648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.522165060043335, + "rewards_train/margins": 3.878772020339966, + "rewards_train/rejected": -6.400937080383301, + "step": 1444 + }, + { + "epoch": 0.4, + "logps_train/chosen": -120.60285186767578, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -146.34197998046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0102852582931519, + "rewards_train/margins": 3.6739126443862915, + "rewards_train/rejected": -4.684197902679443, + "step": 1444 + }, + { + "epoch": 0.4, + "logps_train/chosen": -104.5323715209961, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -171.95741271972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6532371640205383, + "rewards_train/margins": 4.09250420331955, + "rewards_train/rejected": -4.745741367340088, + "step": 1444 + }, + { + "epoch": 0.4, + "logps_train/chosen": -152.18368530273438, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -195.88613891601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9683685302734375, + "rewards_train/margins": 2.1202454566955566, + "rewards_train/rejected": -6.088613986968994, + "step": 1445 + }, + { + "epoch": 0.4, + "logps_train/chosen": -110.05508422851562, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -174.47491455078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0055084228515625, + "rewards_train/margins": 3.691983222961426, + "rewards_train/rejected": -4.697491645812988, + "step": 1445 + }, + { + "epoch": 0.4, + "logps_train/chosen": -141.656005859375, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -127.31892395019531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.565600872039795, + "rewards_train/margins": -2.233708381652832, + "rewards_train/rejected": -2.331892490386963, + "step": 1445 + }, + { + "epoch": 0.4, + "logps_train/chosen": -124.1921157836914, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -138.65985107421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5192115902900696, + "rewards_train/margins": 2.0967735648155212, + "rewards_train/rejected": -2.615985155105591, + "step": 1445 + }, + { + "epoch": 0.4, + "logps_train/chosen": -178.65196228027344, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -188.31008911132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.965196371078491, + "rewards_train/margins": 1.8658125400543213, + "rewards_train/rejected": -5.8310089111328125, + "step": 1445 + }, + { + "epoch": 0.4, + "logps_train/chosen": -76.9300765991211, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -76.41468048095703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.3069923520088196, + "rewards_train/margins": -0.051539599895477295, + "rewards_train/rejected": 0.3585319519042969, + "step": 1445 + }, + { + "epoch": 0.4, + "logps_train/chosen": -40.976051330566406, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -43.48872756958008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6226052045822144, + "rewards_train/margins": 1.6137675046920776, + "rewards_train/rejected": -3.236372709274292, + "step": 1445 + }, + { + "epoch": 0.4, + "logps_train/chosen": -8.286760330200195, + "logps_train/ref_chosen": -1.015625, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -10.299888610839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7271135449409485, + "rewards_train/margins": 0.06381285190582275, + "rewards_train/rejected": -0.7909263968467712, + "step": 1445 + }, + { + "epoch": 0.4, + "learning_rate": 8.243258003884319e-07, + "loss": 0.3065, + "step": 1446 + }, + { + "epoch": 0.4, + "logps_train/chosen": -48.878448486328125, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -12.531781196594238, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13784484565258026, + "rewards_train/margins": 0.5028332620859146, + "rewards_train/rejected": -0.6406781077384949, + "step": 1446 + }, + { + "epoch": 0.4, + "logps_train/chosen": -36.3111572265625, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -45.711936950683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6311157941818237, + "rewards_train/margins": 0.9150780439376831, + "rewards_train/rejected": -2.546193838119507, + "step": 1446 + }, + { + "epoch": 0.4, + "logps_train/chosen": -62.399444580078125, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -39.927921295166016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2600555419921875, + "rewards_train/margins": 2.240347743034363, + "rewards_train/rejected": -1.9802922010421753, + "step": 1446 + }, + { + "epoch": 0.4, + "logps_train/chosen": -7.19680118560791, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -22.875, + "logps_train/rejected": -31.81212043762207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.357180118560791, + "rewards_train/margins": 0.536531925201416, + "rewards_train/rejected": -0.893712043762207, + "step": 1446 + }, + { + "epoch": 0.4, + "logps_train/chosen": -89.09171295166016, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -123.89582061767578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6591713428497314, + "rewards_train/margins": 0.8804106712341309, + "rewards_train/rejected": -2.5395820140838623, + "step": 1446 + }, + { + "epoch": 0.4, + "logps_train/chosen": -20.59495735168457, + "logps_train/ref_chosen": -5.71875, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -38.269859313964844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4876207113265991, + "rewards_train/margins": -0.048134803771972656, + "rewards_train/rejected": -1.4394859075546265, + "step": 1446 + }, + { + "epoch": 0.4, + "logps_train/chosen": -18.397647857666016, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -7.0625, + "logps_train/rejected": -19.13844108581543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9710147976875305, + "rewards_train/margins": 0.23657935857772827, + "rewards_train/rejected": -1.2075941562652588, + "step": 1446 + }, + { + "epoch": 0.4, + "logps_train/chosen": -113.24415588378906, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -206.19912719726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9244155883789062, + "rewards_train/margins": 5.695497035980225, + "rewards_train/rejected": -6.619912624359131, + "step": 1446 + }, + { + "epoch": 0.4, + "logps_train/chosen": -74.62173461914062, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -185.32318115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2121734619140625, + "rewards_train/margins": 5.520144939422607, + "rewards_train/rejected": -5.73231840133667, + "step": 1447 + }, + { + "epoch": 0.4, + "logps_train/chosen": -14.681233406066895, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -3.484375, + "logps_train/rejected": -16.937633514404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1274983882904053, + "rewards_train/margins": 0.2178274393081665, + "rewards_train/rejected": -1.3453258275985718, + "step": 1447 + }, + { + "epoch": 0.4, + "logps_train/chosen": -37.868812561035156, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -35.10284423828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.649381399154663, + "rewards_train/margins": -1.2265969514846802, + "rewards_train/rejected": -1.422784447669983, + "step": 1447 + }, + { + "epoch": 0.4, + "logps_train/chosen": -18.70595932006836, + "logps_train/ref_chosen": -2.65625, + "logps_train/ref_rejected": -3.875, + "logps_train/rejected": -19.30695343017578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.604970932006836, + "rewards_train/margins": -0.0617755651473999, + "rewards_train/rejected": -1.543195366859436, + "step": 1447 + }, + { + "epoch": 0.4, + "logps_train/chosen": -220.6575927734375, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -223.05059814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.065759658813477, + "rewards_train/margins": 0.23930072784423828, + "rewards_train/rejected": -9.305060386657715, + "step": 1447 + }, + { + "epoch": 0.4, + "logps_train/chosen": -172.81903076171875, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -53.25, + "logps_train/rejected": -78.8428726196289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.081902980804443, + "rewards_train/margins": -2.522615671157837, + "rewards_train/rejected": -2.5592873096466064, + "step": 1447 + }, + { + "epoch": 0.4, + "logps_train/chosen": -55.154701232910156, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -49.534996032714844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.5279700756073, + "rewards_train/margins": -0.5119705200195312, + "rewards_train/rejected": -3.0159995555877686, + "step": 1447 + }, + { + "epoch": 0.4, + "logps_train/chosen": -4.255059242248535, + "logps_train/ref_chosen": -3.390625, + "logps_train/ref_rejected": -3.390625, + "logps_train/rejected": -4.2542009353637695, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.08644342422485352, + "rewards_train/margins": -8.58306884765625e-05, + "rewards_train/rejected": -0.08635759353637695, + "step": 1447 + }, + { + "epoch": 0.4, + "learning_rate": 8.217220091497018e-07, + "loss": 0.6655, + "step": 1448 + }, + { + "epoch": 0.4, + "logps_train/chosen": -52.28717803955078, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -43.81772994995117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.353717803955078, + "rewards_train/margins": -0.07819485664367676, + "rewards_train/rejected": -3.2755229473114014, + "step": 1448 + }, + { + "epoch": 0.4, + "logps_train/chosen": -126.271484375, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -183.9403076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.527148485183716, + "rewards_train/margins": 4.7668821811676025, + "rewards_train/rejected": -7.294030666351318, + "step": 1448 + }, + { + "epoch": 0.4, + "logps_train/chosen": -73.7451171875, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -93.71064758300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02451171912252903, + "rewards_train/margins": 0.8465530630201101, + "rewards_train/rejected": -0.8710647821426392, + "step": 1448 + }, + { + "epoch": 0.4, + "logps_train/chosen": -3.478640556335449, + "logps_train/ref_chosen": -0.71875, + "logps_train/ref_rejected": -0.71875, + "logps_train/rejected": -3.3189167976379395, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2759890556335449, + "rewards_train/margins": -0.015972375869750977, + "rewards_train/rejected": -0.26001667976379395, + "step": 1448 + }, + { + "epoch": 0.4, + "logps_train/chosen": -8.814396858215332, + "logps_train/ref_chosen": -6.0, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -9.481912612915039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2814396917819977, + "rewards_train/margins": -0.17074842751026154, + "rewards_train/rejected": -0.11069126427173615, + "step": 1448 + }, + { + "epoch": 0.4, + "logps_train/chosen": -239.4940948486328, + "logps_train/ref_chosen": -183.0, + "logps_train/ref_rejected": -54.75, + "logps_train/rejected": -84.1690673828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.649409770965576, + "rewards_train/margins": -2.707503080368042, + "rewards_train/rejected": -2.941906690597534, + "step": 1448 + }, + { + "epoch": 0.4, + "logps_train/chosen": -12.153332710266113, + "logps_train/ref_chosen": -7.25, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -16.78233528137207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49033328890800476, + "rewards_train/margins": 0.8519627153873444, + "rewards_train/rejected": -1.3422960042953491, + "step": 1448 + }, + { + "epoch": 0.4, + "logps_train/chosen": -23.14615249633789, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -47.49039840698242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7177402973175049, + "rewards_train/margins": 0.36879968643188477, + "rewards_train/rejected": -2.0865399837493896, + "step": 1448 + }, + { + "epoch": 0.41, + "logps_train/chosen": -107.17231750488281, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -121.72408294677734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8172317743301392, + "rewards_train/margins": 0.2551765441894531, + "rewards_train/rejected": -1.0724083185195923, + "step": 1449 + }, + { + "epoch": 0.41, + "logps_train/chosen": -27.314006805419922, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -14.881444931030273, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2251508235931396, + "rewards_train/margins": -1.0010688304901123, + "rewards_train/rejected": -1.2240819931030273, + "step": 1449 + }, + { + "epoch": 0.41, + "logps_train/chosen": -80.59806060791016, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -49.25, + "logps_train/rejected": -86.77236938476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45980605483055115, + "rewards_train/margins": 3.292431026697159, + "rewards_train/rejected": -3.75223708152771, + "step": 1449 + }, + { + "epoch": 0.41, + "logps_train/chosen": -27.851837158203125, + "logps_train/ref_chosen": -6.0, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -16.979040145874023, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1851837635040283, + "rewards_train/margins": -1.027904748916626, + "rewards_train/rejected": -1.1572790145874023, + "step": 1449 + }, + { + "epoch": 0.41, + "logps_train/chosen": -1.5248063802719116, + "logps_train/ref_chosen": -0.8046875, + "logps_train/ref_rejected": -2.0, + "logps_train/rejected": -6.323241710662842, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07201188802719116, + "rewards_train/margins": 0.360312283039093, + "rewards_train/rejected": -0.4323241710662842, + "step": 1449 + }, + { + "epoch": 0.41, + "logps_train/chosen": -205.66217041015625, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -258.1986083984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.316217422485352, + "rewards_train/margins": 5.903643608093262, + "rewards_train/rejected": -15.219861030578613, + "step": 1449 + }, + { + "epoch": 0.41, + "logps_train/chosen": -15.887073516845703, + "logps_train/ref_chosen": -6.90625, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -50.77167892456055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8980823755264282, + "rewards_train/margins": 3.0728355646133423, + "rewards_train/rejected": -3.9709179401397705, + "step": 1449 + }, + { + "epoch": 0.41, + "logps_train/chosen": -0.5050987601280212, + "logps_train/ref_chosen": -0.84375, + "logps_train/ref_rejected": -0.84375, + "logps_train/rejected": -0.5013152360916138, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.033865123987197876, + "rewards_train/margins": -0.0003783516585826874, + "rewards_train/rejected": 0.03424347564578056, + "step": 1449 + }, + { + "epoch": 0.41, + "learning_rate": 8.191194656678904e-07, + "loss": 0.6741, + "step": 1450 + }, + { + "epoch": 0.41, + "logps_train/chosen": -112.0656967163086, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -208.79330444335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.806569814682007, + "rewards_train/margins": 4.772760629653931, + "rewards_train/rejected": -8.579330444335938, + "step": 1450 + }, + { + "epoch": 0.41, + "logps_train/chosen": -133.63681030273438, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -128.99197387695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8636810183525085, + "rewards_train/margins": 1.485516369342804, + "rewards_train/rejected": -2.3491973876953125, + "step": 1450 + }, + { + "epoch": 0.41, + "logps_train/chosen": -92.76985931396484, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -109.85012817382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1769859790802, + "rewards_train/margins": 1.0080268383026123, + "rewards_train/rejected": -4.1850128173828125, + "step": 1450 + }, + { + "epoch": 0.41, + "logps_train/chosen": -15.450759887695312, + "logps_train/ref_chosen": -1.8671875, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -36.01027297973633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.358357310295105, + "rewards_train/margins": 0.8614200353622437, + "rewards_train/rejected": -2.2197773456573486, + "step": 1450 + }, + { + "epoch": 0.41, + "logps_train/chosen": -32.824462890625, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -46.386512756347656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5761964321136475, + "rewards_train/margins": -0.1875450611114502, + "rewards_train/rejected": -2.3886513710021973, + "step": 1450 + }, + { + "epoch": 0.41, + "logps_train/chosen": -5.55304479598999, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -16.113595962524414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.030304480344057083, + "rewards_train/margins": 0.4810551516711712, + "rewards_train/rejected": -0.5113596320152283, + "step": 1450 + }, + { + "epoch": 0.41, + "logps_train/chosen": -67.9686279296875, + "logps_train/ref_chosen": -43.5, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -23.644121170043945, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4468629360198975, + "rewards_train/margins": -1.132450819015503, + "rewards_train/rejected": -1.3144121170043945, + "step": 1450 + }, + { + "epoch": 0.41, + "logps_train/chosen": -200.68115234375, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -205.126220703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.368115186691284, + "rewards_train/margins": 3.6445071697235107, + "rewards_train/rejected": -6.012622356414795, + "step": 1450 + }, + { + "epoch": 0.41, + "logps_train/chosen": -24.986276626586914, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -31.0, + "logps_train/rejected": -48.20587158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7111276388168335, + "rewards_train/margins": 0.009459495544433594, + "rewards_train/rejected": -1.720587134361267, + "step": 1451 + }, + { + "epoch": 0.41, + "logps_train/chosen": -15.850116729736328, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -32.440574645996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16498832404613495, + "rewards_train/margins": 2.396545931696892, + "rewards_train/rejected": -2.231557607650757, + "step": 1451 + }, + { + "epoch": 0.41, + "logps_train/chosen": -128.57456970214844, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -182.53488159179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.607456922531128, + "rewards_train/margins": 4.696031332015991, + "rewards_train/rejected": -7.303488254547119, + "step": 1451 + }, + { + "epoch": 0.41, + "logps_train/chosen": -25.801258087158203, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -38.58348846435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.030125856399536, + "rewards_train/margins": 0.9282231330871582, + "rewards_train/rejected": -2.9583489894866943, + "step": 1451 + }, + { + "epoch": 0.41, + "logps_train/chosen": -59.58075714111328, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -47.0, + "logps_train/rejected": -69.14349365234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.083075761795044, + "rewards_train/margins": 0.13127374649047852, + "rewards_train/rejected": -2.2143495082855225, + "step": 1451 + }, + { + "epoch": 0.41, + "logps_train/chosen": -44.30831527709961, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -30.125, + "logps_train/rejected": -61.51329803466797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5558315515518188, + "rewards_train/margins": 1.5829983949661255, + "rewards_train/rejected": -3.1388299465179443, + "step": 1451 + }, + { + "epoch": 0.41, + "logps_train/chosen": -133.90936279296875, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -151.1016387939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.090936303138733, + "rewards_train/margins": 1.1192275285720825, + "rewards_train/rejected": -2.2101638317108154, + "step": 1451 + }, + { + "epoch": 0.41, + "logps_train/chosen": -12.325695037841797, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -25.34865951538086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9247570037841797, + "rewards_train/margins": 0.8788589239120483, + "rewards_train/rejected": -1.803615927696228, + "step": 1451 + }, + { + "epoch": 0.41, + "learning_rate": 8.165181881580418e-07, + "loss": 0.3844, + "step": 1452 + }, + { + "epoch": 0.41, + "logps_train/chosen": -33.45682144165039, + "logps_train/ref_chosen": -27.875, + "logps_train/ref_rejected": -14.4375, + "logps_train/rejected": -45.90953826904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5581821799278259, + "rewards_train/margins": 2.5890217423439026, + "rewards_train/rejected": -3.1472039222717285, + "step": 1452 + }, + { + "epoch": 0.41, + "logps_train/chosen": -143.30072021484375, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -170.854248046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.980072021484375, + "rewards_train/margins": 2.65535306930542, + "rewards_train/rejected": -4.635425090789795, + "step": 1452 + }, + { + "epoch": 0.41, + "logps_train/chosen": -130.885986328125, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -210.6557159423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6885986328125, + "rewards_train/margins": 3.8769731521606445, + "rewards_train/rejected": -5.5655717849731445, + "step": 1452 + }, + { + "epoch": 0.41, + "logps_train/chosen": -54.461551666259766, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -42.97226333618164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14615516364574432, + "rewards_train/margins": 2.0635711699724197, + "rewards_train/rejected": -2.209726333618164, + "step": 1452 + }, + { + "epoch": 0.41, + "logps_train/chosen": -280.2566223144531, + "logps_train/ref_chosen": -204.0, + "logps_train/ref_rejected": -184.0, + "logps_train/rejected": -321.7074890136719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.625662326812744, + "rewards_train/margins": 6.145086765289307, + "rewards_train/rejected": -13.77074909210205, + "step": 1452 + }, + { + "epoch": 0.41, + "logps_train/chosen": -10.517292022705078, + "logps_train/ref_chosen": -1.6875, + "logps_train/ref_rejected": -1.71875, + "logps_train/rejected": -10.557409286499023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8829792141914368, + "rewards_train/margins": 0.0008867383003234863, + "rewards_train/rejected": -0.8838659524917603, + "step": 1452 + }, + { + "epoch": 0.41, + "logps_train/chosen": -185.3694305419922, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -185.94320678710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.736943006515503, + "rewards_train/margins": 0.8573777675628662, + "rewards_train/rejected": -4.594320774078369, + "step": 1452 + }, + { + "epoch": 0.41, + "logps_train/chosen": -134.79705810546875, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -161.89724731445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0297058820724487, + "rewards_train/margins": 2.7600189447402954, + "rewards_train/rejected": -3.789724826812744, + "step": 1452 + }, + { + "epoch": 0.41, + "logps_train/chosen": -29.308422088623047, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -5.75, + "logps_train/rejected": -23.58949089050293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7370922565460205, + "rewards_train/margins": 0.04685688018798828, + "rewards_train/rejected": -1.7839491367340088, + "step": 1453 + }, + { + "epoch": 0.41, + "logps_train/chosen": -26.651416778564453, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -54.0, + "logps_train/rejected": -64.57884216308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7026416659355164, + "rewards_train/margins": 0.3552425503730774, + "rewards_train/rejected": -1.0578842163085938, + "step": 1453 + }, + { + "epoch": 0.41, + "logps_train/chosen": -3.444819927215576, + "logps_train/ref_chosen": -4.40625, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -10.088642120361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09614300727844238, + "rewards_train/margins": 0.2487572282552719, + "rewards_train/rejected": -0.15261422097682953, + "step": 1453 + }, + { + "epoch": 0.41, + "logps_train/chosen": -149.22142028808594, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -139.33013916015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9221420288085938, + "rewards_train/margins": 0.06087195873260498, + "rewards_train/rejected": -1.9830139875411987, + "step": 1453 + }, + { + "epoch": 0.41, + "logps_train/chosen": -61.57387161254883, + "logps_train/ref_chosen": -52.25, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -86.83905029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9323871731758118, + "rewards_train/margins": 0.0015178918838500977, + "rewards_train/rejected": -0.9339050650596619, + "step": 1453 + }, + { + "epoch": 0.41, + "logps_train/chosen": -6.721869945526123, + "logps_train/ref_chosen": -5.5625, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -17.34487533569336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1159369945526123, + "rewards_train/margins": 0.9935505390167236, + "rewards_train/rejected": -1.109487533569336, + "step": 1453 + }, + { + "epoch": 0.41, + "logps_train/chosen": -22.15923500061035, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -30.57259750366211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2784235179424286, + "rewards_train/margins": 1.441336303949356, + "rewards_train/rejected": -1.7197598218917847, + "step": 1453 + }, + { + "epoch": 0.41, + "logps_train/chosen": -62.77241897583008, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -3.203125, + "logps_train/rejected": -38.41322326660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42724189162254333, + "rewards_train/margins": 3.0937680304050446, + "rewards_train/rejected": -3.521009922027588, + "step": 1453 + }, + { + "epoch": 0.41, + "learning_rate": 8.139181948263414e-07, + "loss": 0.3189, + "step": 1454 + }, + { + "epoch": 0.41, + "logps_train/chosen": -129.75439453125, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -250.1868896484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.275439739227295, + "rewards_train/margins": 7.043249607086182, + "rewards_train/rejected": -11.318689346313477, + "step": 1454 + }, + { + "epoch": 0.41, + "logps_train/chosen": -31.762765884399414, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -37.265140533447266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9637765884399414, + "rewards_train/margins": 0.16898751258850098, + "rewards_train/rejected": -2.1327641010284424, + "step": 1454 + }, + { + "epoch": 0.41, + "logps_train/chosen": -14.518230438232422, + "logps_train/ref_chosen": -3.6875, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -30.085723876953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0830730199813843, + "rewards_train/margins": 0.4692493677139282, + "rewards_train/rejected": -1.5523223876953125, + "step": 1454 + }, + { + "epoch": 0.41, + "logps_train/chosen": -115.95988464355469, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -141.62994384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14598846435546875, + "rewards_train/margins": 3.7670059204101562, + "rewards_train/rejected": -3.912994384765625, + "step": 1454 + }, + { + "epoch": 0.41, + "logps_train/chosen": -167.30551147460938, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -309.1986389160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5305511951446533, + "rewards_train/margins": 12.68931269645691, + "rewards_train/rejected": -14.219863891601562, + "step": 1454 + }, + { + "epoch": 0.41, + "logps_train/chosen": -78.61083984375, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -35.87087631225586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9610840082168579, + "rewards_train/margins": 1.9416285753250122, + "rewards_train/rejected": -2.90271258354187, + "step": 1454 + }, + { + "epoch": 0.41, + "logps_train/chosen": -29.759647369384766, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -23.71900177001953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8009647130966187, + "rewards_train/margins": -0.2665644884109497, + "rewards_train/rejected": -1.534400224685669, + "step": 1454 + }, + { + "epoch": 0.41, + "logps_train/chosen": -6.701337814331055, + "logps_train/ref_chosen": -4.59375, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -21.227893829345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21075879037380219, + "rewards_train/margins": 0.09953059256076813, + "rewards_train/rejected": -0.3102893829345703, + "step": 1454 + }, + { + "epoch": 0.41, + "logps_train/chosen": -6.641180992126465, + "logps_train/ref_chosen": -3.09375, + "logps_train/ref_rejected": -4.5625, + "logps_train/rejected": -12.875664710998535, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.354743093252182, + "rewards_train/margins": 0.4765733778476715, + "rewards_train/rejected": -0.8313164710998535, + "step": 1455 + }, + { + "epoch": 0.41, + "logps_train/chosen": -3.8819713592529297, + "logps_train/ref_chosen": -3.171875, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -4.770328998565674, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07100963592529297, + "rewards_train/margins": -0.09085173718631268, + "rewards_train/rejected": 0.019842101261019707, + "step": 1455 + }, + { + "epoch": 0.41, + "logps_train/chosen": -35.86477279663086, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -24.15521240234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7864773273468018, + "rewards_train/margins": -0.8272060751914978, + "rewards_train/rejected": -0.959271252155304, + "step": 1455 + }, + { + "epoch": 0.41, + "logps_train/chosen": -17.811506271362305, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -48.96464157104492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6749006509780884, + "rewards_train/margins": 2.9028135538101196, + "rewards_train/rejected": -3.577714204788208, + "step": 1455 + }, + { + "epoch": 0.41, + "logps_train/chosen": -86.77668762207031, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -153.15219116210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9276687502861023, + "rewards_train/margins": 2.837550461292267, + "rewards_train/rejected": -3.765219211578369, + "step": 1455 + }, + { + "epoch": 0.41, + "logps_train/chosen": -16.659467697143555, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -56.840518951416016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2190717458724976, + "rewards_train/margins": 2.189980149269104, + "rewards_train/rejected": -3.4090518951416016, + "step": 1455 + }, + { + "epoch": 0.41, + "logps_train/chosen": -79.20904541015625, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -99.70454406738281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.270904541015625, + "rewards_train/margins": 0.6995498538017273, + "rewards_train/rejected": -0.9704543948173523, + "step": 1455 + }, + { + "epoch": 0.41, + "logps_train/chosen": -133.3321990966797, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -92.47291564941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8332200050354004, + "rewards_train/margins": 0.0640716552734375, + "rewards_train/rejected": -2.897291660308838, + "step": 1455 + }, + { + "epoch": 0.41, + "learning_rate": 8.113195038699859e-07, + "loss": 0.4017, + "step": 1456 + }, + { + "epoch": 0.41, + "logps_train/chosen": -2.6930243968963623, + "logps_train/ref_chosen": -0.60546875, + "logps_train/ref_rejected": -0.60546875, + "logps_train/rejected": -2.739142417907715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20875556766986847, + "rewards_train/margins": 0.004611805081367493, + "rewards_train/rejected": -0.21336737275123596, + "step": 1456 + }, + { + "epoch": 0.41, + "logps_train/chosen": -13.54445743560791, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -19.7576847076416, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.85444575548172, + "rewards_train/margins": -0.40992727875709534, + "rewards_train/rejected": -0.44451847672462463, + "step": 1456 + }, + { + "epoch": 0.41, + "logps_train/chosen": -25.506488800048828, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -21.125, + "logps_train/rejected": -38.04538345336914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6881489157676697, + "rewards_train/margins": 1.0038895010948181, + "rewards_train/rejected": -1.6920384168624878, + "step": 1456 + }, + { + "epoch": 0.41, + "logps_train/chosen": -102.90379333496094, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -134.26425170898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1903793811798096, + "rewards_train/margins": 1.286045789718628, + "rewards_train/rejected": -4.4764251708984375, + "step": 1456 + }, + { + "epoch": 0.41, + "logps_train/chosen": -100.29071044921875, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -178.23280334472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.379070997238159, + "rewards_train/margins": 4.844209432601929, + "rewards_train/rejected": -7.223280429840088, + "step": 1456 + }, + { + "epoch": 0.41, + "logps_train/chosen": -221.5504913330078, + "logps_train/ref_chosen": -184.0, + "logps_train/ref_rejected": -244.0, + "logps_train/rejected": -310.14849853515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.755049228668213, + "rewards_train/margins": 2.8598008155822754, + "rewards_train/rejected": -6.614850044250488, + "step": 1456 + }, + { + "epoch": 0.41, + "logps_train/chosen": -55.269371032714844, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -56.080078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45193710923194885, + "rewards_train/margins": 2.906070798635483, + "rewards_train/rejected": -3.3580079078674316, + "step": 1456 + }, + { + "epoch": 0.41, + "logps_train/chosen": -2.2031166553497314, + "logps_train/ref_chosen": -1.9140625, + "logps_train/ref_rejected": -2.078125, + "logps_train/rejected": -1.7018910646438599, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.028905415907502174, + "rewards_train/margins": -0.06652881018817425, + "rewards_train/rejected": 0.03762339428067207, + "step": 1456 + }, + { + "epoch": 0.41, + "logps_train/chosen": -10.262211799621582, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -18.900938034057617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5105962157249451, + "rewards_train/margins": 0.4857475757598877, + "rewards_train/rejected": -0.9963437914848328, + "step": 1457 + }, + { + "epoch": 0.41, + "logps_train/chosen": -213.95050048828125, + "logps_train/ref_chosen": -188.0, + "logps_train/ref_rejected": -227.0, + "logps_train/rejected": -357.1492614746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.595050096511841, + "rewards_train/margins": 10.419875860214233, + "rewards_train/rejected": -13.014925956726074, + "step": 1457 + }, + { + "epoch": 0.41, + "logps_train/chosen": -9.561689376831055, + "logps_train/ref_chosen": -1.0390625, + "logps_train/ref_rejected": -1.0390625, + "logps_train/rejected": -10.258915901184082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8522626757621765, + "rewards_train/margins": 0.06972265243530273, + "rewards_train/rejected": -0.9219853281974792, + "step": 1457 + }, + { + "epoch": 0.41, + "logps_train/chosen": -60.40496063232422, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -164.95059204101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2654961347579956, + "rewards_train/margins": 5.979563355445862, + "rewards_train/rejected": -7.245059490203857, + "step": 1457 + }, + { + "epoch": 0.41, + "logps_train/chosen": -9.610479354858398, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -3.75, + "logps_train/rejected": -44.69685363769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08270206302404404, + "rewards_train/margins": 4.177387617528439, + "rewards_train/rejected": -4.0946855545043945, + "step": 1457 + }, + { + "epoch": 0.41, + "logps_train/chosen": -10.679559707641602, + "logps_train/ref_chosen": -2.953125, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -24.440187454223633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.772643506526947, + "rewards_train/margins": 0.8276252150535583, + "rewards_train/rejected": -1.6002687215805054, + "step": 1457 + }, + { + "epoch": 0.41, + "logps_train/chosen": -16.04180335998535, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -42.80483627319336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23543034493923187, + "rewards_train/margins": 2.9825533777475357, + "rewards_train/rejected": -3.2179837226867676, + "step": 1457 + }, + { + "epoch": 0.41, + "logps_train/chosen": -113.78155517578125, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -88.0523681640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7281556129455566, + "rewards_train/margins": -0.04791879653930664, + "rewards_train/rejected": -2.68023681640625, + "step": 1457 + }, + { + "epoch": 0.41, + "learning_rate": 8.087221334770566e-07, + "loss": 0.331, + "step": 1458 + }, + { + "epoch": 0.41, + "logps_train/chosen": -7.794646739959717, + "logps_train/ref_chosen": -6.96875, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -11.082512855529785, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.08258967846632004, + "rewards_train/margins": -0.10558839328587055, + "rewards_train/rejected": 0.022998714819550514, + "step": 1458 + }, + { + "epoch": 0.41, + "logps_train/chosen": -22.950191497802734, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -38.89276885986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4575191736221313, + "rewards_train/margins": 1.9192577600479126, + "rewards_train/rejected": -3.376776933670044, + "step": 1458 + }, + { + "epoch": 0.41, + "logps_train/chosen": -135.90536499023438, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -218.85372924804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7405365705490112, + "rewards_train/margins": 5.744836449623108, + "rewards_train/rejected": -7.485373020172119, + "step": 1458 + }, + { + "epoch": 0.41, + "logps_train/chosen": -57.98286437988281, + "logps_train/ref_chosen": -40.0, + "logps_train/ref_rejected": -39.75, + "logps_train/rejected": -59.60301208496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7982864379882812, + "rewards_train/margins": 0.18701481819152832, + "rewards_train/rejected": -1.9853012561798096, + "step": 1458 + }, + { + "epoch": 0.41, + "logps_train/chosen": -218.50949096679688, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -174.26332092285156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.150949478149414, + "rewards_train/margins": -1.9246172904968262, + "rewards_train/rejected": -6.226332187652588, + "step": 1458 + }, + { + "epoch": 0.41, + "logps_train/chosen": -7.837218284606934, + "logps_train/ref_chosen": -4.59375, + "logps_train/ref_rejected": -5.15625, + "logps_train/rejected": -10.838309288024902, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3243468403816223, + "rewards_train/margins": 0.24385911226272583, + "rewards_train/rejected": -0.5682059526443481, + "step": 1458 + }, + { + "epoch": 0.41, + "logps_train/chosen": -7.444144248962402, + "logps_train/ref_chosen": -0.48828125, + "logps_train/ref_rejected": -7.90625, + "logps_train/rejected": -6.862247467041016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6955863237380981, + "rewards_train/margins": -0.7999865785241127, + "rewards_train/rejected": 0.10440025478601456, + "step": 1458 + }, + { + "epoch": 0.41, + "logps_train/chosen": -12.630695343017578, + "logps_train/ref_chosen": -2.375, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -31.816471099853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0255695581436157, + "rewards_train/margins": 1.51545250415802, + "rewards_train/rejected": -2.5410220623016357, + "step": 1458 + }, + { + "epoch": 0.41, + "logps_train/chosen": -119.59632873535156, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -124.27879333496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.159633159637451, + "rewards_train/margins": 0.018246173858642578, + "rewards_train/rejected": -5.177879333496094, + "step": 1459 + }, + { + "epoch": 0.41, + "logps_train/chosen": -188.27670288085938, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -210.97767639160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0276702642440796, + "rewards_train/margins": 7.970097184181213, + "rewards_train/rejected": -8.997767448425293, + "step": 1459 + }, + { + "epoch": 0.41, + "logps_train/chosen": -6.035792827606201, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -1.5390625, + "logps_train/rejected": -5.227206707000732, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20045427978038788, + "rewards_train/margins": 0.1683601588010788, + "rewards_train/rejected": -0.3688144385814667, + "step": 1459 + }, + { + "epoch": 0.41, + "logps_train/chosen": -70.81649780273438, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -78.74032592773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3316497802734375, + "rewards_train/margins": 1.4923828840255737, + "rewards_train/rejected": -1.8240326642990112, + "step": 1459 + }, + { + "epoch": 0.41, + "logps_train/chosen": -86.49848175048828, + "logps_train/ref_chosen": -49.25, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -57.39812469482422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7248482704162598, + "rewards_train/margins": -2.385035753250122, + "rewards_train/rejected": -1.3398125171661377, + "step": 1459 + }, + { + "epoch": 0.41, + "logps_train/chosen": -28.004301071166992, + "logps_train/ref_chosen": -14.25, + "logps_train/ref_rejected": -1.84375, + "logps_train/rejected": -25.89072608947754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3754301071166992, + "rewards_train/margins": 1.0292675495147705, + "rewards_train/rejected": -2.4046976566314697, + "step": 1459 + }, + { + "epoch": 0.41, + "logps_train/chosen": -109.42874145507812, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -167.5457763671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7428741455078125, + "rewards_train/margins": 4.561703681945801, + "rewards_train/rejected": -5.304577827453613, + "step": 1459 + }, + { + "epoch": 0.41, + "logps_train/chosen": -85.8629150390625, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -168.8421173095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.63629150390625, + "rewards_train/margins": 6.797920227050781, + "rewards_train/rejected": -8.434211730957031, + "step": 1459 + }, + { + "epoch": 0.41, + "learning_rate": 8.061261018263918e-07, + "loss": 0.6119, + "step": 1460 + }, + { + "epoch": 0.41, + "logps_train/chosen": -66.47227478027344, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -52.5, + "logps_train/rejected": -73.46647644042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6222274899482727, + "rewards_train/margins": 1.4744202494621277, + "rewards_train/rejected": -2.0966477394104004, + "step": 1460 + }, + { + "epoch": 0.41, + "logps_train/chosen": -1.3355339765548706, + "logps_train/ref_chosen": -1.578125, + "logps_train/ref_rejected": -0.3203125, + "logps_train/rejected": -2.4148812294006348, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02425910346210003, + "rewards_train/margins": 0.23371597938239574, + "rewards_train/rejected": -0.20945687592029572, + "step": 1460 + }, + { + "epoch": 0.41, + "logps_train/chosen": -16.132579803466797, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -22.375, + "logps_train/rejected": -35.17095947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5382580161094666, + "rewards_train/margins": 0.7413379549980164, + "rewards_train/rejected": -1.279595971107483, + "step": 1460 + }, + { + "epoch": 0.41, + "logps_train/chosen": -3.777195930480957, + "logps_train/ref_chosen": -2.1875, + "logps_train/ref_rejected": -7.75, + "logps_train/rejected": -10.739568710327148, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15896959602832794, + "rewards_train/margins": 0.1399872750043869, + "rewards_train/rejected": -0.29895687103271484, + "step": 1460 + }, + { + "epoch": 0.41, + "logps_train/chosen": -99.83821105957031, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -98.54693603515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4161789119243622, + "rewards_train/margins": 0.42087251553311944, + "rewards_train/rejected": -0.0046936036087572575, + "step": 1460 + }, + { + "epoch": 0.41, + "logps_train/chosen": -219.46673583984375, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -237.2955322265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.046673774719238, + "rewards_train/margins": -0.11712074279785156, + "rewards_train/rejected": -11.929553031921387, + "step": 1460 + }, + { + "epoch": 0.41, + "logps_train/chosen": -108.59034729003906, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -230.60992431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.309034824371338, + "rewards_train/margins": 8.651957988739014, + "rewards_train/rejected": -11.960992813110352, + "step": 1460 + }, + { + "epoch": 0.41, + "logps_train/chosen": -6.746304988861084, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -32.240074157714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15588049590587616, + "rewards_train/margins": 0.9931269437074661, + "rewards_train/rejected": -1.1490074396133423, + "step": 1460 + }, + { + "epoch": 0.41, + "logps_train/chosen": -24.495040893554688, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -28.625, + "logps_train/rejected": -44.935333251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7495040893554688, + "rewards_train/margins": 0.8815292119979858, + "rewards_train/rejected": -1.6310333013534546, + "step": 1461 + }, + { + "epoch": 0.41, + "logps_train/chosen": -236.7529296875, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -185.0, + "logps_train/rejected": -259.98089599609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.575293064117432, + "rewards_train/margins": -0.07720327377319336, + "rewards_train/rejected": -7.498089790344238, + "step": 1461 + }, + { + "epoch": 0.41, + "logps_train/chosen": -76.57646942138672, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -114.87374877929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.232646942138672, + "rewards_train/margins": 1.754728078842163, + "rewards_train/rejected": -3.987375020980835, + "step": 1461 + }, + { + "epoch": 0.41, + "logps_train/chosen": -34.67600631713867, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -30.486976623535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0676006078720093, + "rewards_train/margins": 1.2623471021652222, + "rewards_train/rejected": -2.3299477100372314, + "step": 1461 + }, + { + "epoch": 0.41, + "logps_train/chosen": -89.59233856201172, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -112.16703033447266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3092338740825653, + "rewards_train/margins": 2.8074691593647003, + "rewards_train/rejected": -3.1167030334472656, + "step": 1461 + }, + { + "epoch": 0.41, + "logps_train/chosen": -43.56696701049805, + "logps_train/ref_chosen": -44.25, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -53.37139129638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06830330193042755, + "rewards_train/margins": 0.4304424375295639, + "rewards_train/rejected": -0.36213913559913635, + "step": 1461 + }, + { + "epoch": 0.41, + "logps_train/chosen": -227.46542358398438, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -217.78721618652344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.446542263031006, + "rewards_train/margins": -0.36782073974609375, + "rewards_train/rejected": -5.078721523284912, + "step": 1461 + }, + { + "epoch": 0.41, + "logps_train/chosen": -106.0226058959961, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -208.93380737304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.802260637283325, + "rewards_train/margins": 5.841120481491089, + "rewards_train/rejected": -8.643381118774414, + "step": 1461 + }, + { + "epoch": 0.41, + "learning_rate": 8.035314270874615e-07, + "loss": 0.3952, + "step": 1462 + }, + { + "epoch": 0.41, + "logps_train/chosen": -88.62590026855469, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -88.44503784179688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0625900030136108, + "rewards_train/margins": -0.01808619499206543, + "rewards_train/rejected": -1.0445038080215454, + "step": 1462 + }, + { + "epoch": 0.41, + "logps_train/chosen": -16.61315155029297, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -45.0, + "logps_train/rejected": -63.49925231933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4300651550292969, + "rewards_train/margins": 1.4198601245880127, + "rewards_train/rejected": -1.8499252796173096, + "step": 1462 + }, + { + "epoch": 0.41, + "logps_train/chosen": -120.7754898071289, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -74.74152374267578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.927549123764038, + "rewards_train/margins": -2.853396773338318, + "rewards_train/rejected": -1.0741523504257202, + "step": 1462 + }, + { + "epoch": 0.41, + "logps_train/chosen": -15.066656112670898, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -4.5625, + "logps_train/rejected": -25.329626083374023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6691656112670898, + "rewards_train/margins": 1.4075469970703125, + "rewards_train/rejected": -2.0767126083374023, + "step": 1462 + }, + { + "epoch": 0.41, + "logps_train/chosen": -16.20816421508789, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -38.26462173461914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.192691445350647, + "rewards_train/margins": 1.277520775794983, + "rewards_train/rejected": -2.47021222114563, + "step": 1462 + }, + { + "epoch": 0.41, + "logps_train/chosen": -14.400289535522461, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -62.01354217529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29002895951271057, + "rewards_train/margins": 4.567575544118881, + "rewards_train/rejected": -4.857604503631592, + "step": 1462 + }, + { + "epoch": 0.41, + "logps_train/chosen": -14.464521408081055, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -11.75, + "logps_train/rejected": -21.020347595214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37770214676856995, + "rewards_train/margins": 0.5493326485157013, + "rewards_train/rejected": -0.9270347952842712, + "step": 1462 + }, + { + "epoch": 0.41, + "logps_train/chosen": -11.230205535888672, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -13.42146110534668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17302055656909943, + "rewards_train/margins": 0.7472505420446396, + "rewards_train/rejected": -0.920271098613739, + "step": 1462 + }, + { + "epoch": 0.41, + "logps_train/chosen": -163.7235107421875, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -210.19943237304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.672351062297821, + "rewards_train/margins": 5.047592461109161, + "rewards_train/rejected": -5.719943523406982, + "step": 1463 + }, + { + "epoch": 0.41, + "logps_train/chosen": -0.29737329483032227, + "logps_train/ref_chosen": -0.91015625, + "logps_train/ref_rejected": -2.5625, + "logps_train/rejected": -6.09541130065918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.061278294771909714, + "rewards_train/margins": 0.4145694188773632, + "rewards_train/rejected": -0.3532911241054535, + "step": 1463 + }, + { + "epoch": 0.41, + "logps_train/chosen": -24.157520294189453, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -34.52915573120117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3157520294189453, + "rewards_train/margins": 2.537163496017456, + "rewards_train/rejected": -2.8529155254364014, + "step": 1463 + }, + { + "epoch": 0.41, + "logps_train/chosen": -119.2014389038086, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -111.17794799804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9201438426971436, + "rewards_train/margins": 2.0726511478424072, + "rewards_train/rejected": -4.992794990539551, + "step": 1463 + }, + { + "epoch": 0.41, + "logps_train/chosen": -19.096895217895508, + "logps_train/ref_chosen": -0.86328125, + "logps_train/ref_rejected": -1.96875, + "logps_train/rejected": -23.527311325073242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8233613967895508, + "rewards_train/margins": 0.33249473571777344, + "rewards_train/rejected": -2.155856132507324, + "step": 1463 + }, + { + "epoch": 0.41, + "logps_train/chosen": -81.098388671875, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -123.99230194091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6598389148712158, + "rewards_train/margins": 0.13939130306243896, + "rewards_train/rejected": -1.7992302179336548, + "step": 1463 + }, + { + "epoch": 0.41, + "logps_train/chosen": -66.12102508544922, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -84.74200439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.012102508917450905, + "rewards_train/margins": 0.2620979305356741, + "rewards_train/rejected": -0.274200439453125, + "step": 1463 + }, + { + "epoch": 0.41, + "logps_train/chosen": -177.00811767578125, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -237.5160675048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.050811767578125, + "rewards_train/margins": 4.000795364379883, + "rewards_train/rejected": -9.051607131958008, + "step": 1463 + }, + { + "epoch": 0.41, + "learning_rate": 8.009381274202378e-07, + "loss": 0.4757, + "step": 1464 + }, + { + "epoch": 0.41, + "logps_train/chosen": -154.7865447998047, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -195.84317016601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.378654479980469, + "rewards_train/margins": 0.10566282272338867, + "rewards_train/rejected": -4.484317302703857, + "step": 1464 + }, + { + "epoch": 0.41, + "logps_train/chosen": -290.33123779296875, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -261.81536865234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.633124351501465, + "rewards_train/margins": -0.9515876770019531, + "rewards_train/rejected": -10.681536674499512, + "step": 1464 + }, + { + "epoch": 0.41, + "logps_train/chosen": -150.71002197265625, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -217.67160034179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.921002149581909, + "rewards_train/margins": 6.046158075332642, + "rewards_train/rejected": -8.96716022491455, + "step": 1464 + }, + { + "epoch": 0.41, + "logps_train/chosen": -118.77762603759766, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -128.29544067382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5777626037597656, + "rewards_train/margins": 1.5017814636230469, + "rewards_train/rejected": -2.0795440673828125, + "step": 1464 + }, + { + "epoch": 0.41, + "logps_train/chosen": -115.68484497070312, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -187.519775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.268484503030777, + "rewards_train/margins": 5.083493322134018, + "rewards_train/rejected": -5.351977825164795, + "step": 1464 + }, + { + "epoch": 0.41, + "logps_train/chosen": -25.201210021972656, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -41.249000549316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3513710498809814, + "rewards_train/margins": 0.41102898120880127, + "rewards_train/rejected": -1.7624000310897827, + "step": 1464 + }, + { + "epoch": 0.41, + "logps_train/chosen": -131.07476806640625, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -187.00048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1074769496917725, + "rewards_train/margins": 4.092571973800659, + "rewards_train/rejected": -6.200048923492432, + "step": 1464 + }, + { + "epoch": 0.41, + "logps_train/chosen": -3.0643162727355957, + "logps_train/ref_chosen": -1.9296875, + "logps_train/ref_rejected": -7.125, + "logps_train/rejected": -27.543560028076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11346288025379181, + "rewards_train/margins": 1.9283931702375412, + "rewards_train/rejected": -2.041856050491333, + "step": 1464 + }, + { + "epoch": 0.41, + "logps_train/chosen": -15.189373970031738, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -53.013916015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28768739104270935, + "rewards_train/margins": 4.113704115152359, + "rewards_train/rejected": -4.401391506195068, + "step": 1465 + }, + { + "epoch": 0.41, + "logps_train/chosen": -126.81398010253906, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -192.02960205078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.731398105621338, + "rewards_train/margins": 5.471561908721924, + "rewards_train/rejected": -9.202960014343262, + "step": 1465 + }, + { + "epoch": 0.41, + "logps_train/chosen": -30.892305374145508, + "logps_train/ref_chosen": -25.375, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -33.233741760253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5517305731773376, + "rewards_train/margins": 0.9841436743736267, + "rewards_train/rejected": -1.5358742475509644, + "step": 1465 + }, + { + "epoch": 0.41, + "logps_train/chosen": -86.36743927001953, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -66.98038482666016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.63674396276474, + "rewards_train/margins": -0.9387054741382599, + "rewards_train/rejected": 0.3019615113735199, + "step": 1465 + }, + { + "epoch": 0.41, + "logps_train/chosen": -33.26825714111328, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -33.193870544433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.676825761795044, + "rewards_train/margins": 0.7925612926483154, + "rewards_train/rejected": -2.4693870544433594, + "step": 1465 + }, + { + "epoch": 0.41, + "logps_train/chosen": -117.77984619140625, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -205.85507202148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1779847145080566, + "rewards_train/margins": 5.307522773742676, + "rewards_train/rejected": -7.485507488250732, + "step": 1465 + }, + { + "epoch": 0.41, + "logps_train/chosen": -108.18075561523438, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -105.97459411621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6180756092071533, + "rewards_train/margins": 0.12938380241394043, + "rewards_train/rejected": -1.7474594116210938, + "step": 1465 + }, + { + "epoch": 0.41, + "logps_train/chosen": -137.08905029296875, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -149.53643798828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.358905076980591, + "rewards_train/margins": 0.8947386741638184, + "rewards_train/rejected": -3.253643751144409, + "step": 1465 + }, + { + "epoch": 0.41, + "learning_rate": 7.983462209750684e-07, + "loss": 0.3593, + "step": 1466 + }, + { + "epoch": 0.41, + "logps_train/chosen": -53.78434753417969, + "logps_train/ref_chosen": -50.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -87.56558990478516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3784347474575043, + "rewards_train/margins": 0.9781242907047272, + "rewards_train/rejected": -1.3565590381622314, + "step": 1466 + }, + { + "epoch": 0.41, + "logps_train/chosen": -30.8939208984375, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -0.9765625, + "logps_train/rejected": -20.99507713317871, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.23560790717601776, + "rewards_train/margins": 2.2374594658613205, + "rewards_train/rejected": -2.0018515586853027, + "step": 1466 + }, + { + "epoch": 0.41, + "logps_train/chosen": -93.17257690429688, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -158.0914764404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3172577619552612, + "rewards_train/margins": 3.8918901681900024, + "rewards_train/rejected": -5.209147930145264, + "step": 1466 + }, + { + "epoch": 0.41, + "logps_train/chosen": -100.15106201171875, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -114.01869201660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.515106201171875, + "rewards_train/margins": 2.436763048171997, + "rewards_train/rejected": -3.951869249343872, + "step": 1466 + }, + { + "epoch": 0.41, + "logps_train/chosen": -165.864990234375, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -158.25360107421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.686499118804932, + "rewards_train/margins": -0.41113901138305664, + "rewards_train/rejected": -5.275360107421875, + "step": 1466 + }, + { + "epoch": 0.41, + "logps_train/chosen": -209.1575469970703, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -164.55715942382812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.515754699707031, + "rewards_train/margins": -0.7600388526916504, + "rewards_train/rejected": -5.755715847015381, + "step": 1466 + }, + { + "epoch": 0.41, + "logps_train/chosen": -38.77011489868164, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -41.076480865478516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32701149582862854, + "rewards_train/margins": 3.224386543035507, + "rewards_train/rejected": -3.5513980388641357, + "step": 1466 + }, + { + "epoch": 0.41, + "logps_train/chosen": -10.456510543823242, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -9.640501976013184, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02309894561767578, + "rewards_train/margins": 0.4496491551399231, + "rewards_train/rejected": -0.4265502095222473, + "step": 1466 + }, + { + "epoch": 0.41, + "logps_train/chosen": -37.35853958129883, + "logps_train/ref_chosen": -29.25, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -92.35313415527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8108539581298828, + "rewards_train/margins": 0.37445950508117676, + "rewards_train/rejected": -1.1853134632110596, + "step": 1467 + }, + { + "epoch": 0.41, + "logps_train/chosen": -33.27812194824219, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -40.846214294433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.396562337875366, + "rewards_train/margins": 0.638059139251709, + "rewards_train/rejected": -3.034621477127075, + "step": 1467 + }, + { + "epoch": 0.41, + "logps_train/chosen": -64.7840576171875, + "logps_train/ref_chosen": -58.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -122.79257202148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.67840576171875, + "rewards_train/margins": 1.1508514881134033, + "rewards_train/rejected": -1.8292572498321533, + "step": 1467 + }, + { + "epoch": 0.41, + "logps_train/chosen": -84.80632781982422, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -188.93136596679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7306327819824219, + "rewards_train/margins": 3.7625041007995605, + "rewards_train/rejected": -4.493136882781982, + "step": 1467 + }, + { + "epoch": 0.41, + "logps_train/chosen": -161.865478515625, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -217.2462158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.786548137664795, + "rewards_train/margins": 3.538074016571045, + "rewards_train/rejected": -10.32462215423584, + "step": 1467 + }, + { + "epoch": 0.41, + "logps_train/chosen": -257.4219970703125, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -214.31985473632812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.44219970703125, + "rewards_train/margins": -1.5602140426635742, + "rewards_train/rejected": -10.881985664367676, + "step": 1467 + }, + { + "epoch": 0.41, + "logps_train/chosen": -181.34188842773438, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -215.7992706298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7341887950897217, + "rewards_train/margins": 1.9457385540008545, + "rewards_train/rejected": -4.679927349090576, + "step": 1467 + }, + { + "epoch": 0.41, + "logps_train/chosen": -204.94003295898438, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -232.61468505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.694003582000732, + "rewards_train/margins": 1.9674649238586426, + "rewards_train/rejected": -6.661468505859375, + "step": 1467 + }, + { + "epoch": 0.41, + "learning_rate": 7.957557258925504e-07, + "loss": 0.4006, + "step": 1468 + }, + { + "epoch": 0.41, + "logps_train/chosen": -93.38970947265625, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -93.87139892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.138970971107483, + "rewards_train/margins": 0.04816889762878418, + "rewards_train/rejected": -1.187139868736267, + "step": 1468 + }, + { + "epoch": 0.41, + "logps_train/chosen": -34.70115661621094, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -36.6803092956543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2201156616210938, + "rewards_train/margins": 1.9604153633117676, + "rewards_train/rejected": -3.1805310249328613, + "step": 1468 + }, + { + "epoch": 0.41, + "logps_train/chosen": -0.8534743785858154, + "logps_train/ref_chosen": -0.7578125, + "logps_train/ref_rejected": -2.1875, + "logps_train/rejected": -4.553638458251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009566187858581543, + "rewards_train/margins": 0.22704766690731049, + "rewards_train/rejected": -0.23661385476589203, + "step": 1468 + }, + { + "epoch": 0.41, + "logps_train/chosen": -32.617774963378906, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -30.232730865478516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.211777448654175, + "rewards_train/margins": -0.22600436210632324, + "rewards_train/rejected": -1.9857730865478516, + "step": 1468 + }, + { + "epoch": 0.41, + "logps_train/chosen": -106.23628997802734, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -256.9619445800781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.073629140853882, + "rewards_train/margins": 10.772565126419067, + "rewards_train/rejected": -13.84619426727295, + "step": 1468 + }, + { + "epoch": 0.41, + "logps_train/chosen": -190.14601135253906, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -235.67807006835938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.314600944519043, + "rewards_train/margins": -2.3467936515808105, + "rewards_train/rejected": -6.967807292938232, + "step": 1468 + }, + { + "epoch": 0.41, + "logps_train/chosen": -1.9168133735656738, + "logps_train/ref_chosen": -0.875, + "logps_train/ref_rejected": -3.5625, + "logps_train/rejected": -10.250661849975586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10418134182691574, + "rewards_train/margins": 0.5646348670125008, + "rewards_train/rejected": -0.6688162088394165, + "step": 1468 + }, + { + "epoch": 0.41, + "logps_train/chosen": -155.81045532226562, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -165.02777099609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.231045722961426, + "rewards_train/margins": -0.6282687187194824, + "rewards_train/rejected": -4.602777004241943, + "step": 1468 + }, + { + "epoch": 0.41, + "logps_train/chosen": -221.32286071777344, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -210.67428588867188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.932286262512207, + "rewards_train/margins": -1.3648576736450195, + "rewards_train/rejected": -4.5674285888671875, + "step": 1469 + }, + { + "epoch": 0.41, + "logps_train/chosen": -6.326511383056641, + "logps_train/ref_chosen": -1.7109375, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -23.49570655822754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46155738830566406, + "rewards_train/margins": 1.4692633152008057, + "rewards_train/rejected": -1.9308207035064697, + "step": 1469 + }, + { + "epoch": 0.41, + "logps_train/chosen": -203.369384765625, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -215.18869018554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.2369384765625, + "rewards_train/margins": 2.781930923461914, + "rewards_train/rejected": -10.018869400024414, + "step": 1469 + }, + { + "epoch": 0.41, + "logps_train/chosen": -152.69961547851562, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -199.0, + "logps_train/rejected": -228.11109924316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.66996169090271, + "rewards_train/margins": 0.2411482334136963, + "rewards_train/rejected": -2.9111099243164062, + "step": 1469 + }, + { + "epoch": 0.41, + "logps_train/chosen": -123.57991027832031, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -155.49905395507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.1079912185668945, + "rewards_train/margins": 2.291914463043213, + "rewards_train/rejected": -6.399905681610107, + "step": 1469 + }, + { + "epoch": 0.41, + "logps_train/chosen": -88.62112426757812, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -140.31402587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16211242973804474, + "rewards_train/margins": 2.9692903012037277, + "rewards_train/rejected": -3.1314027309417725, + "step": 1469 + }, + { + "epoch": 0.41, + "logps_train/chosen": -67.15374755859375, + "logps_train/ref_chosen": -55.5, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -72.80506896972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.165374755859375, + "rewards_train/margins": 2.765132188796997, + "rewards_train/rejected": -3.930506944656372, + "step": 1469 + }, + { + "epoch": 0.41, + "logps_train/chosen": -14.826719284057617, + "logps_train/ref_chosen": -1.8125, + "logps_train/ref_rejected": -1.890625, + "logps_train/rejected": -12.188623428344727, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3014219999313354, + "rewards_train/margins": -0.2716221809387207, + "rewards_train/rejected": -1.0297998189926147, + "step": 1469 + }, + { + "epoch": 0.41, + "learning_rate": 7.931666603034032e-07, + "loss": 0.6018, + "step": 1470 + }, + { + "epoch": 0.41, + "logps_train/chosen": -89.7548828125, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -192.4086456298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.675488293170929, + "rewards_train/margins": 5.165376365184784, + "rewards_train/rejected": -5.840864658355713, + "step": 1470 + }, + { + "epoch": 0.41, + "logps_train/chosen": -28.138751983642578, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -30.58932113647461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0388752222061157, + "rewards_train/margins": 0.9638069868087769, + "rewards_train/rejected": -2.0026822090148926, + "step": 1470 + }, + { + "epoch": 0.41, + "logps_train/chosen": -53.60021209716797, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -6.875, + "logps_train/rejected": -17.653396606445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5850211977958679, + "rewards_train/margins": 0.49281853437423706, + "rewards_train/rejected": -1.077839732170105, + "step": 1470 + }, + { + "epoch": 0.41, + "logps_train/chosen": -19.912628173828125, + "logps_train/ref_chosen": -7.75, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -45.417110443115234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2162628173828125, + "rewards_train/margins": 1.3379483222961426, + "rewards_train/rejected": -2.554211139678955, + "step": 1470 + }, + { + "epoch": 0.41, + "logps_train/chosen": -21.2902889251709, + "logps_train/ref_chosen": -7.96875, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -32.91014099121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3321539163589478, + "rewards_train/margins": 0.308860182762146, + "rewards_train/rejected": -1.6410140991210938, + "step": 1470 + }, + { + "epoch": 0.41, + "logps_train/chosen": -1.9641740322113037, + "logps_train/ref_chosen": -1.5859375, + "logps_train/ref_rejected": -1.984375, + "logps_train/rejected": -3.3591089248657227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03782365471124649, + "rewards_train/margins": 0.09964973479509354, + "rewards_train/rejected": -0.13747338950634003, + "step": 1470 + }, + { + "epoch": 0.41, + "logps_train/chosen": -55.99061584472656, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -49.90791320800781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3740615844726562, + "rewards_train/margins": 1.1542298793792725, + "rewards_train/rejected": -2.5282914638519287, + "step": 1470 + }, + { + "epoch": 0.41, + "logps_train/chosen": -55.417240142822266, + "logps_train/ref_chosen": -25.875, + "logps_train/ref_rejected": -29.5, + "logps_train/rejected": -55.179141998291016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.954224109649658, + "rewards_train/margins": -0.3863098621368408, + "rewards_train/rejected": -2.5679142475128174, + "step": 1470 + }, + { + "epoch": 0.41, + "logps_train/chosen": -14.726346969604492, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -35.881805419921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.169509768486023, + "rewards_train/margins": 0.893670916557312, + "rewards_train/rejected": -2.063180685043335, + "step": 1471 + }, + { + "epoch": 0.41, + "logps_train/chosen": -13.98177719116211, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -37.421669006347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4106777310371399, + "rewards_train/margins": 2.3814892172813416, + "rewards_train/rejected": -2.7921669483184814, + "step": 1471 + }, + { + "epoch": 0.41, + "logps_train/chosen": -127.10053253173828, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -214.9466552734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4600532054901123, + "rewards_train/margins": 6.2846128940582275, + "rewards_train/rejected": -8.74466609954834, + "step": 1471 + }, + { + "epoch": 0.41, + "logps_train/chosen": -3.674973726272583, + "logps_train/ref_chosen": -1.9375, + "logps_train/ref_rejected": -2.96875, + "logps_train/rejected": -3.4528279304504395, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.17374737560749054, + "rewards_train/margins": -0.1253395825624466, + "rewards_train/rejected": -0.048407793045043945, + "step": 1471 + }, + { + "epoch": 0.41, + "logps_train/chosen": -65.06983184814453, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -3.96875, + "logps_train/rejected": -15.945745468139648, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.231983184814453, + "rewards_train/margins": -2.0342836380004883, + "rewards_train/rejected": -1.1976995468139648, + "step": 1471 + }, + { + "epoch": 0.41, + "logps_train/chosen": -94.6878890991211, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -114.29241943359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2187888622283936, + "rewards_train/margins": -0.38954687118530273, + "rewards_train/rejected": -2.829241991043091, + "step": 1471 + }, + { + "epoch": 0.41, + "logps_train/chosen": -167.53106689453125, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -175.351806640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.503106594085693, + "rewards_train/margins": 1.0820741653442383, + "rewards_train/rejected": -6.585180759429932, + "step": 1471 + }, + { + "epoch": 0.41, + "logps_train/chosen": -144.1376190185547, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -130.01747131347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0637619495391846, + "rewards_train/margins": 1.78798508644104, + "rewards_train/rejected": -4.851747035980225, + "step": 1471 + }, + { + "epoch": 0.41, + "learning_rate": 7.905790423283408e-07, + "loss": 0.507, + "step": 1472 + }, + { + "epoch": 0.41, + "logps_train/chosen": -224.4053955078125, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -166.82455444335938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.840539455413818, + "rewards_train/margins": -0.7080841064453125, + "rewards_train/rejected": -5.132455348968506, + "step": 1472 + }, + { + "epoch": 0.41, + "logps_train/chosen": -108.08110046386719, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -58.5, + "logps_train/rejected": -96.48916625976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4581100940704346, + "rewards_train/margins": 2.340806484222412, + "rewards_train/rejected": -3.7989165782928467, + "step": 1472 + }, + { + "epoch": 0.41, + "logps_train/chosen": -111.14287567138672, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -165.07461547851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7642875909805298, + "rewards_train/margins": 3.343173861503601, + "rewards_train/rejected": -4.107461452484131, + "step": 1472 + }, + { + "epoch": 0.41, + "logps_train/chosen": -12.857857704162598, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -43.828147888183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8732857704162598, + "rewards_train/margins": 3.050153970718384, + "rewards_train/rejected": -3.9234397411346436, + "step": 1472 + }, + { + "epoch": 0.41, + "logps_train/chosen": -17.070985794067383, + "logps_train/ref_chosen": -2.265625, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -54.37610626220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4805361032485962, + "rewards_train/margins": 2.5008245706558228, + "rewards_train/rejected": -3.981360673904419, + "step": 1472 + }, + { + "epoch": 0.41, + "logps_train/chosen": -116.09835815429688, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -103.24681091308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.509835958480835, + "rewards_train/margins": 0.014845132827758789, + "rewards_train/rejected": -2.5246810913085938, + "step": 1472 + }, + { + "epoch": 0.41, + "logps_train/chosen": -150.73919677734375, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -164.47634887695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.373919725418091, + "rewards_train/margins": 3.3737151622772217, + "rewards_train/rejected": -5.7476348876953125, + "step": 1472 + }, + { + "epoch": 0.41, + "logps_train/chosen": -86.79202270507812, + "logps_train/ref_chosen": -56.0, + "logps_train/ref_rejected": -33.25, + "logps_train/rejected": -79.25794982910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.07920241355896, + "rewards_train/margins": 1.5215928554534912, + "rewards_train/rejected": -4.600795269012451, + "step": 1472 + }, + { + "epoch": 0.41, + "logps_train/chosen": -52.83037567138672, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -89.32284545898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.820537567138672, + "rewards_train/margins": 2.8117470741271973, + "rewards_train/rejected": -5.632284641265869, + "step": 1473 + }, + { + "epoch": 0.41, + "logps_train/chosen": -184.505615234375, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -190.09556579589844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.850561618804932, + "rewards_train/margins": -0.49100494384765625, + "rewards_train/rejected": -6.359556674957275, + "step": 1473 + }, + { + "epoch": 0.41, + "logps_train/chosen": -209.65420532226562, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -230.4294891357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.065421104431152, + "rewards_train/margins": 0.5775279998779297, + "rewards_train/rejected": -9.642949104309082, + "step": 1473 + }, + { + "epoch": 0.41, + "logps_train/chosen": -120.93975067138672, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -263.281005859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9439752101898193, + "rewards_train/margins": 9.38412594795227, + "rewards_train/rejected": -12.32810115814209, + "step": 1473 + }, + { + "epoch": 0.41, + "logps_train/chosen": -167.67120361328125, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -111.44050598144531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.467120409011841, + "rewards_train/margins": 0.7269301414489746, + "rewards_train/rejected": -3.1940505504608154, + "step": 1473 + }, + { + "epoch": 0.41, + "logps_train/chosen": -8.96055793762207, + "logps_train/ref_chosen": -3.3125, + "logps_train/ref_rejected": -1.6484375, + "logps_train/rejected": -7.271910667419434, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.564805805683136, + "rewards_train/margins": -0.0024584531784057617, + "rewards_train/rejected": -0.5623473525047302, + "step": 1473 + }, + { + "epoch": 0.41, + "logps_train/chosen": -151.18276977539062, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -238.63320922851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8182770013809204, + "rewards_train/margins": 5.645044207572937, + "rewards_train/rejected": -7.463321208953857, + "step": 1473 + }, + { + "epoch": 0.41, + "logps_train/chosen": -11.387924194335938, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -21.40675926208496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.35495758056640625, + "rewards_train/margins": 1.3331335186958313, + "rewards_train/rejected": -0.978175938129425, + "step": 1473 + }, + { + "epoch": 0.41, + "learning_rate": 7.879928900779455e-07, + "loss": 0.3173, + "step": 1474 + }, + { + "epoch": 0.41, + "logps_train/chosen": -60.128387451171875, + "logps_train/ref_chosen": -55.5, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -119.4527587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.462838739156723, + "rewards_train/margins": 5.18243733048439, + "rewards_train/rejected": -5.645276069641113, + "step": 1474 + }, + { + "epoch": 0.41, + "logps_train/chosen": -15.876203536987305, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -25.857830047607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48137035965919495, + "rewards_train/margins": 1.279412716627121, + "rewards_train/rejected": -1.760783076286316, + "step": 1474 + }, + { + "epoch": 0.41, + "logps_train/chosen": -26.565528869628906, + "logps_train/ref_chosen": -21.375, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -31.48206329345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5190529227256775, + "rewards_train/margins": 2.0697785019874573, + "rewards_train/rejected": -2.5888314247131348, + "step": 1474 + }, + { + "epoch": 0.41, + "logps_train/chosen": -112.47621154785156, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -200.5961151123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6476211547851562, + "rewards_train/margins": 3.4119906425476074, + "rewards_train/rejected": -7.059611797332764, + "step": 1474 + }, + { + "epoch": 0.41, + "logps_train/chosen": -89.03369903564453, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -209.997314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2966301143169403, + "rewards_train/margins": 5.39636155962944, + "rewards_train/rejected": -5.0997314453125, + "step": 1474 + }, + { + "epoch": 0.41, + "logps_train/chosen": -143.65283203125, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -205.11199951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.165283203125, + "rewards_train/margins": 6.145916938781738, + "rewards_train/rejected": -7.311200141906738, + "step": 1474 + }, + { + "epoch": 0.41, + "logps_train/chosen": -105.4787826538086, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -22.625, + "logps_train/rejected": -47.702423095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.847878336906433, + "rewards_train/margins": 0.659864068031311, + "rewards_train/rejected": -2.507742404937744, + "step": 1474 + }, + { + "epoch": 0.41, + "logps_train/chosen": -111.491455078125, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -214.03392028808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.449145555496216, + "rewards_train/margins": 4.654247045516968, + "rewards_train/rejected": -8.103392601013184, + "step": 1474 + }, + { + "epoch": 0.41, + "logps_train/chosen": -81.61727142333984, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -98.08134460449219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1117271184921265, + "rewards_train/margins": -0.053592681884765625, + "rewards_train/rejected": -1.0581344366073608, + "step": 1475 + }, + { + "epoch": 0.41, + "logps_train/chosen": -90.46881103515625, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -151.33978271484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2531189024448395, + "rewards_train/margins": 3.587097316980362, + "rewards_train/rejected": -3.3339784145355225, + "step": 1475 + }, + { + "epoch": 0.41, + "logps_train/chosen": -34.41304016113281, + "logps_train/ref_chosen": -6.6875, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -41.67924880981445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7725541591644287, + "rewards_train/margins": 0.014120817184448242, + "rewards_train/rejected": -2.786674976348877, + "step": 1475 + }, + { + "epoch": 0.41, + "logps_train/chosen": -96.01840209960938, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -236.80514526367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7518402338027954, + "rewards_train/margins": 7.028674483299255, + "rewards_train/rejected": -7.780514717102051, + "step": 1475 + }, + { + "epoch": 0.41, + "logps_train/chosen": -47.040367126464844, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -162.8306884765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3415367603302, + "rewards_train/margins": 2.74153208732605, + "rewards_train/rejected": -5.08306884765625, + "step": 1475 + }, + { + "epoch": 0.41, + "logps_train/chosen": -2.822584629058838, + "logps_train/ref_chosen": -2.375, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -17.315528869628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04475846514105797, + "rewards_train/margins": 0.8555444218218327, + "rewards_train/rejected": -0.9003028869628906, + "step": 1475 + }, + { + "epoch": 0.41, + "logps_train/chosen": -88.59770202636719, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -49.94907760620117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7097702026367188, + "rewards_train/margins": 2.547637701034546, + "rewards_train/rejected": -3.2574079036712646, + "step": 1475 + }, + { + "epoch": 0.41, + "logps_train/chosen": -56.46796417236328, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -88.48188781738281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12179642170667648, + "rewards_train/margins": 0.8763923719525337, + "rewards_train/rejected": -0.9981887936592102, + "step": 1475 + }, + { + "epoch": 0.41, + "learning_rate": 7.854082216525407e-07, + "loss": 0.1942, + "step": 1476 + }, + { + "epoch": 0.41, + "logps_train/chosen": -8.18824577331543, + "logps_train/ref_chosen": -3.921875, + "logps_train/ref_rejected": -2.578125, + "logps_train/rejected": -9.872518539428711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42663708329200745, + "rewards_train/margins": 0.30280229449272156, + "rewards_train/rejected": -0.729439377784729, + "step": 1476 + }, + { + "epoch": 0.41, + "logps_train/chosen": -24.733768463134766, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -40.30208206176758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5733768343925476, + "rewards_train/margins": 1.306831419467926, + "rewards_train/rejected": -1.8802082538604736, + "step": 1476 + }, + { + "epoch": 0.41, + "logps_train/chosen": -125.92401123046875, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -167.53390502929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.792401075363159, + "rewards_train/margins": 3.8109896183013916, + "rewards_train/rejected": -6.603390693664551, + "step": 1476 + }, + { + "epoch": 0.41, + "logps_train/chosen": -5.710224628448486, + "logps_train/ref_chosen": -3.65625, + "logps_train/ref_rejected": -1.125, + "logps_train/rejected": -0.6461429595947266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20539747178554535, + "rewards_train/margins": -0.25328317657113075, + "rewards_train/rejected": 0.0478857047855854, + "step": 1476 + }, + { + "epoch": 0.41, + "logps_train/chosen": -24.622394561767578, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -1.6484375, + "logps_train/rejected": -13.990201950073242, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9028644561767578, + "rewards_train/margins": -0.6686879396438599, + "rewards_train/rejected": -1.234176516532898, + "step": 1476 + }, + { + "epoch": 0.41, + "logps_train/chosen": -40.617347717285156, + "logps_train/ref_chosen": -36.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -95.0615463256836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4617347717285156, + "rewards_train/margins": 1.4444198608398438, + "rewards_train/rejected": -1.9061546325683594, + "step": 1476 + }, + { + "epoch": 0.41, + "logps_train/chosen": -34.02113723754883, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -29.625, + "logps_train/rejected": -62.23387908935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5271137356758118, + "rewards_train/margins": 2.7337741255760193, + "rewards_train/rejected": -3.260887861251831, + "step": 1476 + }, + { + "epoch": 0.41, + "logps_train/chosen": -149.29446411132812, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -132.517333984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.579446315765381, + "rewards_train/margins": -2.027712821960449, + "rewards_train/rejected": -2.5517334938049316, + "step": 1476 + }, + { + "epoch": 0.41, + "logps_train/chosen": -102.96817779541016, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -146.04794311523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1968178749084473, + "rewards_train/margins": 1.1579763889312744, + "rewards_train/rejected": -3.3547942638397217, + "step": 1477 + }, + { + "epoch": 0.41, + "logps_train/chosen": -10.058456420898438, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -32.681419372558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03084564208984375, + "rewards_train/margins": 2.418546438217163, + "rewards_train/rejected": -2.449392080307007, + "step": 1477 + }, + { + "epoch": 0.41, + "logps_train/chosen": -185.01519775390625, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -249.0, + "logps_train/rejected": -387.73858642578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2015198469161987, + "rewards_train/margins": 12.672338604927063, + "rewards_train/rejected": -13.873858451843262, + "step": 1477 + }, + { + "epoch": 0.41, + "logps_train/chosen": -105.27522277832031, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -168.90057373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9775222539901733, + "rewards_train/margins": 4.562535405158997, + "rewards_train/rejected": -6.54005765914917, + "step": 1477 + }, + { + "epoch": 0.41, + "logps_train/chosen": -81.50715637207031, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -123.54418182373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5507156252861023, + "rewards_train/margins": 1.1037026047706604, + "rewards_train/rejected": -1.6544182300567627, + "step": 1477 + }, + { + "epoch": 0.41, + "logps_train/chosen": -13.9561767578125, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -13.302446365356445, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.958117663860321, + "rewards_train/margins": -0.3778730034828186, + "rewards_train/rejected": -0.5802446603775024, + "step": 1477 + }, + { + "epoch": 0.41, + "logps_train/chosen": -54.545021057128906, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -97.833251953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.2704978883266449, + "rewards_train/margins": -0.1961769163608551, + "rewards_train/rejected": 0.4666748046875, + "step": 1477 + }, + { + "epoch": 0.41, + "logps_train/chosen": -19.82151985168457, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -37.5, + "logps_train/rejected": -67.36077117919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9009019732475281, + "rewards_train/margins": 2.085175096988678, + "rewards_train/rejected": -2.986077070236206, + "step": 1477 + }, + { + "epoch": 0.41, + "learning_rate": 7.828250551420656e-07, + "loss": 0.4763, + "step": 1478 + }, + { + "epoch": 0.41, + "logps_train/chosen": -37.4150390625, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -42.02766418457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.772753953933716, + "rewards_train/margins": 0.7362625598907471, + "rewards_train/rejected": -3.509016513824463, + "step": 1478 + }, + { + "epoch": 0.41, + "logps_train/chosen": -124.02313995361328, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -180.2225341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7523139715194702, + "rewards_train/margins": 4.069939732551575, + "rewards_train/rejected": -5.822253704071045, + "step": 1478 + }, + { + "epoch": 0.41, + "logps_train/chosen": -114.29208374023438, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -133.90122985839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4792083501815796, + "rewards_train/margins": 2.8609148263931274, + "rewards_train/rejected": -4.340123176574707, + "step": 1478 + }, + { + "epoch": 0.41, + "logps_train/chosen": -21.872175216674805, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -41.85760498046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2184675931930542, + "rewards_train/margins": 1.6922930479049683, + "rewards_train/rejected": -2.9107606410980225, + "step": 1478 + }, + { + "epoch": 0.41, + "logps_train/chosen": -86.10966491699219, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -128.48741149902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7609664797782898, + "rewards_train/margins": 1.3377748131752014, + "rewards_train/rejected": -2.098741292953491, + "step": 1478 + }, + { + "epoch": 0.41, + "logps_train/chosen": -17.762004852294922, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -41.0, + "logps_train/rejected": -71.42576599121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8699504733085632, + "rewards_train/margins": 2.1726260781288147, + "rewards_train/rejected": -3.042576551437378, + "step": 1478 + }, + { + "epoch": 0.41, + "logps_train/chosen": -97.63014221191406, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -126.70419311523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6130142211914062, + "rewards_train/margins": 4.107405185699463, + "rewards_train/rejected": -4.720419406890869, + "step": 1478 + }, + { + "epoch": 0.41, + "logps_train/chosen": -19.842178344726562, + "logps_train/ref_chosen": -2.9375, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -28.260257720947266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6904678344726562, + "rewards_train/margins": 0.06680798530578613, + "rewards_train/rejected": -1.7572758197784424, + "step": 1478 + }, + { + "epoch": 0.41, + "logps_train/chosen": -105.14776611328125, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -71.469970703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.614776611328125, + "rewards_train/margins": 1.0072205066680908, + "rewards_train/rejected": -1.6219971179962158, + "step": 1479 + }, + { + "epoch": 0.41, + "logps_train/chosen": -75.75093078613281, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -76.05027770996094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1750930547714233, + "rewards_train/margins": -0.07006525993347168, + "rewards_train/rejected": -1.1050277948379517, + "step": 1479 + }, + { + "epoch": 0.41, + "logps_train/chosen": -12.942681312561035, + "logps_train/ref_chosen": -2.734375, + "logps_train/ref_rejected": -3.765625, + "logps_train/rejected": -16.502321243286133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0208306312561035, + "rewards_train/margins": 0.25283896923065186, + "rewards_train/rejected": -1.2736696004867554, + "step": 1479 + }, + { + "epoch": 0.41, + "logps_train/chosen": -286.7427673339844, + "logps_train/ref_chosen": -185.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -237.69961547851562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.174277305603027, + "rewards_train/margins": -1.204315185546875, + "rewards_train/rejected": -8.969962120056152, + "step": 1479 + }, + { + "epoch": 0.41, + "logps_train/chosen": -5.140326499938965, + "logps_train/ref_chosen": -0.87890625, + "logps_train/ref_rejected": -3.078125, + "logps_train/rejected": -18.561168670654297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42614203691482544, + "rewards_train/margins": 1.122162401676178, + "rewards_train/rejected": -1.5483044385910034, + "step": 1479 + }, + { + "epoch": 0.41, + "logps_train/chosen": -11.12471866607666, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -16.127777099609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5530968904495239, + "rewards_train/margins": -1.1153191924095154, + "rewards_train/rejected": 0.5622223019599915, + "step": 1479 + }, + { + "epoch": 0.41, + "logps_train/chosen": -21.540224075317383, + "logps_train/ref_chosen": -7.96875, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -47.61354064941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.357147455215454, + "rewards_train/margins": 2.129206657409668, + "rewards_train/rejected": -3.486354112625122, + "step": 1479 + }, + { + "epoch": 0.41, + "logps_train/chosen": -147.45921325683594, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -159.18875122070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.395921468734741, + "rewards_train/margins": 0.17295360565185547, + "rewards_train/rejected": -3.5688750743865967, + "step": 1479 + }, + { + "epoch": 0.41, + "learning_rate": 7.802434086259468e-07, + "loss": 0.4459, + "step": 1480 + }, + { + "epoch": 0.41, + "logps_train/chosen": -94.16350555419922, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -181.87429809570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8163505792617798, + "rewards_train/margins": 3.4710792303085327, + "rewards_train/rejected": -4.2874298095703125, + "step": 1480 + }, + { + "epoch": 0.41, + "logps_train/chosen": -25.613643646240234, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -25.979970932006836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4176143407821655, + "rewards_train/margins": 0.31163275241851807, + "rewards_train/rejected": -1.7292470932006836, + "step": 1480 + }, + { + "epoch": 0.41, + "logps_train/chosen": -96.70376586914062, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -115.43258666992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0703766345977783, + "rewards_train/margins": 1.922882080078125, + "rewards_train/rejected": -2.9932587146759033, + "step": 1480 + }, + { + "epoch": 0.41, + "logps_train/chosen": -46.97477340698242, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -63.70989227294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0599772930145264, + "rewards_train/margins": 0.9610118865966797, + "rewards_train/rejected": -3.020989179611206, + "step": 1480 + }, + { + "epoch": 0.41, + "logps_train/chosen": -62.12628173828125, + "logps_train/ref_chosen": -38.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -110.86834716796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.362628221511841, + "rewards_train/margins": 1.37420654296875, + "rewards_train/rejected": -3.736834764480591, + "step": 1480 + }, + { + "epoch": 0.41, + "logps_train/chosen": -118.73106384277344, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -153.08494567871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5231064558029175, + "rewards_train/margins": 4.135388016700745, + "rewards_train/rejected": -5.658494472503662, + "step": 1480 + }, + { + "epoch": 0.41, + "logps_train/chosen": -111.61932373046875, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -173.24188232421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3880676329135895, + "rewards_train/margins": 2.9122559130191803, + "rewards_train/rejected": -2.524188280105591, + "step": 1480 + }, + { + "epoch": 0.41, + "logps_train/chosen": -88.20035552978516, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -88.24140930175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8700355887413025, + "rewards_train/margins": 0.004105329513549805, + "rewards_train/rejected": -0.8741409182548523, + "step": 1480 + }, + { + "epoch": 0.41, + "logps_train/chosen": -65.39220428466797, + "logps_train/ref_chosen": -53.75, + "logps_train/ref_rejected": -55.75, + "logps_train/rejected": -66.00806427001953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1642204523086548, + "rewards_train/margins": -0.13841402530670166, + "rewards_train/rejected": -1.0258064270019531, + "step": 1481 + }, + { + "epoch": 0.41, + "logps_train/chosen": -30.343154907226562, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -21.734025955200195, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.93431556224823, + "rewards_train/margins": -0.45778799057006836, + "rewards_train/rejected": -1.4765275716781616, + "step": 1481 + }, + { + "epoch": 0.41, + "logps_train/chosen": -191.42233276367188, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -151.07113647460938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.542233467102051, + "rewards_train/margins": -0.13511991500854492, + "rewards_train/rejected": -4.407113552093506, + "step": 1481 + }, + { + "epoch": 0.41, + "logps_train/chosen": -30.168212890625, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -39.62983322143555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9793213605880737, + "rewards_train/margins": 0.5899120569229126, + "rewards_train/rejected": -2.5692334175109863, + "step": 1481 + }, + { + "epoch": 0.41, + "logps_train/chosen": -11.571560859680176, + "logps_train/ref_chosen": -1.1796875, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -9.998992919921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0391873121261597, + "rewards_train/margins": -1.151788018643856, + "rewards_train/rejected": 0.11260070651769638, + "step": 1481 + }, + { + "epoch": 0.41, + "logps_train/chosen": -181.01763916015625, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -256.0, + "logps_train/rejected": -303.97210693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.401763916015625, + "rewards_train/margins": 3.39544677734375, + "rewards_train/rejected": -4.797210693359375, + "step": 1481 + }, + { + "epoch": 0.41, + "logps_train/chosen": -24.044967651367188, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -17.495882034301758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8294968008995056, + "rewards_train/margins": 0.07634139060974121, + "rewards_train/rejected": -0.9058381915092468, + "step": 1481 + }, + { + "epoch": 0.41, + "logps_train/chosen": -78.63703155517578, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -50.5, + "logps_train/rejected": -89.73739624023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6137031316757202, + "rewards_train/margins": 2.310036540031433, + "rewards_train/rejected": -3.9237396717071533, + "step": 1481 + }, + { + "epoch": 0.41, + "learning_rate": 7.776633001729729e-07, + "loss": 0.447, + "step": 1482 + }, + { + "epoch": 0.41, + "logps_train/chosen": -39.427223205566406, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -78.51313018798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9802223443984985, + "rewards_train/margins": 0.3460906744003296, + "rewards_train/rejected": -2.326313018798828, + "step": 1482 + }, + { + "epoch": 0.41, + "logps_train/chosen": -10.677617073059082, + "logps_train/ref_chosen": -1.8828125, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -19.87120819091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8794804811477661, + "rewards_train/margins": 0.5045154094696045, + "rewards_train/rejected": -1.3839958906173706, + "step": 1482 + }, + { + "epoch": 0.41, + "logps_train/chosen": -79.48529052734375, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -159.3854217529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.473529100418091, + "rewards_train/margins": 2.2650129795074463, + "rewards_train/rejected": -4.738542079925537, + "step": 1482 + }, + { + "epoch": 0.41, + "logps_train/chosen": -14.322357177734375, + "logps_train/ref_chosen": -3.828125, + "logps_train/ref_rejected": -5.21875, + "logps_train/rejected": -28.725208282470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0494232177734375, + "rewards_train/margins": 1.301222562789917, + "rewards_train/rejected": -2.3506457805633545, + "step": 1482 + }, + { + "epoch": 0.41, + "logps_train/chosen": -29.722383499145508, + "logps_train/ref_chosen": -26.0, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -48.899044036865234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3722383677959442, + "rewards_train/margins": 2.7176661789417267, + "rewards_train/rejected": -3.089904546737671, + "step": 1482 + }, + { + "epoch": 0.41, + "logps_train/chosen": -17.775758743286133, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -9.612014770507812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8713259100914001, + "rewards_train/margins": -0.4913744330406189, + "rewards_train/rejected": -0.37995147705078125, + "step": 1482 + }, + { + "epoch": 0.41, + "logps_train/chosen": -88.47037506103516, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -90.33604431152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0470374822616577, + "rewards_train/margins": 1.936566948890686, + "rewards_train/rejected": -2.9836044311523438, + "step": 1482 + }, + { + "epoch": 0.41, + "logps_train/chosen": -166.75950622558594, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -224.68408203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.175950527191162, + "rewards_train/margins": 5.842458248138428, + "rewards_train/rejected": -10.01840877532959, + "step": 1482 + }, + { + "epoch": 0.41, + "logps_train/chosen": -21.702848434448242, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -3.78125, + "logps_train/rejected": -11.040842056274414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8265348672866821, + "rewards_train/margins": -1.1005756258964539, + "rewards_train/rejected": -0.7259592413902283, + "step": 1483 + }, + { + "epoch": 0.41, + "logps_train/chosen": -107.33584594726562, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -171.41026306152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.433584690093994, + "rewards_train/margins": 4.8574419021606445, + "rewards_train/rejected": -7.291026592254639, + "step": 1483 + }, + { + "epoch": 0.41, + "logps_train/chosen": -65.25172424316406, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -164.38023376464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12482757866382599, + "rewards_train/margins": 5.112850859761238, + "rewards_train/rejected": -4.988023281097412, + "step": 1483 + }, + { + "epoch": 0.41, + "logps_train/chosen": -160.7340087890625, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -242.4638671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.973400890827179, + "rewards_train/margins": 7.072985827922821, + "rewards_train/rejected": -8.04638671875, + "step": 1483 + }, + { + "epoch": 0.41, + "logps_train/chosen": -28.592567443847656, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -31.125, + "logps_train/rejected": -55.4909782409668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1467567682266235, + "rewards_train/margins": 1.2898410558700562, + "rewards_train/rejected": -2.4365978240966797, + "step": 1483 + }, + { + "epoch": 0.41, + "logps_train/chosen": -27.855485916137695, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -27.625, + "logps_train/rejected": -39.57614517211914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6355485916137695, + "rewards_train/margins": 0.5595659017562866, + "rewards_train/rejected": -1.1951144933700562, + "step": 1483 + }, + { + "epoch": 0.41, + "logps_train/chosen": -59.341461181640625, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -89.56658172607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30914613604545593, + "rewards_train/margins": 1.1475121080875397, + "rewards_train/rejected": -1.4566582441329956, + "step": 1483 + }, + { + "epoch": 0.41, + "logps_train/chosen": -10.433964729309082, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -32.3050537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5965214967727661, + "rewards_train/margins": 0.8464839458465576, + "rewards_train/rejected": -1.4430054426193237, + "step": 1483 + }, + { + "epoch": 0.41, + "learning_rate": 7.750847478411669e-07, + "loss": 0.3276, + "step": 1484 + }, + { + "epoch": 0.41, + "logps_train/chosen": -179.7637176513672, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -234.53492736816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.776371955871582, + "rewards_train/margins": 5.077120780944824, + "rewards_train/rejected": -9.853492736816406, + "step": 1484 + }, + { + "epoch": 0.41, + "logps_train/chosen": -28.84596061706543, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -41.29182815551758, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.872096061706543, + "rewards_train/margins": -0.30541324615478516, + "rewards_train/rejected": -1.5666828155517578, + "step": 1484 + }, + { + "epoch": 0.41, + "logps_train/chosen": -259.3256530761719, + "logps_train/ref_chosen": -235.0, + "logps_train/ref_rejected": -270.0, + "logps_train/rejected": -313.408447265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.432565450668335, + "rewards_train/margins": 1.9082791805267334, + "rewards_train/rejected": -4.340844631195068, + "step": 1484 + }, + { + "epoch": 0.41, + "logps_train/chosen": -158.7271270751953, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -249.24835205078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.0727128982543945, + "rewards_train/margins": 3.952122688293457, + "rewards_train/rejected": -8.024835586547852, + "step": 1484 + }, + { + "epoch": 0.41, + "logps_train/chosen": -104.12149047851562, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -97.31593322753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.462149053812027, + "rewards_train/margins": 1.369444340467453, + "rewards_train/rejected": -1.83159339427948, + "step": 1484 + }, + { + "epoch": 0.41, + "logps_train/chosen": -159.87677001953125, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -142.34042358398438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.887676954269409, + "rewards_train/margins": -0.15363454818725586, + "rewards_train/rejected": -3.7340424060821533, + "step": 1484 + }, + { + "epoch": 0.41, + "logps_train/chosen": -25.119401931762695, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -2.671875, + "logps_train/rejected": -12.855818748474121, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7119402289390564, + "rewards_train/margins": 0.3064541220664978, + "rewards_train/rejected": -1.0183943510055542, + "step": 1484 + }, + { + "epoch": 0.41, + "logps_train/chosen": -6.8488569259643555, + "logps_train/ref_chosen": -7.5, + "logps_train/ref_rejected": -1.796875, + "logps_train/rejected": -5.596485137939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06511431187391281, + "rewards_train/margins": 0.4450753256678581, + "rewards_train/rejected": -0.3799610137939453, + "step": 1484 + }, + { + "epoch": 0.42, + "logps_train/chosen": -133.3621063232422, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -168.78848266601562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.986210584640503, + "rewards_train/margins": -0.00736236572265625, + "rewards_train/rejected": -3.9788482189178467, + "step": 1485 + }, + { + "epoch": 0.42, + "logps_train/chosen": -102.88348388671875, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -62.25, + "logps_train/rejected": -69.53970336914062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7383484244346619, + "rewards_train/margins": -0.00937807559967041, + "rewards_train/rejected": -0.7289703488349915, + "step": 1485 + }, + { + "epoch": 0.42, + "logps_train/chosen": -146.36691284179688, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -147.46893310546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2366912364959717, + "rewards_train/margins": 0.11020207405090332, + "rewards_train/rejected": -2.346893310546875, + "step": 1485 + }, + { + "epoch": 0.42, + "logps_train/chosen": -200.23843383789062, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -276.9844055175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2238433361053467, + "rewards_train/margins": 6.6745970249176025, + "rewards_train/rejected": -9.89844036102295, + "step": 1485 + }, + { + "epoch": 0.42, + "logps_train/chosen": -8.744414329528809, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -22.0111141204834, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44944143295288086, + "rewards_train/margins": 0.0641700029373169, + "rewards_train/rejected": -0.5136114358901978, + "step": 1485 + }, + { + "epoch": 0.42, + "logps_train/chosen": -112.61029052734375, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -62.25, + "logps_train/rejected": -72.37200164794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8610290884971619, + "rewards_train/margins": 0.15117114782333374, + "rewards_train/rejected": -1.0122002363204956, + "step": 1485 + }, + { + "epoch": 0.42, + "logps_train/chosen": -167.7034149169922, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -222.50823974609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5703415870666504, + "rewards_train/margins": 5.880482196807861, + "rewards_train/rejected": -8.450823783874512, + "step": 1485 + }, + { + "epoch": 0.42, + "logps_train/chosen": -23.031959533691406, + "logps_train/ref_chosen": -3.484375, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -20.69329071044922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9547585248947144, + "rewards_train/margins": -1.4416794180870056, + "rewards_train/rejected": -0.5130791068077087, + "step": 1485 + }, + { + "epoch": 0.42, + "learning_rate": 7.725077696776616e-07, + "loss": 0.5026, + "step": 1486 + }, + { + "epoch": 0.42, + "logps_train/chosen": -83.59852600097656, + "logps_train/ref_chosen": -49.5, + "logps_train/ref_rejected": -49.25, + "logps_train/rejected": -74.22283935546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4098527431488037, + "rewards_train/margins": -0.9125688076019287, + "rewards_train/rejected": -2.497283935546875, + "step": 1486 + }, + { + "epoch": 0.42, + "logps_train/chosen": -49.53142166137695, + "logps_train/ref_chosen": -38.5, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -77.39754486083984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1031421422958374, + "rewards_train/margins": 3.7991124391555786, + "rewards_train/rejected": -4.902254581451416, + "step": 1486 + }, + { + "epoch": 0.42, + "logps_train/chosen": -0.8412293791770935, + "logps_train/ref_chosen": -0.365234375, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -3.3846333026885986, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04759950190782547, + "rewards_train/margins": -0.09507367387413979, + "rewards_train/rejected": 0.047474171966314316, + "step": 1486 + }, + { + "epoch": 0.42, + "logps_train/chosen": -162.6293487548828, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -138.43954467773438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.962934970855713, + "rewards_train/margins": -0.2189805507659912, + "rewards_train/rejected": -3.7439544200897217, + "step": 1486 + }, + { + "epoch": 0.42, + "logps_train/chosen": -81.42804718017578, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -146.2102508544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9928047060966492, + "rewards_train/margins": 4.278220474720001, + "rewards_train/rejected": -5.27102518081665, + "step": 1486 + }, + { + "epoch": 0.42, + "logps_train/chosen": -166.20095825195312, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -196.65237426757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.120095729827881, + "rewards_train/margins": 1.4451417922973633, + "rewards_train/rejected": -5.565237522125244, + "step": 1486 + }, + { + "epoch": 0.42, + "logps_train/chosen": -81.9241943359375, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -158.95181274414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7674195766448975, + "rewards_train/margins": 4.377761602401733, + "rewards_train/rejected": -7.145181179046631, + "step": 1486 + }, + { + "epoch": 0.42, + "logps_train/chosen": -199.97021484375, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -275.94903564453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.697021484375, + "rewards_train/margins": 4.797882080078125, + "rewards_train/rejected": -8.494903564453125, + "step": 1486 + }, + { + "epoch": 0.42, + "logps_train/chosen": -22.69720458984375, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -37.92165756225586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.088470458984375, + "rewards_train/margins": 1.6224453449249268, + "rewards_train/rejected": -2.7109158039093018, + "step": 1487 + }, + { + "epoch": 0.42, + "logps_train/chosen": -105.51476287841797, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -142.72178649902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6514763236045837, + "rewards_train/margins": 5.070702612400055, + "rewards_train/rejected": -5.722178936004639, + "step": 1487 + }, + { + "epoch": 0.42, + "logps_train/chosen": -5.681868076324463, + "logps_train/ref_chosen": -0.388671875, + "logps_train/ref_rejected": -0.388671875, + "logps_train/rejected": -5.417637825012207, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5293196439743042, + "rewards_train/margins": -0.02642303705215454, + "rewards_train/rejected": -0.5028966069221497, + "step": 1487 + }, + { + "epoch": 0.42, + "logps_train/chosen": -0.9127193093299866, + "logps_train/ref_chosen": -0.177734375, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -13.628746032714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07349849492311478, + "rewards_train/margins": 0.8143761083483696, + "rewards_train/rejected": -0.8878746032714844, + "step": 1487 + }, + { + "epoch": 0.42, + "logps_train/chosen": -173.43235778808594, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -171.5390167236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.343235969543457, + "rewards_train/margins": 2.2606658935546875, + "rewards_train/rejected": -6.6039018630981445, + "step": 1487 + }, + { + "epoch": 0.42, + "logps_train/chosen": -41.661773681640625, + "logps_train/ref_chosen": -24.875, + "logps_train/ref_rejected": -26.875, + "logps_train/rejected": -52.11870574951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6786774396896362, + "rewards_train/margins": 0.8456932306289673, + "rewards_train/rejected": -2.5243706703186035, + "step": 1487 + }, + { + "epoch": 0.42, + "logps_train/chosen": -39.38616943359375, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -65.19393157958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8511170148849487, + "rewards_train/margins": 1.9557760953903198, + "rewards_train/rejected": -3.8068931102752686, + "step": 1487 + }, + { + "epoch": 0.42, + "logps_train/chosen": -99.58051300048828, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -217.89996337890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.458051323890686, + "rewards_train/margins": 7.531945586204529, + "rewards_train/rejected": -8.989996910095215, + "step": 1487 + }, + { + "epoch": 0.42, + "learning_rate": 7.699323837185725e-07, + "loss": 0.3073, + "step": 1488 + }, + { + "epoch": 0.42, + "logps_train/chosen": -115.41126251220703, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -94.86293029785156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.641126275062561, + "rewards_train/margins": -0.7048332467675209, + "rewards_train/rejected": 0.06370697170495987, + "step": 1488 + }, + { + "epoch": 0.42, + "logps_train/chosen": -25.535114288330078, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -2.6875, + "logps_train/rejected": -25.482784271240234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3160114288330078, + "rewards_train/margins": 0.9635169506072998, + "rewards_train/rejected": -2.2795283794403076, + "step": 1488 + }, + { + "epoch": 0.42, + "logps_train/chosen": -94.7004165649414, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -51.22867965698242, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.120041608810425, + "rewards_train/margins": -1.1221736073493958, + "rewards_train/rejected": -0.997868001461029, + "step": 1488 + }, + { + "epoch": 0.42, + "logps_train/chosen": -22.984596252441406, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -33.73783874511719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1922096014022827, + "rewards_train/margins": 1.1378244161605835, + "rewards_train/rejected": -2.330034017562866, + "step": 1488 + }, + { + "epoch": 0.42, + "logps_train/chosen": -179.04229736328125, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -224.0, + "logps_train/rejected": -313.151611328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.504229784011841, + "rewards_train/margins": 5.410931348800659, + "rewards_train/rejected": -8.9151611328125, + "step": 1488 + }, + { + "epoch": 0.42, + "logps_train/chosen": -187.44822692871094, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -124.0643081665039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4448226690292358, + "rewards_train/margins": 1.3616081476211548, + "rewards_train/rejected": -2.8064308166503906, + "step": 1488 + }, + { + "epoch": 0.42, + "logps_train/chosen": -28.2917423248291, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -39.0, + "logps_train/rejected": -71.97720336914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9354242086410522, + "rewards_train/margins": 1.362296223640442, + "rewards_train/rejected": -3.297720432281494, + "step": 1488 + }, + { + "epoch": 0.42, + "logps_train/chosen": -135.90023803710938, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -129.9920654296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4900238513946533, + "rewards_train/margins": 2.55918288230896, + "rewards_train/rejected": -5.049206733703613, + "step": 1488 + }, + { + "epoch": 0.42, + "logps_train/chosen": -228.23294067382812, + "logps_train/ref_chosen": -197.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -211.57293701171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1232941150665283, + "rewards_train/margins": 4.333999872207642, + "rewards_train/rejected": -7.45729398727417, + "step": 1489 + }, + { + "epoch": 0.42, + "logps_train/chosen": -161.26943969726562, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -132.4808807373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5269439816474915, + "rewards_train/margins": 2.5211440920829773, + "rewards_train/rejected": -3.0480880737304688, + "step": 1489 + }, + { + "epoch": 0.42, + "logps_train/chosen": -56.94116973876953, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -45.5, + "logps_train/rejected": -57.46253204345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.144116997718811, + "rewards_train/margins": 0.05213618278503418, + "rewards_train/rejected": -1.1962531805038452, + "step": 1489 + }, + { + "epoch": 0.42, + "logps_train/chosen": -32.98393249511719, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -14.125, + "logps_train/rejected": -21.92009735107422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2983932495117188, + "rewards_train/margins": -0.5188835263252258, + "rewards_train/rejected": -0.7795097231864929, + "step": 1489 + }, + { + "epoch": 0.42, + "logps_train/chosen": -4.568608283996582, + "logps_train/ref_chosen": -0.447265625, + "logps_train/ref_rejected": -2.703125, + "logps_train/rejected": -15.807280540466309, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4121342599391937, + "rewards_train/margins": 0.8982813656330109, + "rewards_train/rejected": -1.3104156255722046, + "step": 1489 + }, + { + "epoch": 0.42, + "logps_train/chosen": -69.76724243164062, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -110.26151275634766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5232757925987244, + "rewards_train/margins": 2.349427044391632, + "rewards_train/rejected": -1.8261512517929077, + "step": 1489 + }, + { + "epoch": 0.42, + "logps_train/chosen": -26.247390747070312, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -37.15509796142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4997390508651733, + "rewards_train/margins": 1.3595207929611206, + "rewards_train/rejected": -2.859259843826294, + "step": 1489 + }, + { + "epoch": 0.42, + "logps_train/chosen": -0.3243967592716217, + "logps_train/ref_chosen": -0.5234375, + "logps_train/ref_rejected": -0.5234375, + "logps_train/rejected": -0.2967751920223236, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01990407519042492, + "rewards_train/margins": -0.00276215560734272, + "rewards_train/rejected": 0.02266623079776764, + "step": 1489 + }, + { + "epoch": 0.42, + "learning_rate": 7.673586079888697e-07, + "loss": 0.4216, + "step": 1490 + }, + { + "epoch": 0.42, + "logps_train/chosen": -25.647790908813477, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -28.091955184936523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2335290908813477, + "rewards_train/margins": 0.2756664752960205, + "rewards_train/rejected": -1.5091955661773682, + "step": 1490 + }, + { + "epoch": 0.42, + "logps_train/chosen": -19.21686363220215, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -35.57468795776367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12831364572048187, + "rewards_train/margins": 1.1482825130224228, + "rewards_train/rejected": -1.019968867301941, + "step": 1490 + }, + { + "epoch": 0.42, + "logps_train/chosen": -99.3016128540039, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -148.2654571533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7801613211631775, + "rewards_train/margins": 2.0463843941688538, + "rewards_train/rejected": -2.8265457153320312, + "step": 1490 + }, + { + "epoch": 0.42, + "logps_train/chosen": -98.54712677001953, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -133.58041381835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6547126770019531, + "rewards_train/margins": 0.20332872867584229, + "rewards_train/rejected": -1.8580414056777954, + "step": 1490 + }, + { + "epoch": 0.42, + "logps_train/chosen": -125.68484497070312, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -195.7616424560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8184845447540283, + "rewards_train/margins": 3.357679605484009, + "rewards_train/rejected": -6.176164150238037, + "step": 1490 + }, + { + "epoch": 0.42, + "logps_train/chosen": -115.59283447265625, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -127.01566314697266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6592835187911987, + "rewards_train/margins": 2.192282795906067, + "rewards_train/rejected": -3.8515663146972656, + "step": 1490 + }, + { + "epoch": 0.42, + "logps_train/chosen": -114.19829559326172, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -143.5078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.06982946395874, + "rewards_train/margins": 2.530951976776123, + "rewards_train/rejected": -6.600781440734863, + "step": 1490 + }, + { + "epoch": 0.42, + "logps_train/chosen": -64.9405746459961, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -191.75753784179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1690574735403061, + "rewards_train/margins": 6.106696501374245, + "rewards_train/rejected": -6.275753974914551, + "step": 1490 + }, + { + "epoch": 0.42, + "logps_train/chosen": -11.861960411071777, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -7.606133460998535, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6674460768699646, + "rewards_train/margins": -0.3599577248096466, + "rewards_train/rejected": -0.307488352060318, + "step": 1491 + }, + { + "epoch": 0.42, + "logps_train/chosen": -131.51052856445312, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -133.57818603515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.60105299949646, + "rewards_train/margins": 1.3567657470703125, + "rewards_train/rejected": -3.9578187465667725, + "step": 1491 + }, + { + "epoch": 0.42, + "logps_train/chosen": -48.48615646362305, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -24.0, + "logps_train/rejected": -49.402042388916016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5986156463623047, + "rewards_train/margins": 0.9415886402130127, + "rewards_train/rejected": -2.5402042865753174, + "step": 1491 + }, + { + "epoch": 0.42, + "logps_train/chosen": -28.00726318359375, + "logps_train/ref_chosen": -7.90625, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -43.611846923828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.010101318359375, + "rewards_train/margins": 0.15108346939086914, + "rewards_train/rejected": -2.161184787750244, + "step": 1491 + }, + { + "epoch": 0.42, + "logps_train/chosen": -12.530027389526367, + "logps_train/ref_chosen": -3.609375, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -47.98544692993164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8920652270317078, + "rewards_train/margins": 2.9252296090126038, + "rewards_train/rejected": -3.8172948360443115, + "step": 1491 + }, + { + "epoch": 0.42, + "logps_train/chosen": -143.59555053710938, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -175.7872772216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.759555339813232, + "rewards_train/margins": 1.969172477722168, + "rewards_train/rejected": -6.7287278175354, + "step": 1491 + }, + { + "epoch": 0.42, + "logps_train/chosen": -124.81845092773438, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -166.92080688476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3818451166152954, + "rewards_train/margins": 1.4102355241775513, + "rewards_train/rejected": -2.7920806407928467, + "step": 1491 + }, + { + "epoch": 0.42, + "logps_train/chosen": -18.05266571044922, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -38.061805725097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4865165650844574, + "rewards_train/margins": 1.5696641504764557, + "rewards_train/rejected": -2.056180715560913, + "step": 1491 + }, + { + "epoch": 0.42, + "learning_rate": 7.647864605022554e-07, + "loss": 0.2773, + "step": 1492 + }, + { + "epoch": 0.42, + "logps_train/chosen": -92.18891143798828, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -159.96975708007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4688911437988281, + "rewards_train/margins": 1.628084659576416, + "rewards_train/rejected": -2.096975803375244, + "step": 1492 + }, + { + "epoch": 0.42, + "logps_train/chosen": -27.515419006347656, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -36.0848503112793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6827919483184814, + "rewards_train/margins": 1.3350682258605957, + "rewards_train/rejected": -3.017860174179077, + "step": 1492 + }, + { + "epoch": 0.42, + "logps_train/chosen": -38.54079055786133, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -50.91607666015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9415791034698486, + "rewards_train/margins": 1.450028657913208, + "rewards_train/rejected": -3.3916077613830566, + "step": 1492 + }, + { + "epoch": 0.42, + "logps_train/chosen": -16.265962600708008, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -2.203125, + "logps_train/rejected": -12.073695182800293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4640962779521942, + "rewards_train/margins": 0.522960752248764, + "rewards_train/rejected": -0.9870570302009583, + "step": 1492 + }, + { + "epoch": 0.42, + "logps_train/chosen": -46.945560455322266, + "logps_train/ref_chosen": -40.75, + "logps_train/ref_rejected": -35.25, + "logps_train/rejected": -50.645355224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6195560693740845, + "rewards_train/margins": 0.919979453086853, + "rewards_train/rejected": -1.5395355224609375, + "step": 1492 + }, + { + "epoch": 0.42, + "logps_train/chosen": -38.66126251220703, + "logps_train/ref_chosen": -34.75, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -71.36495971679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39112624526023865, + "rewards_train/margins": 3.1203698217868805, + "rewards_train/rejected": -3.511496067047119, + "step": 1492 + }, + { + "epoch": 0.42, + "logps_train/chosen": -122.49069213867188, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -236.45436096191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15093079209327698, + "rewards_train/margins": 6.196367174386978, + "rewards_train/rejected": -6.045436382293701, + "step": 1492 + }, + { + "epoch": 0.42, + "logps_train/chosen": -185.60177612304688, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -206.11917114257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5601775646209717, + "rewards_train/margins": 0.251739501953125, + "rewards_train/rejected": -2.8119170665740967, + "step": 1492 + }, + { + "epoch": 0.42, + "logps_train/chosen": -72.45349884033203, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -126.15863800048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.595349907875061, + "rewards_train/margins": 2.270513892173767, + "rewards_train/rejected": -2.865863800048828, + "step": 1493 + }, + { + "epoch": 0.42, + "logps_train/chosen": -118.93086242675781, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -173.24713134765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8430862426757812, + "rewards_train/margins": 2.881627082824707, + "rewards_train/rejected": -5.724713325500488, + "step": 1493 + }, + { + "epoch": 0.42, + "logps_train/chosen": -194.58944702148438, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -198.0, + "logps_train/rejected": -303.94146728515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.158944845199585, + "rewards_train/margins": 8.43520188331604, + "rewards_train/rejected": -10.594146728515625, + "step": 1493 + }, + { + "epoch": 0.42, + "logps_train/chosen": -204.826416015625, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -199.13426208496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.982641696929932, + "rewards_train/margins": 0.4307847023010254, + "rewards_train/rejected": -6.413426399230957, + "step": 1493 + }, + { + "epoch": 0.42, + "logps_train/chosen": -122.4771499633789, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -147.43313598632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9477150440216064, + "rewards_train/margins": 0.8955986499786377, + "rewards_train/rejected": -2.843313694000244, + "step": 1493 + }, + { + "epoch": 0.42, + "logps_train/chosen": -2.009556531906128, + "logps_train/ref_chosen": -2.84375, + "logps_train/ref_rejected": -7.125, + "logps_train/rejected": -7.979055881500244, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08341934531927109, + "rewards_train/margins": 0.1688249334692955, + "rewards_train/rejected": -0.08540558815002441, + "step": 1493 + }, + { + "epoch": 0.42, + "logps_train/chosen": -96.23704528808594, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -57.75, + "logps_train/rejected": -63.059730529785156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6237045526504517, + "rewards_train/margins": -0.09273147583007812, + "rewards_train/rejected": -0.5309730768203735, + "step": 1493 + }, + { + "epoch": 0.42, + "logps_train/chosen": -20.52573013305664, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -26.055118560791016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6463230848312378, + "rewards_train/margins": 0.2873138189315796, + "rewards_train/rejected": -1.9336369037628174, + "step": 1493 + }, + { + "epoch": 0.42, + "learning_rate": 7.622159592610347e-07, + "loss": 0.3096, + "step": 1494 + }, + { + "epoch": 0.42, + "logps_train/chosen": -148.9922332763672, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -150.10418701171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0007766723865643144, + "rewards_train/margins": 0.1111953720683232, + "rewards_train/rejected": -0.11041869968175888, + "step": 1494 + }, + { + "epoch": 0.42, + "logps_train/chosen": -43.55536651611328, + "logps_train/ref_chosen": -31.125, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -31.939912796020508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2430366277694702, + "rewards_train/margins": 1.279079794883728, + "rewards_train/rejected": -2.5221164226531982, + "step": 1494 + }, + { + "epoch": 0.42, + "logps_train/chosen": -126.72199249267578, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -155.63800048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.172199249267578, + "rewards_train/margins": 2.391601085662842, + "rewards_train/rejected": -6.56380033493042, + "step": 1494 + }, + { + "epoch": 0.42, + "logps_train/chosen": -3.795246124267578, + "logps_train/ref_chosen": -1.359375, + "logps_train/ref_rejected": -2.359375, + "logps_train/rejected": -15.061687469482422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24358712136745453, + "rewards_train/margins": 1.0266441255807877, + "rewards_train/rejected": -1.2702312469482422, + "step": 1494 + }, + { + "epoch": 0.42, + "logps_train/chosen": -9.589288711547852, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -1.1171875, + "logps_train/rejected": -1.1044416427612305, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1714288741350174, + "rewards_train/margins": -0.17270345985889435, + "rewards_train/rejected": 0.0012745857238769531, + "step": 1494 + }, + { + "epoch": 0.42, + "logps_train/chosen": -159.18167114257812, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -186.6063690185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9181671142578125, + "rewards_train/margins": 3.142469882965088, + "rewards_train/rejected": -6.0606369972229, + "step": 1494 + }, + { + "epoch": 0.42, + "logps_train/chosen": -23.815399169921875, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -28.771541595458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1752899885177612, + "rewards_train/margins": 0.1456141471862793, + "rewards_train/rejected": -1.3209041357040405, + "step": 1494 + }, + { + "epoch": 0.42, + "logps_train/chosen": -104.92048645019531, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -168.8973388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9420487880706787, + "rewards_train/margins": 3.5976850986480713, + "rewards_train/rejected": -6.53973388671875, + "step": 1494 + }, + { + "epoch": 0.42, + "logps_train/chosen": -15.35344409942627, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -20.266891479492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07715559005737305, + "rewards_train/margins": 0.39134475588798523, + "rewards_train/rejected": -0.3141891658306122, + "step": 1495 + }, + { + "epoch": 0.42, + "logps_train/chosen": -23.19216537475586, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -49.25, + "logps_train/rejected": -52.47051239013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16921654343605042, + "rewards_train/margins": 0.1528347134590149, + "rewards_train/rejected": -0.3220512568950653, + "step": 1495 + }, + { + "epoch": 0.42, + "logps_train/chosen": -15.624250411987305, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -22.300601959228516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6374250650405884, + "rewards_train/margins": 0.2738851308822632, + "rewards_train/rejected": -0.9113101959228516, + "step": 1495 + }, + { + "epoch": 0.42, + "logps_train/chosen": -27.714664459228516, + "logps_train/ref_chosen": -27.75, + "logps_train/ref_rejected": -3.296875, + "logps_train/rejected": -23.335535049438477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0035335540305823088, + "rewards_train/margins": 2.007399511290714, + "rewards_train/rejected": -2.003865957260132, + "step": 1495 + }, + { + "epoch": 0.42, + "logps_train/chosen": -41.34196472167969, + "logps_train/ref_chosen": -42.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -122.65675354003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11580353230237961, + "rewards_train/margins": 1.0814788863062859, + "rewards_train/rejected": -0.9656753540039062, + "step": 1495 + }, + { + "epoch": 0.42, + "logps_train/chosen": -84.06588745117188, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -120.64534759521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24341125786304474, + "rewards_train/margins": 1.907946065068245, + "rewards_train/rejected": -1.6645348072052002, + "step": 1495 + }, + { + "epoch": 0.42, + "logps_train/chosen": -228.92697143554688, + "logps_train/ref_chosen": -218.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -226.6064453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0926971435546875, + "rewards_train/margins": 4.167947292327881, + "rewards_train/rejected": -5.260644435882568, + "step": 1495 + }, + { + "epoch": 0.42, + "logps_train/chosen": -32.286705017089844, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -37.1873779296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9911705255508423, + "rewards_train/margins": 1.858817219734192, + "rewards_train/rejected": -2.849987745285034, + "step": 1495 + }, + { + "epoch": 0.42, + "learning_rate": 7.596471222559906e-07, + "loss": 0.3233, + "step": 1496 + }, + { + "epoch": 0.42, + "logps_train/chosen": -143.8245391845703, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -276.71917724609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.332453966140747, + "rewards_train/margins": 10.239463567733765, + "rewards_train/rejected": -12.571917533874512, + "step": 1496 + }, + { + "epoch": 0.42, + "logps_train/chosen": -97.60991668701172, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -128.57659912109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.960991621017456, + "rewards_train/margins": 1.046668291091919, + "rewards_train/rejected": -4.007659912109375, + "step": 1496 + }, + { + "epoch": 0.42, + "logps_train/chosen": -66.37214660644531, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -97.54768371582031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4627853333950043, + "rewards_train/margins": 0.5175537057220936, + "rewards_train/rejected": -0.05476837232708931, + "step": 1496 + }, + { + "epoch": 0.42, + "logps_train/chosen": -37.31352615356445, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -46.793365478515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.800102710723877, + "rewards_train/margins": 0.6104838848114014, + "rewards_train/rejected": -3.4105865955352783, + "step": 1496 + }, + { + "epoch": 0.42, + "logps_train/chosen": -0.3478020131587982, + "logps_train/ref_chosen": -0.263671875, + "logps_train/ref_rejected": -0.263671875, + "logps_train/rejected": -0.34989216923713684, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008413014002144337, + "rewards_train/margins": 0.00020901579409837723, + "rewards_train/rejected": -0.008622029796242714, + "step": 1496 + }, + { + "epoch": 0.42, + "logps_train/chosen": -144.6029815673828, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -219.0, + "logps_train/rejected": -298.2260437011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7602981925010681, + "rewards_train/margins": 7.162306368350983, + "rewards_train/rejected": -7.922604560852051, + "step": 1496 + }, + { + "epoch": 0.42, + "logps_train/chosen": -253.53228759765625, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -258.5600280761719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.553229331970215, + "rewards_train/margins": -0.39722633361816406, + "rewards_train/rejected": -10.15600299835205, + "step": 1496 + }, + { + "epoch": 0.42, + "logps_train/chosen": -205.80026245117188, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -150.94447326660156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.680026531219482, + "rewards_train/margins": -2.185579299926758, + "rewards_train/rejected": -4.494447231292725, + "step": 1496 + }, + { + "epoch": 0.42, + "logps_train/chosen": -44.07723617553711, + "logps_train/ref_chosen": -28.625, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -170.13426208496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.545223593711853, + "rewards_train/margins": 2.2682026624679565, + "rewards_train/rejected": -3.8134262561798096, + "step": 1497 + }, + { + "epoch": 0.42, + "logps_train/chosen": -22.905521392822266, + "logps_train/ref_chosen": -3.171875, + "logps_train/ref_rejected": -29.625, + "logps_train/rejected": -63.606719970703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9733647108078003, + "rewards_train/margins": 1.4248074293136597, + "rewards_train/rejected": -3.39817214012146, + "step": 1497 + }, + { + "epoch": 0.42, + "logps_train/chosen": -20.429094314575195, + "logps_train/ref_chosen": -3.8125, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -38.784568786621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6616594791412354, + "rewards_train/margins": -1.133202612400055, + "rewards_train/rejected": -0.5284568667411804, + "step": 1497 + }, + { + "epoch": 0.42, + "logps_train/chosen": -30.539905548095703, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -37.934112548828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7352405786514282, + "rewards_train/margins": 0.9019206762313843, + "rewards_train/rejected": -2.6371612548828125, + "step": 1497 + }, + { + "epoch": 0.42, + "logps_train/chosen": -13.77937126159668, + "logps_train/ref_chosen": -5.40625, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -12.972684860229492, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8373121619224548, + "rewards_train/margins": -0.12754368782043457, + "rewards_train/rejected": -0.7097684741020203, + "step": 1497 + }, + { + "epoch": 0.42, + "logps_train/chosen": -11.313164710998535, + "logps_train/ref_chosen": -3.484375, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -44.18635559082031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7828789949417114, + "rewards_train/margins": 1.5607565641403198, + "rewards_train/rejected": -2.3436355590820312, + "step": 1497 + }, + { + "epoch": 0.42, + "logps_train/chosen": -154.73043823242188, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -155.13546752929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0230438709259033, + "rewards_train/margins": 3.140503168106079, + "rewards_train/rejected": -6.163547039031982, + "step": 1497 + }, + { + "epoch": 0.42, + "logps_train/chosen": -22.715839385986328, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -3.4375, + "logps_train/rejected": -28.505508422851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7747089862823486, + "rewards_train/margins": 0.7320919036865234, + "rewards_train/rejected": -2.506800889968872, + "step": 1497 + }, + { + "epoch": 0.42, + "learning_rate": 7.570799674662585e-07, + "loss": 0.5349, + "step": 1498 + }, + { + "epoch": 0.42, + "logps_train/chosen": -16.6987361907959, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -3.90625, + "logps_train/rejected": -13.58428955078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.326123595237732, + "rewards_train/margins": -0.35831964015960693, + "rewards_train/rejected": -0.967803955078125, + "step": 1498 + }, + { + "epoch": 0.42, + "logps_train/chosen": -14.507719993591309, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -2.046875, + "logps_train/rejected": -12.289800643920898, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1476470232009888, + "rewards_train/margins": -0.12335443496704102, + "rewards_train/rejected": -1.0242925882339478, + "step": 1498 + }, + { + "epoch": 0.42, + "logps_train/chosen": -83.77808380126953, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -58.4517822265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.927808403968811, + "rewards_train/margins": -0.03263014554977417, + "rewards_train/rejected": -0.8951782584190369, + "step": 1498 + }, + { + "epoch": 0.42, + "logps_train/chosen": -25.795427322387695, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -31.74673843383789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4920427799224854, + "rewards_train/margins": 0.5826311111450195, + "rewards_train/rejected": -2.074673891067505, + "step": 1498 + }, + { + "epoch": 0.42, + "logps_train/chosen": -9.768507957458496, + "logps_train/ref_chosen": -0.234375, + "logps_train/ref_rejected": -0.234375, + "logps_train/rejected": -10.144067764282227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9534133076667786, + "rewards_train/margins": 0.037555992603302, + "rewards_train/rejected": -0.9909693002700806, + "step": 1498 + }, + { + "epoch": 0.42, + "logps_train/chosen": -120.4543228149414, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -194.24595642089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2454323768615723, + "rewards_train/margins": 4.72916316986084, + "rewards_train/rejected": -7.974595546722412, + "step": 1498 + }, + { + "epoch": 0.42, + "logps_train/chosen": -24.725744247436523, + "logps_train/ref_chosen": -22.0, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -13.287588119506836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27257442474365234, + "rewards_train/margins": 0.31555938720703125, + "rewards_train/rejected": -0.5881338119506836, + "step": 1498 + }, + { + "epoch": 0.42, + "logps_train/chosen": -46.24847412109375, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -31.588403701782227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9873474836349487, + "rewards_train/margins": -1.3660070896148682, + "rewards_train/rejected": -0.6213403940200806, + "step": 1498 + }, + { + "epoch": 0.42, + "logps_train/chosen": -108.25654602050781, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -184.66725158691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9756546020507812, + "rewards_train/margins": 3.7910704612731934, + "rewards_train/rejected": -6.766725063323975, + "step": 1499 + }, + { + "epoch": 0.42, + "logps_train/chosen": -139.4468231201172, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -173.5590362548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.344682216644287, + "rewards_train/margins": 0.1112213134765625, + "rewards_train/rejected": -4.45590353012085, + "step": 1499 + }, + { + "epoch": 0.42, + "logps_train/chosen": -31.766769409179688, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -24.073076248168945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0266770124435425, + "rewards_train/margins": 0.5243806838989258, + "rewards_train/rejected": -1.5510576963424683, + "step": 1499 + }, + { + "epoch": 0.42, + "logps_train/chosen": -171.21829223632812, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -169.95010375976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7218291759490967, + "rewards_train/margins": 2.6731812953948975, + "rewards_train/rejected": -5.395010471343994, + "step": 1499 + }, + { + "epoch": 0.42, + "logps_train/chosen": -25.472858428955078, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -28.779212951660156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6035358905792236, + "rewards_train/margins": -0.800614595413208, + "rewards_train/rejected": -0.8029212951660156, + "step": 1499 + }, + { + "epoch": 0.42, + "logps_train/chosen": -74.63236236572266, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -74.6585693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2132363319396973, + "rewards_train/margins": 0.30262064933776855, + "rewards_train/rejected": -2.515856981277466, + "step": 1499 + }, + { + "epoch": 0.42, + "logps_train/chosen": -15.406457901000977, + "logps_train/ref_chosen": -2.515625, + "logps_train/ref_rejected": -2.59375, + "logps_train/rejected": -9.516094207763672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2890833616256714, + "rewards_train/margins": -0.5968489050865173, + "rewards_train/rejected": -0.692234456539154, + "step": 1499 + }, + { + "epoch": 0.42, + "logps_train/chosen": -66.68695831298828, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -26.5, + "logps_train/rejected": -53.58573913574219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0686957836151123, + "rewards_train/margins": 0.6398782730102539, + "rewards_train/rejected": -2.708574056625366, + "step": 1499 + }, + { + "epoch": 0.42, + "learning_rate": 7.545145128592008e-07, + "loss": 0.6249, + "step": 1500 + }, + { + "epoch": 0.42, + "logps_train/chosen": -17.116975784301758, + "logps_train/ref_chosen": -15.6875, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -11.06418228149414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14294758439064026, + "rewards_train/margins": 0.10097064077854156, + "rewards_train/rejected": -0.24391822516918182, + "step": 1500 + }, + { + "epoch": 0.42, + "logps_train/chosen": -120.43875122070312, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -224.12603759765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.99387526512146, + "rewards_train/margins": 5.218728303909302, + "rewards_train/rejected": -9.212603569030762, + "step": 1500 + }, + { + "epoch": 0.42, + "logps_train/chosen": -146.97262573242188, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -175.7010955810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3972625732421875, + "rewards_train/margins": 1.0728468894958496, + "rewards_train/rejected": -4.470109462738037, + "step": 1500 + }, + { + "epoch": 0.42, + "logps_train/chosen": -152.01495361328125, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -148.08807373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.901495337486267, + "rewards_train/margins": 1.3573120832443237, + "rewards_train/rejected": -3.258807420730591, + "step": 1500 + }, + { + "epoch": 0.42, + "logps_train/chosen": -154.9072265625, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -198.45864868164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.190722703933716, + "rewards_train/margins": 2.9551422595977783, + "rewards_train/rejected": -5.145864963531494, + "step": 1500 + }, + { + "epoch": 0.42, + "logps_train/chosen": -0.23490533232688904, + "logps_train/ref_chosen": -0.392578125, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -17.204315185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.015767280012369156, + "rewards_train/margins": 0.21744880452752113, + "rewards_train/rejected": -0.20168152451515198, + "step": 1500 + }, + { + "epoch": 0.42, + "logps_train/chosen": -17.68587303161621, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -6.65625, + "logps_train/rejected": -19.3151798248291, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4185872972011566, + "rewards_train/margins": 0.8473056852817535, + "rewards_train/rejected": -1.2658929824829102, + "step": 1500 + }, + { + "epoch": 0.42, + "logps_train/chosen": -72.75982666015625, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -59.0, + "logps_train/rejected": -71.77951049804688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.675982713699341, + "rewards_train/margins": -1.3980315923690796, + "rewards_train/rejected": -1.2779511213302612, + "step": 1500 + }, + { + "epoch": 0.42, + "logps_train/chosen": -156.46438598632812, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -225.3679962158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.096438884735107, + "rewards_train/margins": 3.1403608322143555, + "rewards_train/rejected": -7.236799716949463, + "step": 1501 + }, + { + "epoch": 0.42, + "logps_train/chosen": -161.71099853515625, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -225.42337036132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.221099853515625, + "rewards_train/margins": 5.621237754821777, + "rewards_train/rejected": -10.842337608337402, + "step": 1501 + }, + { + "epoch": 0.42, + "logps_train/chosen": -21.895172119140625, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -4.129188537597656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.35201722383499146, + "rewards_train/margins": -0.37972337007522583, + "rewards_train/rejected": 0.027706146240234375, + "step": 1501 + }, + { + "epoch": 0.42, + "logps_train/chosen": -112.02774047851562, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -132.5476531982422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.352774143218994, + "rewards_train/margins": -0.29800868034362793, + "rewards_train/rejected": -2.054765462875366, + "step": 1501 + }, + { + "epoch": 0.42, + "logps_train/chosen": -187.68260192871094, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -186.1236114501953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4682602882385254, + "rewards_train/margins": -0.25589919090270996, + "rewards_train/rejected": -3.2123610973358154, + "step": 1501 + }, + { + "epoch": 0.42, + "logps_train/chosen": -52.18388748168945, + "logps_train/ref_chosen": -46.75, + "logps_train/ref_rejected": -11.6875, + "logps_train/rejected": -36.204063415527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5433887839317322, + "rewards_train/margins": 1.9082675576210022, + "rewards_train/rejected": -2.4516563415527344, + "step": 1501 + }, + { + "epoch": 0.42, + "logps_train/chosen": -107.013671875, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -151.24649047851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.201367139816284, + "rewards_train/margins": 3.82328200340271, + "rewards_train/rejected": -6.024649143218994, + "step": 1501 + }, + { + "epoch": 0.42, + "logps_train/chosen": -11.917802810668945, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -24.06104278564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7449052929878235, + "rewards_train/margins": 0.7674489617347717, + "rewards_train/rejected": -1.5123542547225952, + "step": 1501 + }, + { + "epoch": 0.42, + "learning_rate": 7.519507763902792e-07, + "loss": 0.435, + "step": 1502 + }, + { + "epoch": 0.42, + "logps_train/chosen": -15.08102035522461, + "logps_train/ref_chosen": -3.734375, + "logps_train/ref_rejected": -3.015625, + "logps_train/rejected": -5.418683052062988, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.134664535522461, + "rewards_train/margins": -0.8943587243556976, + "rewards_train/rejected": -0.2403058111667633, + "step": 1502 + }, + { + "epoch": 0.42, + "logps_train/chosen": -257.7908935546875, + "logps_train/ref_chosen": -188.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -212.88629150390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.979089260101318, + "rewards_train/margins": 0.009540081024169922, + "rewards_train/rejected": -6.988629341125488, + "step": 1502 + }, + { + "epoch": 0.42, + "logps_train/chosen": -1.4514409303665161, + "logps_train/ref_chosen": -0.80078125, + "logps_train/ref_rejected": -0.67578125, + "logps_train/rejected": -0.32058286666870117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06506597250699997, + "rewards_train/margins": -0.10058581084012985, + "rewards_train/rejected": 0.03551983833312988, + "step": 1502 + }, + { + "epoch": 0.42, + "logps_train/chosen": -150.22308349609375, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -140.50674438476562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.722308397293091, + "rewards_train/margins": -0.4216339588165283, + "rewards_train/rejected": -2.3006744384765625, + "step": 1502 + }, + { + "epoch": 0.42, + "logps_train/chosen": -155.34515380859375, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -179.0183563232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.934515476226807, + "rewards_train/margins": 0.6173200607299805, + "rewards_train/rejected": -5.551835536956787, + "step": 1502 + }, + { + "epoch": 0.42, + "logps_train/chosen": -29.608055114746094, + "logps_train/ref_chosen": -29.75, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -45.11433410644531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.014194488525390625, + "rewards_train/margins": 3.4256279468536377, + "rewards_train/rejected": -3.411433458328247, + "step": 1502 + }, + { + "epoch": 0.42, + "logps_train/chosen": -13.226383209228516, + "logps_train/ref_chosen": -7.59375, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -28.003780364990234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5632633566856384, + "rewards_train/margins": 0.3746147155761719, + "rewards_train/rejected": -0.9378780722618103, + "step": 1502 + }, + { + "epoch": 0.42, + "logps_train/chosen": -8.367642402648926, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -35.87158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04448575899004936, + "rewards_train/margins": 0.38164396211504936, + "rewards_train/rejected": -0.337158203125, + "step": 1502 + }, + { + "epoch": 0.42, + "logps_train/chosen": -160.39413452148438, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -207.14212036132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8394135236740112, + "rewards_train/margins": 1.174798607826233, + "rewards_train/rejected": -3.014212131500244, + "step": 1503 + }, + { + "epoch": 0.42, + "logps_train/chosen": -61.58517837524414, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -57.12102508544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.008517861366272, + "rewards_train/margins": 1.35358464717865, + "rewards_train/rejected": -2.362102508544922, + "step": 1503 + }, + { + "epoch": 0.42, + "logps_train/chosen": -83.60401916503906, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -134.98423767089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5104019045829773, + "rewards_train/margins": 5.438021957874298, + "rewards_train/rejected": -5.948423862457275, + "step": 1503 + }, + { + "epoch": 0.42, + "logps_train/chosen": -8.007233619689941, + "logps_train/ref_chosen": -0.053955078125, + "logps_train/ref_rejected": -0.053955078125, + "logps_train/rejected": -7.75746488571167, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7953278422355652, + "rewards_train/margins": -0.02497684955596924, + "rewards_train/rejected": -0.770350992679596, + "step": 1503 + }, + { + "epoch": 0.42, + "logps_train/chosen": -123.33246612548828, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -149.99169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4832466840744019, + "rewards_train/margins": 0.015923261642456055, + "rewards_train/rejected": -1.499169945716858, + "step": 1503 + }, + { + "epoch": 0.42, + "logps_train/chosen": -114.27249908447266, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -182.80352783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2772499322891235, + "rewards_train/margins": 6.15310275554657, + "rewards_train/rejected": -7.430352687835693, + "step": 1503 + }, + { + "epoch": 0.42, + "logps_train/chosen": -47.282493591308594, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -28.348114013671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4282493591308594, + "rewards_train/margins": 0.43781208992004395, + "rewards_train/rejected": -1.8660614490509033, + "step": 1503 + }, + { + "epoch": 0.42, + "logps_train/chosen": -17.827829360961914, + "logps_train/ref_chosen": -7.5, + "logps_train/ref_rejected": -13.3125, + "logps_train/rejected": -38.60607147216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0327829122543335, + "rewards_train/margins": 1.4965742826461792, + "rewards_train/rejected": -2.5293571949005127, + "step": 1503 + }, + { + "epoch": 0.42, + "learning_rate": 7.493887760029312e-07, + "loss": 0.4815, + "step": 1504 + }, + { + "epoch": 0.42, + "logps_train/chosen": -60.55169677734375, + "logps_train/ref_chosen": -40.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -99.85523986816406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0551698207855225, + "rewards_train/margins": -1.8696458339691162, + "rewards_train/rejected": -0.18552398681640625, + "step": 1504 + }, + { + "epoch": 0.42, + "logps_train/chosen": -54.17303466796875, + "logps_train/ref_chosen": -31.25, + "logps_train/ref_rejected": -44.5, + "logps_train/rejected": -76.08075714111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2923035621643066, + "rewards_train/margins": 0.8657722473144531, + "rewards_train/rejected": -3.1580758094787598, + "step": 1504 + }, + { + "epoch": 0.42, + "logps_train/chosen": -35.351619720458984, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -5.46875, + "logps_train/rejected": -15.49140739440918, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5226620435714722, + "rewards_train/margins": -0.5203962326049805, + "rewards_train/rejected": -1.0022658109664917, + "step": 1504 + }, + { + "epoch": 0.42, + "logps_train/chosen": -13.91672134399414, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -1.984375, + "logps_train/rejected": -20.64295196533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26667213439941406, + "rewards_train/margins": 1.599185585975647, + "rewards_train/rejected": -1.865857720375061, + "step": 1504 + }, + { + "epoch": 0.42, + "logps_train/chosen": -220.35003662109375, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -193.3101806640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.535003662109375, + "rewards_train/margins": 0.7960143089294434, + "rewards_train/rejected": -6.331017971038818, + "step": 1504 + }, + { + "epoch": 0.42, + "logps_train/chosen": -21.771085739135742, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -39.53794860839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5271086096763611, + "rewards_train/margins": 1.3516862988471985, + "rewards_train/rejected": -1.8787949085235596, + "step": 1504 + }, + { + "epoch": 0.42, + "logps_train/chosen": -30.305620193481445, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -3.4375, + "logps_train/rejected": -21.95125961303711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3524370193481445, + "rewards_train/margins": -0.5010610818862915, + "rewards_train/rejected": -1.851375937461853, + "step": 1504 + }, + { + "epoch": 0.42, + "logps_train/chosen": -121.75041198730469, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -215.39743041992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9250411987304688, + "rewards_train/margins": 5.014701843261719, + "rewards_train/rejected": -5.9397430419921875, + "step": 1504 + }, + { + "epoch": 0.42, + "logps_train/chosen": -31.154348373413086, + "logps_train/ref_chosen": -13.0625, + "logps_train/ref_rejected": -42.25, + "logps_train/rejected": -69.57345581054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8091849088668823, + "rewards_train/margins": 0.9231606721878052, + "rewards_train/rejected": -2.7323455810546875, + "step": 1505 + }, + { + "epoch": 0.42, + "logps_train/chosen": -11.888179779052734, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -11.644429206848145, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5950679779052734, + "rewards_train/margins": -0.08375006914138794, + "rewards_train/rejected": -0.5113179087638855, + "step": 1505 + }, + { + "epoch": 0.42, + "logps_train/chosen": -102.6855239868164, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -112.2049789428711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9685524106025696, + "rewards_train/margins": 0.2019454836845398, + "rewards_train/rejected": -1.1704978942871094, + "step": 1505 + }, + { + "epoch": 0.42, + "logps_train/chosen": -96.67205047607422, + "logps_train/ref_chosen": -50.25, + "logps_train/ref_rejected": -56.5, + "logps_train/rejected": -105.27569580078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.642205238342285, + "rewards_train/margins": 0.23536443710327148, + "rewards_train/rejected": -4.877569675445557, + "step": 1505 + }, + { + "epoch": 0.42, + "logps_train/chosen": -137.1414794921875, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -128.75570678710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9641480445861816, + "rewards_train/margins": 0.11142277717590332, + "rewards_train/rejected": -3.075570821762085, + "step": 1505 + }, + { + "epoch": 0.42, + "logps_train/chosen": -88.27902221679688, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -95.9727783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02790222130715847, + "rewards_train/margins": 0.21937561966478825, + "rewards_train/rejected": -0.24727784097194672, + "step": 1505 + }, + { + "epoch": 0.42, + "logps_train/chosen": -7.344447612762451, + "logps_train/ref_chosen": -1.1015625, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -18.565486907958984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6242884993553162, + "rewards_train/margins": -0.20523980259895325, + "rewards_train/rejected": -0.4190486967563629, + "step": 1505 + }, + { + "epoch": 0.42, + "logps_train/chosen": -107.0277099609375, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -218.46261596679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05277099832892418, + "rewards_train/margins": 6.293490502983332, + "rewards_train/rejected": -6.346261501312256, + "step": 1505 + }, + { + "epoch": 0.42, + "learning_rate": 7.468285296284425e-07, + "loss": 0.5877, + "step": 1506 + }, + { + "epoch": 0.42, + "logps_train/chosen": -0.6582490801811218, + "logps_train/ref_chosen": -0.37890625, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -9.408024787902832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.027934283018112183, + "rewards_train/margins": 0.37849318981170654, + "rewards_train/rejected": -0.4064274728298187, + "step": 1506 + }, + { + "epoch": 0.42, + "logps_train/chosen": -133.05535888671875, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -280.4090576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9055359363555908, + "rewards_train/margins": 9.035369634628296, + "rewards_train/rejected": -10.940905570983887, + "step": 1506 + }, + { + "epoch": 0.42, + "logps_train/chosen": -92.26082611083984, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -189.30908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2260825634002686, + "rewards_train/margins": 2.7048256397247314, + "rewards_train/rejected": -4.930908203125, + "step": 1506 + }, + { + "epoch": 0.42, + "logps_train/chosen": -45.53215026855469, + "logps_train/ref_chosen": -15.6875, + "logps_train/ref_rejected": -41.5, + "logps_train/rejected": -90.82748413085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9844651222229004, + "rewards_train/margins": 1.9482831954956055, + "rewards_train/rejected": -4.932748317718506, + "step": 1506 + }, + { + "epoch": 0.42, + "logps_train/chosen": -22.474117279052734, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -37.8848876953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2099117040634155, + "rewards_train/margins": 1.3598271608352661, + "rewards_train/rejected": -2.5697388648986816, + "step": 1506 + }, + { + "epoch": 0.42, + "logps_train/chosen": -34.14716339111328, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -3.5625, + "logps_train/rejected": -33.11757278442383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2397162914276123, + "rewards_train/margins": 0.7157909870147705, + "rewards_train/rejected": -2.955507278442383, + "step": 1506 + }, + { + "epoch": 0.42, + "logps_train/chosen": -29.41376495361328, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -31.3780574798584, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2601265907287598, + "rewards_train/margins": 0.14642930030822754, + "rewards_train/rejected": -2.4065558910369873, + "step": 1506 + }, + { + "epoch": 0.42, + "logps_train/chosen": -100.47486114501953, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -92.05465698242188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5474860668182373, + "rewards_train/margins": -0.8920203447341919, + "rewards_train/rejected": -1.6554657220840454, + "step": 1506 + }, + { + "epoch": 0.42, + "logps_train/chosen": -8.788016319274902, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -1.90625, + "logps_train/rejected": -14.199392318725586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.053801633417606354, + "rewards_train/margins": 1.1755125746130943, + "rewards_train/rejected": -1.2293142080307007, + "step": 1507 + }, + { + "epoch": 0.42, + "logps_train/chosen": -108.63533020019531, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -226.19674682617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4635331630706787, + "rewards_train/margins": 2.8561418056488037, + "rewards_train/rejected": -5.319674968719482, + "step": 1507 + }, + { + "epoch": 0.42, + "logps_train/chosen": -146.579345703125, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -203.80963134765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1079347133636475, + "rewards_train/margins": 3.273028612136841, + "rewards_train/rejected": -6.380963325500488, + "step": 1507 + }, + { + "epoch": 0.42, + "logps_train/chosen": -102.41919708251953, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -58.0, + "logps_train/rejected": -73.78604125976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.94191974401474, + "rewards_train/margins": 0.6366843581199646, + "rewards_train/rejected": -1.5786041021347046, + "step": 1507 + }, + { + "epoch": 0.42, + "logps_train/chosen": -43.42698669433594, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -31.310949325561523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0676987171173096, + "rewards_train/margins": 1.0633962154388428, + "rewards_train/rejected": -2.1310949325561523, + "step": 1507 + }, + { + "epoch": 0.42, + "logps_train/chosen": -35.454742431640625, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -39.91804504394531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0204741954803467, + "rewards_train/margins": 0.7900803089141846, + "rewards_train/rejected": -2.8105545043945312, + "step": 1507 + }, + { + "epoch": 0.42, + "logps_train/chosen": -0.9624624252319336, + "logps_train/ref_chosen": -1.6796875, + "logps_train/ref_rejected": -1.140625, + "logps_train/rejected": -16.70730209350586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07172250747680664, + "rewards_train/margins": 1.6283901929855347, + "rewards_train/rejected": -1.556667685508728, + "step": 1507 + }, + { + "epoch": 0.42, + "logps_train/chosen": -95.83485412597656, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -139.46304321289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7834854125976562, + "rewards_train/margins": 2.1628189086914062, + "rewards_train/rejected": -4.9463043212890625, + "step": 1507 + }, + { + "epoch": 0.42, + "learning_rate": 7.442700551858236e-07, + "loss": 0.3093, + "step": 1508 + }, + { + "epoch": 0.42, + "logps_train/chosen": -166.79190063476562, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -236.57359313964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.629189968109131, + "rewards_train/margins": 4.728169918060303, + "rewards_train/rejected": -11.357359886169434, + "step": 1508 + }, + { + "epoch": 0.42, + "logps_train/chosen": -79.12487030029297, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -98.17599487304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.362487316131592, + "rewards_train/margins": 1.055112361907959, + "rewards_train/rejected": -5.417599678039551, + "step": 1508 + }, + { + "epoch": 0.42, + "logps_train/chosen": -190.34121704101562, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -163.15646362304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.134121894836426, + "rewards_train/margins": 0.23152446746826172, + "rewards_train/rejected": -5.3656463623046875, + "step": 1508 + }, + { + "epoch": 0.42, + "logps_train/chosen": -125.4151382446289, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -283.57550048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4415138959884644, + "rewards_train/margins": 10.616036534309387, + "rewards_train/rejected": -12.057550430297852, + "step": 1508 + }, + { + "epoch": 0.42, + "logps_train/chosen": -11.198511123657227, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -21.890316009521484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3573511242866516, + "rewards_train/margins": -0.1808195263147354, + "rewards_train/rejected": -0.1765315979719162, + "step": 1508 + }, + { + "epoch": 0.42, + "logps_train/chosen": -121.9248046875, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -208.6015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7424805164337158, + "rewards_train/margins": 2.7176759243011475, + "rewards_train/rejected": -4.460156440734863, + "step": 1508 + }, + { + "epoch": 0.42, + "logps_train/chosen": -148.11228942871094, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -132.7807159423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3112289905548096, + "rewards_train/margins": 3.36684250831604, + "rewards_train/rejected": -4.67807149887085, + "step": 1508 + }, + { + "epoch": 0.42, + "logps_train/chosen": -77.3646011352539, + "logps_train/ref_chosen": -53.75, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -174.06671142578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3614602088928223, + "rewards_train/margins": 6.945210933685303, + "rewards_train/rejected": -9.306671142578125, + "step": 1508 + }, + { + "epoch": 0.42, + "logps_train/chosen": -12.825177192687988, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -40.91409683227539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8137677311897278, + "rewards_train/margins": 1.7463919520378113, + "rewards_train/rejected": -2.560159683227539, + "step": 1509 + }, + { + "epoch": 0.42, + "logps_train/chosen": -124.81727600097656, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -141.676025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.131727695465088, + "rewards_train/margins": 3.035874843597412, + "rewards_train/rejected": -6.1676025390625, + "step": 1509 + }, + { + "epoch": 0.42, + "logps_train/chosen": -1.1908538341522217, + "logps_train/ref_chosen": -0.9609375, + "logps_train/ref_rejected": -4.0, + "logps_train/rejected": -8.592381477355957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.022991633042693138, + "rewards_train/margins": 0.4362465087324381, + "rewards_train/rejected": -0.4592381417751312, + "step": 1509 + }, + { + "epoch": 0.42, + "logps_train/chosen": -180.18948364257812, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -162.02957153320312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.268948554992676, + "rewards_train/margins": -0.6659913063049316, + "rewards_train/rejected": -5.602957248687744, + "step": 1509 + }, + { + "epoch": 0.42, + "logps_train/chosen": -151.97970581054688, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -203.79922485351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2979706525802612, + "rewards_train/margins": 3.181952118873596, + "rewards_train/rejected": -4.479922771453857, + "step": 1509 + }, + { + "epoch": 0.42, + "logps_train/chosen": -60.999961853027344, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -61.682735443115234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6500038504600525, + "rewards_train/margins": 0.16827738285064697, + "rewards_train/rejected": 0.4817264676094055, + "step": 1509 + }, + { + "epoch": 0.42, + "logps_train/chosen": -159.5389404296875, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -177.39614868164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.253894329071045, + "rewards_train/margins": 0.6857204437255859, + "rewards_train/rejected": -6.939614772796631, + "step": 1509 + }, + { + "epoch": 0.42, + "logps_train/chosen": -106.05392456054688, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -130.30059814453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5553925037384033, + "rewards_train/margins": -0.2253326177597046, + "rewards_train/rejected": -1.3300598859786987, + "step": 1509 + }, + { + "epoch": 0.42, + "learning_rate": 7.417133705816836e-07, + "loss": 0.3399, + "step": 1510 + }, + { + "epoch": 0.42, + "logps_train/chosen": -7.176968574523926, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -27.877731323242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35519686341285706, + "rewards_train/margins": 1.4325762689113617, + "rewards_train/rejected": -1.7877731323242188, + "step": 1510 + }, + { + "epoch": 0.42, + "logps_train/chosen": -112.38184356689453, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -133.22299194335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.73818439245224, + "rewards_train/margins": 1.0841148495674133, + "rewards_train/rejected": -1.8222992420196533, + "step": 1510 + }, + { + "epoch": 0.42, + "logps_train/chosen": -0.6155761480331421, + "logps_train/ref_chosen": -0.25, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -24.42181968688965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03655761480331421, + "rewards_train/margins": 0.4931243658065796, + "rewards_train/rejected": -0.5296819806098938, + "step": 1510 + }, + { + "epoch": 0.42, + "logps_train/chosen": -106.7625732421875, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -222.80026245117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12625733017921448, + "rewards_train/margins": 11.753769487142563, + "rewards_train/rejected": -11.880026817321777, + "step": 1510 + }, + { + "epoch": 0.42, + "logps_train/chosen": -130.36740112304688, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -163.46279907226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.586740016937256, + "rewards_train/margins": 3.25954008102417, + "rewards_train/rejected": -7.846280097961426, + "step": 1510 + }, + { + "epoch": 0.42, + "logps_train/chosen": -227.4351348876953, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -196.00048828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.343513488769531, + "rewards_train/margins": -0.04346466064453125, + "rewards_train/rejected": -6.300048828125, + "step": 1510 + }, + { + "epoch": 0.42, + "logps_train/chosen": -146.82406616210938, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -168.36228942871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.632406711578369, + "rewards_train/margins": 2.5038223266601562, + "rewards_train/rejected": -6.136229038238525, + "step": 1510 + }, + { + "epoch": 0.42, + "logps_train/chosen": -165.69638061523438, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -230.64633178710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9696381092071533, + "rewards_train/margins": 4.494995355606079, + "rewards_train/rejected": -7.464633464813232, + "step": 1510 + }, + { + "epoch": 0.42, + "logps_train/chosen": -152.85958862304688, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -196.81329345703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7859588861465454, + "rewards_train/margins": 4.695370554924011, + "rewards_train/rejected": -6.481329441070557, + "step": 1511 + }, + { + "epoch": 0.42, + "logps_train/chosen": -12.296908378601074, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -6.59375, + "logps_train/rejected": -9.385793685913086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3234408497810364, + "rewards_train/margins": -0.04423648118972778, + "rewards_train/rejected": -0.2792043685913086, + "step": 1511 + }, + { + "epoch": 0.42, + "logps_train/chosen": -11.752216339111328, + "logps_train/ref_chosen": -3.859375, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -22.682008743286133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7892841696739197, + "rewards_train/margins": 0.4414166808128357, + "rewards_train/rejected": -1.2307008504867554, + "step": 1511 + }, + { + "epoch": 0.42, + "logps_train/chosen": -22.41489601135254, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -3.78125, + "logps_train/rejected": -6.941606044769287, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2085103988647461, + "rewards_train/margins": 0.5245459973812103, + "rewards_train/rejected": -0.31603559851646423, + "step": 1511 + }, + { + "epoch": 0.42, + "logps_train/chosen": -30.529972076416016, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -85.87294006347656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7529972791671753, + "rewards_train/margins": -1.1657032370567322, + "rewards_train/rejected": -0.5872940421104431, + "step": 1511 + }, + { + "epoch": 0.42, + "logps_train/chosen": -110.94830322265625, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -176.451416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1948304176330566, + "rewards_train/margins": 4.250311374664307, + "rewards_train/rejected": -7.445141792297363, + "step": 1511 + }, + { + "epoch": 0.42, + "logps_train/chosen": -104.51191711425781, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -177.84762573242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.551191806793213, + "rewards_train/margins": 2.133570671081543, + "rewards_train/rejected": -4.684762477874756, + "step": 1511 + }, + { + "epoch": 0.42, + "logps_train/chosen": -17.458633422851562, + "logps_train/ref_chosen": -1.1328125, + "logps_train/ref_rejected": -14.4375, + "logps_train/rejected": -41.96931838989258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6325820684432983, + "rewards_train/margins": 1.1205998659133911, + "rewards_train/rejected": -2.7531819343566895, + "step": 1511 + }, + { + "epoch": 0.42, + "learning_rate": 7.391584937101033e-07, + "loss": 0.3347, + "step": 1512 + }, + { + "epoch": 0.42, + "logps_train/chosen": -48.54761505126953, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -35.7054328918457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.592261552810669, + "rewards_train/margins": -0.31546831130981445, + "rewards_train/rejected": -2.2767932415008545, + "step": 1512 + }, + { + "epoch": 0.42, + "logps_train/chosen": -125.04778289794922, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -156.02493286132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5547783374786377, + "rewards_train/margins": 0.5477149486541748, + "rewards_train/rejected": -3.1024932861328125, + "step": 1512 + }, + { + "epoch": 0.42, + "logps_train/chosen": -11.048397064208984, + "logps_train/ref_chosen": -0.9765625, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -28.23385238647461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0071834325790405, + "rewards_train/margins": 0.6787018775939941, + "rewards_train/rejected": -1.6858853101730347, + "step": 1512 + }, + { + "epoch": 0.42, + "logps_train/chosen": -5.112214088439941, + "logps_train/ref_chosen": -1.0390625, + "logps_train/ref_rejected": -1.40625, + "logps_train/rejected": -6.291675567626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4073151648044586, + "rewards_train/margins": 0.0812273919582367, + "rewards_train/rejected": -0.4885425567626953, + "step": 1512 + }, + { + "epoch": 0.42, + "logps_train/chosen": -32.17312240600586, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -26.125, + "logps_train/rejected": -44.10238265991211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.161062240600586, + "rewards_train/margins": -0.3633239269256592, + "rewards_train/rejected": -1.7977383136749268, + "step": 1512 + }, + { + "epoch": 0.42, + "logps_train/chosen": -164.75750732421875, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -183.08642578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.875750780105591, + "rewards_train/margins": 3.132891893386841, + "rewards_train/rejected": -7.008642673492432, + "step": 1512 + }, + { + "epoch": 0.42, + "logps_train/chosen": -4.254149913787842, + "logps_train/ref_chosen": -1.8203125, + "logps_train/ref_rejected": -1.0, + "logps_train/rejected": -0.9136976003646851, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2433837503194809, + "rewards_train/margins": -0.2520139906555414, + "rewards_train/rejected": 0.008630240336060524, + "step": 1512 + }, + { + "epoch": 0.42, + "logps_train/chosen": -24.99629783630371, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -26.082523345947266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.184004783630371, + "rewards_train/margins": -0.03825235366821289, + "rewards_train/rejected": -2.145752429962158, + "step": 1512 + }, + { + "epoch": 0.42, + "logps_train/chosen": -210.48159790039062, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -224.65980529785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.048160076141357, + "rewards_train/margins": 2.1178207397460938, + "rewards_train/rejected": -7.165980815887451, + "step": 1513 + }, + { + "epoch": 0.42, + "logps_train/chosen": -201.80648803710938, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -266.8274841308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.780648708343506, + "rewards_train/margins": 2.7021002769470215, + "rewards_train/rejected": -8.482748985290527, + "step": 1513 + }, + { + "epoch": 0.42, + "logps_train/chosen": -13.75885009765625, + "logps_train/ref_chosen": -3.640625, + "logps_train/ref_rejected": -42.25, + "logps_train/rejected": -57.16548538208008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0118225812911987, + "rewards_train/margins": 0.4797259569168091, + "rewards_train/rejected": -1.4915485382080078, + "step": 1513 + }, + { + "epoch": 0.42, + "logps_train/chosen": -222.6171875, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -40.75, + "logps_train/rejected": -59.56185531616211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.961719036102295, + "rewards_train/margins": -3.080533504486084, + "rewards_train/rejected": -1.881185531616211, + "step": 1513 + }, + { + "epoch": 0.42, + "logps_train/chosen": -132.8120880126953, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -140.50843811035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.781208872795105, + "rewards_train/margins": 0.46963489055633545, + "rewards_train/rejected": -2.2508437633514404, + "step": 1513 + }, + { + "epoch": 0.42, + "logps_train/chosen": -67.63615417480469, + "logps_train/ref_chosen": -52.75, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -200.29843139648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4886153936386108, + "rewards_train/margins": 5.54122793674469, + "rewards_train/rejected": -7.029843330383301, + "step": 1513 + }, + { + "epoch": 0.42, + "logps_train/chosen": -39.22870635986328, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -44.25123977661133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8478707075119019, + "rewards_train/margins": 1.1272534132003784, + "rewards_train/rejected": -2.9751241207122803, + "step": 1513 + }, + { + "epoch": 0.42, + "logps_train/chosen": -114.66020202636719, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -212.4119873046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0660202503204346, + "rewards_train/margins": 5.975178480148315, + "rewards_train/rejected": -7.04119873046875, + "step": 1513 + }, + { + "epoch": 0.42, + "learning_rate": 7.366054424525119e-07, + "loss": 0.5896, + "step": 1514 + }, + { + "epoch": 0.42, + "logps_train/chosen": -36.42668914794922, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -34.11531448364258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6051689386367798, + "rewards_train/margins": 0.4188624620437622, + "rewards_train/rejected": -2.024031400680542, + "step": 1514 + }, + { + "epoch": 0.42, + "logps_train/chosen": -30.07833480834961, + "logps_train/ref_chosen": -6.71875, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -32.288673400878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.335958480834961, + "rewards_train/margins": 0.09290885925292969, + "rewards_train/rejected": -2.4288673400878906, + "step": 1514 + }, + { + "epoch": 0.42, + "logps_train/chosen": -3.7588415145874023, + "logps_train/ref_chosen": -1.78125, + "logps_train/ref_rejected": -1.4375, + "logps_train/rejected": -2.3763270378112793, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.19775915145874023, + "rewards_train/margins": -0.10387644916772842, + "rewards_train/rejected": -0.09388270229101181, + "step": 1514 + }, + { + "epoch": 0.42, + "logps_train/chosen": -18.703508377075195, + "logps_train/ref_chosen": -7.65625, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -27.574270248413086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1047258377075195, + "rewards_train/margins": 0.7527011632919312, + "rewards_train/rejected": -1.8574270009994507, + "step": 1514 + }, + { + "epoch": 0.42, + "logps_train/chosen": -43.00876998901367, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -45.99168395996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.238377094268799, + "rewards_train/margins": 0.27329134941101074, + "rewards_train/rejected": -2.5116684436798096, + "step": 1514 + }, + { + "epoch": 0.42, + "logps_train/chosen": -31.23080062866211, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -60.31595993041992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3105801343917847, + "rewards_train/margins": 2.133515954017639, + "rewards_train/rejected": -3.444096088409424, + "step": 1514 + }, + { + "epoch": 0.42, + "logps_train/chosen": -169.08570861816406, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -304.0, + "logps_train/rejected": -352.992919921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.958570957183838, + "rewards_train/margins": -0.05927896499633789, + "rewards_train/rejected": -4.8992919921875, + "step": 1514 + }, + { + "epoch": 0.42, + "logps_train/chosen": -104.20018005371094, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -150.328369140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6200180053710938, + "rewards_train/margins": 1.812819004058838, + "rewards_train/rejected": -4.432837009429932, + "step": 1514 + }, + { + "epoch": 0.42, + "logps_train/chosen": -26.466758728027344, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -45.726051330566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5404258966445923, + "rewards_train/margins": 1.6384292840957642, + "rewards_train/rejected": -3.1788551807403564, + "step": 1515 + }, + { + "epoch": 0.42, + "logps_train/chosen": -100.80030059814453, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -29.966182708740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1300300657749176, + "rewards_train/margins": 1.829088181257248, + "rewards_train/rejected": -1.9591182470321655, + "step": 1515 + }, + { + "epoch": 0.42, + "logps_train/chosen": -5.247568607330322, + "logps_train/ref_chosen": -0.91015625, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -17.88498306274414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4337412416934967, + "rewards_train/margins": 0.4672570526599884, + "rewards_train/rejected": -0.9009982943534851, + "step": 1515 + }, + { + "epoch": 0.42, + "logps_train/chosen": -11.304023742675781, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -28.104564666748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.508527398109436, + "rewards_train/margins": 1.2519290447235107, + "rewards_train/rejected": -1.7604564428329468, + "step": 1515 + }, + { + "epoch": 0.42, + "logps_train/chosen": -11.547151565551758, + "logps_train/ref_chosen": -7.9375, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -13.493084907531738, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36096516251564026, + "rewards_train/margins": 0.2508433163166046, + "rewards_train/rejected": -0.6118084788322449, + "step": 1515 + }, + { + "epoch": 0.42, + "logps_train/chosen": -16.23760414123535, + "logps_train/ref_chosen": -2.765625, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -37.09322738647461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3471978902816772, + "rewards_train/margins": 1.1496249437332153, + "rewards_train/rejected": -2.4968228340148926, + "step": 1515 + }, + { + "epoch": 0.42, + "logps_train/chosen": -29.709644317626953, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -36.25996780395508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5959644317626953, + "rewards_train/margins": 2.5456573963165283, + "rewards_train/rejected": -3.1416218280792236, + "step": 1515 + }, + { + "epoch": 0.42, + "logps_train/chosen": -114.15538787841797, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -230.6738739013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.465538740158081, + "rewards_train/margins": 5.401849031448364, + "rewards_train/rejected": -8.867387771606445, + "step": 1515 + }, + { + "epoch": 0.42, + "learning_rate": 7.340542346775618e-07, + "loss": 0.3631, + "step": 1516 + }, + { + "epoch": 0.42, + "logps_train/chosen": -86.30250549316406, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -26.87075424194336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2802505493164062, + "rewards_train/margins": 0.2880748510360718, + "rewards_train/rejected": -1.568325400352478, + "step": 1516 + }, + { + "epoch": 0.42, + "logps_train/chosen": -50.22916030883789, + "logps_train/ref_chosen": -29.125, + "logps_train/ref_rejected": -58.5, + "logps_train/rejected": -94.44920349121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1104161739349365, + "rewards_train/margins": 1.484504222869873, + "rewards_train/rejected": -3.5949203968048096, + "step": 1516 + }, + { + "epoch": 0.42, + "logps_train/chosen": -23.093748092651367, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -48.73645782470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6218748092651367, + "rewards_train/margins": 0.7392709255218506, + "rewards_train/rejected": -2.3611457347869873, + "step": 1516 + }, + { + "epoch": 0.42, + "logps_train/chosen": -21.523462295532227, + "logps_train/ref_chosen": -2.109375, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -38.40699768066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9414087533950806, + "rewards_train/margins": 0.7680410146713257, + "rewards_train/rejected": -2.7094497680664062, + "step": 1516 + }, + { + "epoch": 0.42, + "logps_train/chosen": -118.6874771118164, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -197.47900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8187477588653564, + "rewards_train/margins": 5.0291526317596436, + "rewards_train/rejected": -6.847900390625, + "step": 1516 + }, + { + "epoch": 0.42, + "logps_train/chosen": -106.28422546386719, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -109.62222290039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.928422689437866, + "rewards_train/margins": 0.08379960060119629, + "rewards_train/rejected": -4.0122222900390625, + "step": 1516 + }, + { + "epoch": 0.42, + "logps_train/chosen": -83.66253662109375, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -82.14309692382812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4662536680698395, + "rewards_train/margins": -0.0019439756870269775, + "rewards_train/rejected": -0.4643096923828125, + "step": 1516 + }, + { + "epoch": 0.42, + "logps_train/chosen": -11.186809539794922, + "logps_train/ref_chosen": -3.5, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -41.66197967529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.768680989742279, + "rewards_train/margins": 1.7100170254707336, + "rewards_train/rejected": -2.4786980152130127, + "step": 1516 + }, + { + "epoch": 0.42, + "logps_train/chosen": -16.507854461669922, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -17.148962020874023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5132854580879211, + "rewards_train/margins": 0.30786073207855225, + "rewards_train/rejected": -0.8211461901664734, + "step": 1517 + }, + { + "epoch": 0.42, + "logps_train/chosen": -100.90925598144531, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -130.3746337890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0409256219863892, + "rewards_train/margins": 2.1965378522872925, + "rewards_train/rejected": -3.2374634742736816, + "step": 1517 + }, + { + "epoch": 0.42, + "logps_train/chosen": -31.42162322998047, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -28.81957244873047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9671623110771179, + "rewards_train/margins": -0.210205078125, + "rewards_train/rejected": -0.7569572329521179, + "step": 1517 + }, + { + "epoch": 0.42, + "logps_train/chosen": -14.328999519348145, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -27.404766082763672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9985249638557434, + "rewards_train/margins": 0.6669517159461975, + "rewards_train/rejected": -1.665476679801941, + "step": 1517 + }, + { + "epoch": 0.42, + "logps_train/chosen": -96.53658294677734, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -96.45137786865234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2536582946777344, + "rewards_train/margins": -0.008520498871803284, + "rewards_train/rejected": -0.2451377958059311, + "step": 1517 + }, + { + "epoch": 0.42, + "logps_train/chosen": -11.440387725830078, + "logps_train/ref_chosen": -1.703125, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -55.743412017822266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9737262725830078, + "rewards_train/margins": 0.8256149291992188, + "rewards_train/rejected": -1.7993412017822266, + "step": 1517 + }, + { + "epoch": 0.42, + "logps_train/chosen": -181.77777099609375, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -122.41348266601562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.57777738571167, + "rewards_train/margins": -2.0364291667938232, + "rewards_train/rejected": -2.5413482189178467, + "step": 1517 + }, + { + "epoch": 0.42, + "logps_train/chosen": -68.64411163330078, + "logps_train/ref_chosen": -59.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -134.52691650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.964411199092865, + "rewards_train/margins": 0.5882804989814758, + "rewards_train/rejected": -1.5526916980743408, + "step": 1517 + }, + { + "epoch": 0.42, + "learning_rate": 7.315048882410024e-07, + "loss": 0.5369, + "step": 1518 + }, + { + "epoch": 0.42, + "logps_train/chosen": -5.040703773498535, + "logps_train/ref_chosen": -2.390625, + "logps_train/ref_rejected": -1.3828125, + "logps_train/rejected": -2.820563554763794, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.265007883310318, + "rewards_train/margins": -0.1212327778339386, + "rewards_train/rejected": -0.1437751054763794, + "step": 1518 + }, + { + "epoch": 0.42, + "logps_train/chosen": -86.96652221679688, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -89.82003784179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5533477663993835, + "rewards_train/margins": 0.18535155057907104, + "rewards_train/rejected": 0.3679962158203125, + "step": 1518 + }, + { + "epoch": 0.42, + "logps_train/chosen": -6.600244998931885, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -10.946945190429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14752450585365295, + "rewards_train/margins": 0.4690450131893158, + "rewards_train/rejected": -0.6165695190429688, + "step": 1518 + }, + { + "epoch": 0.42, + "logps_train/chosen": -6.034381866455078, + "logps_train/ref_chosen": -3.78125, + "logps_train/ref_rejected": -2.953125, + "logps_train/rejected": -18.885896682739258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2253131866455078, + "rewards_train/margins": 1.3679640293121338, + "rewards_train/rejected": -1.5932772159576416, + "step": 1518 + }, + { + "epoch": 0.42, + "logps_train/chosen": -195.48553466796875, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -254.62118530273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.448553562164307, + "rewards_train/margins": 2.5135655403137207, + "rewards_train/rejected": -9.962119102478027, + "step": 1518 + }, + { + "epoch": 0.42, + "logps_train/chosen": -54.729679107666016, + "logps_train/ref_chosen": -37.75, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -127.44309997558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6979678869247437, + "rewards_train/margins": 1.9963420629501343, + "rewards_train/rejected": -3.694309949874878, + "step": 1518 + }, + { + "epoch": 0.42, + "logps_train/chosen": -12.530937194824219, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -38.50126266479492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8499687314033508, + "rewards_train/margins": 1.8501576781272888, + "rewards_train/rejected": -2.7001264095306396, + "step": 1518 + }, + { + "epoch": 0.42, + "logps_train/chosen": -27.23157501220703, + "logps_train/ref_chosen": -3.84375, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -42.28196716308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.338782548904419, + "rewards_train/margins": 1.0081641674041748, + "rewards_train/rejected": -3.3469467163085938, + "step": 1518 + }, + { + "epoch": 0.42, + "logps_train/chosen": -100.5669937133789, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -199.26564025878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2433006316423416, + "rewards_train/margins": 1.9698646813631058, + "rewards_train/rejected": -1.7265640497207642, + "step": 1519 + }, + { + "epoch": 0.42, + "logps_train/chosen": -18.80756187438965, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -3.6875, + "logps_train/rejected": -14.448888778686523, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5948187112808228, + "rewards_train/margins": -0.5186798572540283, + "rewards_train/rejected": -1.0761388540267944, + "step": 1519 + }, + { + "epoch": 0.42, + "logps_train/chosen": -14.355119705200195, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -23.267990112304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9511370062828064, + "rewards_train/margins": 0.8194120526313782, + "rewards_train/rejected": -1.7705490589141846, + "step": 1519 + }, + { + "epoch": 0.42, + "logps_train/chosen": -101.22811889648438, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -154.18814086914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.622812032699585, + "rewards_train/margins": 1.746001958847046, + "rewards_train/rejected": -4.368813991546631, + "step": 1519 + }, + { + "epoch": 0.42, + "logps_train/chosen": -143.0929718017578, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -142.97593688964844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.459297180175781, + "rewards_train/margins": -0.0117034912109375, + "rewards_train/rejected": -5.447593688964844, + "step": 1519 + }, + { + "epoch": 0.42, + "logps_train/chosen": -157.43756103515625, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -156.5933837890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.043756008148193, + "rewards_train/margins": 0.41558265686035156, + "rewards_train/rejected": -6.459338665008545, + "step": 1519 + }, + { + "epoch": 0.42, + "logps_train/chosen": -97.69171142578125, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -123.95814514160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4808288514614105, + "rewards_train/margins": 0.5266433656215668, + "rewards_train/rejected": -0.04581451416015625, + "step": 1519 + }, + { + "epoch": 0.42, + "logps_train/chosen": -16.49472999572754, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -30.672672271728516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6557230353355408, + "rewards_train/margins": 1.5552943348884583, + "rewards_train/rejected": -2.211017370223999, + "step": 1519 + }, + { + "epoch": 0.42, + "learning_rate": 7.289574209855559e-07, + "loss": 0.3899, + "step": 1520 + }, + { + "epoch": 0.42, + "logps_train/chosen": -32.79108428955078, + "logps_train/ref_chosen": -25.0, + "logps_train/ref_rejected": -2.796875, + "logps_train/rejected": -21.366132736206055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.779108464717865, + "rewards_train/margins": 1.0778173804283142, + "rewards_train/rejected": -1.8569258451461792, + "step": 1520 + }, + { + "epoch": 0.42, + "logps_train/chosen": -5.016537666320801, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -23.027301788330078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09522123634815216, + "rewards_train/margins": 1.2792014628648758, + "rewards_train/rejected": -1.1839802265167236, + "step": 1520 + }, + { + "epoch": 0.42, + "logps_train/chosen": -22.156709671020508, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -31.26105499267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06567096710205078, + "rewards_train/margins": 2.0354344844818115, + "rewards_train/rejected": -2.1011054515838623, + "step": 1520 + }, + { + "epoch": 0.42, + "logps_train/chosen": -152.8297882080078, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -33.69645309448242, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.682978868484497, + "rewards_train/margins": -0.46958351135253906, + "rewards_train/rejected": -2.213395357131958, + "step": 1520 + }, + { + "epoch": 0.42, + "logps_train/chosen": -54.12683868408203, + "logps_train/ref_chosen": -31.125, + "logps_train/ref_rejected": -43.5, + "logps_train/rejected": -70.73797607421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3001840114593506, + "rewards_train/margins": 0.4236135482788086, + "rewards_train/rejected": -2.723797559738159, + "step": 1520 + }, + { + "epoch": 0.42, + "logps_train/chosen": -120.41461181640625, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -187.3732147216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3085388243198395, + "rewards_train/margins": 6.045860201120377, + "rewards_train/rejected": -5.737321376800537, + "step": 1520 + }, + { + "epoch": 0.42, + "logps_train/chosen": -10.578773498535156, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -3.046875, + "logps_train/rejected": -12.248698234558105, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16412734985351562, + "rewards_train/margins": 0.7560549974441528, + "rewards_train/rejected": -0.9201823472976685, + "step": 1520 + }, + { + "epoch": 0.42, + "logps_train/chosen": -8.036686897277832, + "logps_train/ref_chosen": -3.8125, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -9.563013076782227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4224186837673187, + "rewards_train/margins": -0.14424237608909607, + "rewards_train/rejected": -0.27817630767822266, + "step": 1520 + }, + { + "epoch": 0.43, + "logps_train/chosen": -83.15730285644531, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -174.90122985839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7657302618026733, + "rewards_train/margins": 6.874393105506897, + "rewards_train/rejected": -8.64012336730957, + "step": 1521 + }, + { + "epoch": 0.43, + "logps_train/chosen": -97.61820983886719, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -245.3143310546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9118210077285767, + "rewards_train/margins": 6.119612097740173, + "rewards_train/rejected": -7.03143310546875, + "step": 1521 + }, + { + "epoch": 0.43, + "logps_train/chosen": -16.094970703125, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -3.640625, + "logps_train/rejected": -11.053620338439941, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.753247082233429, + "rewards_train/margins": -0.01194751262664795, + "rewards_train/rejected": -0.741299569606781, + "step": 1521 + }, + { + "epoch": 0.43, + "logps_train/chosen": -97.28170776367188, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -167.73876953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7281707525253296, + "rewards_train/margins": 3.5957061052322388, + "rewards_train/rejected": -5.323876857757568, + "step": 1521 + }, + { + "epoch": 0.43, + "logps_train/chosen": -8.542497634887695, + "logps_train/ref_chosen": -2.078125, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -8.376729965209961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6464372873306274, + "rewards_train/margins": -0.3681392967700958, + "rewards_train/rejected": -0.2782979905605316, + "step": 1521 + }, + { + "epoch": 0.43, + "logps_train/chosen": -156.90980529785156, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -141.4602813720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.290980577468872, + "rewards_train/margins": 0.955047607421875, + "rewards_train/rejected": -2.246028184890747, + "step": 1521 + }, + { + "epoch": 0.43, + "logps_train/chosen": -1.320351243019104, + "logps_train/ref_chosen": -0.09521484375, + "logps_train/ref_rejected": -0.09521484375, + "logps_train/rejected": -1.2306265830993652, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12251364439725876, + "rewards_train/margins": -0.008972465991973877, + "rewards_train/rejected": -0.11354117840528488, + "step": 1521 + }, + { + "epoch": 0.43, + "logps_train/chosen": -143.4384765625, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -197.70480346679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2938477993011475, + "rewards_train/margins": 4.176632642745972, + "rewards_train/rejected": -6.470480442047119, + "step": 1521 + }, + { + "epoch": 0.43, + "learning_rate": 7.264118507407917e-07, + "loss": 0.3707, + "step": 1522 + }, + { + "epoch": 0.43, + "logps_train/chosen": -0.10469916462898254, + "logps_train/ref_chosen": -0.208984375, + "logps_train/ref_rejected": -0.208984375, + "logps_train/rejected": -0.11152410507202148, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01042852085083723, + "rewards_train/margins": 0.0006824936717748642, + "rewards_train/rejected": 0.009746027179062366, + "step": 1522 + }, + { + "epoch": 0.43, + "logps_train/chosen": -95.16754150390625, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -98.607421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.816754162311554, + "rewards_train/margins": 1.4939879775047302, + "rewards_train/rejected": -2.310742139816284, + "step": 1522 + }, + { + "epoch": 0.43, + "logps_train/chosen": -82.86180114746094, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -83.17321014404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1361801624298096, + "rewards_train/margins": 0.031140923500061035, + "rewards_train/rejected": -1.1673210859298706, + "step": 1522 + }, + { + "epoch": 0.43, + "logps_train/chosen": -155.86517333984375, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -157.906494140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.836517333984375, + "rewards_train/margins": -1.7458679676055908, + "rewards_train/rejected": -3.090649366378784, + "step": 1522 + }, + { + "epoch": 0.43, + "logps_train/chosen": -139.0271453857422, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -91.86620330810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.202714681625366, + "rewards_train/margins": 0.43390560150146484, + "rewards_train/rejected": -2.636620283126831, + "step": 1522 + }, + { + "epoch": 0.43, + "logps_train/chosen": -4.828621864318848, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -2.78125, + "logps_train/rejected": -6.55686092376709, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2265128195285797, + "rewards_train/margins": 0.6040739119052887, + "rewards_train/rejected": -0.377561092376709, + "step": 1522 + }, + { + "epoch": 0.43, + "logps_train/chosen": -17.40096664428711, + "logps_train/ref_chosen": -2.59375, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -24.62470245361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4807217121124268, + "rewards_train/margins": 0.3661235570907593, + "rewards_train/rejected": -1.846845269203186, + "step": 1522 + }, + { + "epoch": 0.43, + "logps_train/chosen": -139.55946350097656, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -228.12583923339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5559463500976562, + "rewards_train/margins": 5.656638145446777, + "rewards_train/rejected": -8.212584495544434, + "step": 1522 + }, + { + "epoch": 0.43, + "logps_train/chosen": -38.06285095214844, + "logps_train/ref_chosen": -30.625, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -87.89338684082031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7437850832939148, + "rewards_train/margins": 0.7455536723136902, + "rewards_train/rejected": -1.489338755607605, + "step": 1523 + }, + { + "epoch": 0.43, + "logps_train/chosen": -114.18141174316406, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -245.2251434326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4181411862373352, + "rewards_train/margins": 6.204373061656952, + "rewards_train/rejected": -6.622514247894287, + "step": 1523 + }, + { + "epoch": 0.43, + "logps_train/chosen": -186.87435913085938, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -232.73226928710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.8874359130859375, + "rewards_train/margins": 2.7857913970947266, + "rewards_train/rejected": -8.673227310180664, + "step": 1523 + }, + { + "epoch": 0.43, + "logps_train/chosen": -121.54072570800781, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -140.98692321777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6540725231170654, + "rewards_train/margins": 0.94461989402771, + "rewards_train/rejected": -3.5986924171447754, + "step": 1523 + }, + { + "epoch": 0.43, + "logps_train/chosen": -91.14730834960938, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -79.2916259765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3647308349609375, + "rewards_train/margins": -0.9355682134628296, + "rewards_train/rejected": -1.429162621498108, + "step": 1523 + }, + { + "epoch": 0.43, + "logps_train/chosen": -14.44517707824707, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -23.4627685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.894517719745636, + "rewards_train/margins": 0.570509135723114, + "rewards_train/rejected": -1.46502685546875, + "step": 1523 + }, + { + "epoch": 0.43, + "logps_train/chosen": -149.75750732421875, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -194.56918334960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3757507801055908, + "rewards_train/margins": 3.78116774559021, + "rewards_train/rejected": -5.156918525695801, + "step": 1523 + }, + { + "epoch": 0.43, + "logps_train/chosen": -136.52584838867188, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -249.67823791503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.002584934234619, + "rewards_train/margins": 11.315239429473877, + "rewards_train/rejected": -13.317824363708496, + "step": 1523 + }, + { + "epoch": 0.43, + "learning_rate": 7.238681953230035e-07, + "loss": 0.4666, + "step": 1524 + }, + { + "epoch": 0.43, + "logps_train/chosen": -106.92094421386719, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -210.39576721191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5920944213867188, + "rewards_train/margins": 6.447482109069824, + "rewards_train/rejected": -8.039576530456543, + "step": 1524 + }, + { + "epoch": 0.43, + "logps_train/chosen": -123.9847412109375, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -191.78526306152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.24847412109375, + "rewards_train/margins": 3.4800524711608887, + "rewards_train/rejected": -7.728526592254639, + "step": 1524 + }, + { + "epoch": 0.43, + "logps_train/chosen": -4.219619274139404, + "logps_train/ref_chosen": -1.21875, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -17.53781509399414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30008694529533386, + "rewards_train/margins": 0.6161945760250092, + "rewards_train/rejected": -0.916281521320343, + "step": 1524 + }, + { + "epoch": 0.43, + "logps_train/chosen": -111.95074462890625, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -138.98184204101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.645074486732483, + "rewards_train/margins": 2.903109908103943, + "rewards_train/rejected": -4.548184394836426, + "step": 1524 + }, + { + "epoch": 0.43, + "logps_train/chosen": -2.471014976501465, + "logps_train/ref_chosen": -0.9765625, + "logps_train/ref_rejected": -1.03125, + "logps_train/rejected": -2.77797532081604, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14944525063037872, + "rewards_train/margins": 0.025227278470993042, + "rewards_train/rejected": -0.17467252910137177, + "step": 1524 + }, + { + "epoch": 0.43, + "logps_train/chosen": -67.19607543945312, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -44.25, + "logps_train/rejected": -65.77782440185547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.74460768699646, + "rewards_train/margins": -0.5918252468109131, + "rewards_train/rejected": -2.152782440185547, + "step": 1524 + }, + { + "epoch": 0.43, + "logps_train/chosen": -171.72787475585938, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -241.64378356933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.772787570953369, + "rewards_train/margins": 4.0915913581848145, + "rewards_train/rejected": -9.864378929138184, + "step": 1524 + }, + { + "epoch": 0.43, + "logps_train/chosen": -114.88143920898438, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -134.46275329589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4881439208984375, + "rewards_train/margins": 1.0081313848495483, + "rewards_train/rejected": -1.4962753057479858, + "step": 1524 + }, + { + "epoch": 0.43, + "logps_train/chosen": -169.9444122314453, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -164.38934326171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5944411754608154, + "rewards_train/margins": -0.4555068016052246, + "rewards_train/rejected": -2.138934373855591, + "step": 1525 + }, + { + "epoch": 0.43, + "logps_train/chosen": -29.92757225036621, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -49.23863983154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.355257272720337, + "rewards_train/margins": 2.8686068058013916, + "rewards_train/rejected": -4.2238640785217285, + "step": 1525 + }, + { + "epoch": 0.43, + "logps_train/chosen": -40.97180938720703, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -34.38276672363281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.059680938720703, + "rewards_train/margins": 0.647345781326294, + "rewards_train/rejected": -2.707026720046997, + "step": 1525 + }, + { + "epoch": 0.43, + "logps_train/chosen": -50.40279769897461, + "logps_train/ref_chosen": -22.75, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -54.37379455566406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.765279769897461, + "rewards_train/margins": -0.7029001712799072, + "rewards_train/rejected": -2.0623795986175537, + "step": 1525 + }, + { + "epoch": 0.43, + "logps_train/chosen": -131.83102416992188, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -141.24913024902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.1331024169921875, + "rewards_train/margins": 0.04181051254272461, + "rewards_train/rejected": -6.174912929534912, + "step": 1525 + }, + { + "epoch": 0.43, + "logps_train/chosen": -32.01029968261719, + "logps_train/ref_chosen": -25.5, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -83.79707336425781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6510300040245056, + "rewards_train/margins": 4.103677332401276, + "rewards_train/rejected": -4.754707336425781, + "step": 1525 + }, + { + "epoch": 0.43, + "logps_train/chosen": -16.44269561767578, + "logps_train/ref_chosen": -7.53125, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -39.04330825805664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8911445736885071, + "rewards_train/margins": 1.8006862998008728, + "rewards_train/rejected": -2.69183087348938, + "step": 1525 + }, + { + "epoch": 0.43, + "logps_train/chosen": -19.93867301940918, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -2.96875, + "logps_train/rejected": -16.33942413330078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3657423257827759, + "rewards_train/margins": -0.028674840927124023, + "rewards_train/rejected": -1.3370674848556519, + "step": 1525 + }, + { + "epoch": 0.43, + "learning_rate": 7.213264725350816e-07, + "loss": 0.4147, + "step": 1526 + }, + { + "epoch": 0.43, + "logps_train/chosen": -222.57247924804688, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -224.0, + "logps_train/rejected": -293.9732666015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.957248210906982, + "rewards_train/margins": 0.04007863998413086, + "rewards_train/rejected": -6.997326850891113, + "step": 1526 + }, + { + "epoch": 0.43, + "logps_train/chosen": -43.67333221435547, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -29.73171043395996, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.967333197593689, + "rewards_train/margins": -0.46291208267211914, + "rewards_train/rejected": -1.5044211149215698, + "step": 1526 + }, + { + "epoch": 0.43, + "logps_train/chosen": -36.292724609375, + "logps_train/ref_chosen": -30.75, + "logps_train/ref_rejected": -30.75, + "logps_train/rejected": -36.36552810668945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.554272472858429, + "rewards_train/margins": 0.0072803497314453125, + "rewards_train/rejected": -0.5615528225898743, + "step": 1526 + }, + { + "epoch": 0.43, + "logps_train/chosen": -22.998323440551758, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -31.747634887695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9060823321342468, + "rewards_train/margins": 1.4374311566352844, + "rewards_train/rejected": -2.3435134887695312, + "step": 1526 + }, + { + "epoch": 0.43, + "logps_train/chosen": -121.65009307861328, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -114.93229675292969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8150092959403992, + "rewards_train/margins": 2.778220474720001, + "rewards_train/rejected": -3.5932297706604004, + "step": 1526 + }, + { + "epoch": 0.43, + "logps_train/chosen": -43.041969299316406, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -41.66253662109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0104470252990723, + "rewards_train/margins": -0.5691933631896973, + "rewards_train/rejected": -2.441253662109375, + "step": 1526 + }, + { + "epoch": 0.43, + "logps_train/chosen": -90.39945220947266, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -117.3892593383789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4899452924728394, + "rewards_train/margins": 2.398980736732483, + "rewards_train/rejected": -3.8889260292053223, + "step": 1526 + }, + { + "epoch": 0.43, + "logps_train/chosen": -169.9551544189453, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -157.73524475097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4955155849456787, + "rewards_train/margins": 2.3280088901519775, + "rewards_train/rejected": -5.823524475097656, + "step": 1526 + }, + { + "epoch": 0.43, + "logps_train/chosen": -25.5401611328125, + "logps_train/ref_chosen": -14.5, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -23.625680923461914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1040161848068237, + "rewards_train/margins": -0.4414480924606323, + "rewards_train/rejected": -0.6625680923461914, + "step": 1527 + }, + { + "epoch": 0.43, + "logps_train/chosen": -20.573408126831055, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -62.25, + "logps_train/rejected": -80.13863372802734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.332340806722641, + "rewards_train/margins": 1.4565226137638092, + "rewards_train/rejected": -1.7888634204864502, + "step": 1527 + }, + { + "epoch": 0.43, + "logps_train/chosen": -67.4317626953125, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -133.10675048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.293176293373108, + "rewards_train/margins": 0.01749873161315918, + "rewards_train/rejected": -1.310675024986267, + "step": 1527 + }, + { + "epoch": 0.43, + "logps_train/chosen": -98.70974731445312, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -69.95696258544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.429025262594223, + "rewards_train/margins": 0.024721503257751465, + "rewards_train/rejected": 0.40430375933647156, + "step": 1527 + }, + { + "epoch": 0.43, + "logps_train/chosen": -110.50518798828125, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -158.9208984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9505188465118408, + "rewards_train/margins": 2.941570997238159, + "rewards_train/rejected": -4.89208984375, + "step": 1527 + }, + { + "epoch": 0.43, + "logps_train/chosen": -88.9498291015625, + "logps_train/ref_chosen": -56.0, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -56.19222640991211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.29498291015625, + "rewards_train/margins": 0.07423973083496094, + "rewards_train/rejected": -3.369222640991211, + "step": 1527 + }, + { + "epoch": 0.43, + "logps_train/chosen": -224.5, + "logps_train/ref_chosen": -191.0, + "logps_train/ref_rejected": -187.0, + "logps_train/rejected": -247.34503173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3500001430511475, + "rewards_train/margins": 2.6845033168792725, + "rewards_train/rejected": -6.03450345993042, + "step": 1527 + }, + { + "epoch": 0.43, + "logps_train/chosen": -119.2855224609375, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -197.36390686035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22855225205421448, + "rewards_train/margins": 3.6078384816646576, + "rewards_train/rejected": -3.836390733718872, + "step": 1527 + }, + { + "epoch": 0.43, + "learning_rate": 7.18786700166391e-07, + "loss": 0.4437, + "step": 1528 + }, + { + "epoch": 0.43, + "logps_train/chosen": -17.411304473876953, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -17.97132682800293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42863044142723083, + "rewards_train/margins": 0.6060022413730621, + "rewards_train/rejected": -1.034632682800293, + "step": 1528 + }, + { + "epoch": 0.43, + "logps_train/chosen": -19.16176986694336, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -19.58408546447754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6724269986152649, + "rewards_train/margins": 0.4547315239906311, + "rewards_train/rejected": -1.127158522605896, + "step": 1528 + }, + { + "epoch": 0.43, + "logps_train/chosen": -128.1407470703125, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -151.90724182128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.164074659347534, + "rewards_train/margins": 0.3766496181488037, + "rewards_train/rejected": -3.540724277496338, + "step": 1528 + }, + { + "epoch": 0.43, + "logps_train/chosen": -148.32000732421875, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -194.0148162841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.032000780105591, + "rewards_train/margins": 3.7694809436798096, + "rewards_train/rejected": -5.8014817237854, + "step": 1528 + }, + { + "epoch": 0.43, + "logps_train/chosen": -52.047874450683594, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -66.65882873535156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.842287540435791, + "rewards_train/margins": -1.1264046430587769, + "rewards_train/rejected": -1.7158828973770142, + "step": 1528 + }, + { + "epoch": 0.43, + "logps_train/chosen": -37.16966247558594, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -33.792213439941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9544662237167358, + "rewards_train/margins": 0.8685051202774048, + "rewards_train/rejected": -2.8229713439941406, + "step": 1528 + }, + { + "epoch": 0.43, + "logps_train/chosen": -139.0320281982422, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -130.85606384277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7532029151916504, + "rewards_train/margins": 1.1324036121368408, + "rewards_train/rejected": -3.885606527328491, + "step": 1528 + }, + { + "epoch": 0.43, + "logps_train/chosen": -6.13616943359375, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -18.560306549072266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21049194037914276, + "rewards_train/margins": 0.7205387502908707, + "rewards_train/rejected": -0.9310306906700134, + "step": 1528 + }, + { + "epoch": 0.43, + "logps_train/chosen": -8.31025505065918, + "logps_train/ref_chosen": -0.79296875, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -16.29557991027832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7517286539077759, + "rewards_train/margins": 0.29032933712005615, + "rewards_train/rejected": -1.042057991027832, + "step": 1529 + }, + { + "epoch": 0.43, + "logps_train/chosen": -25.74630355834961, + "logps_train/ref_chosen": -23.5, + "logps_train/ref_rejected": -46.5, + "logps_train/rejected": -53.11479187011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22463035583496094, + "rewards_train/margins": 0.43684881925582886, + "rewards_train/rejected": -0.6614791750907898, + "step": 1529 + }, + { + "epoch": 0.43, + "logps_train/chosen": -47.427696228027344, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -49.546268463134766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5302696228027344, + "rewards_train/margins": 0.5493571758270264, + "rewards_train/rejected": -3.0796267986297607, + "step": 1529 + }, + { + "epoch": 0.43, + "logps_train/chosen": -207.50709533691406, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -207.89089965820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.750709533691406, + "rewards_train/margins": 2.338380813598633, + "rewards_train/rejected": -9.089090347290039, + "step": 1529 + }, + { + "epoch": 0.43, + "logps_train/chosen": -32.413543701171875, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -34.10808563232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0538543462753296, + "rewards_train/margins": 0.4819542169570923, + "rewards_train/rejected": -1.5358085632324219, + "step": 1529 + }, + { + "epoch": 0.43, + "logps_train/chosen": -224.50375366210938, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -203.83596801757812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.250375270843506, + "rewards_train/margins": -0.4667782783508301, + "rewards_train/rejected": -5.783596992492676, + "step": 1529 + }, + { + "epoch": 0.43, + "logps_train/chosen": -183.51544189453125, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -229.0, + "logps_train/rejected": -311.9171142578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6515443325042725, + "rewards_train/margins": 4.640167474746704, + "rewards_train/rejected": -8.291711807250977, + "step": 1529 + }, + { + "epoch": 0.43, + "logps_train/chosen": -150.03060913085938, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -204.87356567382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.253060817718506, + "rewards_train/margins": 2.3342957496643066, + "rewards_train/rejected": -6.5873565673828125, + "step": 1529 + }, + { + "epoch": 0.43, + "learning_rate": 7.162488959926449e-07, + "loss": 0.4404, + "step": 1530 + }, + { + "epoch": 0.43, + "logps_train/chosen": -103.88141632080078, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -124.60751342773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1881415843963623, + "rewards_train/margins": 0.9226100444793701, + "rewards_train/rejected": -4.110751628875732, + "step": 1530 + }, + { + "epoch": 0.43, + "logps_train/chosen": -149.96424865722656, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -180.16468811035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.3464250564575195, + "rewards_train/margins": 0.5700440406799316, + "rewards_train/rejected": -5.916469097137451, + "step": 1530 + }, + { + "epoch": 0.43, + "logps_train/chosen": -1.0931729078292847, + "logps_train/ref_chosen": -1.078125, + "logps_train/ref_rejected": -3.796875, + "logps_train/rejected": -7.114831924438477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00150479085277766, + "rewards_train/margins": 0.33029090159107, + "rewards_train/rejected": -0.33179569244384766, + "step": 1530 + }, + { + "epoch": 0.43, + "logps_train/chosen": -82.10147094726562, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -201.65684509277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.035147190093994, + "rewards_train/margins": 9.030537128448486, + "rewards_train/rejected": -11.06568431854248, + "step": 1530 + }, + { + "epoch": 0.43, + "logps_train/chosen": -151.57632446289062, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -196.02349853515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3576323986053467, + "rewards_train/margins": 2.2447173595428467, + "rewards_train/rejected": -4.602349758148193, + "step": 1530 + }, + { + "epoch": 0.43, + "logps_train/chosen": -117.71878051757812, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -155.31192016601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7718780636787415, + "rewards_train/margins": 0.15931397676467896, + "rewards_train/rejected": -0.9311920404434204, + "step": 1530 + }, + { + "epoch": 0.43, + "logps_train/chosen": -8.789451599121094, + "logps_train/ref_chosen": -1.9140625, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -18.7938175201416, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6875389218330383, + "rewards_train/margins": 0.28559285402297974, + "rewards_train/rejected": -0.9731317758560181, + "step": 1530 + }, + { + "epoch": 0.43, + "logps_train/chosen": -145.19122314453125, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -217.82354736328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.119122266769409, + "rewards_train/margins": 4.1632325649261475, + "rewards_train/rejected": -7.282354831695557, + "step": 1530 + }, + { + "epoch": 0.43, + "logps_train/chosen": -218.9202423095703, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -181.0, + "logps_train/rejected": -212.73577880859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.092024326324463, + "rewards_train/margins": -0.9184463024139404, + "rewards_train/rejected": -3.1735780239105225, + "step": 1531 + }, + { + "epoch": 0.43, + "logps_train/chosen": -50.8531379699707, + "logps_train/ref_chosen": -48.25, + "logps_train/ref_rejected": -39.5, + "logps_train/rejected": -72.57594299316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26031380891799927, + "rewards_train/margins": 3.047280490398407, + "rewards_train/rejected": -3.3075942993164062, + "step": 1531 + }, + { + "epoch": 0.43, + "logps_train/chosen": -27.17688751220703, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -7.970737457275391, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0176887512207031, + "rewards_train/margins": -0.7549900114536285, + "rewards_train/rejected": -0.2626987397670746, + "step": 1531 + }, + { + "epoch": 0.43, + "logps_train/chosen": -61.16063690185547, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -101.73362731933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1339363157749176, + "rewards_train/margins": 3.757299095392227, + "rewards_train/rejected": -3.6233627796173096, + "step": 1531 + }, + { + "epoch": 0.43, + "logps_train/chosen": -131.8995819091797, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -195.59864807128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6899582147598267, + "rewards_train/margins": 1.669906735420227, + "rewards_train/rejected": -3.3598649501800537, + "step": 1531 + }, + { + "epoch": 0.43, + "logps_train/chosen": -13.675396919250488, + "logps_train/ref_chosen": -4.9375, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -26.442468643188477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8737897276878357, + "rewards_train/margins": 0.15795713663101196, + "rewards_train/rejected": -1.0317468643188477, + "step": 1531 + }, + { + "epoch": 0.43, + "logps_train/chosen": -156.42715454101562, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -144.7210235595703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1427154541015625, + "rewards_train/margins": -0.4706130623817444, + "rewards_train/rejected": -0.6721023917198181, + "step": 1531 + }, + { + "epoch": 0.43, + "logps_train/chosen": -132.667724609375, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -165.36219787597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.016772508621216, + "rewards_train/margins": 2.919447183609009, + "rewards_train/rejected": -5.936219692230225, + "step": 1531 + }, + { + "epoch": 0.43, + "learning_rate": 7.137130777757827e-07, + "loss": 0.4302, + "step": 1532 + }, + { + "epoch": 0.43, + "logps_train/chosen": -56.5156364440918, + "logps_train/ref_chosen": -30.25, + "logps_train/ref_rejected": -35.0, + "logps_train/rejected": -93.54110717773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.626563787460327, + "rewards_train/margins": 3.2275469303131104, + "rewards_train/rejected": -5.8541107177734375, + "step": 1532 + }, + { + "epoch": 0.43, + "logps_train/chosen": -91.95280456542969, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -168.81874084472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3452804684638977, + "rewards_train/margins": 5.68659371137619, + "rewards_train/rejected": -6.031874179840088, + "step": 1532 + }, + { + "epoch": 0.43, + "logps_train/chosen": -118.08758544921875, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -52.004730224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.408758521080017, + "rewards_train/margins": 2.6042147874832153, + "rewards_train/rejected": -4.012973308563232, + "step": 1532 + }, + { + "epoch": 0.43, + "logps_train/chosen": -29.26834487915039, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -31.918472290039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7643345594406128, + "rewards_train/margins": 0.15876269340515137, + "rewards_train/rejected": -1.9230972528457642, + "step": 1532 + }, + { + "epoch": 0.43, + "logps_train/chosen": -16.428796768188477, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -15.604777336120605, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1960047483444214, + "rewards_train/margins": -0.548026978969574, + "rewards_train/rejected": -0.6479777693748474, + "step": 1532 + }, + { + "epoch": 0.43, + "logps_train/chosen": -10.794245719909668, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -5.84375, + "logps_train/rejected": -14.69489574432373, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5544245839118958, + "rewards_train/margins": 0.33069002628326416, + "rewards_train/rejected": -0.8851146101951599, + "step": 1532 + }, + { + "epoch": 0.43, + "logps_train/chosen": -2.4283719062805176, + "logps_train/ref_chosen": -1.5625, + "logps_train/ref_rejected": -3.78125, + "logps_train/rejected": -7.795741081237793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08658719062805176, + "rewards_train/margins": 0.314861923456192, + "rewards_train/rejected": -0.4014491140842438, + "step": 1532 + }, + { + "epoch": 0.43, + "logps_train/chosen": -3.9173107147216797, + "logps_train/ref_chosen": -2.46875, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -17.02120590209961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14485608041286469, + "rewards_train/margins": 0.9572645574808121, + "rewards_train/rejected": -1.1021206378936768, + "step": 1532 + }, + { + "epoch": 0.43, + "logps_train/chosen": -146.85931396484375, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -135.9687957763672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.735931396484375, + "rewards_train/margins": 0.7109484672546387, + "rewards_train/rejected": -4.446879863739014, + "step": 1533 + }, + { + "epoch": 0.43, + "logps_train/chosen": -48.097503662109375, + "logps_train/ref_chosen": -15.625, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -43.79975128173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2472503185272217, + "rewards_train/margins": 0.6827249526977539, + "rewards_train/rejected": -3.9299752712249756, + "step": 1533 + }, + { + "epoch": 0.43, + "logps_train/chosen": -162.86041259765625, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -243.3408203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4860413074493408, + "rewards_train/margins": 6.148041009902954, + "rewards_train/rejected": -7.634082317352295, + "step": 1533 + }, + { + "epoch": 0.43, + "logps_train/chosen": -9.01766300201416, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -13.297332763671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.370516300201416, + "rewards_train/margins": 0.1904670000076294, + "rewards_train/rejected": -0.5609833002090454, + "step": 1533 + }, + { + "epoch": 0.43, + "logps_train/chosen": -157.28787231445312, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -272.93634033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.378787517547607, + "rewards_train/margins": 5.614846706390381, + "rewards_train/rejected": -11.993634223937988, + "step": 1533 + }, + { + "epoch": 0.43, + "logps_train/chosen": -218.291015625, + "logps_train/ref_chosen": -195.0, + "logps_train/ref_rejected": -198.0, + "logps_train/rejected": -253.55221557617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3291015625, + "rewards_train/margins": 3.2261199951171875, + "rewards_train/rejected": -5.5552215576171875, + "step": 1533 + }, + { + "epoch": 0.43, + "logps_train/chosen": -125.143798828125, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -184.18753051757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5143799781799316, + "rewards_train/margins": 4.754373073577881, + "rewards_train/rejected": -7.2687530517578125, + "step": 1533 + }, + { + "epoch": 0.43, + "logps_train/chosen": -47.63239288330078, + "logps_train/ref_chosen": -47.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -94.57107543945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06323929131031036, + "rewards_train/margins": 1.4438683241605759, + "rewards_train/rejected": -1.5071076154708862, + "step": 1533 + }, + { + "epoch": 0.43, + "learning_rate": 7.111792632638432e-07, + "loss": 0.3016, + "step": 1534 + }, + { + "epoch": 0.43, + "logps_train/chosen": -131.84339904785156, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -142.82891845703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8843399286270142, + "rewards_train/margins": 3.4985522031784058, + "rewards_train/rejected": -5.38289213180542, + "step": 1534 + }, + { + "epoch": 0.43, + "logps_train/chosen": -13.583819389343262, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -9.33150863647461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18963193893432617, + "rewards_train/margins": 0.35445642471313477, + "rewards_train/rejected": -0.5440883636474609, + "step": 1534 + }, + { + "epoch": 0.43, + "logps_train/chosen": -127.5087890625, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -216.37600708007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0508790016174316, + "rewards_train/margins": 2.486721992492676, + "rewards_train/rejected": -5.537600994110107, + "step": 1534 + }, + { + "epoch": 0.43, + "logps_train/chosen": -0.7801358699798584, + "logps_train/ref_chosen": -0.890625, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -20.718799591064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01104891300201416, + "rewards_train/margins": 0.9204288721084595, + "rewards_train/rejected": -0.9093799591064453, + "step": 1534 + }, + { + "epoch": 0.43, + "logps_train/chosen": -134.86541748046875, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -208.78704833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1365418434143066, + "rewards_train/margins": 5.8421630859375, + "rewards_train/rejected": -7.978704929351807, + "step": 1534 + }, + { + "epoch": 0.43, + "logps_train/chosen": -137.80992126464844, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -251.57835388183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.980992078781128, + "rewards_train/margins": 5.776843309402466, + "rewards_train/rejected": -9.757835388183594, + "step": 1534 + }, + { + "epoch": 0.43, + "logps_train/chosen": -77.32524871826172, + "logps_train/ref_chosen": -40.5, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -72.98777770996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6825249195098877, + "rewards_train/margins": 0.0662529468536377, + "rewards_train/rejected": -3.7487778663635254, + "step": 1534 + }, + { + "epoch": 0.43, + "logps_train/chosen": -7.639812469482422, + "logps_train/ref_chosen": -3.25, + "logps_train/ref_rejected": -2.234375, + "logps_train/rejected": -8.97504997253418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4389812648296356, + "rewards_train/margins": 0.23508623242378235, + "rewards_train/rejected": -0.674067497253418, + "step": 1534 + }, + { + "epoch": 0.43, + "logps_train/chosen": -125.00235748291016, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -220.0, + "logps_train/rejected": -228.68130493164062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4502357244491577, + "rewards_train/margins": -0.5821052193641663, + "rewards_train/rejected": -0.8681305050849915, + "step": 1535 + }, + { + "epoch": 0.43, + "logps_train/chosen": -166.89767456054688, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -174.8909912109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2897675037384033, + "rewards_train/margins": 2.0493319034576416, + "rewards_train/rejected": -5.339099407196045, + "step": 1535 + }, + { + "epoch": 0.43, + "logps_train/chosen": -5.796867847442627, + "logps_train/ref_chosen": -1.5859375, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -27.120025634765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.42109304666519165, + "rewards_train/margins": -0.1840904802083969, + "rewards_train/rejected": -0.23700256645679474, + "step": 1535 + }, + { + "epoch": 0.43, + "logps_train/chosen": -17.052494049072266, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -12.36850357055664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7364994287490845, + "rewards_train/margins": -0.8246490731835365, + "rewards_train/rejected": 0.08814964443445206, + "step": 1535 + }, + { + "epoch": 0.43, + "logps_train/chosen": -134.14004516601562, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -162.55010986328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7640044689178467, + "rewards_train/margins": 1.7910068035125732, + "rewards_train/rejected": -4.55501127243042, + "step": 1535 + }, + { + "epoch": 0.43, + "logps_train/chosen": -117.85618591308594, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -236.297119140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.535618543624878, + "rewards_train/margins": 7.094093561172485, + "rewards_train/rejected": -9.629712104797363, + "step": 1535 + }, + { + "epoch": 0.43, + "logps_train/chosen": -6.1058573722839355, + "logps_train/ref_chosen": -1.5546875, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -12.76140022277832, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.45511698722839355, + "rewards_train/margins": -0.21022696793079376, + "rewards_train/rejected": -0.2448900192975998, + "step": 1535 + }, + { + "epoch": 0.43, + "logps_train/chosen": -174.95681762695312, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -207.8708038330078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.095681667327881, + "rewards_train/margins": 0.49139881134033203, + "rewards_train/rejected": -4.587080478668213, + "step": 1535 + }, + { + "epoch": 0.43, + "learning_rate": 7.086474701908413e-07, + "loss": 0.424, + "step": 1536 + }, + { + "epoch": 0.43, + "logps_train/chosen": -83.72273254394531, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -81.348388671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.2277267426252365, + "rewards_train/margins": -0.2874344140291214, + "rewards_train/rejected": 0.5151611566543579, + "step": 1536 + }, + { + "epoch": 0.43, + "logps_train/chosen": -145.373291015625, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -229.76898193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4873292446136475, + "rewards_train/margins": 6.839569330215454, + "rewards_train/rejected": -10.326898574829102, + "step": 1536 + }, + { + "epoch": 0.43, + "logps_train/chosen": -162.66358947753906, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -195.06919860839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4663589000701904, + "rewards_train/margins": 3.7905609607696533, + "rewards_train/rejected": -7.256919860839844, + "step": 1536 + }, + { + "epoch": 0.43, + "logps_train/chosen": -3.4638476371765137, + "logps_train/ref_chosen": -1.6640625, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -6.8608479499816895, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.17997851967811584, + "rewards_train/margins": -0.0282687246799469, + "rewards_train/rejected": -0.15170979499816895, + "step": 1536 + }, + { + "epoch": 0.43, + "logps_train/chosen": -176.12010192871094, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -179.19473266601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.912010192871094, + "rewards_train/margins": 1.457463264465332, + "rewards_train/rejected": -6.369473457336426, + "step": 1536 + }, + { + "epoch": 0.43, + "logps_train/chosen": -80.17567443847656, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -134.53549194335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03243255615234375, + "rewards_train/margins": 3.9359817504882812, + "rewards_train/rejected": -3.9035491943359375, + "step": 1536 + }, + { + "epoch": 0.43, + "logps_train/chosen": -226.29763793945312, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -215.09959411621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.129764080047607, + "rewards_train/margins": 4.180195331573486, + "rewards_train/rejected": -10.309959411621094, + "step": 1536 + }, + { + "epoch": 0.43, + "logps_train/chosen": -190.1295623779297, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -179.99227905273438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.8129563331604, + "rewards_train/margins": -0.6137285232543945, + "rewards_train/rejected": -6.199227809906006, + "step": 1536 + }, + { + "epoch": 0.43, + "logps_train/chosen": -40.795223236083984, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -37.0, + "logps_train/rejected": -77.35352325439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4045222997665405, + "rewards_train/margins": 2.630829930305481, + "rewards_train/rejected": -4.0353522300720215, + "step": 1537 + }, + { + "epoch": 0.43, + "logps_train/chosen": -35.80928039550781, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -33.31562805175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4184281826019287, + "rewards_train/margins": 0.322509765625, + "rewards_train/rejected": -2.7409379482269287, + "step": 1537 + }, + { + "epoch": 0.43, + "logps_train/chosen": -141.17959594726562, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -125.42469787597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8179596066474915, + "rewards_train/margins": 1.9245101809501648, + "rewards_train/rejected": -2.7424697875976562, + "step": 1537 + }, + { + "epoch": 0.43, + "logps_train/chosen": -128.88421630859375, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -105.30471801757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1884217262268066, + "rewards_train/margins": 0.04205012321472168, + "rewards_train/rejected": -2.2304718494415283, + "step": 1537 + }, + { + "epoch": 0.43, + "logps_train/chosen": -119.20109558105469, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -182.1118927001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4701095521450043, + "rewards_train/margins": 7.29107990860939, + "rewards_train/rejected": -7.7611894607543945, + "step": 1537 + }, + { + "epoch": 0.43, + "logps_train/chosen": -37.736175537109375, + "logps_train/ref_chosen": -5.75, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -37.64539337158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.198617696762085, + "rewards_train/margins": 0.10342168807983398, + "rewards_train/rejected": -3.302039384841919, + "step": 1537 + }, + { + "epoch": 0.43, + "logps_train/chosen": -119.07058715820312, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -118.59082794189453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8070586919784546, + "rewards_train/margins": -0.047975897789001465, + "rewards_train/rejected": -1.7590827941894531, + "step": 1537 + }, + { + "epoch": 0.43, + "logps_train/chosen": -67.0818099975586, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -49.91265869140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.533181190490723, + "rewards_train/margins": -0.2950401306152344, + "rewards_train/rejected": -4.238141059875488, + "step": 1537 + }, + { + "epoch": 0.43, + "learning_rate": 7.061177162766436e-07, + "loss": 0.4064, + "step": 1538 + }, + { + "epoch": 0.43, + "logps_train/chosen": -160.94540405273438, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -223.17495727539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.144540309906006, + "rewards_train/margins": 3.872955799102783, + "rewards_train/rejected": -9.017496109008789, + "step": 1538 + }, + { + "epoch": 0.43, + "logps_train/chosen": -9.694635391235352, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -7.15625, + "logps_train/rejected": -20.032814025878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.043036460876464844, + "rewards_train/margins": 1.3306928873062134, + "rewards_train/rejected": -1.2876564264297485, + "step": 1538 + }, + { + "epoch": 0.43, + "logps_train/chosen": -80.29366302490234, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -30.125, + "logps_train/rejected": -69.34595489501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8543663024902344, + "rewards_train/margins": 1.0677292346954346, + "rewards_train/rejected": -3.922095537185669, + "step": 1538 + }, + { + "epoch": 0.43, + "logps_train/chosen": -45.86833190917969, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -53.53990936279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.736833333969116, + "rewards_train/margins": 0.6421575546264648, + "rewards_train/rejected": -3.378990888595581, + "step": 1538 + }, + { + "epoch": 0.43, + "logps_train/chosen": -121.90547943115234, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -144.14700317382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.340548038482666, + "rewards_train/margins": 0.2241523265838623, + "rewards_train/rejected": -3.5647003650665283, + "step": 1538 + }, + { + "epoch": 0.43, + "logps_train/chosen": -44.318511962890625, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -40.25, + "logps_train/rejected": -48.413902282714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1181488037109375, + "rewards_train/margins": 0.9345390200614929, + "rewards_train/rejected": -0.8163902163505554, + "step": 1538 + }, + { + "epoch": 0.43, + "logps_train/chosen": -8.135250091552734, + "logps_train/ref_chosen": -1.53125, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -31.922950744628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6604000329971313, + "rewards_train/margins": 0.6568950414657593, + "rewards_train/rejected": -1.3172950744628906, + "step": 1538 + }, + { + "epoch": 0.43, + "logps_train/chosen": -32.75143051147461, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -2.984375, + "logps_train/rejected": -18.816862106323242, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1688930988311768, + "rewards_train/margins": -0.5856443643569946, + "rewards_train/rejected": -1.5832487344741821, + "step": 1538 + }, + { + "epoch": 0.43, + "logps_train/chosen": -23.524364471435547, + "logps_train/ref_chosen": -3.875, + "logps_train/ref_rejected": -0.77734375, + "logps_train/rejected": -15.756451606750488, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9649364948272705, + "rewards_train/margins": -0.46702563762664795, + "rewards_train/rejected": -1.4979108572006226, + "step": 1539 + }, + { + "epoch": 0.43, + "logps_train/chosen": -72.38967895507812, + "logps_train/ref_chosen": -60.0, + "logps_train/ref_rejected": -28.25, + "logps_train/rejected": -35.082984924316406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2389678955078125, + "rewards_train/margins": -0.555669367313385, + "rewards_train/rejected": -0.6832985281944275, + "step": 1539 + }, + { + "epoch": 0.43, + "logps_train/chosen": -13.37767505645752, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -19.483997344970703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49401751160621643, + "rewards_train/margins": 0.016882210969924927, + "rewards_train/rejected": -0.5108997225761414, + "step": 1539 + }, + { + "epoch": 0.43, + "logps_train/chosen": -150.19384765625, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -173.00167846679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.619384765625, + "rewards_train/margins": 2.480782985687256, + "rewards_train/rejected": -4.100167751312256, + "step": 1539 + }, + { + "epoch": 0.43, + "logps_train/chosen": -193.71629333496094, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -186.235595703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.971629619598389, + "rewards_train/margins": -0.1480698585510254, + "rewards_train/rejected": -4.823559761047363, + "step": 1539 + }, + { + "epoch": 0.43, + "logps_train/chosen": -16.689538955688477, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -3.015625, + "logps_train/rejected": -37.57332229614258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5689539313316345, + "rewards_train/margins": 2.886815845966339, + "rewards_train/rejected": -3.4557697772979736, + "step": 1539 + }, + { + "epoch": 0.43, + "logps_train/chosen": -24.91268539428711, + "logps_train/ref_chosen": -6.5, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -60.9071044921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.841268539428711, + "rewards_train/margins": -0.7255580425262451, + "rewards_train/rejected": -1.1157104969024658, + "step": 1539 + }, + { + "epoch": 0.43, + "logps_train/chosen": -43.99108123779297, + "logps_train/ref_chosen": -27.25, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -106.70513153076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6741081476211548, + "rewards_train/margins": 1.8964051008224487, + "rewards_train/rejected": -3.5705132484436035, + "step": 1539 + }, + { + "epoch": 0.43, + "learning_rate": 7.035900192268464e-07, + "loss": 0.5095, + "step": 1540 + }, + { + "epoch": 0.43, + "logps_train/chosen": -29.95631980895996, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -42.52838134765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.508131980895996, + "rewards_train/margins": 1.1322062015533447, + "rewards_train/rejected": -2.640338182449341, + "step": 1540 + }, + { + "epoch": 0.43, + "logps_train/chosen": -1.4197230339050293, + "logps_train/ref_chosen": -1.015625, + "logps_train/ref_rejected": -24.875, + "logps_train/rejected": -33.119300842285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04040980339050293, + "rewards_train/margins": 0.7840203046798706, + "rewards_train/rejected": -0.8244301080703735, + "step": 1540 + }, + { + "epoch": 0.43, + "logps_train/chosen": -128.93893432617188, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -130.72792053222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.543893575668335, + "rewards_train/margins": 2.3788983821868896, + "rewards_train/rejected": -4.922791957855225, + "step": 1540 + }, + { + "epoch": 0.43, + "logps_train/chosen": -11.173287391662598, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -12.367557525634766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20482873916625977, + "rewards_train/margins": 0.11942702531814575, + "rewards_train/rejected": -0.3242557644844055, + "step": 1540 + }, + { + "epoch": 0.43, + "logps_train/chosen": -16.034521102905273, + "logps_train/ref_chosen": -4.5, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -20.315872192382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1534521579742432, + "rewards_train/margins": 0.0718851089477539, + "rewards_train/rejected": -1.225337266921997, + "step": 1540 + }, + { + "epoch": 0.43, + "logps_train/chosen": -171.91285705566406, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -208.0, + "logps_train/rejected": -266.3812561035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.291285991668701, + "rewards_train/margins": 1.546839714050293, + "rewards_train/rejected": -5.838125705718994, + "step": 1540 + }, + { + "epoch": 0.43, + "logps_train/chosen": -72.78938293457031, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -75.4374771118164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2710617184638977, + "rewards_train/margins": 0.46480943262577057, + "rewards_train/rejected": -0.19374771416187286, + "step": 1540 + }, + { + "epoch": 0.43, + "logps_train/chosen": -35.88030242919922, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -40.096256256103516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1505303382873535, + "rewards_train/margins": 0.9903452396392822, + "rewards_train/rejected": -3.1408755779266357, + "step": 1540 + }, + { + "epoch": 0.43, + "logps_train/chosen": -103.2605209350586, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -210.3328857421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9760520458221436, + "rewards_train/margins": 6.00723671913147, + "rewards_train/rejected": -8.983288764953613, + "step": 1541 + }, + { + "epoch": 0.43, + "logps_train/chosen": -6.7492241859436035, + "logps_train/ref_chosen": -2.890625, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -25.24692726135254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3858599364757538, + "rewards_train/margins": 0.8638328611850739, + "rewards_train/rejected": -1.2496927976608276, + "step": 1541 + }, + { + "epoch": 0.43, + "logps_train/chosen": -7.847610950469971, + "logps_train/ref_chosen": -3.6875, + "logps_train/ref_rejected": -3.421875, + "logps_train/rejected": -10.374317169189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41601109504699707, + "rewards_train/margins": 0.2792331576347351, + "rewards_train/rejected": -0.6952442526817322, + "step": 1541 + }, + { + "epoch": 0.43, + "logps_train/chosen": -160.27911376953125, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -239.1839141845703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.327911376953125, + "rewards_train/margins": 2.4904799461364746, + "rewards_train/rejected": -4.8183913230896, + "step": 1541 + }, + { + "epoch": 0.43, + "logps_train/chosen": -193.58363342285156, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -285.43902587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4583632946014404, + "rewards_train/margins": 9.385539293289185, + "rewards_train/rejected": -12.843902587890625, + "step": 1541 + }, + { + "epoch": 0.43, + "logps_train/chosen": -16.893352508544922, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -29.716426849365234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8330852389335632, + "rewards_train/margins": 0.6323074698448181, + "rewards_train/rejected": -1.4653927087783813, + "step": 1541 + }, + { + "epoch": 0.43, + "logps_train/chosen": -125.30231475830078, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -182.78488159179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.280231475830078, + "rewards_train/margins": 3.6982569694519043, + "rewards_train/rejected": -7.978488445281982, + "step": 1541 + }, + { + "epoch": 0.43, + "logps_train/chosen": -139.51353454589844, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -191.10752868652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2013535499572754, + "rewards_train/margins": 5.209399223327637, + "rewards_train/rejected": -7.410752773284912, + "step": 1541 + }, + { + "epoch": 0.43, + "learning_rate": 7.010643967326487e-07, + "loss": 0.2805, + "step": 1542 + }, + { + "epoch": 0.43, + "logps_train/chosen": -178.19717407226562, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -313.283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.919717311859131, + "rewards_train/margins": 7.308602809906006, + "rewards_train/rejected": -15.228320121765137, + "step": 1542 + }, + { + "epoch": 0.43, + "logps_train/chosen": -153.50416564941406, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -152.76260375976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.150416851043701, + "rewards_train/margins": 0.07584381103515625, + "rewards_train/rejected": -4.226260662078857, + "step": 1542 + }, + { + "epoch": 0.43, + "logps_train/chosen": -180.17034912109375, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -181.07427978515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.467034816741943, + "rewards_train/margins": 2.290393352508545, + "rewards_train/rejected": -7.757428169250488, + "step": 1542 + }, + { + "epoch": 0.43, + "logps_train/chosen": -23.265148162841797, + "logps_train/ref_chosen": -7.9375, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -45.38443374633789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5327647924423218, + "rewards_train/margins": 1.0556787252426147, + "rewards_train/rejected": -2.5884435176849365, + "step": 1542 + }, + { + "epoch": 0.43, + "logps_train/chosen": -117.42613220214844, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -151.336669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5926132202148438, + "rewards_train/margins": 2.5410537719726562, + "rewards_train/rejected": -5.1336669921875, + "step": 1542 + }, + { + "epoch": 0.43, + "logps_train/chosen": -212.45535278320312, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -203.41650390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.945535182952881, + "rewards_train/margins": -1.303884506225586, + "rewards_train/rejected": -5.641650676727295, + "step": 1542 + }, + { + "epoch": 0.43, + "logps_train/chosen": -108.03231811523438, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -80.2289810180664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6032318472862244, + "rewards_train/margins": -0.5303337424993515, + "rewards_train/rejected": -0.07289810478687286, + "step": 1542 + }, + { + "epoch": 0.43, + "logps_train/chosen": -69.12167358398438, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -44.734527587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.762167453765869, + "rewards_train/margins": 0.8362853527069092, + "rewards_train/rejected": -3.5984528064727783, + "step": 1542 + }, + { + "epoch": 0.43, + "logps_train/chosen": -0.03124849870800972, + "logps_train/ref_chosen": -0.10888671875, + "logps_train/ref_rejected": -0.10888671875, + "logps_train/rejected": -0.031224530190229416, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.007763822562992573, + "rewards_train/margins": -2.3958273231983185e-06, + "rewards_train/rejected": 0.007766218390315771, + "step": 1543 + }, + { + "epoch": 0.43, + "logps_train/chosen": -202.6862335205078, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -216.0833740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2686233520507812, + "rewards_train/margins": 2.439713954925537, + "rewards_train/rejected": -5.708337306976318, + "step": 1543 + }, + { + "epoch": 0.43, + "logps_train/chosen": -58.4136962890625, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -58.75, + "logps_train/rejected": -78.29085540771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.066369652748108, + "rewards_train/margins": 0.8877159357070923, + "rewards_train/rejected": -1.9540855884552002, + "step": 1543 + }, + { + "epoch": 0.43, + "logps_train/chosen": -140.51988220214844, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -189.5283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8519882559776306, + "rewards_train/margins": 3.3008437752723694, + "rewards_train/rejected": -4.15283203125, + "step": 1543 + }, + { + "epoch": 0.43, + "logps_train/chosen": -46.957664489746094, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -20.675212860107422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3207664489746094, + "rewards_train/margins": -0.6907451152801514, + "rewards_train/rejected": -1.630021333694458, + "step": 1543 + }, + { + "epoch": 0.43, + "logps_train/chosen": -126.6207504272461, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -111.67352294921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.762075185775757, + "rewards_train/margins": -0.494722843170166, + "rewards_train/rejected": -3.267352342605591, + "step": 1543 + }, + { + "epoch": 0.43, + "logps_train/chosen": -121.08770751953125, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -150.90768432617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6087708473205566, + "rewards_train/margins": 1.6319975852966309, + "rewards_train/rejected": -5.2407684326171875, + "step": 1543 + }, + { + "epoch": 0.43, + "logps_train/chosen": -6.664741039276123, + "logps_train/ref_chosen": -6.71875, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -4.6490159034729, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.005400896072387695, + "rewards_train/margins": -0.13282251358032227, + "rewards_train/rejected": 0.13822340965270996, + "step": 1543 + }, + { + "epoch": 0.43, + "learning_rate": 6.9854086647073e-07, + "loss": 0.5119, + "step": 1544 + }, + { + "epoch": 0.43, + "logps_train/chosen": -129.97674560546875, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -170.63919067382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.397674560546875, + "rewards_train/margins": 1.1662445068359375, + "rewards_train/rejected": -4.5639190673828125, + "step": 1544 + }, + { + "epoch": 0.43, + "logps_train/chosen": -52.37652587890625, + "logps_train/ref_chosen": -31.75, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -81.82804870605469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.062652587890625, + "rewards_train/margins": -0.20484769344329834, + "rewards_train/rejected": -1.8578048944473267, + "step": 1544 + }, + { + "epoch": 0.43, + "logps_train/chosen": -4.313663482666016, + "logps_train/ref_chosen": -0.92578125, + "logps_train/ref_rejected": -0.92578125, + "logps_train/rejected": -4.3134918212890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.338788241147995, + "rewards_train/margins": -1.71661376953125e-05, + "rewards_train/rejected": -0.3387710750102997, + "step": 1544 + }, + { + "epoch": 0.43, + "logps_train/chosen": -166.92633056640625, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -121.976806640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4926331043243408, + "rewards_train/margins": 0.6550476551055908, + "rewards_train/rejected": -2.1476807594299316, + "step": 1544 + }, + { + "epoch": 0.43, + "logps_train/chosen": -169.25003051757812, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -136.84414672851562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.525002956390381, + "rewards_train/margins": -1.5905883312225342, + "rewards_train/rejected": -3.9344146251678467, + "step": 1544 + }, + { + "epoch": 0.43, + "logps_train/chosen": -9.485797882080078, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -7.0625, + "logps_train/rejected": -9.922628402709961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0889202132821083, + "rewards_train/margins": 0.37493307143449783, + "rewards_train/rejected": -0.2860128581523895, + "step": 1544 + }, + { + "epoch": 0.43, + "logps_train/chosen": -17.034154891967773, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -42.19449234008789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0159155130386353, + "rewards_train/margins": 1.1535338163375854, + "rewards_train/rejected": -2.1694493293762207, + "step": 1544 + }, + { + "epoch": 0.43, + "logps_train/chosen": -21.80950355529785, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -28.978404998779297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7559503316879272, + "rewards_train/margins": -0.37060976028442383, + "rewards_train/rejected": -1.3853405714035034, + "step": 1544 + }, + { + "epoch": 0.43, + "logps_train/chosen": -66.50094604492188, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -169.61996459960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0000946521759033, + "rewards_train/margins": 2.76190185546875, + "rewards_train/rejected": -3.7619965076446533, + "step": 1545 + }, + { + "epoch": 0.43, + "logps_train/chosen": -151.16778564453125, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -192.1186065673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.916778564453125, + "rewards_train/margins": 2.8950819969177246, + "rewards_train/rejected": -5.81186056137085, + "step": 1545 + }, + { + "epoch": 0.43, + "logps_train/chosen": -4.69468355178833, + "logps_train/ref_chosen": -5.03125, + "logps_train/ref_rejected": -3.640625, + "logps_train/rejected": -4.690402984619141, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03365664556622505, + "rewards_train/margins": 0.13863444700837135, + "rewards_train/rejected": -0.1049778014421463, + "step": 1545 + }, + { + "epoch": 0.43, + "logps_train/chosen": -9.367752075195312, + "logps_train/ref_chosen": -3.453125, + "logps_train/ref_rejected": -2.859375, + "logps_train/rejected": -16.993324279785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5914627313613892, + "rewards_train/margins": 0.8219321966171265, + "rewards_train/rejected": -1.4133949279785156, + "step": 1545 + }, + { + "epoch": 0.43, + "logps_train/chosen": -174.25039672851562, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -235.49314880371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.425039768218994, + "rewards_train/margins": 0.12427520751953125, + "rewards_train/rejected": -4.549314975738525, + "step": 1545 + }, + { + "epoch": 0.43, + "logps_train/chosen": -16.83954620361328, + "logps_train/ref_chosen": -7.3125, + "logps_train/ref_rejected": -3.65625, + "logps_train/rejected": -13.269726753234863, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9527046084403992, + "rewards_train/margins": 0.008643090724945068, + "rewards_train/rejected": -0.9613476991653442, + "step": 1545 + }, + { + "epoch": 0.43, + "logps_train/chosen": -38.2260856628418, + "logps_train/ref_chosen": -28.25, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -80.46781921386719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9976086020469666, + "rewards_train/margins": 0.1491733193397522, + "rewards_train/rejected": -1.1467819213867188, + "step": 1545 + }, + { + "epoch": 0.43, + "logps_train/chosen": -84.33600616455078, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -89.8977279663086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.633600652217865, + "rewards_train/margins": -0.09382784366607666, + "rewards_train/rejected": -0.5397728085517883, + "step": 1545 + }, + { + "epoch": 0.43, + "learning_rate": 6.960194461031264e-07, + "loss": 0.5902, + "step": 1546 + }, + { + "epoch": 0.43, + "logps_train/chosen": -190.93930053710938, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -189.14862060546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.2939300537109375, + "rewards_train/margins": -0.6790680885314941, + "rewards_train/rejected": -5.614861965179443, + "step": 1546 + }, + { + "epoch": 0.43, + "logps_train/chosen": -132.12962341308594, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -201.90997314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1629624366760254, + "rewards_train/margins": 3.428034782409668, + "rewards_train/rejected": -5.590997219085693, + "step": 1546 + }, + { + "epoch": 0.43, + "logps_train/chosen": -84.6668701171875, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -94.81770324707031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7666870355606079, + "rewards_train/margins": 0.9650833606719971, + "rewards_train/rejected": -1.731770396232605, + "step": 1546 + }, + { + "epoch": 0.43, + "logps_train/chosen": -137.73228454589844, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -147.6343994140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1732285022735596, + "rewards_train/margins": 1.640211582183838, + "rewards_train/rejected": -3.8134400844573975, + "step": 1546 + }, + { + "epoch": 0.43, + "logps_train/chosen": -90.76808166503906, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -28.625, + "logps_train/rejected": -77.35443115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.176808163523674, + "rewards_train/margins": 4.696135237812996, + "rewards_train/rejected": -4.87294340133667, + "step": 1546 + }, + { + "epoch": 0.43, + "logps_train/chosen": -147.87985229492188, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -241.9044952392578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9879852533340454, + "rewards_train/margins": 6.8024643659591675, + "rewards_train/rejected": -7.790449619293213, + "step": 1546 + }, + { + "epoch": 0.43, + "logps_train/chosen": -24.427919387817383, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -11.444284439086914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43029195070266724, + "rewards_train/margins": 0.23601150512695312, + "rewards_train/rejected": -0.6663034558296204, + "step": 1546 + }, + { + "epoch": 0.43, + "logps_train/chosen": -172.59896850585938, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -191.3526611328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.209897041320801, + "rewards_train/margins": 1.4253692626953125, + "rewards_train/rejected": -8.635266304016113, + "step": 1546 + }, + { + "epoch": 0.43, + "logps_train/chosen": -153.2147216796875, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -162.31134033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2214722633361816, + "rewards_train/margins": 1.5096616744995117, + "rewards_train/rejected": -4.731133937835693, + "step": 1547 + }, + { + "epoch": 0.43, + "logps_train/chosen": -148.6856231689453, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -201.29489135742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.968562364578247, + "rewards_train/margins": 2.560926675796509, + "rewards_train/rejected": -5.529489040374756, + "step": 1547 + }, + { + "epoch": 0.43, + "logps_train/chosen": -26.95012092590332, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -39.7038459777832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6450120806694031, + "rewards_train/margins": 1.1753725409507751, + "rewards_train/rejected": -1.8203846216201782, + "step": 1547 + }, + { + "epoch": 0.43, + "logps_train/chosen": -10.788175582885742, + "logps_train/ref_chosen": -1.546875, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -28.114437103271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9241300821304321, + "rewards_train/margins": 0.9498136043548584, + "rewards_train/rejected": -1.8739436864852905, + "step": 1547 + }, + { + "epoch": 0.43, + "logps_train/chosen": -183.00558471679688, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -206.7000732421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.300558567047119, + "rewards_train/margins": 2.8694491386413574, + "rewards_train/rejected": -9.170007705688477, + "step": 1547 + }, + { + "epoch": 0.43, + "logps_train/chosen": -160.35903930664062, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -200.79071044921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7359039783477783, + "rewards_train/margins": 5.093167066574097, + "rewards_train/rejected": -7.829071044921875, + "step": 1547 + }, + { + "epoch": 0.43, + "logps_train/chosen": -7.711847305297852, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -2.84375, + "logps_train/rejected": -9.616116523742676, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13680973649024963, + "rewards_train/margins": 0.5404269397258759, + "rewards_train/rejected": -0.6772366762161255, + "step": 1547 + }, + { + "epoch": 0.43, + "logps_train/chosen": -122.56492614746094, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -110.00408935546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2935073971748352, + "rewards_train/margins": 1.9939163327217102, + "rewards_train/rejected": -1.700408935546875, + "step": 1547 + }, + { + "epoch": 0.43, + "learning_rate": 6.935001532771078e-07, + "loss": 0.2466, + "step": 1548 + }, + { + "epoch": 0.43, + "logps_train/chosen": -8.15223503112793, + "logps_train/ref_chosen": -2.40625, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -40.916053771972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.574598491191864, + "rewards_train/margins": 2.510757029056549, + "rewards_train/rejected": -3.085355520248413, + "step": 1548 + }, + { + "epoch": 0.43, + "logps_train/chosen": -136.43023681640625, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -209.4333953857422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.743023693561554, + "rewards_train/margins": 4.500315845012665, + "rewards_train/rejected": -5.243339538574219, + "step": 1548 + }, + { + "epoch": 0.43, + "logps_train/chosen": -116.82752990722656, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -183.95297241210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3827531337738037, + "rewards_train/margins": 1.3125441074371338, + "rewards_train/rejected": -3.6952972412109375, + "step": 1548 + }, + { + "epoch": 0.43, + "logps_train/chosen": -31.382997512817383, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -11.6875, + "logps_train/rejected": -26.965312957763672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1757997274398804, + "rewards_train/margins": 0.35198163986206055, + "rewards_train/rejected": -1.527781367301941, + "step": 1548 + }, + { + "epoch": 0.43, + "logps_train/chosen": -9.247719764709473, + "logps_train/ref_chosen": -2.109375, + "logps_train/ref_rejected": -2.265625, + "logps_train/rejected": -9.733010292053223, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7138344645500183, + "rewards_train/margins": 0.032904088497161865, + "rewards_train/rejected": -0.7467385530471802, + "step": 1548 + }, + { + "epoch": 0.43, + "logps_train/chosen": -131.3694305419922, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -113.91940307617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8869431018829346, + "rewards_train/margins": 0.10499727725982666, + "rewards_train/rejected": -1.9919403791427612, + "step": 1548 + }, + { + "epoch": 0.43, + "logps_train/chosen": -3.437670946121216, + "logps_train/ref_chosen": -2.109375, + "logps_train/ref_rejected": -3.5, + "logps_train/rejected": -16.88304901123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13282959163188934, + "rewards_train/margins": 1.2054752856492996, + "rewards_train/rejected": -1.338304877281189, + "step": 1548 + }, + { + "epoch": 0.43, + "logps_train/chosen": -190.80087280273438, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -237.93179321289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.9800872802734375, + "rewards_train/margins": 3.6130924224853516, + "rewards_train/rejected": -10.593179702758789, + "step": 1548 + }, + { + "epoch": 0.43, + "logps_train/chosen": -104.10256958007812, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -237.20338439941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3102569580078125, + "rewards_train/margins": 9.610081672668457, + "rewards_train/rejected": -10.92033863067627, + "step": 1549 + }, + { + "epoch": 0.43, + "logps_train/chosen": -18.26044464111328, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -41.051116943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1385444700717926, + "rewards_train/margins": 3.3603173196315765, + "rewards_train/rejected": -3.498861789703369, + "step": 1549 + }, + { + "epoch": 0.43, + "logps_train/chosen": -7.105321884155273, + "logps_train/ref_chosen": -0.921875, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -29.5039119720459, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6183447241783142, + "rewards_train/margins": 1.1132964491844177, + "rewards_train/rejected": -1.731641173362732, + "step": 1549 + }, + { + "epoch": 0.43, + "logps_train/chosen": -128.5330810546875, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -161.50018310546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.803308010101318, + "rewards_train/margins": 0.8967103958129883, + "rewards_train/rejected": -5.700018405914307, + "step": 1549 + }, + { + "epoch": 0.43, + "logps_train/chosen": -91.60371398925781, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -109.26809692382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.760371446609497, + "rewards_train/margins": 1.616438388824463, + "rewards_train/rejected": -3.37680983543396, + "step": 1549 + }, + { + "epoch": 0.43, + "logps_train/chosen": -82.20671844482422, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -40.333534240722656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.270671844482422, + "rewards_train/margins": -0.42481839656829834, + "rewards_train/rejected": -1.8458534479141235, + "step": 1549 + }, + { + "epoch": 0.43, + "logps_train/chosen": -21.183061599731445, + "logps_train/ref_chosen": -6.71875, + "logps_train/ref_rejected": -1.7421875, + "logps_train/rejected": -22.130393981933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4464311599731445, + "rewards_train/margins": 0.5923895835876465, + "rewards_train/rejected": -2.038820743560791, + "step": 1549 + }, + { + "epoch": 0.43, + "logps_train/chosen": -19.47865104675293, + "logps_train/ref_chosen": -6.625, + "logps_train/ref_rejected": -1.6484375, + "logps_train/rejected": -15.35629653930664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.285365104675293, + "rewards_train/margins": 0.08542084693908691, + "rewards_train/rejected": -1.3707859516143799, + "step": 1549 + }, + { + "epoch": 0.43, + "learning_rate": 6.909830056250526e-07, + "loss": 0.3335, + "step": 1550 + }, + { + "epoch": 0.43, + "logps_train/chosen": -145.91615295410156, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -205.7466583251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.941615343093872, + "rewards_train/margins": 4.9330503940582275, + "rewards_train/rejected": -7.8746657371521, + "step": 1550 + }, + { + "epoch": 0.43, + "logps_train/chosen": -157.02154541015625, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -237.5897216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.452154636383057, + "rewards_train/margins": 3.3068175315856934, + "rewards_train/rejected": -9.75897216796875, + "step": 1550 + }, + { + "epoch": 0.43, + "logps_train/chosen": -29.996089935302734, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -19.35773468017578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9121090173721313, + "rewards_train/margins": -1.2575855255126953, + "rewards_train/rejected": -0.654523491859436, + "step": 1550 + }, + { + "epoch": 0.43, + "logps_train/chosen": -187.37733459472656, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -200.79129028320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.537733554840088, + "rewards_train/margins": 1.6413955688476562, + "rewards_train/rejected": -6.179129123687744, + "step": 1550 + }, + { + "epoch": 0.43, + "logps_train/chosen": -22.764949798583984, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -3.90625, + "logps_train/rejected": -8.640381813049316, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3264950513839722, + "rewards_train/margins": -0.8530818521976471, + "rewards_train/rejected": -0.4734131991863251, + "step": 1550 + }, + { + "epoch": 0.43, + "logps_train/chosen": -57.526153564453125, + "logps_train/ref_chosen": -48.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -245.6438446044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9026153683662415, + "rewards_train/margins": 11.361768901348114, + "rewards_train/rejected": -12.264384269714355, + "step": 1550 + }, + { + "epoch": 0.43, + "logps_train/chosen": -0.14027899503707886, + "logps_train/ref_chosen": -0.380859375, + "logps_train/ref_rejected": -7.03125, + "logps_train/rejected": -7.785449028015137, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.024058038368821144, + "rewards_train/margins": 0.09947794117033482, + "rewards_train/rejected": -0.07541990280151367, + "step": 1550 + }, + { + "epoch": 0.43, + "logps_train/chosen": -29.003618240356445, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -36.840248107910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.044111967086792, + "rewards_train/margins": 0.8149127960205078, + "rewards_train/rejected": -2.8590247631073, + "step": 1550 + }, + { + "epoch": 0.43, + "logps_train/chosen": -173.7373046875, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -280.3189697265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.173730373382568, + "rewards_train/margins": 9.058166980743408, + "rewards_train/rejected": -15.231897354125977, + "step": 1551 + }, + { + "epoch": 0.43, + "logps_train/chosen": -11.246514320373535, + "logps_train/ref_chosen": -5.375, + "logps_train/ref_rejected": -5.0, + "logps_train/rejected": -15.954366683959961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5871514678001404, + "rewards_train/margins": 0.5082852244377136, + "rewards_train/rejected": -1.095436692237854, + "step": 1551 + }, + { + "epoch": 0.43, + "logps_train/chosen": -10.390789985656738, + "logps_train/ref_chosen": -0.9765625, + "logps_train/ref_rejected": -0.9765625, + "logps_train/rejected": -10.373929977416992, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9414227604866028, + "rewards_train/margins": -0.0016859769821166992, + "rewards_train/rejected": -0.9397367835044861, + "step": 1551 + }, + { + "epoch": 0.43, + "logps_train/chosen": -157.23416137695312, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -220.27883911132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.9234161376953125, + "rewards_train/margins": 3.7044677734375, + "rewards_train/rejected": -9.627883911132812, + "step": 1551 + }, + { + "epoch": 0.43, + "logps_train/chosen": -23.650876998901367, + "logps_train/ref_chosen": -5.03125, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -46.691200256347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8619626760482788, + "rewards_train/margins": 0.344657301902771, + "rewards_train/rejected": -2.20661997795105, + "step": 1551 + }, + { + "epoch": 0.43, + "logps_train/chosen": -92.96171569824219, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -59.5, + "logps_train/rejected": -89.13467407226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0461715459823608, + "rewards_train/margins": 1.9172958135604858, + "rewards_train/rejected": -2.9634673595428467, + "step": 1551 + }, + { + "epoch": 0.43, + "logps_train/chosen": -15.91382884979248, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -38.206642150878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9851328730583191, + "rewards_train/margins": 1.3980312943458557, + "rewards_train/rejected": -2.383164167404175, + "step": 1551 + }, + { + "epoch": 0.43, + "logps_train/chosen": -6.3232550621032715, + "logps_train/ref_chosen": -0.78125, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -9.168001174926758, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5542005300521851, + "rewards_train/margins": -0.19990041851997375, + "rewards_train/rejected": -0.3543001115322113, + "step": 1551 + }, + { + "epoch": 0.43, + "learning_rate": 6.884680207643257e-07, + "loss": 0.4269, + "step": 1552 + }, + { + "epoch": 0.43, + "logps_train/chosen": -19.560794830322266, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -28.866714477539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5373294949531555, + "rewards_train/margins": 1.6962170004844666, + "rewards_train/rejected": -2.233546495437622, + "step": 1552 + }, + { + "epoch": 0.43, + "logps_train/chosen": -21.682720184326172, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -7.0625, + "logps_train/rejected": -42.506160736083984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.243272066116333, + "rewards_train/margins": 2.3010940551757812, + "rewards_train/rejected": -3.5443661212921143, + "step": 1552 + }, + { + "epoch": 0.43, + "logps_train/chosen": -44.71129608154297, + "logps_train/ref_chosen": -28.625, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -64.22232055664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.608629584312439, + "rewards_train/margins": 2.5386024713516235, + "rewards_train/rejected": -4.1472320556640625, + "step": 1552 + }, + { + "epoch": 0.43, + "logps_train/chosen": -20.312850952148438, + "logps_train/ref_chosen": -1.5078125, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -12.502105712890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8805038928985596, + "rewards_train/margins": -1.4240433275699615, + "rewards_train/rejected": -0.456460565328598, + "step": 1552 + }, + { + "epoch": 0.43, + "logps_train/chosen": -13.278712272644043, + "logps_train/ref_chosen": -3.53125, + "logps_train/ref_rejected": -43.0, + "logps_train/rejected": -62.36688995361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9747462272644043, + "rewards_train/margins": 0.9619427919387817, + "rewards_train/rejected": -1.936689019203186, + "step": 1552 + }, + { + "epoch": 0.43, + "logps_train/chosen": -134.02658081054688, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -233.75482177734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4026581048965454, + "rewards_train/margins": 2.972823977470398, + "rewards_train/rejected": -4.375482082366943, + "step": 1552 + }, + { + "epoch": 0.43, + "logps_train/chosen": -103.49172973632812, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -78.90814208984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6491730213165283, + "rewards_train/margins": -1.8083587884902954, + "rewards_train/rejected": -0.8408142328262329, + "step": 1552 + }, + { + "epoch": 0.43, + "logps_train/chosen": -32.22031784057617, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -34.38460922241211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3595317602157593, + "rewards_train/margins": 0.35392916202545166, + "rewards_train/rejected": -1.713460922241211, + "step": 1552 + }, + { + "epoch": 0.43, + "logps_train/chosen": -183.6622772216797, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -199.63819885253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.866227865219116, + "rewards_train/margins": 0.09759211540222168, + "rewards_train/rejected": -3.963819980621338, + "step": 1553 + }, + { + "epoch": 0.43, + "logps_train/chosen": -137.216064453125, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -183.3579864501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.571606397628784, + "rewards_train/margins": 3.2641923427581787, + "rewards_train/rejected": -6.835798740386963, + "step": 1553 + }, + { + "epoch": 0.43, + "logps_train/chosen": -106.33460998535156, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -159.31069946289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.93346107006073, + "rewards_train/margins": 3.6476088762283325, + "rewards_train/rejected": -5.5810699462890625, + "step": 1553 + }, + { + "epoch": 0.43, + "logps_train/chosen": -41.40290832519531, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -3.140625, + "logps_train/rejected": -21.343482971191406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8777908086776733, + "rewards_train/margins": -0.057505011558532715, + "rewards_train/rejected": -1.8202857971191406, + "step": 1553 + }, + { + "epoch": 0.43, + "logps_train/chosen": -26.039426803588867, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -42.134464263916016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9914426803588867, + "rewards_train/margins": 2.1657538414001465, + "rewards_train/rejected": -3.157196521759033, + "step": 1553 + }, + { + "epoch": 0.43, + "logps_train/chosen": -4.841372489929199, + "logps_train/ref_chosen": -2.3125, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -27.553308486938477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2528872489929199, + "rewards_train/margins": 1.6086935997009277, + "rewards_train/rejected": -1.8615808486938477, + "step": 1553 + }, + { + "epoch": 0.43, + "logps_train/chosen": -75.11769104003906, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -75.38874816894531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.786769151687622, + "rewards_train/margins": 3.1646058559417725, + "rewards_train/rejected": -4.9513750076293945, + "step": 1553 + }, + { + "epoch": 0.43, + "logps_train/chosen": -11.44403076171875, + "logps_train/ref_chosen": -2.9375, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -17.569866180419922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8506531119346619, + "rewards_train/margins": 0.3282085061073303, + "rewards_train/rejected": -1.1788616180419922, + "step": 1553 + }, + { + "epoch": 0.43, + "learning_rate": 6.85955216297154e-07, + "loss": 0.447, + "step": 1554 + }, + { + "epoch": 0.43, + "logps_train/chosen": -30.47640609741211, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -32.32133865356445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0726406574249268, + "rewards_train/margins": 0.696993350982666, + "rewards_train/rejected": -2.7696340084075928, + "step": 1554 + }, + { + "epoch": 0.43, + "logps_train/chosen": -155.70751953125, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -232.10110473632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.570751905441284, + "rewards_train/margins": 8.189358949661255, + "rewards_train/rejected": -11.760110855102539, + "step": 1554 + }, + { + "epoch": 0.43, + "logps_train/chosen": -19.414819717407227, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -18.582721710205078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7289819717407227, + "rewards_train/margins": -0.0207098126411438, + "rewards_train/rejected": -0.7082721590995789, + "step": 1554 + }, + { + "epoch": 0.43, + "logps_train/chosen": -20.640296936035156, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -2.875, + "logps_train/rejected": -13.891681671142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5265297293663025, + "rewards_train/margins": 0.575138509273529, + "rewards_train/rejected": -1.1016682386398315, + "step": 1554 + }, + { + "epoch": 0.43, + "logps_train/chosen": -111.19274139404297, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -141.48953247070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.269274115562439, + "rewards_train/margins": 6.2796794176101685, + "rewards_train/rejected": -7.548953533172607, + "step": 1554 + }, + { + "epoch": 0.43, + "logps_train/chosen": -63.89768981933594, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -74.8846664428711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3102310299873352, + "rewards_train/margins": 0.5486976802349091, + "rewards_train/rejected": -0.23846665024757385, + "step": 1554 + }, + { + "epoch": 0.43, + "logps_train/chosen": -21.664073944091797, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -6.65625, + "logps_train/rejected": -27.74872398376465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7507823705673218, + "rewards_train/margins": 0.3584650754928589, + "rewards_train/rejected": -2.1092474460601807, + "step": 1554 + }, + { + "epoch": 0.43, + "logps_train/chosen": -172.7447509765625, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -155.2720947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1744751930236816, + "rewards_train/margins": 2.4027342796325684, + "rewards_train/rejected": -4.57720947265625, + "step": 1554 + }, + { + "epoch": 0.43, + "logps_train/chosen": -40.9692497253418, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -70.90592193603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7656750679016113, + "rewards_train/margins": 1.5874171257019043, + "rewards_train/rejected": -4.353092193603516, + "step": 1555 + }, + { + "epoch": 0.43, + "logps_train/chosen": -150.32559204101562, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -155.27615356445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5325592160224915, + "rewards_train/margins": 1.195056140422821, + "rewards_train/rejected": -1.7276153564453125, + "step": 1555 + }, + { + "epoch": 0.43, + "logps_train/chosen": -55.14044189453125, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -53.0, + "logps_train/rejected": -111.95105743408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1390442848205566, + "rewards_train/margins": 3.756061553955078, + "rewards_train/rejected": -5.895105838775635, + "step": 1555 + }, + { + "epoch": 0.43, + "logps_train/chosen": -1.0121726989746094, + "logps_train/ref_chosen": -1.6640625, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -12.736598014831543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06518898159265518, + "rewards_train/margins": 0.4700988009572029, + "rewards_train/rejected": -0.40490981936454773, + "step": 1555 + }, + { + "epoch": 0.43, + "logps_train/chosen": -97.42384338378906, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -88.73624420166016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.89238440990448, + "rewards_train/margins": -0.8187599182128906, + "rewards_train/rejected": -1.0736244916915894, + "step": 1555 + }, + { + "epoch": 0.43, + "logps_train/chosen": -72.54861450195312, + "logps_train/ref_chosen": -52.25, + "logps_train/ref_rejected": -56.0, + "logps_train/rejected": -86.52926635742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0298614501953125, + "rewards_train/margins": 1.0230653285980225, + "rewards_train/rejected": -3.052926778793335, + "step": 1555 + }, + { + "epoch": 0.43, + "logps_train/chosen": -50.476234436035156, + "logps_train/ref_chosen": -35.75, + "logps_train/ref_rejected": -24.5, + "logps_train/rejected": -56.257877349853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4726234674453735, + "rewards_train/margins": 1.7031642198562622, + "rewards_train/rejected": -3.1757876873016357, + "step": 1555 + }, + { + "epoch": 0.43, + "logps_train/chosen": -20.26360321044922, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -23.969928741455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9888603091239929, + "rewards_train/margins": 0.7987576127052307, + "rewards_train/rejected": -1.7876179218292236, + "step": 1555 + }, + { + "epoch": 0.43, + "learning_rate": 6.834446098105054e-07, + "loss": 0.3511, + "step": 1556 + }, + { + "epoch": 0.43, + "logps_train/chosen": -2.381871223449707, + "logps_train/ref_chosen": -0.81640625, + "logps_train/ref_rejected": -3.171875, + "logps_train/rejected": -10.900789260864258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15654650330543518, + "rewards_train/margins": 0.6163449585437775, + "rewards_train/rejected": -0.7728914618492126, + "step": 1556 + }, + { + "epoch": 0.43, + "logps_train/chosen": -21.532039642333984, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -31.606246948242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.296796053647995, + "rewards_train/margins": 1.4949208199977875, + "rewards_train/rejected": -1.1981247663497925, + "step": 1556 + }, + { + "epoch": 0.43, + "logps_train/chosen": -26.538177490234375, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -29.25, + "logps_train/rejected": -63.98835754394531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5788177251815796, + "rewards_train/margins": 1.8950179815292358, + "rewards_train/rejected": -3.4738357067108154, + "step": 1556 + }, + { + "epoch": 0.43, + "logps_train/chosen": -44.127098083496094, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -39.489585876464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0627098083496094, + "rewards_train/margins": 0.2987487316131592, + "rewards_train/rejected": -3.3614585399627686, + "step": 1556 + }, + { + "epoch": 0.43, + "logps_train/chosen": -67.38499450683594, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -24.375, + "logps_train/rejected": -83.32305145263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.563499450683594, + "rewards_train/margins": 1.331305980682373, + "rewards_train/rejected": -5.894805431365967, + "step": 1556 + }, + { + "epoch": 0.43, + "logps_train/chosen": -38.592411041259766, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -39.51155090332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23424111306667328, + "rewards_train/margins": 0.09191398322582245, + "rewards_train/rejected": -0.3261550962924957, + "step": 1556 + }, + { + "epoch": 0.43, + "logps_train/chosen": -96.79977416992188, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -183.72015380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6299774646759033, + "rewards_train/margins": 4.542038202285767, + "rewards_train/rejected": -6.17201566696167, + "step": 1556 + }, + { + "epoch": 0.43, + "logps_train/chosen": -19.408023834228516, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -25.954147338867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6173648834228516, + "rewards_train/margins": 0.5186748504638672, + "rewards_train/rejected": -2.1360397338867188, + "step": 1556 + }, + { + "epoch": 0.44, + "logps_train/chosen": -17.5633602142334, + "logps_train/ref_chosen": -1.8515625, + "logps_train/ref_rejected": -2.234375, + "logps_train/rejected": -12.857734680175781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.571179747581482, + "rewards_train/margins": -0.5088437795639038, + "rewards_train/rejected": -1.0623359680175781, + "step": 1557 + }, + { + "epoch": 0.44, + "logps_train/chosen": -105.17452239990234, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -184.87774658203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.567452311515808, + "rewards_train/margins": 2.3203223943710327, + "rewards_train/rejected": -3.887774705886841, + "step": 1557 + }, + { + "epoch": 0.44, + "logps_train/chosen": -87.17699432373047, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -54.0, + "logps_train/rejected": -78.37056732177734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0676994323730469, + "rewards_train/margins": 1.3693573474884033, + "rewards_train/rejected": -2.43705677986145, + "step": 1557 + }, + { + "epoch": 0.44, + "logps_train/chosen": -118.50098419189453, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -181.25860595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.250098466873169, + "rewards_train/margins": 3.675762414932251, + "rewards_train/rejected": -6.92586088180542, + "step": 1557 + }, + { + "epoch": 0.44, + "logps_train/chosen": -81.90792846679688, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -105.20657348632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3407928943634033, + "rewards_train/margins": 0.6298644542694092, + "rewards_train/rejected": -1.9706573486328125, + "step": 1557 + }, + { + "epoch": 0.44, + "logps_train/chosen": -35.472984313964844, + "logps_train/ref_chosen": -3.046875, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -57.26380920410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2426109313964844, + "rewards_train/margins": 0.5962700843811035, + "rewards_train/rejected": -3.838881015777588, + "step": 1557 + }, + { + "epoch": 0.44, + "logps_train/chosen": -19.055727005004883, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -31.253597259521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3993227183818817, + "rewards_train/margins": 0.8510370552539825, + "rewards_train/rejected": -1.2503597736358643, + "step": 1557 + }, + { + "epoch": 0.44, + "logps_train/chosen": -74.36863708496094, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -125.35806274414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4368637204170227, + "rewards_train/margins": 1.2989426255226135, + "rewards_train/rejected": -1.7358063459396362, + "step": 1557 + }, + { + "epoch": 0.44, + "learning_rate": 6.809362188759635e-07, + "loss": 0.3423, + "step": 1558 + }, + { + "epoch": 0.44, + "logps_train/chosen": -29.003725051879883, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -4.0, + "logps_train/rejected": -32.827301025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1253725290298462, + "rewards_train/margins": 1.7573577165603638, + "rewards_train/rejected": -2.88273024559021, + "step": 1558 + }, + { + "epoch": 0.44, + "logps_train/chosen": -110.20093536376953, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -115.85059356689453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.520093560218811, + "rewards_train/margins": 0.7149658203125, + "rewards_train/rejected": -1.235059380531311, + "step": 1558 + }, + { + "epoch": 0.44, + "logps_train/chosen": -18.17274284362793, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -24.79441261291504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3860242962837219, + "rewards_train/margins": 1.087166965007782, + "rewards_train/rejected": -1.473191261291504, + "step": 1558 + }, + { + "epoch": 0.44, + "logps_train/chosen": -128.5084228515625, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -142.4239959716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.30084228515625, + "rewards_train/margins": 4.94155740737915, + "rewards_train/rejected": -6.2423996925354, + "step": 1558 + }, + { + "epoch": 0.44, + "logps_train/chosen": -11.970107078552246, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -49.77908706665039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07826071232557297, + "rewards_train/margins": 3.580897994339466, + "rewards_train/rejected": -3.659158706665039, + "step": 1558 + }, + { + "epoch": 0.44, + "logps_train/chosen": -93.90087890625, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -161.1407470703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19008789956569672, + "rewards_train/margins": 6.773986712098122, + "rewards_train/rejected": -6.964074611663818, + "step": 1558 + }, + { + "epoch": 0.44, + "logps_train/chosen": -161.3413543701172, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -199.6356964111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3341355323791504, + "rewards_train/margins": 0.42943406105041504, + "rewards_train/rejected": -2.7635695934295654, + "step": 1558 + }, + { + "epoch": 0.44, + "logps_train/chosen": -2.4124209880828857, + "logps_train/ref_chosen": -1.03125, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -16.962955474853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13811710476875305, + "rewards_train/margins": 0.8050534427165985, + "rewards_train/rejected": -0.9431705474853516, + "step": 1558 + }, + { + "epoch": 0.44, + "logps_train/chosen": -144.35809326171875, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -161.69485473632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.735809326171875, + "rewards_train/margins": 2.4836764335632324, + "rewards_train/rejected": -4.219485759735107, + "step": 1559 + }, + { + "epoch": 0.44, + "logps_train/chosen": -176.07708740234375, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -232.82215881347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.607708692550659, + "rewards_train/margins": 3.874507188796997, + "rewards_train/rejected": -7.482215881347656, + "step": 1559 + }, + { + "epoch": 0.44, + "logps_train/chosen": -12.103740692138672, + "logps_train/ref_chosen": -5.6875, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -12.469367980957031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6416240930557251, + "rewards_train/margins": -0.300937294960022, + "rewards_train/rejected": -0.3406867980957031, + "step": 1559 + }, + { + "epoch": 0.44, + "logps_train/chosen": -81.75898742675781, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -17.871150970458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3258987367153168, + "rewards_train/margins": 0.5299663841724396, + "rewards_train/rejected": -0.8558651208877563, + "step": 1559 + }, + { + "epoch": 0.44, + "logps_train/chosen": -7.3711442947387695, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -1.9140625, + "logps_train/rejected": -5.817472457885742, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4293019473552704, + "rewards_train/margins": -0.038960933685302734, + "rewards_train/rejected": -0.39034101366996765, + "step": 1559 + }, + { + "epoch": 0.44, + "logps_train/chosen": -38.61354064941406, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -58.53407287597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4801042079925537, + "rewards_train/margins": 1.8983032703399658, + "rewards_train/rejected": -4.3784074783325195, + "step": 1559 + }, + { + "epoch": 0.44, + "logps_train/chosen": -24.197507858276367, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -3.65625, + "logps_train/rejected": -32.34627151489258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8853758573532104, + "rewards_train/margins": 0.9836262464523315, + "rewards_train/rejected": -2.869002103805542, + "step": 1559 + }, + { + "epoch": 0.44, + "logps_train/chosen": -45.491004943847656, + "logps_train/ref_chosen": -41.75, + "logps_train/ref_rejected": -34.25, + "logps_train/rejected": -39.28676986694336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3741005063056946, + "rewards_train/margins": 0.12957650423049927, + "rewards_train/rejected": -0.5036770105361938, + "step": 1559 + }, + { + "epoch": 0.44, + "learning_rate": 6.784300610496047e-07, + "loss": 0.3113, + "step": 1560 + }, + { + "epoch": 0.44, + "logps_train/chosen": -92.35444641113281, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -170.871337890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6145553588867188, + "rewards_train/margins": 5.351689338684082, + "rewards_train/rejected": -4.737133979797363, + "step": 1560 + }, + { + "epoch": 0.44, + "logps_train/chosen": -20.39780044555664, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -3.5625, + "logps_train/rejected": -14.50558090209961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3647800385951996, + "rewards_train/margins": 0.7295280992984772, + "rewards_train/rejected": -1.0943081378936768, + "step": 1560 + }, + { + "epoch": 0.44, + "logps_train/chosen": -197.08224487304688, + "logps_train/ref_chosen": -193.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -203.8258056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.408224493265152, + "rewards_train/margins": 3.174356073141098, + "rewards_train/rejected": -3.58258056640625, + "step": 1560 + }, + { + "epoch": 0.44, + "logps_train/chosen": -196.28379821777344, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -255.68771362304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1283798217773438, + "rewards_train/margins": 4.240391731262207, + "rewards_train/rejected": -7.368771553039551, + "step": 1560 + }, + { + "epoch": 0.44, + "logps_train/chosen": -120.22506713867188, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -171.5795135498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5225067138671875, + "rewards_train/margins": 3.6854448318481445, + "rewards_train/rejected": -5.207951545715332, + "step": 1560 + }, + { + "epoch": 0.44, + "logps_train/chosen": -244.00811767578125, + "logps_train/ref_chosen": -193.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -175.56927490234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.100811958312988, + "rewards_train/margins": 2.8061156272888184, + "rewards_train/rejected": -7.906927585601807, + "step": 1560 + }, + { + "epoch": 0.44, + "logps_train/chosen": -68.87089538574219, + "logps_train/ref_chosen": -54.0, + "logps_train/ref_rejected": -3.640625, + "logps_train/rejected": -37.25714874267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4870895147323608, + "rewards_train/margins": 1.8745628595352173, + "rewards_train/rejected": -3.361652374267578, + "step": 1560 + }, + { + "epoch": 0.44, + "logps_train/chosen": -127.66604614257812, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -176.7764892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5666046142578125, + "rewards_train/margins": 2.011044502258301, + "rewards_train/rejected": -4.577649116516113, + "step": 1560 + }, + { + "epoch": 0.44, + "logps_train/chosen": -21.1167049407959, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -33.01237106323242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7960455417633057, + "rewards_train/margins": 1.0770666599273682, + "rewards_train/rejected": -2.873112201690674, + "step": 1561 + }, + { + "epoch": 0.44, + "logps_train/chosen": -101.33595275878906, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -1.515625, + "logps_train/rejected": -22.695554733276367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6835952997207642, + "rewards_train/margins": 0.43439781665802, + "rewards_train/rejected": -2.117993116378784, + "step": 1561 + }, + { + "epoch": 0.44, + "logps_train/chosen": -91.14041900634766, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -220.52828979492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0140419006347656, + "rewards_train/margins": 5.7387871742248535, + "rewards_train/rejected": -6.752829074859619, + "step": 1561 + }, + { + "epoch": 0.44, + "logps_train/chosen": -109.88084411621094, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -194.2784423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6880844235420227, + "rewards_train/margins": 5.539759814739227, + "rewards_train/rejected": -6.22784423828125, + "step": 1561 + }, + { + "epoch": 0.44, + "logps_train/chosen": -134.97796630859375, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -129.902099609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.802203357219696, + "rewards_train/margins": 0.892413318157196, + "rewards_train/rejected": -0.0902099609375, + "step": 1561 + }, + { + "epoch": 0.44, + "logps_train/chosen": -31.69992446899414, + "logps_train/ref_chosen": -6.6875, + "logps_train/ref_rejected": -7.46875, + "logps_train/rejected": -42.77070999145508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5012423992156982, + "rewards_train/margins": 1.0289535522460938, + "rewards_train/rejected": -3.530195951461792, + "step": 1561 + }, + { + "epoch": 0.44, + "logps_train/chosen": -13.895343780517578, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -29.37643814086914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8614093661308289, + "rewards_train/margins": 1.3231094479560852, + "rewards_train/rejected": -2.184518814086914, + "step": 1561 + }, + { + "epoch": 0.44, + "logps_train/chosen": -108.53437805175781, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -112.99209594726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09656219929456711, + "rewards_train/margins": 2.3457718417048454, + "rewards_train/rejected": -2.2492096424102783, + "step": 1561 + }, + { + "epoch": 0.44, + "learning_rate": 6.759261538718768e-07, + "loss": 0.1614, + "step": 1562 + }, + { + "epoch": 0.44, + "logps_train/chosen": -112.763916015625, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -113.79470825195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5263917446136475, + "rewards_train/margins": 1.2030792236328125, + "rewards_train/rejected": -3.72947096824646, + "step": 1562 + }, + { + "epoch": 0.44, + "logps_train/chosen": -111.35417938232422, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -97.6599349975586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7354179620742798, + "rewards_train/margins": 1.0805755853652954, + "rewards_train/rejected": -2.815993547439575, + "step": 1562 + }, + { + "epoch": 0.44, + "logps_train/chosen": -44.14115905761719, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -85.37802124023438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6016159057617188, + "rewards_train/margins": -0.11381363868713379, + "rewards_train/rejected": -2.487802267074585, + "step": 1562 + }, + { + "epoch": 0.44, + "logps_train/chosen": -158.4539794921875, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -238.49366760253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.695397853851318, + "rewards_train/margins": 8.153969287872314, + "rewards_train/rejected": -12.849367141723633, + "step": 1562 + }, + { + "epoch": 0.44, + "logps_train/chosen": -15.31437873840332, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -40.92548370361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.687687873840332, + "rewards_train/margins": 2.764235496520996, + "rewards_train/rejected": -3.451923370361328, + "step": 1562 + }, + { + "epoch": 0.44, + "logps_train/chosen": -23.554262161254883, + "logps_train/ref_chosen": -1.78125, + "logps_train/ref_rejected": -3.59375, + "logps_train/rejected": -35.841278076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1773011684417725, + "rewards_train/margins": 1.0474517345428467, + "rewards_train/rejected": -3.224752902984619, + "step": 1562 + }, + { + "epoch": 0.44, + "logps_train/chosen": -80.41625213623047, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -28.5, + "logps_train/rejected": -53.024627685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5916252136230469, + "rewards_train/margins": 1.860837697982788, + "rewards_train/rejected": -2.452462911605835, + "step": 1562 + }, + { + "epoch": 0.44, + "logps_train/chosen": -110.38188171386719, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -108.69696044921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4881881773471832, + "rewards_train/margins": 0.4815078675746918, + "rewards_train/rejected": -0.969696044921875, + "step": 1562 + }, + { + "epoch": 0.44, + "logps_train/chosen": -73.59550476074219, + "logps_train/ref_chosen": -48.0, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -35.54594421386719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5595505237579346, + "rewards_train/margins": 0.7559814453125, + "rewards_train/rejected": -3.3155319690704346, + "step": 1563 + }, + { + "epoch": 0.44, + "logps_train/chosen": -87.11180114746094, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -97.13015747070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.111180067062378, + "rewards_train/margins": 1.4018356800079346, + "rewards_train/rejected": -3.5130157470703125, + "step": 1563 + }, + { + "epoch": 0.44, + "logps_train/chosen": -15.467822074890137, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -20.962249755859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4155322015285492, + "rewards_train/margins": -0.21930722892284393, + "rewards_train/rejected": -0.19622497260570526, + "step": 1563 + }, + { + "epoch": 0.44, + "logps_train/chosen": -7.166810989379883, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -56.25, + "logps_train/rejected": -93.23811340332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3010561168193817, + "rewards_train/margins": 3.3977551758289337, + "rewards_train/rejected": -3.6988112926483154, + "step": 1563 + }, + { + "epoch": 0.44, + "logps_train/chosen": -135.6153564453125, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -189.90066528320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1115357875823975, + "rewards_train/margins": 5.278530836105347, + "rewards_train/rejected": -7.390066623687744, + "step": 1563 + }, + { + "epoch": 0.44, + "logps_train/chosen": -12.384641647338867, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -23.362823486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6165891885757446, + "rewards_train/margins": 0.8446931838989258, + "rewards_train/rejected": -1.4612823724746704, + "step": 1563 + }, + { + "epoch": 0.44, + "logps_train/chosen": -43.20142364501953, + "logps_train/ref_chosen": -30.625, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -98.9892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.257642388343811, + "rewards_train/margins": 2.141283392906189, + "rewards_train/rejected": -3.39892578125, + "step": 1563 + }, + { + "epoch": 0.44, + "logps_train/chosen": -92.31686401367188, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -138.5988006591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.081686496734619, + "rewards_train/margins": 3.5781936645507812, + "rewards_train/rejected": -5.6598801612854, + "step": 1563 + }, + { + "epoch": 0.44, + "learning_rate": 6.734245148674755e-07, + "loss": 0.2658, + "step": 1564 + }, + { + "epoch": 0.44, + "logps_train/chosen": -72.64845275878906, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -134.22727966308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4648452699184418, + "rewards_train/margins": 3.6078828871250153, + "rewards_train/rejected": -4.072728157043457, + "step": 1564 + }, + { + "epoch": 0.44, + "logps_train/chosen": -152.8856658935547, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -136.41796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.288566589355469, + "rewards_train/margins": -0.19676971435546875, + "rewards_train/rejected": -4.091796875, + "step": 1564 + }, + { + "epoch": 0.44, + "logps_train/chosen": -54.48048400878906, + "logps_train/ref_chosen": -34.25, + "logps_train/ref_rejected": -29.625, + "logps_train/rejected": -52.496273040771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0230484008789062, + "rewards_train/margins": 0.26407885551452637, + "rewards_train/rejected": -2.2871272563934326, + "step": 1564 + }, + { + "epoch": 0.44, + "logps_train/chosen": -25.229272842407227, + "logps_train/ref_chosen": -3.9375, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -31.69795799255371, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1291773319244385, + "rewards_train/margins": 0.43749356269836426, + "rewards_train/rejected": -2.5666708946228027, + "step": 1564 + }, + { + "epoch": 0.44, + "logps_train/chosen": -232.30938720703125, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -269.9910583496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.030939102172852, + "rewards_train/margins": 0.5681667327880859, + "rewards_train/rejected": -10.599105834960938, + "step": 1564 + }, + { + "epoch": 0.44, + "logps_train/chosen": -79.14859008789062, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -41.0, + "logps_train/rejected": -59.69810485839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6648589968681335, + "rewards_train/margins": 1.2049514651298523, + "rewards_train/rejected": -1.8698104619979858, + "step": 1564 + }, + { + "epoch": 0.44, + "logps_train/chosen": -188.0927734375, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -142.20956420898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.209277629852295, + "rewards_train/margins": 0.4116787910461426, + "rewards_train/rejected": -5.6209564208984375, + "step": 1564 + }, + { + "epoch": 0.44, + "logps_train/chosen": -75.68426513671875, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -142.92135620117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7684265375137329, + "rewards_train/margins": 6.123709082603455, + "rewards_train/rejected": -6.8921356201171875, + "step": 1564 + }, + { + "epoch": 0.44, + "logps_train/chosen": -7.4395928382873535, + "logps_train/ref_chosen": -3.5, + "logps_train/ref_rejected": -3.59375, + "logps_train/rejected": -9.9163236618042, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39395928382873535, + "rewards_train/margins": 0.23829811811447144, + "rewards_train/rejected": -0.6322574019432068, + "step": 1565 + }, + { + "epoch": 0.44, + "logps_train/chosen": -65.37786865234375, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -100.25216674804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7622131705284119, + "rewards_train/margins": 2.4374298453330994, + "rewards_train/rejected": -1.6752166748046875, + "step": 1565 + }, + { + "epoch": 0.44, + "logps_train/chosen": -16.159029006958008, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -7.59375, + "logps_train/rejected": -31.981090545654297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4909029006958008, + "rewards_train/margins": 1.947831153869629, + "rewards_train/rejected": -2.4387340545654297, + "step": 1565 + }, + { + "epoch": 0.44, + "logps_train/chosen": -14.780153274536133, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -23.033842086791992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9811403155326843, + "rewards_train/margins": 0.5909939408302307, + "rewards_train/rejected": -1.572134256362915, + "step": 1565 + }, + { + "epoch": 0.44, + "logps_train/chosen": -34.993690490722656, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -29.625, + "logps_train/rejected": -56.48732376098633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6743690967559814, + "rewards_train/margins": 0.011863231658935547, + "rewards_train/rejected": -2.686232328414917, + "step": 1565 + }, + { + "epoch": 0.44, + "logps_train/chosen": -232.2417755126953, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -284.4243469238281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.024177551269531, + "rewards_train/margins": 6.118257522583008, + "rewards_train/rejected": -13.142435073852539, + "step": 1565 + }, + { + "epoch": 0.44, + "logps_train/chosen": -121.5296401977539, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -37.85377502441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8529640436172485, + "rewards_train/margins": 1.9074136018753052, + "rewards_train/rejected": -2.7603776454925537, + "step": 1565 + }, + { + "epoch": 0.44, + "logps_train/chosen": -85.4635009765625, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -85.55943298339844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.14635010063648224, + "rewards_train/margins": -0.09040680155158043, + "rewards_train/rejected": -0.05594329908490181, + "step": 1565 + }, + { + "epoch": 0.44, + "learning_rate": 6.709251615452219e-07, + "loss": 0.3697, + "step": 1566 + }, + { + "epoch": 0.44, + "logps_train/chosen": -1.9597651958465576, + "logps_train/ref_chosen": -0.357421875, + "logps_train/ref_rejected": -0.357421875, + "logps_train/rejected": -1.9058691263198853, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.16023433208465576, + "rewards_train/margins": -0.005389600992202759, + "rewards_train/rejected": -0.154844731092453, + "step": 1566 + }, + { + "epoch": 0.44, + "logps_train/chosen": -18.50634765625, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -24.466794967651367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1225098371505737, + "rewards_train/margins": 0.2929196357727051, + "rewards_train/rejected": -1.4154294729232788, + "step": 1566 + }, + { + "epoch": 0.44, + "logps_train/chosen": -113.45155334472656, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -128.81491088867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6951553225517273, + "rewards_train/margins": 1.5863357186317444, + "rewards_train/rejected": -2.2814910411834717, + "step": 1566 + }, + { + "epoch": 0.44, + "logps_train/chosen": -153.32150268554688, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -212.2646942138672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0321502685546875, + "rewards_train/margins": 5.994319438934326, + "rewards_train/rejected": -7.026469707489014, + "step": 1566 + }, + { + "epoch": 0.44, + "logps_train/chosen": -46.188106536865234, + "logps_train/ref_chosen": -31.0, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -37.9007682800293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5188106298446655, + "rewards_train/margins": 0.8150161504745483, + "rewards_train/rejected": -2.333826780319214, + "step": 1566 + }, + { + "epoch": 0.44, + "logps_train/chosen": -17.463958740234375, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -15.634260177612305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5838958621025085, + "rewards_train/margins": 0.3389051556587219, + "rewards_train/rejected": -0.9228010177612305, + "step": 1566 + }, + { + "epoch": 0.44, + "logps_train/chosen": -0.8252851366996765, + "logps_train/ref_chosen": -0.6875, + "logps_train/ref_rejected": -0.6875, + "logps_train/rejected": -0.8812077641487122, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.013778514228761196, + "rewards_train/margins": 0.005592263303697109, + "rewards_train/rejected": -0.019370777532458305, + "step": 1566 + }, + { + "epoch": 0.44, + "logps_train/chosen": -31.45703125, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -32.782501220703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.939453125, + "rewards_train/margins": -0.13620293140411377, + "rewards_train/rejected": -1.8032501935958862, + "step": 1566 + }, + { + "epoch": 0.44, + "logps_train/chosen": -117.4030990600586, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -247.927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.940310001373291, + "rewards_train/margins": 3.0524635314941406, + "rewards_train/rejected": -7.992773532867432, + "step": 1567 + }, + { + "epoch": 0.44, + "logps_train/chosen": -47.21015930175781, + "logps_train/ref_chosen": -46.25, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -37.82725524902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09601593017578125, + "rewards_train/margins": 0.31170961260795593, + "rewards_train/rejected": -0.4077255427837372, + "step": 1567 + }, + { + "epoch": 0.44, + "logps_train/chosen": -11.121458053588867, + "logps_train/ref_chosen": -1.421875, + "logps_train/ref_rejected": -0.46484375, + "logps_train/rejected": -15.248090744018555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9699583053588867, + "rewards_train/margins": 0.5083664655685425, + "rewards_train/rejected": -1.4783247709274292, + "step": 1567 + }, + { + "epoch": 0.44, + "logps_train/chosen": -129.008544921875, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -128.95486450195312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.700854480266571, + "rewards_train/margins": -0.00536799430847168, + "rewards_train/rejected": -0.6954864859580994, + "step": 1567 + }, + { + "epoch": 0.44, + "logps_train/chosen": -20.40730857849121, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -43.75, + "logps_train/rejected": -66.65023040771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3063558340072632, + "rewards_train/margins": 0.983667254447937, + "rewards_train/rejected": -2.2900230884552, + "step": 1567 + }, + { + "epoch": 0.44, + "logps_train/chosen": -10.517542839050293, + "logps_train/ref_chosen": -0.74609375, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -14.693619728088379, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9771448969841003, + "rewards_train/margins": -1.0140329264104366, + "rewards_train/rejected": 0.03688802942633629, + "step": 1567 + }, + { + "epoch": 0.44, + "logps_train/chosen": -32.458919525146484, + "logps_train/ref_chosen": -24.375, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -28.258838653564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8083919882774353, + "rewards_train/margins": 0.8799919486045837, + "rewards_train/rejected": -1.688383936882019, + "step": 1567 + }, + { + "epoch": 0.44, + "logps_train/chosen": -4.183708190917969, + "logps_train/ref_chosen": -3.90625, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -17.589073181152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.027745818719267845, + "rewards_train/margins": 0.6249115113168955, + "rewards_train/rejected": -0.6526573300361633, + "step": 1567 + }, + { + "epoch": 0.44, + "learning_rate": 6.684281113979382e-07, + "loss": 0.4987, + "step": 1568 + }, + { + "epoch": 0.44, + "logps_train/chosen": -223.19924926757812, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -256.46734619140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.969924926757812, + "rewards_train/margins": -1.123189926147461, + "rewards_train/rejected": -11.846735000610352, + "step": 1568 + }, + { + "epoch": 0.44, + "logps_train/chosen": -21.281946182250977, + "logps_train/ref_chosen": -3.609375, + "logps_train/ref_rejected": -7.75, + "logps_train/rejected": -47.01314926147461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7672570943832397, + "rewards_train/margins": 2.1590579748153687, + "rewards_train/rejected": -3.9263150691986084, + "step": 1568 + }, + { + "epoch": 0.44, + "logps_train/chosen": -7.553295135498047, + "logps_train/ref_chosen": -5.71875, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -20.004257202148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1834545135498047, + "rewards_train/margins": 0.5419712066650391, + "rewards_train/rejected": -0.7254257202148438, + "step": 1568 + }, + { + "epoch": 0.44, + "logps_train/chosen": -73.47024536132812, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -35.2462158203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.022024631500244, + "rewards_train/margins": -1.209903061389923, + "rewards_train/rejected": -0.812121570110321, + "step": 1568 + }, + { + "epoch": 0.44, + "logps_train/chosen": -13.224512100219727, + "logps_train/ref_chosen": -2.25, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -21.401132583618164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0974512100219727, + "rewards_train/margins": 0.5832871198654175, + "rewards_train/rejected": -1.6807383298873901, + "step": 1568 + }, + { + "epoch": 0.44, + "logps_train/chosen": -22.160552978515625, + "logps_train/ref_chosen": -6.03125, + "logps_train/ref_rejected": -2.5, + "logps_train/rejected": -20.44413948059082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6129302978515625, + "rewards_train/margins": 0.18148362636566162, + "rewards_train/rejected": -1.7944139242172241, + "step": 1568 + }, + { + "epoch": 0.44, + "logps_train/chosen": -17.396568298339844, + "logps_train/ref_chosen": -1.2265625, + "logps_train/ref_rejected": -3.390625, + "logps_train/rejected": -20.091693878173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6170005798339844, + "rewards_train/margins": 0.05310630798339844, + "rewards_train/rejected": -1.6701068878173828, + "step": 1568 + }, + { + "epoch": 0.44, + "logps_train/chosen": -11.201581954956055, + "logps_train/ref_chosen": -6.75, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -10.693937301635742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4451582133769989, + "rewards_train/margins": 0.14611050486564636, + "rewards_train/rejected": -0.5912687182426453, + "step": 1568 + }, + { + "epoch": 0.44, + "logps_train/chosen": -18.109237670898438, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -7.46875, + "logps_train/rejected": -19.583171844482422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3765487670898438, + "rewards_train/margins": -0.16510653495788574, + "rewards_train/rejected": -1.211442232131958, + "step": 1569 + }, + { + "epoch": 0.44, + "logps_train/chosen": -29.667036056518555, + "logps_train/ref_chosen": -4.59375, + "logps_train/ref_rejected": -3.078125, + "logps_train/rejected": -29.862720489501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.507328748703003, + "rewards_train/margins": 0.17113089561462402, + "rewards_train/rejected": -2.678459644317627, + "step": 1569 + }, + { + "epoch": 0.44, + "logps_train/chosen": -40.554649353027344, + "logps_train/ref_chosen": -33.5, + "logps_train/ref_rejected": -1.8828125, + "logps_train/rejected": -2.8440637588500977, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7054649591445923, + "rewards_train/margins": -0.6093398332595825, + "rewards_train/rejected": -0.09612512588500977, + "step": 1569 + }, + { + "epoch": 0.44, + "logps_train/chosen": -147.24630737304688, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -203.98968505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0746307373046875, + "rewards_train/margins": 3.2243380546569824, + "rewards_train/rejected": -5.29896879196167, + "step": 1569 + }, + { + "epoch": 0.44, + "logps_train/chosen": -17.785865783691406, + "logps_train/ref_chosen": -2.140625, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -38.38240051269531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5645240545272827, + "rewards_train/margins": 1.5455909967422485, + "rewards_train/rejected": -3.1101150512695312, + "step": 1569 + }, + { + "epoch": 0.44, + "logps_train/chosen": -6.02882194519043, + "logps_train/ref_chosen": -2.8125, + "logps_train/ref_rejected": -5.28125, + "logps_train/rejected": -6.258483409881592, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3216322064399719, + "rewards_train/margins": -0.22390886396169662, + "rewards_train/rejected": -0.0977233424782753, + "step": 1569 + }, + { + "epoch": 0.44, + "logps_train/chosen": -174.65814208984375, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -248.32577514648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.865814208984375, + "rewards_train/margins": 4.6667633056640625, + "rewards_train/rejected": -7.5325775146484375, + "step": 1569 + }, + { + "epoch": 0.44, + "logps_train/chosen": -8.230049133300781, + "logps_train/ref_chosen": -1.015625, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -22.10019302368164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7214424014091492, + "rewards_train/margins": 0.6073269248008728, + "rewards_train/rejected": -1.328769326210022, + "step": 1569 + }, + { + "epoch": 0.44, + "learning_rate": 6.65933381902329e-07, + "loss": 0.6065, + "step": 1570 + }, + { + "epoch": 0.44, + "logps_train/chosen": -181.9853515625, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -165.31930541992188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.298535346984863, + "rewards_train/margins": -1.1666045188903809, + "rewards_train/rejected": -5.131930828094482, + "step": 1570 + }, + { + "epoch": 0.44, + "logps_train/chosen": -161.05381774902344, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -206.0, + "logps_train/rejected": -315.94580078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2053818702697754, + "rewards_train/margins": 7.789198398590088, + "rewards_train/rejected": -10.994580268859863, + "step": 1570 + }, + { + "epoch": 0.44, + "logps_train/chosen": -7.516516208648682, + "logps_train/ref_chosen": -6.59375, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -37.785343170166016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09227662533521652, + "rewards_train/margins": 2.773757643997669, + "rewards_train/rejected": -2.8660342693328857, + "step": 1570 + }, + { + "epoch": 0.44, + "logps_train/chosen": -131.73345947265625, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -148.79129028320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.273345947265625, + "rewards_train/margins": 2.105782985687256, + "rewards_train/rejected": -4.379128932952881, + "step": 1570 + }, + { + "epoch": 0.44, + "logps_train/chosen": -220.59521484375, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -275.33197021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.859521389007568, + "rewards_train/margins": 2.9736762046813965, + "rewards_train/rejected": -9.833197593688965, + "step": 1570 + }, + { + "epoch": 0.44, + "logps_train/chosen": -7.573822021484375, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -0.578125, + "logps_train/rejected": -1.5097500085830688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.33550721406936646, + "rewards_train/margins": -0.2423447147011757, + "rewards_train/rejected": -0.09316249936819077, + "step": 1570 + }, + { + "epoch": 0.44, + "logps_train/chosen": -165.43008422851562, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -187.01785278320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7430083751678467, + "rewards_train/margins": 4.758776903152466, + "rewards_train/rejected": -8.501785278320312, + "step": 1570 + }, + { + "epoch": 0.44, + "logps_train/chosen": -133.08468627929688, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -103.61825561523438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.158468723297119, + "rewards_train/margins": -1.3966431617736816, + "rewards_train/rejected": -3.7618255615234375, + "step": 1570 + }, + { + "epoch": 0.44, + "logps_train/chosen": -172.4270782470703, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -191.74636840820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.042707920074463, + "rewards_train/margins": 1.4319288730621338, + "rewards_train/rejected": -3.4746367931365967, + "step": 1571 + }, + { + "epoch": 0.44, + "logps_train/chosen": -121.11681365966797, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -128.71728515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.711681365966797, + "rewards_train/margins": 1.4100470542907715, + "rewards_train/rejected": -4.121728420257568, + "step": 1571 + }, + { + "epoch": 0.44, + "logps_train/chosen": -259.874755859375, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -223.85125732421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.18747615814209, + "rewards_train/margins": -1.6023502349853516, + "rewards_train/rejected": -10.585125923156738, + "step": 1571 + }, + { + "epoch": 0.44, + "logps_train/chosen": -40.886077880859375, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -46.048187255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.363607883453369, + "rewards_train/margins": 1.5005857944488525, + "rewards_train/rejected": -3.8641936779022217, + "step": 1571 + }, + { + "epoch": 0.44, + "logps_train/chosen": -150.57557678222656, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -210.92123413085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.157557964324951, + "rewards_train/margins": 3.5345654487609863, + "rewards_train/rejected": -7.6921234130859375, + "step": 1571 + }, + { + "epoch": 0.44, + "logps_train/chosen": -174.3399658203125, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -194.3529815673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9339966773986816, + "rewards_train/margins": 2.2013015747070312, + "rewards_train/rejected": -5.135298252105713, + "step": 1571 + }, + { + "epoch": 0.44, + "logps_train/chosen": -9.602338790893555, + "logps_train/ref_chosen": -2.90625, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -15.623544692993164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6696088910102844, + "rewards_train/margins": 0.04899561405181885, + "rewards_train/rejected": -0.7186045050621033, + "step": 1571 + }, + { + "epoch": 0.44, + "logps_train/chosen": -58.159584045410156, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -25.589616775512695, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4909584522247314, + "rewards_train/margins": -0.8382467031478882, + "rewards_train/rejected": -1.6527117490768433, + "step": 1571 + }, + { + "epoch": 0.44, + "learning_rate": 6.634409905188553e-07, + "loss": 0.5329, + "step": 1572 + }, + { + "epoch": 0.44, + "logps_train/chosen": -91.00968170166016, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -154.82337951660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0009682178497314, + "rewards_train/margins": 2.6313698291778564, + "rewards_train/rejected": -3.632338047027588, + "step": 1572 + }, + { + "epoch": 0.44, + "logps_train/chosen": -129.90245056152344, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -210.5769805908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6902450919151306, + "rewards_train/margins": 5.467453062534332, + "rewards_train/rejected": -6.157698154449463, + "step": 1572 + }, + { + "epoch": 0.44, + "logps_train/chosen": -91.3837661743164, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -160.81369018554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3383766412734985, + "rewards_train/margins": 3.0429924726486206, + "rewards_train/rejected": -4.381369113922119, + "step": 1572 + }, + { + "epoch": 0.44, + "logps_train/chosen": -235.360595703125, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -205.1585693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.3360595703125, + "rewards_train/margins": 1.2797975540161133, + "rewards_train/rejected": -7.615857124328613, + "step": 1572 + }, + { + "epoch": 0.44, + "logps_train/chosen": -119.83572387695312, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -123.05145263671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.5835723876953125, + "rewards_train/margins": -0.22842693328857422, + "rewards_train/rejected": -4.355145454406738, + "step": 1572 + }, + { + "epoch": 0.44, + "logps_train/chosen": -195.58383178710938, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -220.51724243164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.158383369445801, + "rewards_train/margins": 4.443341255187988, + "rewards_train/rejected": -9.601724624633789, + "step": 1572 + }, + { + "epoch": 0.44, + "logps_train/chosen": -26.311302185058594, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -42.75, + "logps_train/rejected": -71.0820541381836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1623802185058594, + "rewards_train/margins": 1.6708252429962158, + "rewards_train/rejected": -2.833205461502075, + "step": 1572 + }, + { + "epoch": 0.44, + "logps_train/chosen": -252.40707397460938, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -298.5346984863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.940707206726074, + "rewards_train/margins": 4.612762451171875, + "rewards_train/rejected": -14.55346965789795, + "step": 1572 + }, + { + "epoch": 0.44, + "logps_train/chosen": -198.85247802734375, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -237.129638671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.985247850418091, + "rewards_train/margins": 3.827716112136841, + "rewards_train/rejected": -7.812963962554932, + "step": 1573 + }, + { + "epoch": 0.44, + "logps_train/chosen": -34.92704772949219, + "logps_train/ref_chosen": -22.25, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -28.15648651123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2677048444747925, + "rewards_train/margins": 0.6479438543319702, + "rewards_train/rejected": -1.9156486988067627, + "step": 1573 + }, + { + "epoch": 0.44, + "logps_train/chosen": -40.22011184692383, + "logps_train/ref_chosen": -37.5, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -25.298391342163086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2720111906528473, + "rewards_train/margins": 0.0953279435634613, + "rewards_train/rejected": -0.3673391342163086, + "step": 1573 + }, + { + "epoch": 0.44, + "logps_train/chosen": -118.86161804199219, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -155.5875244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5861617922782898, + "rewards_train/margins": 0.9725906252861023, + "rewards_train/rejected": -1.558752417564392, + "step": 1573 + }, + { + "epoch": 0.44, + "logps_train/chosen": -0.9781352877616882, + "logps_train/ref_chosen": -2.703125, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -8.000006675720215, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17249897122383118, + "rewards_train/margins": 0.2506246417760849, + "rewards_train/rejected": -0.07812567055225372, + "step": 1573 + }, + { + "epoch": 0.44, + "logps_train/chosen": -47.276405334472656, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -38.319000244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5026405453681946, + "rewards_train/margins": 2.2855095267295837, + "rewards_train/rejected": -2.7881500720977783, + "step": 1573 + }, + { + "epoch": 0.44, + "logps_train/chosen": -110.52017974853516, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -53.5, + "logps_train/rejected": -108.37458801269531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.102018117904663, + "rewards_train/margins": 2.3854405879974365, + "rewards_train/rejected": -5.4874587059021, + "step": 1573 + }, + { + "epoch": 0.44, + "logps_train/chosen": -68.65200805664062, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -113.22767639160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0902007818222046, + "rewards_train/margins": 1.3325668573379517, + "rewards_train/rejected": -2.4227676391601562, + "step": 1573 + }, + { + "epoch": 0.44, + "learning_rate": 6.609509546916145e-07, + "loss": 0.236, + "step": 1574 + }, + { + "epoch": 0.44, + "logps_train/chosen": -1.9102133512496948, + "logps_train/ref_chosen": -0.84765625, + "logps_train/ref_rejected": -0.84765625, + "logps_train/rejected": -1.909165382385254, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10625571012496948, + "rewards_train/margins": -0.00010479241609573364, + "rewards_train/rejected": -0.10615091770887375, + "step": 1574 + }, + { + "epoch": 0.44, + "logps_train/chosen": -175.21218872070312, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -223.85391235351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.321218967437744, + "rewards_train/margins": 5.314172267913818, + "rewards_train/rejected": -9.635391235351562, + "step": 1574 + }, + { + "epoch": 0.44, + "logps_train/chosen": -101.85365295410156, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -178.63937377929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.38536536693573, + "rewards_train/margins": 5.9785720109939575, + "rewards_train/rejected": -7.3639373779296875, + "step": 1574 + }, + { + "epoch": 0.44, + "logps_train/chosen": -63.18122482299805, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -51.189613342285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26812249422073364, + "rewards_train/margins": 3.338338792324066, + "rewards_train/rejected": -3.6064612865448, + "step": 1574 + }, + { + "epoch": 0.44, + "logps_train/chosen": -18.646678924560547, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -19.26929473876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9084178805351257, + "rewards_train/margins": 0.10601156949996948, + "rewards_train/rejected": -1.0144294500350952, + "step": 1574 + }, + { + "epoch": 0.44, + "logps_train/chosen": -6.693903923034668, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -37.36629867553711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16939039528369904, + "rewards_train/margins": 1.729739472270012, + "rewards_train/rejected": -1.899129867553711, + "step": 1574 + }, + { + "epoch": 0.44, + "logps_train/chosen": -145.2497100830078, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -178.94677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7249709963798523, + "rewards_train/margins": 3.4697068333625793, + "rewards_train/rejected": -4.194677829742432, + "step": 1574 + }, + { + "epoch": 0.44, + "logps_train/chosen": -21.010604858398438, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -2.578125, + "logps_train/rejected": -5.9514851570129395, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2698105573654175, + "rewards_train/margins": -0.9324745237827301, + "rewards_train/rejected": -0.3373360335826874, + "step": 1574 + }, + { + "epoch": 0.44, + "logps_train/chosen": -42.42279052734375, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -37.118560791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9797791242599487, + "rewards_train/margins": 0.19457709789276123, + "rewards_train/rejected": -2.17435622215271, + "step": 1575 + }, + { + "epoch": 0.44, + "logps_train/chosen": -86.59757995605469, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -87.14688110351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8597580194473267, + "rewards_train/margins": 0.05493009090423584, + "rewards_train/rejected": -0.9146881103515625, + "step": 1575 + }, + { + "epoch": 0.44, + "logps_train/chosen": -8.660311698913574, + "logps_train/ref_chosen": -0.83203125, + "logps_train/ref_rejected": -0.83203125, + "logps_train/rejected": -9.088929176330566, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7828280329704285, + "rewards_train/margins": 0.042861759662628174, + "rewards_train/rejected": -0.8256897926330566, + "step": 1575 + }, + { + "epoch": 0.44, + "logps_train/chosen": -7.555993556976318, + "logps_train/ref_chosen": -1.3359375, + "logps_train/ref_rejected": -2.546875, + "logps_train/rejected": -25.437349319458008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6220056414604187, + "rewards_train/margins": 1.667041838169098, + "rewards_train/rejected": -2.2890474796295166, + "step": 1575 + }, + { + "epoch": 0.44, + "logps_train/chosen": -12.136251449584961, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -5.21875, + "logps_train/rejected": -16.263168334960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03862514719367027, + "rewards_train/margins": 1.0658167339861393, + "rewards_train/rejected": -1.1044418811798096, + "step": 1575 + }, + { + "epoch": 0.44, + "logps_train/chosen": -82.32528686523438, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -95.72247314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6325286626815796, + "rewards_train/margins": 0.5897186994552612, + "rewards_train/rejected": -2.222247362136841, + "step": 1575 + }, + { + "epoch": 0.44, + "logps_train/chosen": -13.225076675415039, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -20.9897518157959, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9193826913833618, + "rewards_train/margins": -0.08290749788284302, + "rewards_train/rejected": -0.8364751935005188, + "step": 1575 + }, + { + "epoch": 0.44, + "logps_train/chosen": -111.2871322631836, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -99.22146606445312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4287132024765015, + "rewards_train/margins": -0.45656657218933105, + "rewards_train/rejected": -0.9721466302871704, + "step": 1575 + }, + { + "epoch": 0.44, + "learning_rate": 6.58463291848217e-07, + "loss": 0.4604, + "step": 1576 + }, + { + "epoch": 0.44, + "logps_train/chosen": -182.6042022705078, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -209.19004821777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.560420513153076, + "rewards_train/margins": 0.5585842132568359, + "rewards_train/rejected": -6.119004726409912, + "step": 1576 + }, + { + "epoch": 0.44, + "logps_train/chosen": -58.40277099609375, + "logps_train/ref_chosen": -37.25, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -39.678443908691406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.115277051925659, + "rewards_train/margins": -0.47243261337280273, + "rewards_train/rejected": -1.6428444385528564, + "step": 1576 + }, + { + "epoch": 0.44, + "logps_train/chosen": -0.33055299520492554, + "logps_train/ref_chosen": -1.3671875, + "logps_train/ref_rejected": -1.3203125, + "logps_train/rejected": -11.485661506652832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10366344451904297, + "rewards_train/margins": 1.120198369026184, + "rewards_train/rejected": -1.0165349245071411, + "step": 1576 + }, + { + "epoch": 0.44, + "logps_train/chosen": -52.01069259643555, + "logps_train/ref_chosen": -15.0, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -51.90963363647461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7010693550109863, + "rewards_train/margins": 0.5961441993713379, + "rewards_train/rejected": -4.297213554382324, + "step": 1576 + }, + { + "epoch": 0.44, + "logps_train/chosen": -174.13265991210938, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -162.717041015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.063266277313232, + "rewards_train/margins": -2.791562080383301, + "rewards_train/rejected": -3.2717041969299316, + "step": 1576 + }, + { + "epoch": 0.44, + "logps_train/chosen": -135.59051513671875, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -217.43344116210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.159051418304443, + "rewards_train/margins": 4.784292697906494, + "rewards_train/rejected": -8.943344116210938, + "step": 1576 + }, + { + "epoch": 0.44, + "logps_train/chosen": -41.19123077392578, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -39.5, + "logps_train/rejected": -65.31979370117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0691230297088623, + "rewards_train/margins": 0.5128564834594727, + "rewards_train/rejected": -2.581979513168335, + "step": 1576 + }, + { + "epoch": 0.44, + "logps_train/chosen": -41.53562927246094, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -48.18119812011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05356292799115181, + "rewards_train/margins": 0.6395568959414959, + "rewards_train/rejected": -0.6931198239326477, + "step": 1576 + }, + { + "epoch": 0.44, + "logps_train/chosen": -14.066387176513672, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -71.43621826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7753887176513672, + "rewards_train/margins": 4.580733299255371, + "rewards_train/rejected": -5.356122016906738, + "step": 1577 + }, + { + "epoch": 0.44, + "logps_train/chosen": -100.71589660644531, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -121.97785949707031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2215898036956787, + "rewards_train/margins": 2.276196241378784, + "rewards_train/rejected": -4.497786045074463, + "step": 1577 + }, + { + "epoch": 0.44, + "logps_train/chosen": -4.246707916259766, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -18.39825439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0481082908809185, + "rewards_train/margins": 0.19171715155243874, + "rewards_train/rejected": -0.23982544243335724, + "step": 1577 + }, + { + "epoch": 0.44, + "logps_train/chosen": -30.133636474609375, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -48.0, + "logps_train/rejected": -82.77842712402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9883636236190796, + "rewards_train/margins": 1.4894791841506958, + "rewards_train/rejected": -3.4778428077697754, + "step": 1577 + }, + { + "epoch": 0.44, + "logps_train/chosen": -29.026769638061523, + "logps_train/ref_chosen": -11.5625, + "logps_train/ref_rejected": -5.0625, + "logps_train/rejected": -9.389668464660645, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7464269399642944, + "rewards_train/margins": -1.31371009349823, + "rewards_train/rejected": -0.43271684646606445, + "step": 1577 + }, + { + "epoch": 0.44, + "logps_train/chosen": -113.58454895019531, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -116.7166519165039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4084548950195312, + "rewards_train/margins": 0.16321039199829102, + "rewards_train/rejected": -2.5716652870178223, + "step": 1577 + }, + { + "epoch": 0.44, + "logps_train/chosen": -170.8392333984375, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -172.619873046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.633923530578613, + "rewards_train/margins": 0.17806386947631836, + "rewards_train/rejected": -6.811987400054932, + "step": 1577 + }, + { + "epoch": 0.44, + "logps_train/chosen": -19.586013793945312, + "logps_train/ref_chosen": -4.625, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -27.38965606689453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4961013793945312, + "rewards_train/margins": 0.0053642988204956055, + "rewards_train/rejected": -1.5014656782150269, + "step": 1577 + }, + { + "epoch": 0.44, + "learning_rate": 6.559780193996655e-07, + "loss": 0.6414, + "step": 1578 + }, + { + "epoch": 0.44, + "logps_train/chosen": -84.30378723144531, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -174.0862579345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8303787112236023, + "rewards_train/margins": 7.028247177600861, + "rewards_train/rejected": -7.858625888824463, + "step": 1578 + }, + { + "epoch": 0.44, + "logps_train/chosen": -111.41871643066406, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -42.0, + "logps_train/rejected": -80.82595825195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.091871738433838, + "rewards_train/margins": 0.7907240390777588, + "rewards_train/rejected": -3.8825957775115967, + "step": 1578 + }, + { + "epoch": 0.44, + "logps_train/chosen": -140.73037719726562, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -157.51528930664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.623037815093994, + "rewards_train/margins": 5.678491115570068, + "rewards_train/rejected": -8.301528930664062, + "step": 1578 + }, + { + "epoch": 0.44, + "logps_train/chosen": -153.5134735107422, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -155.30137634277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.35134744644165, + "rewards_train/margins": 0.17879009246826172, + "rewards_train/rejected": -5.530137538909912, + "step": 1578 + }, + { + "epoch": 0.44, + "logps_train/chosen": -1.2458086013793945, + "logps_train/ref_chosen": -1.1328125, + "logps_train/ref_rejected": -1.234375, + "logps_train/rejected": -1.2646141052246094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.011299610137939453, + "rewards_train/margins": -0.008275699568912387, + "rewards_train/rejected": -0.0030239105690270662, + "step": 1578 + }, + { + "epoch": 0.44, + "logps_train/chosen": -105.5278091430664, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -238.27920532226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4027810096740723, + "rewards_train/margins": 10.375139713287354, + "rewards_train/rejected": -13.777920722961426, + "step": 1578 + }, + { + "epoch": 0.44, + "logps_train/chosen": -0.6935553550720215, + "logps_train/ref_chosen": -1.4140625, + "logps_train/ref_rejected": -2.859375, + "logps_train/rejected": -4.308378219604492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07205071300268173, + "rewards_train/margins": 0.21695103496313095, + "rewards_train/rejected": -0.14490032196044922, + "step": 1578 + }, + { + "epoch": 0.44, + "logps_train/chosen": -37.168922424316406, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -111.3357925415039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.066892385482788, + "rewards_train/margins": 1.8166868686676025, + "rewards_train/rejected": -3.8835792541503906, + "step": 1578 + }, + { + "epoch": 0.44, + "logps_train/chosen": -5.792271614074707, + "logps_train/ref_chosen": -1.1484375, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -12.95431900024414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46438342332839966, + "rewards_train/margins": 0.40917348861694336, + "rewards_train/rejected": -0.873556911945343, + "step": 1579 + }, + { + "epoch": 0.44, + "logps_train/chosen": -27.572994232177734, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -2.6875, + "logps_train/rejected": -25.541236877441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8572994470596313, + "rewards_train/margins": 1.4280742406845093, + "rewards_train/rejected": -2.2853736877441406, + "step": 1579 + }, + { + "epoch": 0.44, + "logps_train/chosen": -180.85614013671875, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -210.57757568359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.085614204406738, + "rewards_train/margins": 2.4721436500549316, + "rewards_train/rejected": -7.55775785446167, + "step": 1579 + }, + { + "epoch": 0.44, + "logps_train/chosen": -146.9270782470703, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -204.02120971679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.942707896232605, + "rewards_train/margins": 5.459413170814514, + "rewards_train/rejected": -7.402121067047119, + "step": 1579 + }, + { + "epoch": 0.44, + "logps_train/chosen": -22.582029342651367, + "logps_train/ref_chosen": -6.46875, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -14.943764686584473, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6113280057907104, + "rewards_train/margins": -0.7857015132904053, + "rewards_train/rejected": -0.8256264925003052, + "step": 1579 + }, + { + "epoch": 0.44, + "logps_train/chosen": -11.246431350708008, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -25.23359489440918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7840181589126587, + "rewards_train/margins": 1.0612163543701172, + "rewards_train/rejected": -1.8452345132827759, + "step": 1579 + }, + { + "epoch": 0.44, + "logps_train/chosen": -103.30497741699219, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -157.14413452148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.130497694015503, + "rewards_train/margins": 3.133915662765503, + "rewards_train/rejected": -5.264413356781006, + "step": 1579 + }, + { + "epoch": 0.44, + "logps_train/chosen": -221.09609985351562, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -240.2668914794922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.109610557556152, + "rewards_train/margins": -1.1829214096069336, + "rewards_train/rejected": -9.926689147949219, + "step": 1579 + }, + { + "epoch": 0.44, + "learning_rate": 6.534951547402321e-07, + "loss": 0.3866, + "step": 1580 + }, + { + "epoch": 0.44, + "logps_train/chosen": -238.7981719970703, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -215.61764526367188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.379817008972168, + "rewards_train/margins": -0.21805191040039062, + "rewards_train/rejected": -8.161765098571777, + "step": 1580 + }, + { + "epoch": 0.44, + "logps_train/chosen": -16.88833999633789, + "logps_train/ref_chosen": -6.5625, + "logps_train/ref_rejected": -2.875, + "logps_train/rejected": -9.583175659179688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0325840711593628, + "rewards_train/margins": -0.361766517162323, + "rewards_train/rejected": -0.6708175539970398, + "step": 1580 + }, + { + "epoch": 0.44, + "logps_train/chosen": -73.31305694580078, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -32.97697448730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06869430840015411, + "rewards_train/margins": 1.553891733288765, + "rewards_train/rejected": -1.4851974248886108, + "step": 1580 + }, + { + "epoch": 0.44, + "logps_train/chosen": -69.20117950439453, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -120.17994689941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.670117974281311, + "rewards_train/margins": 1.6978768110275269, + "rewards_train/rejected": -2.367994785308838, + "step": 1580 + }, + { + "epoch": 0.44, + "logps_train/chosen": -148.42832946777344, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -213.34671020507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9428329467773438, + "rewards_train/margins": 2.991837978363037, + "rewards_train/rejected": -4.934670925140381, + "step": 1580 + }, + { + "epoch": 0.44, + "logps_train/chosen": -21.41039276123047, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -27.173229217529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7285392880439758, + "rewards_train/margins": 0.976283609867096, + "rewards_train/rejected": -1.7048228979110718, + "step": 1580 + }, + { + "epoch": 0.44, + "logps_train/chosen": -248.7406768798828, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -230.31076049804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.874068260192871, + "rewards_train/margins": 0.657008171081543, + "rewards_train/rejected": -10.531076431274414, + "step": 1580 + }, + { + "epoch": 0.44, + "logps_train/chosen": -113.34330749511719, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -180.28213500976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4843307733535767, + "rewards_train/margins": 3.2438830137252808, + "rewards_train/rejected": -4.728213787078857, + "step": 1580 + }, + { + "epoch": 0.44, + "logps_train/chosen": -25.533018112182617, + "logps_train/ref_chosen": -7.5, + "logps_train/ref_rejected": -28.125, + "logps_train/rejected": -39.70392608642578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8033018112182617, + "rewards_train/margins": -0.6454092264175415, + "rewards_train/rejected": -1.1578925848007202, + "step": 1581 + }, + { + "epoch": 0.44, + "logps_train/chosen": -41.37976837158203, + "logps_train/ref_chosen": -6.90625, + "logps_train/ref_rejected": -6.8125, + "logps_train/rejected": -45.42469787597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4473519325256348, + "rewards_train/margins": 0.4138679504394531, + "rewards_train/rejected": -3.861219882965088, + "step": 1581 + }, + { + "epoch": 0.44, + "logps_train/chosen": -241.25656127929688, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -182.62213134765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.425656318664551, + "rewards_train/margins": -1.3634428977966309, + "rewards_train/rejected": -6.06221342086792, + "step": 1581 + }, + { + "epoch": 0.44, + "logps_train/chosen": -19.755300521850586, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -42.0379524230957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1442800760269165, + "rewards_train/margins": 1.7595151662826538, + "rewards_train/rejected": -2.9037952423095703, + "step": 1581 + }, + { + "epoch": 0.44, + "logps_train/chosen": -15.133249282836914, + "logps_train/ref_chosen": -14.0, + "logps_train/ref_rejected": -4.9375, + "logps_train/rejected": -15.129464149475098, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11332493275403976, + "rewards_train/margins": 0.9058714583516121, + "rewards_train/rejected": -1.0191963911056519, + "step": 1581 + }, + { + "epoch": 0.44, + "logps_train/chosen": -55.08514404296875, + "logps_train/ref_chosen": -38.5, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -53.53858184814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.658514380455017, + "rewards_train/margins": 1.807843804359436, + "rewards_train/rejected": -3.466358184814453, + "step": 1581 + }, + { + "epoch": 0.44, + "logps_train/chosen": -172.4933319091797, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -160.71340942382812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.899333477020264, + "rewards_train/margins": -0.07799243927001953, + "rewards_train/rejected": -4.821341037750244, + "step": 1581 + }, + { + "epoch": 0.44, + "logps_train/chosen": -151.21371459960938, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -152.5323944091797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9213714599609375, + "rewards_train/margins": -0.1681320071220398, + "rewards_train/rejected": -0.7532394528388977, + "step": 1581 + }, + { + "epoch": 0.44, + "learning_rate": 6.51014715247337e-07, + "loss": 0.5131, + "step": 1582 + }, + { + "epoch": 0.44, + "logps_train/chosen": -145.57745361328125, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -161.27720642089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.207745313644409, + "rewards_train/margins": 3.019975423812866, + "rewards_train/rejected": -6.227720737457275, + "step": 1582 + }, + { + "epoch": 0.44, + "logps_train/chosen": -132.04129028320312, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -143.23043823242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3541290760040283, + "rewards_train/margins": 2.0689146518707275, + "rewards_train/rejected": -4.423043727874756, + "step": 1582 + }, + { + "epoch": 0.44, + "logps_train/chosen": -0.8739118576049805, + "logps_train/ref_chosen": -1.28125, + "logps_train/ref_rejected": -2.59375, + "logps_train/rejected": -2.451838254928589, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04073381423950195, + "rewards_train/margins": 0.02654263935983181, + "rewards_train/rejected": 0.014191174879670143, + "step": 1582 + }, + { + "epoch": 0.44, + "logps_train/chosen": -122.5566177368164, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -129.36326599121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.255661725997925, + "rewards_train/margins": 1.9306647777557373, + "rewards_train/rejected": -4.186326503753662, + "step": 1582 + }, + { + "epoch": 0.44, + "logps_train/chosen": -6.48544979095459, + "logps_train/ref_chosen": -7.4375, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -13.85477066040039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09520502388477325, + "rewards_train/margins": 0.05568208917975426, + "rewards_train/rejected": 0.039522934705019, + "step": 1582 + }, + { + "epoch": 0.44, + "logps_train/chosen": -98.68421936035156, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -89.03549194335938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3684219121932983, + "rewards_train/margins": -0.4148727059364319, + "rewards_train/rejected": -0.9535492062568665, + "step": 1582 + }, + { + "epoch": 0.44, + "logps_train/chosen": -229.04156494140625, + "logps_train/ref_chosen": -180.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -214.43453979492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.904156684875488, + "rewards_train/margins": 1.3392972946166992, + "rewards_train/rejected": -6.2434539794921875, + "step": 1582 + }, + { + "epoch": 0.44, + "logps_train/chosen": -257.5321044921875, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -251.05145263671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -13.60321044921875, + "rewards_train/margins": -0.7480649948120117, + "rewards_train/rejected": -12.855145454406738, + "step": 1582 + }, + { + "epoch": 0.44, + "logps_train/chosen": -51.32974624633789, + "logps_train/ref_chosen": -43.0, + "logps_train/ref_rejected": -43.25, + "logps_train/rejected": -59.17308044433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8329746127128601, + "rewards_train/margins": 0.7593334317207336, + "rewards_train/rejected": -1.5923080444335938, + "step": 1583 + }, + { + "epoch": 0.44, + "logps_train/chosen": -105.72167205810547, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -123.43389892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2221672534942627, + "rewards_train/margins": 1.1712226867675781, + "rewards_train/rejected": -3.393389940261841, + "step": 1583 + }, + { + "epoch": 0.44, + "logps_train/chosen": -209.87222290039062, + "logps_train/ref_chosen": -182.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -228.82957458496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.787222385406494, + "rewards_train/margins": 3.7957353591918945, + "rewards_train/rejected": -6.582957744598389, + "step": 1583 + }, + { + "epoch": 0.44, + "logps_train/chosen": -2.607846260070801, + "logps_train/ref_chosen": -2.5625, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -7.7305097579956055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004534625913947821, + "rewards_train/margins": 0.1810163469053805, + "rewards_train/rejected": -0.1855509728193283, + "step": 1583 + }, + { + "epoch": 0.44, + "logps_train/chosen": -37.71173858642578, + "logps_train/ref_chosen": -26.25, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -47.60856628417969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1461738348007202, + "rewards_train/margins": 1.5146828889846802, + "rewards_train/rejected": -2.6608567237854004, + "step": 1583 + }, + { + "epoch": 0.44, + "logps_train/chosen": -125.77972412109375, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -38.5, + "logps_train/rejected": -85.02719116210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.227972507476807, + "rewards_train/margins": 0.4247465133666992, + "rewards_train/rejected": -4.652719020843506, + "step": 1583 + }, + { + "epoch": 0.44, + "logps_train/chosen": -74.98464965820312, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -171.13589477539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5484649538993835, + "rewards_train/margins": 5.515124619007111, + "rewards_train/rejected": -6.063589572906494, + "step": 1583 + }, + { + "epoch": 0.44, + "logps_train/chosen": -11.9056396484375, + "logps_train/ref_chosen": -7.46875, + "logps_train/ref_rejected": -0.451171875, + "logps_train/rejected": -2.1585938930511475, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4436889588832855, + "rewards_train/margins": -0.272946760058403, + "rewards_train/rejected": -0.1707421988248825, + "step": 1583 + }, + { + "epoch": 0.44, + "learning_rate": 6.485367182814263e-07, + "loss": 0.4228, + "step": 1584 + }, + { + "epoch": 0.44, + "logps_train/chosen": -118.32763671875, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -154.2645263671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.082763671875, + "rewards_train/margins": 2.7436890602111816, + "rewards_train/rejected": -4.826452732086182, + "step": 1584 + }, + { + "epoch": 0.44, + "logps_train/chosen": -148.99349975585938, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -218.7434844970703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.999350070953369, + "rewards_train/margins": 3.4749984741210938, + "rewards_train/rejected": -6.474348545074463, + "step": 1584 + }, + { + "epoch": 0.44, + "logps_train/chosen": -151.43844604492188, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -210.490478515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.293844699859619, + "rewards_train/margins": 1.9552030563354492, + "rewards_train/rejected": -7.249047756195068, + "step": 1584 + }, + { + "epoch": 0.44, + "logps_train/chosen": -13.64127254486084, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -46.625877380371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47037726640701294, + "rewards_train/margins": 2.8109604716300964, + "rewards_train/rejected": -3.2813377380371094, + "step": 1584 + }, + { + "epoch": 0.44, + "logps_train/chosen": -70.47442626953125, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -135.34996032714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10255737602710724, + "rewards_train/margins": 3.187553361058235, + "rewards_train/rejected": -3.084995985031128, + "step": 1584 + }, + { + "epoch": 0.44, + "logps_train/chosen": -232.72445678710938, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -278.62060546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -12.0724458694458, + "rewards_train/margins": 3.789614677429199, + "rewards_train/rejected": -15.862060546875, + "step": 1584 + }, + { + "epoch": 0.44, + "logps_train/chosen": -196.1415252685547, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -204.384521484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.764152526855469, + "rewards_train/margins": 2.0242996215820312, + "rewards_train/rejected": -9.7884521484375, + "step": 1584 + }, + { + "epoch": 0.44, + "logps_train/chosen": -117.01557159423828, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -103.3325424194336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.651557445526123, + "rewards_train/margins": 0.15669679641723633, + "rewards_train/rejected": -4.808254241943359, + "step": 1584 + }, + { + "epoch": 0.44, + "logps_train/chosen": -27.24995994567871, + "logps_train/ref_chosen": -1.359375, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -59.10383605957031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5890586376190186, + "rewards_train/margins": -0.35367488861083984, + "rewards_train/rejected": -2.2353837490081787, + "step": 1585 + }, + { + "epoch": 0.44, + "logps_train/chosen": -150.78158569335938, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -226.65737915039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.778158664703369, + "rewards_train/margins": 7.337579250335693, + "rewards_train/rejected": -10.115737915039062, + "step": 1585 + }, + { + "epoch": 0.44, + "logps_train/chosen": -30.104795455932617, + "logps_train/ref_chosen": -7.65625, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -50.38360595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.244854688644409, + "rewards_train/margins": 0.6060059070587158, + "rewards_train/rejected": -2.850860595703125, + "step": 1585 + }, + { + "epoch": 0.44, + "logps_train/chosen": -57.590179443359375, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -32.506412506103516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.109018087387085, + "rewards_train/margins": 0.39162325859069824, + "rewards_train/rejected": -2.500641345977783, + "step": 1585 + }, + { + "epoch": 0.44, + "logps_train/chosen": -0.8560121059417725, + "logps_train/ref_chosen": -0.65234375, + "logps_train/ref_rejected": -2.578125, + "logps_train/rejected": -9.693253517150879, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.020366836339235306, + "rewards_train/margins": 0.6911460272967815, + "rewards_train/rejected": -0.7115128636360168, + "step": 1585 + }, + { + "epoch": 0.44, + "logps_train/chosen": -31.880577087402344, + "logps_train/ref_chosen": -24.375, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -33.63610076904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7505577206611633, + "rewards_train/margins": 1.7505523562431335, + "rewards_train/rejected": -2.501110076904297, + "step": 1585 + }, + { + "epoch": 0.44, + "logps_train/chosen": -12.172719955444336, + "logps_train/ref_chosen": -4.59375, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -23.998943328857422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7578970193862915, + "rewards_train/margins": 0.3669973611831665, + "rewards_train/rejected": -1.124894380569458, + "step": 1585 + }, + { + "epoch": 0.44, + "logps_train/chosen": -7.002522945404053, + "logps_train/ref_chosen": -6.65625, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -11.745153427124023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03462729603052139, + "rewards_train/margins": 0.00238804891705513, + "rewards_train/rejected": -0.03701534494757652, + "step": 1585 + }, + { + "epoch": 0.44, + "learning_rate": 6.460611811858521e-07, + "loss": 0.2948, + "step": 1586 + }, + { + "epoch": 0.44, + "logps_train/chosen": -277.4166259765625, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -226.75962829589844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.64166259765625, + "rewards_train/margins": -2.4656991958618164, + "rewards_train/rejected": -9.175963401794434, + "step": 1586 + }, + { + "epoch": 0.44, + "logps_train/chosen": -67.41571044921875, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -156.61744689941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.041571021080017, + "rewards_train/margins": 5.570173859596252, + "rewards_train/rejected": -6.6117448806762695, + "step": 1586 + }, + { + "epoch": 0.44, + "logps_train/chosen": -122.68641662597656, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -162.05149841308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.768641710281372, + "rewards_train/margins": 0.5365080833435059, + "rewards_train/rejected": -2.305149793624878, + "step": 1586 + }, + { + "epoch": 0.44, + "logps_train/chosen": -206.8616943359375, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -204.0, + "logps_train/rejected": -279.62762451171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.38616943359375, + "rewards_train/margins": 1.17659330368042, + "rewards_train/rejected": -7.56276273727417, + "step": 1586 + }, + { + "epoch": 0.44, + "logps_train/chosen": -37.90194320678711, + "logps_train/ref_chosen": -25.625, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -49.50676345825195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2276943922042847, + "rewards_train/margins": 2.4292320013046265, + "rewards_train/rejected": -3.656926393508911, + "step": 1586 + }, + { + "epoch": 0.44, + "logps_train/chosen": -238.43084716796875, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -239.630615234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.843085289001465, + "rewards_train/margins": 0.11997604370117188, + "rewards_train/rejected": -9.963061332702637, + "step": 1586 + }, + { + "epoch": 0.44, + "logps_train/chosen": -141.0739288330078, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -189.9901123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.457392930984497, + "rewards_train/margins": 3.341618299484253, + "rewards_train/rejected": -5.79901123046875, + "step": 1586 + }, + { + "epoch": 0.44, + "logps_train/chosen": -174.23089599609375, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -202.91172790527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6230896711349487, + "rewards_train/margins": 4.56808340549469, + "rewards_train/rejected": -6.191173076629639, + "step": 1586 + }, + { + "epoch": 0.44, + "logps_train/chosen": -95.54246520996094, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -21.125, + "logps_train/rejected": -55.86574172973633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.8542466163635254, + "rewards_train/margins": -0.3801724910736084, + "rewards_train/rejected": -3.474074125289917, + "step": 1587 + }, + { + "epoch": 0.44, + "logps_train/chosen": -32.25202178955078, + "logps_train/ref_chosen": -26.875, + "logps_train/ref_rejected": -3.515625, + "logps_train/rejected": -24.343303680419922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.537702202796936, + "rewards_train/margins": 1.5450657606124878, + "rewards_train/rejected": -2.082767963409424, + "step": 1587 + }, + { + "epoch": 0.44, + "logps_train/chosen": -192.8060760498047, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -295.44110107421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.880607604980469, + "rewards_train/margins": 9.413503646850586, + "rewards_train/rejected": -17.294111251831055, + "step": 1587 + }, + { + "epoch": 0.44, + "logps_train/chosen": -140.09866333007812, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -238.03916931152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6098663806915283, + "rewards_train/margins": 8.044050931930542, + "rewards_train/rejected": -11.65391731262207, + "step": 1587 + }, + { + "epoch": 0.44, + "logps_train/chosen": -66.08820343017578, + "logps_train/ref_chosen": -50.0, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -80.79810333251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6088203191757202, + "rewards_train/margins": 2.595990300178528, + "rewards_train/rejected": -4.204810619354248, + "step": 1587 + }, + { + "epoch": 0.44, + "logps_train/chosen": -150.52838134765625, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -209.18905639648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.552838325500488, + "rewards_train/margins": 1.8660674095153809, + "rewards_train/rejected": -7.418905735015869, + "step": 1587 + }, + { + "epoch": 0.44, + "logps_train/chosen": -148.20472717285156, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -135.75694274902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.720472812652588, + "rewards_train/margins": 1.8052215576171875, + "rewards_train/rejected": -5.525694370269775, + "step": 1587 + }, + { + "epoch": 0.44, + "logps_train/chosen": -29.858898162841797, + "logps_train/ref_chosen": -1.0390625, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -6.723952770233154, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.881983518600464, + "rewards_train/margins": -2.6908382326364517, + "rewards_train/rejected": -0.19114528596401215, + "step": 1587 + }, + { + "epoch": 0.44, + "learning_rate": 6.435881212867493e-07, + "loss": 0.5161, + "step": 1588 + }, + { + "epoch": 0.44, + "logps_train/chosen": -33.77785110473633, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -39.5, + "logps_train/rejected": -76.05653381347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4902851581573486, + "rewards_train/margins": 2.1653683185577393, + "rewards_train/rejected": -3.655653476715088, + "step": 1588 + }, + { + "epoch": 0.44, + "logps_train/chosen": -142.26171875, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -258.1614685058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.326171875, + "rewards_train/margins": 10.589975357055664, + "rewards_train/rejected": -15.916147232055664, + "step": 1588 + }, + { + "epoch": 0.44, + "logps_train/chosen": -161.1140594482422, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -244.03164672851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.061406135559082, + "rewards_train/margins": 7.94175910949707, + "rewards_train/rejected": -12.003165245056152, + "step": 1588 + }, + { + "epoch": 0.44, + "logps_train/chosen": -16.95252227783203, + "logps_train/ref_chosen": -1.796875, + "logps_train/ref_rejected": -7.0625, + "logps_train/rejected": -22.220415115356445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5155647993087769, + "rewards_train/margins": 0.00022673606872558594, + "rewards_train/rejected": -1.5157915353775024, + "step": 1588 + }, + { + "epoch": 0.44, + "logps_train/chosen": -10.22490406036377, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -1.6328125, + "logps_train/rejected": -2.448805809020996, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2506154179573059, + "rewards_train/margins": -0.16901608556509018, + "rewards_train/rejected": -0.08159933239221573, + "step": 1588 + }, + { + "epoch": 0.44, + "logps_train/chosen": -115.36206817626953, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -222.6712188720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.0362067222595215, + "rewards_train/margins": 6.88091516494751, + "rewards_train/rejected": -10.917121887207031, + "step": 1588 + }, + { + "epoch": 0.44, + "logps_train/chosen": -24.68239974975586, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -0.37109375, + "logps_train/rejected": -18.063762664794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.386989951133728, + "rewards_train/margins": 0.3822770118713379, + "rewards_train/rejected": -1.769266963005066, + "step": 1588 + }, + { + "epoch": 0.44, + "logps_train/chosen": -80.1270980834961, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -107.96084594726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2627098560333252, + "rewards_train/margins": 2.033374786376953, + "rewards_train/rejected": -3.2960846424102783, + "step": 1588 + }, + { + "epoch": 0.44, + "logps_train/chosen": -190.71804809570312, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -192.1485137939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.571805000305176, + "rewards_train/margins": 0.04304647445678711, + "rewards_train/rejected": -5.614851474761963, + "step": 1589 + }, + { + "epoch": 0.44, + "logps_train/chosen": -31.881763458251953, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -3.171875, + "logps_train/rejected": -20.370765686035156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7506763935089111, + "rewards_train/margins": -0.030787348747253418, + "rewards_train/rejected": -1.7198890447616577, + "step": 1589 + }, + { + "epoch": 0.44, + "logps_train/chosen": -21.949108123779297, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -61.37550735473633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1699107885360718, + "rewards_train/margins": 3.473889946937561, + "rewards_train/rejected": -4.643800735473633, + "step": 1589 + }, + { + "epoch": 0.44, + "logps_train/chosen": -102.45379638671875, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -148.167724609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.695379614830017, + "rewards_train/margins": 3.871392846107483, + "rewards_train/rejected": -5.5667724609375, + "step": 1589 + }, + { + "epoch": 0.44, + "logps_train/chosen": -9.728455543518066, + "logps_train/ref_chosen": -3.09375, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -47.25730895996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6634705662727356, + "rewards_train/margins": 3.1747602820396423, + "rewards_train/rejected": -3.838230848312378, + "step": 1589 + }, + { + "epoch": 0.44, + "logps_train/chosen": -108.97676086425781, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -59.3311653137207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5476760864257812, + "rewards_train/margins": 0.13544058799743652, + "rewards_train/rejected": -2.6831166744232178, + "step": 1589 + }, + { + "epoch": 0.44, + "logps_train/chosen": -11.020699501037598, + "logps_train/ref_chosen": -1.3515625, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -39.368507385253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9669137001037598, + "rewards_train/margins": 1.7511870861053467, + "rewards_train/rejected": -2.7181007862091064, + "step": 1589 + }, + { + "epoch": 0.44, + "logps_train/chosen": -7.863099098205566, + "logps_train/ref_chosen": -1.8828125, + "logps_train/ref_rejected": -1.9375, + "logps_train/rejected": -5.603956699371338, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5980286598205566, + "rewards_train/margins": -0.23138299584388733, + "rewards_train/rejected": -0.3666456639766693, + "step": 1589 + }, + { + "epoch": 0.44, + "learning_rate": 6.411175558929152e-07, + "loss": 0.3314, + "step": 1590 + }, + { + "epoch": 0.44, + "logps_train/chosen": -158.42054748535156, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -196.7296600341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.4920549392700195, + "rewards_train/margins": 3.280911445617676, + "rewards_train/rejected": -8.772966384887695, + "step": 1590 + }, + { + "epoch": 0.44, + "logps_train/chosen": -119.5260009765625, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -148.2184295654297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20260010659694672, + "rewards_train/margins": -0.2807571515440941, + "rewards_train/rejected": 0.07815704494714737, + "step": 1590 + }, + { + "epoch": 0.44, + "logps_train/chosen": -112.899658203125, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -121.7835693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03996581956744194, + "rewards_train/margins": 1.838391114026308, + "rewards_train/rejected": -1.87835693359375, + "step": 1590 + }, + { + "epoch": 0.44, + "logps_train/chosen": -23.823612213134766, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -77.59303283691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1323612928390503, + "rewards_train/margins": 4.239441990852356, + "rewards_train/rejected": -5.371803283691406, + "step": 1590 + }, + { + "epoch": 0.44, + "logps_train/chosen": -229.57745361328125, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -252.75318908691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -13.707745552062988, + "rewards_train/margins": 0.4675731658935547, + "rewards_train/rejected": -14.175318717956543, + "step": 1590 + }, + { + "epoch": 0.44, + "logps_train/chosen": -0.774803876876831, + "logps_train/ref_chosen": -0.1220703125, + "logps_train/ref_rejected": -0.1220703125, + "logps_train/rejected": -0.6523449420928955, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06527335941791534, + "rewards_train/margins": -0.012245897203683853, + "rewards_train/rejected": -0.05302746221423149, + "step": 1590 + }, + { + "epoch": 0.44, + "logps_train/chosen": -17.046201705932617, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -29.625, + "logps_train/rejected": -45.477508544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5171201825141907, + "rewards_train/margins": 1.0681306719779968, + "rewards_train/rejected": -1.5852508544921875, + "step": 1590 + }, + { + "epoch": 0.44, + "logps_train/chosen": -39.42633056640625, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -80.23832702636719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.267633080482483, + "rewards_train/margins": 1.6811996698379517, + "rewards_train/rejected": -2.9488327503204346, + "step": 1590 + }, + { + "epoch": 0.44, + "logps_train/chosen": -43.810447692871094, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -82.12513732910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7560448050498962, + "rewards_train/margins": 0.65646892786026, + "rewards_train/rejected": -1.4125137329101562, + "step": 1591 + }, + { + "epoch": 0.44, + "logps_train/chosen": -29.7891845703125, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -11.75, + "logps_train/rejected": -49.04753112792969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.25079345703125, + "rewards_train/margins": 1.4789597988128662, + "rewards_train/rejected": -3.729753255844116, + "step": 1591 + }, + { + "epoch": 0.44, + "logps_train/chosen": -38.56705856323242, + "logps_train/ref_chosen": -7.15625, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -48.28963088989258, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.141080856323242, + "rewards_train/margins": -0.46211767196655273, + "rewards_train/rejected": -2.6789631843566895, + "step": 1591 + }, + { + "epoch": 0.44, + "logps_train/chosen": -51.25577163696289, + "logps_train/ref_chosen": -30.125, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -29.54642677307129, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.113077163696289, + "rewards_train/margins": 0.288440465927124, + "rewards_train/rejected": -2.401517629623413, + "step": 1591 + }, + { + "epoch": 0.44, + "logps_train/chosen": -67.81977844238281, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -56.5, + "logps_train/rejected": -80.56060791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0819778442382812, + "rewards_train/margins": 1.3240830898284912, + "rewards_train/rejected": -2.4060609340667725, + "step": 1591 + }, + { + "epoch": 0.44, + "logps_train/chosen": -149.9550323486328, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -292.8487243652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7955033779144287, + "rewards_train/margins": 8.189369440078735, + "rewards_train/rejected": -10.984872817993164, + "step": 1591 + }, + { + "epoch": 0.44, + "logps_train/chosen": -157.7306671142578, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -141.87181091308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.023066759109497, + "rewards_train/margins": 2.664114236831665, + "rewards_train/rejected": -5.687180995941162, + "step": 1591 + }, + { + "epoch": 0.44, + "logps_train/chosen": -175.25692749023438, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -242.6750946044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.525692939758301, + "rewards_train/margins": 8.091816902160645, + "rewards_train/rejected": -12.617509841918945, + "step": 1591 + }, + { + "epoch": 0.44, + "learning_rate": 6.386495022956875e-07, + "loss": 0.3207, + "step": 1592 + }, + { + "epoch": 0.44, + "logps_train/chosen": -12.680384635925293, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -1.9375, + "logps_train/rejected": -10.337458610534668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42428848147392273, + "rewards_train/margins": 0.41570737957954407, + "rewards_train/rejected": -0.8399958610534668, + "step": 1592 + }, + { + "epoch": 0.44, + "logps_train/chosen": -30.167583465576172, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -2.6875, + "logps_train/rejected": -5.272000789642334, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.673008441925049, + "rewards_train/margins": -2.4145583510398865, + "rewards_train/rejected": -0.25845009088516235, + "step": 1592 + }, + { + "epoch": 0.44, + "logps_train/chosen": -28.74647331237793, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -7.25, + "logps_train/rejected": -33.553646087646484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.812147319316864, + "rewards_train/margins": 1.8182173371315002, + "rewards_train/rejected": -2.6303646564483643, + "step": 1592 + }, + { + "epoch": 0.44, + "logps_train/chosen": -34.98848342895508, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -37.823883056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0425984859466553, + "rewards_train/margins": 0.9710397720336914, + "rewards_train/rejected": -3.0136382579803467, + "step": 1592 + }, + { + "epoch": 0.44, + "logps_train/chosen": -9.533722877502441, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -21.820470809936523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1091277152299881, + "rewards_train/margins": 1.4661747962236404, + "rewards_train/rejected": -1.3570470809936523, + "step": 1592 + }, + { + "epoch": 0.44, + "logps_train/chosen": -120.69955444335938, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -198.88294982910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2199554443359375, + "rewards_train/margins": 6.968339920043945, + "rewards_train/rejected": -9.188295364379883, + "step": 1592 + }, + { + "epoch": 0.44, + "logps_train/chosen": -126.595947265625, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -163.7323760986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.909594774246216, + "rewards_train/margins": 3.2636430263519287, + "rewards_train/rejected": -7.1732378005981445, + "step": 1592 + }, + { + "epoch": 0.44, + "logps_train/chosen": -53.51627731323242, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -42.25, + "logps_train/rejected": -76.33918762207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8016277551651, + "rewards_train/margins": 1.6072911024093628, + "rewards_train/rejected": -3.408918857574463, + "step": 1592 + }, + { + "epoch": 0.45, + "logps_train/chosen": -112.89283752441406, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -142.02957153320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08928375691175461, + "rewards_train/margins": 1.913673348724842, + "rewards_train/rejected": -2.0029571056365967, + "step": 1593 + }, + { + "epoch": 0.45, + "logps_train/chosen": -24.456504821777344, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -30.176645278930664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.439400553703308, + "rewards_train/margins": 0.5095139741897583, + "rewards_train/rejected": -1.9489145278930664, + "step": 1593 + }, + { + "epoch": 0.45, + "logps_train/chosen": -120.20333862304688, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -146.1029815673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.320333957672119, + "rewards_train/margins": 3.3899641036987305, + "rewards_train/rejected": -5.71029806137085, + "step": 1593 + }, + { + "epoch": 0.45, + "logps_train/chosen": -88.82412719726562, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -92.67317962646484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.482412725687027, + "rewards_train/margins": -0.06509476900100708, + "rewards_train/rejected": -0.4173179566860199, + "step": 1593 + }, + { + "epoch": 0.45, + "logps_train/chosen": -100.12654876708984, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -229.71917724609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9126548767089844, + "rewards_train/margins": 9.159262657165527, + "rewards_train/rejected": -10.071917533874512, + "step": 1593 + }, + { + "epoch": 0.45, + "logps_train/chosen": -186.0004119873047, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -171.67576599121094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.600041389465332, + "rewards_train/margins": -1.5824646949768066, + "rewards_train/rejected": -7.017576694488525, + "step": 1593 + }, + { + "epoch": 0.45, + "logps_train/chosen": -30.440555572509766, + "logps_train/ref_chosen": -7.59375, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -35.79383087158203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2846806049346924, + "rewards_train/margins": -0.5552974939346313, + "rewards_train/rejected": -1.729383111000061, + "step": 1593 + }, + { + "epoch": 0.45, + "logps_train/chosen": -103.55859375, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -104.39573669433594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.055859327316284, + "rewards_train/margins": -0.16628563404083252, + "rewards_train/rejected": -1.8895736932754517, + "step": 1593 + }, + { + "epoch": 0.45, + "learning_rate": 6.361839777688254e-07, + "loss": 0.5519, + "step": 1594 + }, + { + "epoch": 0.45, + "logps_train/chosen": -68.1094970703125, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -101.27146911621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.385949730873108, + "rewards_train/margins": 0.7411972284317017, + "rewards_train/rejected": -2.1271469593048096, + "step": 1594 + }, + { + "epoch": 0.45, + "logps_train/chosen": -97.5409164428711, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -42.75, + "logps_train/rejected": -102.64130401611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6540915966033936, + "rewards_train/margins": 2.335038900375366, + "rewards_train/rejected": -5.98913049697876, + "step": 1594 + }, + { + "epoch": 0.45, + "logps_train/chosen": -0.6762292981147766, + "logps_train/ref_chosen": -0.70703125, + "logps_train/ref_rejected": -0.70703125, + "logps_train/rejected": -0.6820605993270874, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0030801952816545963, + "rewards_train/margins": 0.0005831301677972078, + "rewards_train/rejected": 0.0024970651138573885, + "step": 1594 + }, + { + "epoch": 0.45, + "logps_train/chosen": -13.734914779663086, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -26.875, + "logps_train/rejected": -29.84393882751465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16099147498607635, + "rewards_train/margins": 0.13590241968631744, + "rewards_train/rejected": -0.2968938946723938, + "step": 1594 + }, + { + "epoch": 0.45, + "logps_train/chosen": -24.508255004882812, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -2.8125, + "logps_train/rejected": -30.982654571533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5883255004882812, + "rewards_train/margins": 1.2286899089813232, + "rewards_train/rejected": -2.8170154094696045, + "step": 1594 + }, + { + "epoch": 0.45, + "logps_train/chosen": -9.678668022155762, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -42.99372863769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42099180817604065, + "rewards_train/margins": 2.090881198644638, + "rewards_train/rejected": -2.5118730068206787, + "step": 1594 + }, + { + "epoch": 0.45, + "logps_train/chosen": -19.042264938354492, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -34.25, + "logps_train/rejected": -71.66658020019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5854765176773071, + "rewards_train/margins": 3.1561814546585083, + "rewards_train/rejected": -3.7416579723358154, + "step": 1594 + }, + { + "epoch": 0.45, + "logps_train/chosen": -159.79476928710938, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -187.42626953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2794768810272217, + "rewards_train/margins": 3.1631500720977783, + "rewards_train/rejected": -5.442626953125, + "step": 1594 + }, + { + "epoch": 0.45, + "logps_train/chosen": -66.59737396240234, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -106.57050323486328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4847373962402344, + "rewards_train/margins": -0.37768709659576416, + "rewards_train/rejected": -1.1070502996444702, + "step": 1595 + }, + { + "epoch": 0.45, + "logps_train/chosen": -87.649658203125, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -145.46694946289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3649659156799316, + "rewards_train/margins": 4.8817291259765625, + "rewards_train/rejected": -7.246695041656494, + "step": 1595 + }, + { + "epoch": 0.45, + "logps_train/chosen": -29.38066291809082, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -68.12169647216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.575566291809082, + "rewards_train/margins": 2.936603307723999, + "rewards_train/rejected": -3.512169599533081, + "step": 1595 + }, + { + "epoch": 0.45, + "logps_train/chosen": -37.633888244628906, + "logps_train/ref_chosen": -25.75, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -28.66582489013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1883888244628906, + "rewards_train/margins": 1.1156938076019287, + "rewards_train/rejected": -2.3040826320648193, + "step": 1595 + }, + { + "epoch": 0.45, + "logps_train/chosen": -108.00765991210938, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -308.2388000488281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6007659435272217, + "rewards_train/margins": 12.42311406135559, + "rewards_train/rejected": -15.023880004882812, + "step": 1595 + }, + { + "epoch": 0.45, + "logps_train/chosen": -201.2236328125, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -319.6623229980469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.022363662719727, + "rewards_train/margins": 6.843869209289551, + "rewards_train/rejected": -14.866232872009277, + "step": 1595 + }, + { + "epoch": 0.45, + "logps_train/chosen": -20.827329635620117, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -34.7580451965332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7967954874038696, + "rewards_train/margins": 0.7852591276168823, + "rewards_train/rejected": -2.582054615020752, + "step": 1595 + }, + { + "epoch": 0.45, + "logps_train/chosen": -79.43809509277344, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -135.22540283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6438094973564148, + "rewards_train/margins": 2.5287309288978577, + "rewards_train/rejected": -3.1725404262542725, + "step": 1595 + }, + { + "epoch": 0.45, + "learning_rate": 6.337209995683865e-07, + "loss": 0.2471, + "step": 1596 + }, + { + "epoch": 0.45, + "logps_train/chosen": -77.26898193359375, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -80.64765930175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27689820528030396, + "rewards_train/margins": 0.037867724895477295, + "rewards_train/rejected": -0.31476593017578125, + "step": 1596 + }, + { + "epoch": 0.45, + "logps_train/chosen": -82.2003402709961, + "logps_train/ref_chosen": -43.0, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -107.28570556640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.920034170150757, + "rewards_train/margins": 0.3585364818572998, + "rewards_train/rejected": -4.278570652008057, + "step": 1596 + }, + { + "epoch": 0.45, + "logps_train/chosen": -12.68636703491211, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -23.676834106445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4436367154121399, + "rewards_train/margins": 0.5990467667579651, + "rewards_train/rejected": -1.042683482170105, + "step": 1596 + }, + { + "epoch": 0.45, + "logps_train/chosen": -63.940528869628906, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -121.2803726196289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3690528869628906, + "rewards_train/margins": 2.258984327316284, + "rewards_train/rejected": -2.628037214279175, + "step": 1596 + }, + { + "epoch": 0.45, + "logps_train/chosen": -11.336067199707031, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -2.453125, + "logps_train/rejected": -7.299274444580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22110672295093536, + "rewards_train/margins": 0.26350821554660797, + "rewards_train/rejected": -0.48461493849754333, + "step": 1596 + }, + { + "epoch": 0.45, + "logps_train/chosen": -91.82979583740234, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -141.0370635986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1329796314239502, + "rewards_train/margins": 5.320726633071899, + "rewards_train/rejected": -6.45370626449585, + "step": 1596 + }, + { + "epoch": 0.45, + "logps_train/chosen": -39.43861389160156, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -48.75745391845703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.90636146068573, + "rewards_train/margins": 1.131883978843689, + "rewards_train/rejected": -3.038245439529419, + "step": 1596 + }, + { + "epoch": 0.45, + "logps_train/chosen": -166.11647033691406, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -265.3802490234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.461647033691406, + "rewards_train/margins": 5.376378059387207, + "rewards_train/rejected": -10.838025093078613, + "step": 1596 + }, + { + "epoch": 0.45, + "logps_train/chosen": -70.55050659179688, + "logps_train/ref_chosen": -31.875, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -33.534423828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.8675506114959717, + "rewards_train/margins": -1.0734832286834717, + "rewards_train/rejected": -2.7940673828125, + "step": 1597 + }, + { + "epoch": 0.45, + "logps_train/chosen": -1.6723968982696533, + "logps_train/ref_chosen": -0.59375, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -25.22583770751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10786469280719757, + "rewards_train/margins": 1.6772190779447556, + "rewards_train/rejected": -1.7850837707519531, + "step": 1597 + }, + { + "epoch": 0.45, + "logps_train/chosen": -18.07179832458496, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -21.28933334350586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3446798324584961, + "rewards_train/margins": 0.8030035495758057, + "rewards_train/rejected": -1.1476833820343018, + "step": 1597 + }, + { + "epoch": 0.45, + "logps_train/chosen": -116.33534240722656, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -193.01904296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.033534288406372, + "rewards_train/margins": 6.418370008468628, + "rewards_train/rejected": -7.451904296875, + "step": 1597 + }, + { + "epoch": 0.45, + "logps_train/chosen": -151.39288330078125, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -159.68014526367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.539288282394409, + "rewards_train/margins": 3.8787262439727783, + "rewards_train/rejected": -6.4180145263671875, + "step": 1597 + }, + { + "epoch": 0.45, + "logps_train/chosen": -110.23330688476562, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -310.0, + "logps_train/rejected": -364.4263000488281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8733307123184204, + "rewards_train/margins": 4.569299578666687, + "rewards_train/rejected": -5.442630290985107, + "step": 1597 + }, + { + "epoch": 0.45, + "logps_train/chosen": -131.6636962890625, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -168.39463806152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.46636962890625, + "rewards_train/margins": 1.3730943202972412, + "rewards_train/rejected": -3.839463949203491, + "step": 1597 + }, + { + "epoch": 0.45, + "logps_train/chosen": -131.83363342285156, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -124.65386962890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7833633422851562, + "rewards_train/margins": -0.41797637939453125, + "rewards_train/rejected": -1.365386962890625, + "step": 1597 + }, + { + "epoch": 0.45, + "learning_rate": 6.312605849326065e-07, + "loss": 0.3558, + "step": 1598 + }, + { + "epoch": 0.45, + "logps_train/chosen": -106.53057861328125, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -106.96757507324219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15305785834789276, + "rewards_train/margins": 0.04369965195655823, + "rewards_train/rejected": -0.196757510304451, + "step": 1598 + }, + { + "epoch": 0.45, + "logps_train/chosen": -95.98031616210938, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -206.83961486816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5480316281318665, + "rewards_train/margins": 4.335930049419403, + "rewards_train/rejected": -4.8839616775512695, + "step": 1598 + }, + { + "epoch": 0.45, + "logps_train/chosen": -64.5422134399414, + "logps_train/ref_chosen": -52.0, + "logps_train/ref_rejected": -59.75, + "logps_train/rejected": -88.1150131225586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2542213201522827, + "rewards_train/margins": 1.5822800397872925, + "rewards_train/rejected": -2.836501359939575, + "step": 1598 + }, + { + "epoch": 0.45, + "logps_train/chosen": -4.914507865905762, + "logps_train/ref_chosen": -1.9296875, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -23.568933486938477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2984820306301117, + "rewards_train/margins": 1.6365363895893097, + "rewards_train/rejected": -1.9350184202194214, + "step": 1598 + }, + { + "epoch": 0.45, + "logps_train/chosen": -114.92476654052734, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -165.20376586914062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.692476749420166, + "rewards_train/margins": -0.47210001945495605, + "rewards_train/rejected": -3.22037672996521, + "step": 1598 + }, + { + "epoch": 0.45, + "logps_train/chosen": -111.14959716796875, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -143.34934997558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7649598121643066, + "rewards_train/margins": 0.6199753284454346, + "rewards_train/rejected": -3.384935140609741, + "step": 1598 + }, + { + "epoch": 0.45, + "logps_train/chosen": -150.28213500976562, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -231.4990997314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5782134532928467, + "rewards_train/margins": 4.571696519851685, + "rewards_train/rejected": -7.149909973144531, + "step": 1598 + }, + { + "epoch": 0.45, + "logps_train/chosen": -18.399568557739258, + "logps_train/ref_chosen": -7.90625, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -45.91694259643555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0493319034576416, + "rewards_train/margins": 1.267362356185913, + "rewards_train/rejected": -2.3166942596435547, + "step": 1598 + }, + { + "epoch": 0.45, + "logps_train/chosen": -29.73174285888672, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -31.793046951293945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7606742978096008, + "rewards_train/margins": 1.6998805403709412, + "rewards_train/rejected": -2.460554838180542, + "step": 1599 + }, + { + "epoch": 0.45, + "logps_train/chosen": -19.749536514282227, + "logps_train/ref_chosen": -3.453125, + "logps_train/ref_rejected": -1.375, + "logps_train/rejected": -20.560222625732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6296411752700806, + "rewards_train/margins": 0.2888810634613037, + "rewards_train/rejected": -1.9185222387313843, + "step": 1599 + }, + { + "epoch": 0.45, + "logps_train/chosen": -14.89670181274414, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -31.779325485229492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0615452527999878, + "rewards_train/margins": 0.47888731956481934, + "rewards_train/rejected": -1.5404325723648071, + "step": 1599 + }, + { + "epoch": 0.45, + "logps_train/chosen": -20.876293182373047, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -2.53125, + "logps_train/rejected": -12.805174827575684, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3157542943954468, + "rewards_train/margins": -0.2883617877960205, + "rewards_train/rejected": -1.0273925065994263, + "step": 1599 + }, + { + "epoch": 0.45, + "logps_train/chosen": -16.821807861328125, + "logps_train/ref_chosen": -0.494140625, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -28.243480682373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6327667236328125, + "rewards_train/margins": 0.7259564399719238, + "rewards_train/rejected": -2.3587231636047363, + "step": 1599 + }, + { + "epoch": 0.45, + "logps_train/chosen": -27.683815002441406, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -24.63804054260254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9308815002441406, + "rewards_train/margins": 0.2954225540161133, + "rewards_train/rejected": -1.226304054260254, + "step": 1599 + }, + { + "epoch": 0.45, + "logps_train/chosen": -143.5223388671875, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -178.95310974121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.85223388671875, + "rewards_train/margins": 0.9430770874023438, + "rewards_train/rejected": -3.7953109741210938, + "step": 1599 + }, + { + "epoch": 0.45, + "logps_train/chosen": -125.72911071777344, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -134.2405242919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.322911024093628, + "rewards_train/margins": 2.0511415004730225, + "rewards_train/rejected": -5.37405252456665, + "step": 1599 + }, + { + "epoch": 0.45, + "learning_rate": 6.288027510817791e-07, + "loss": 0.3846, + "step": 1600 + }, + { + "epoch": 0.45, + "logps_train/chosen": -193.67025756835938, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -194.92181396484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.117025852203369, + "rewards_train/margins": 1.4251561164855957, + "rewards_train/rejected": -8.542181968688965, + "step": 1600 + }, + { + "epoch": 0.45, + "logps_train/chosen": -10.466418266296387, + "logps_train/ref_chosen": -2.515625, + "logps_train/ref_rejected": -0.6875, + "logps_train/rejected": -16.858325958251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7950793504714966, + "rewards_train/margins": 0.8220032453536987, + "rewards_train/rejected": -1.6170825958251953, + "step": 1600 + }, + { + "epoch": 0.45, + "logps_train/chosen": -24.380874633789062, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -14.875, + "logps_train/rejected": -25.447593688964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6755874752998352, + "rewards_train/margins": 0.3816719651222229, + "rewards_train/rejected": -1.057259440422058, + "step": 1600 + }, + { + "epoch": 0.45, + "logps_train/chosen": -6.790646553039551, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -33.63796615600586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16468535363674164, + "rewards_train/margins": 2.184732064604759, + "rewards_train/rejected": -2.0200467109680176, + "step": 1600 + }, + { + "epoch": 0.45, + "logps_train/chosen": -27.295146942138672, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -41.0220832824707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.107639789581299, + "rewards_train/margins": 1.2758185863494873, + "rewards_train/rejected": -3.383458375930786, + "step": 1600 + }, + { + "epoch": 0.45, + "logps_train/chosen": -161.89053344726562, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -208.03402709960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.789053440093994, + "rewards_train/margins": 6.614349842071533, + "rewards_train/rejected": -10.403403282165527, + "step": 1600 + }, + { + "epoch": 0.45, + "logps_train/chosen": -15.684311866760254, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -0.6015625, + "logps_train/rejected": -6.8212738037109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15593118965625763, + "rewards_train/margins": 0.4660399407148361, + "rewards_train/rejected": -0.6219711303710938, + "step": 1600 + }, + { + "epoch": 0.45, + "logps_train/chosen": -54.03169250488281, + "logps_train/ref_chosen": -40.75, + "logps_train/ref_rejected": -40.5, + "logps_train/rejected": -68.9880599975586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3281692266464233, + "rewards_train/margins": 1.5206369161605835, + "rewards_train/rejected": -2.848806142807007, + "step": 1600 + }, + { + "epoch": 0.45, + "logps_train/chosen": -46.043724060058594, + "logps_train/ref_chosen": -26.125, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -55.03395080566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9918724298477173, + "rewards_train/margins": 1.386522650718689, + "rewards_train/rejected": -3.3783950805664062, + "step": 1601 + }, + { + "epoch": 0.45, + "logps_train/chosen": -161.38961791992188, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -237.78598022460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.288961887359619, + "rewards_train/margins": 1.6896367073059082, + "rewards_train/rejected": -8.978598594665527, + "step": 1601 + }, + { + "epoch": 0.45, + "logps_train/chosen": -173.48464965820312, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -174.89590454101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.298465251922607, + "rewards_train/margins": 0.6411252021789551, + "rewards_train/rejected": -5.9395904541015625, + "step": 1601 + }, + { + "epoch": 0.45, + "logps_train/chosen": -9.681379318237305, + "logps_train/ref_chosen": -2.65625, + "logps_train/ref_rejected": -3.1875, + "logps_train/rejected": -16.074928283691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7025129199028015, + "rewards_train/margins": 0.5862299799919128, + "rewards_train/rejected": -1.2887428998947144, + "step": 1601 + }, + { + "epoch": 0.45, + "logps_train/chosen": -39.01900100708008, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -41.038272857666016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0831501483917236, + "rewards_train/margins": 0.4113020896911621, + "rewards_train/rejected": -3.4944522380828857, + "step": 1601 + }, + { + "epoch": 0.45, + "logps_train/chosen": -9.244382858276367, + "logps_train/ref_chosen": -2.53125, + "logps_train/ref_rejected": -0.63671875, + "logps_train/rejected": -12.378056526184082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6713132858276367, + "rewards_train/margins": 0.5028204917907715, + "rewards_train/rejected": -1.1741337776184082, + "step": 1601 + }, + { + "epoch": 0.45, + "logps_train/chosen": -200.8717041015625, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -225.26248168945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.987170457839966, + "rewards_train/margins": 1.63907790184021, + "rewards_train/rejected": -4.626248359680176, + "step": 1601 + }, + { + "epoch": 0.45, + "logps_train/chosen": -160.1160125732422, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -185.81777954101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.211601257324219, + "rewards_train/margins": 2.070176601409912, + "rewards_train/rejected": -6.281777858734131, + "step": 1601 + }, + { + "epoch": 0.45, + "learning_rate": 6.263475152181361e-07, + "loss": 0.2922, + "step": 1602 + }, + { + "epoch": 0.45, + "logps_train/chosen": -83.19107818603516, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -163.1069793701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2191078662872314, + "rewards_train/margins": 4.341590166091919, + "rewards_train/rejected": -5.56069803237915, + "step": 1602 + }, + { + "epoch": 0.45, + "logps_train/chosen": -215.92233276367188, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -208.25286865234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.9922332763671875, + "rewards_train/margins": 1.7330541610717773, + "rewards_train/rejected": -8.725287437438965, + "step": 1602 + }, + { + "epoch": 0.45, + "logps_train/chosen": -22.306529998779297, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -56.14396667480469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43065300583839417, + "rewards_train/margins": 3.183743804693222, + "rewards_train/rejected": -3.614396810531616, + "step": 1602 + }, + { + "epoch": 0.45, + "logps_train/chosen": -137.20040893554688, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -88.9050064086914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5200408697128296, + "rewards_train/margins": -0.17954015731811523, + "rewards_train/rejected": -1.3405007123947144, + "step": 1602 + }, + { + "epoch": 0.45, + "logps_train/chosen": -16.025306701660156, + "logps_train/ref_chosen": -4.1875, + "logps_train/ref_rejected": -7.34375, + "logps_train/rejected": -54.04103088378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1837806701660156, + "rewards_train/margins": 3.485947608947754, + "rewards_train/rejected": -4.6697282791137695, + "step": 1602 + }, + { + "epoch": 0.45, + "logps_train/chosen": -9.57929515838623, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -20.967317581176758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.526679515838623, + "rewards_train/margins": 0.3638022541999817, + "rewards_train/rejected": -0.8904817700386047, + "step": 1602 + }, + { + "epoch": 0.45, + "logps_train/chosen": -14.87447738647461, + "logps_train/ref_chosen": -3.796875, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -17.90801239013672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1077603101730347, + "rewards_train/margins": -0.4169590473175049, + "rewards_train/rejected": -0.6908012628555298, + "step": 1602 + }, + { + "epoch": 0.45, + "logps_train/chosen": -121.67019653320312, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -156.427490234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.267019748687744, + "rewards_train/margins": 2.925729274749756, + "rewards_train/rejected": -6.1927490234375, + "step": 1602 + }, + { + "epoch": 0.45, + "logps_train/chosen": -1.7857015132904053, + "logps_train/ref_chosen": -0.7421875, + "logps_train/ref_rejected": -0.7421875, + "logps_train/rejected": -1.6817907094955444, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10435140132904053, + "rewards_train/margins": -0.010391078889369965, + "rewards_train/rejected": -0.09396032243967056, + "step": 1603 + }, + { + "epoch": 0.45, + "logps_train/chosen": -53.41638946533203, + "logps_train/ref_chosen": -52.25, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -91.197509765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11663895100355148, + "rewards_train/margins": 2.5031120255589485, + "rewards_train/rejected": -2.6197509765625, + "step": 1603 + }, + { + "epoch": 0.45, + "logps_train/chosen": -144.406494140625, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -221.8007354736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2406494617462158, + "rewards_train/margins": 7.039424657821655, + "rewards_train/rejected": -8.280074119567871, + "step": 1603 + }, + { + "epoch": 0.45, + "logps_train/chosen": -66.93975067138672, + "logps_train/ref_chosen": -58.75, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -138.86306762695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8189750909805298, + "rewards_train/margins": 4.167331576347351, + "rewards_train/rejected": -4.986306667327881, + "step": 1603 + }, + { + "epoch": 0.45, + "logps_train/chosen": -20.692615509033203, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -35.439781188964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1005115509033203, + "rewards_train/margins": 1.105966567993164, + "rewards_train/rejected": -2.2064781188964844, + "step": 1603 + }, + { + "epoch": 0.45, + "logps_train/chosen": -88.38595581054688, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -217.5312042236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6385955810546875, + "rewards_train/margins": 7.614524841308594, + "rewards_train/rejected": -10.253120422363281, + "step": 1603 + }, + { + "epoch": 0.45, + "logps_train/chosen": -0.9455165863037109, + "logps_train/ref_chosen": -0.330078125, + "logps_train/ref_rejected": -1.90625, + "logps_train/rejected": -3.789194345474243, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06154384836554527, + "rewards_train/margins": 0.12675059214234352, + "rewards_train/rejected": -0.1882944405078888, + "step": 1603 + }, + { + "epoch": 0.45, + "logps_train/chosen": -31.431386947631836, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -61.75857162475586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7681387662887573, + "rewards_train/margins": 0.4077185392379761, + "rewards_train/rejected": -2.1758573055267334, + "step": 1603 + }, + { + "epoch": 0.45, + "learning_rate": 6.238948945257247e-07, + "loss": 0.2973, + "step": 1604 + }, + { + "epoch": 0.45, + "logps_train/chosen": -43.488914489746094, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -43.327293395996094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.880141496658325, + "rewards_train/margins": -0.01616215705871582, + "rewards_train/rejected": -3.8639793395996094, + "step": 1604 + }, + { + "epoch": 0.45, + "logps_train/chosen": -146.03460693359375, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -99.80509948730469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.253460884094238, + "rewards_train/margins": -2.5729509592056274, + "rewards_train/rejected": -1.6805099248886108, + "step": 1604 + }, + { + "epoch": 0.45, + "logps_train/chosen": -39.940887451171875, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -25.8563175201416, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.669088840484619, + "rewards_train/margins": -0.5272071361541748, + "rewards_train/rejected": -2.1418817043304443, + "step": 1604 + }, + { + "epoch": 0.45, + "logps_train/chosen": -27.038564682006836, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -20.816368103027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22885647416114807, + "rewards_train/margins": 0.9652803838253021, + "rewards_train/rejected": -1.1941368579864502, + "step": 1604 + }, + { + "epoch": 0.45, + "logps_train/chosen": -12.955485343933105, + "logps_train/ref_chosen": -1.515625, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -65.18292236328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1439861059188843, + "rewards_train/margins": 3.1618062257766724, + "rewards_train/rejected": -4.305792331695557, + "step": 1604 + }, + { + "epoch": 0.45, + "logps_train/chosen": -46.5809326171875, + "logps_train/ref_chosen": -39.25, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -100.21019744873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.73309326171875, + "rewards_train/margins": 2.1879265308380127, + "rewards_train/rejected": -2.9210197925567627, + "step": 1604 + }, + { + "epoch": 0.45, + "logps_train/chosen": -21.972469329833984, + "logps_train/ref_chosen": -12.6875, + "logps_train/ref_rejected": -2.703125, + "logps_train/rejected": -17.14470863342285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9284969568252563, + "rewards_train/margins": 0.5156614780426025, + "rewards_train/rejected": -1.4441584348678589, + "step": 1604 + }, + { + "epoch": 0.45, + "logps_train/chosen": -81.42559051513672, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -106.57759094238281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5925590395927429, + "rewards_train/margins": -0.1847999393939972, + "rewards_train/rejected": -0.4077591001987457, + "step": 1604 + }, + { + "epoch": 0.45, + "logps_train/chosen": -123.63648223876953, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -155.23175048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6636483669281006, + "rewards_train/margins": 0.9095265865325928, + "rewards_train/rejected": -4.573174953460693, + "step": 1605 + }, + { + "epoch": 0.45, + "logps_train/chosen": -16.26009750366211, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -39.93555450439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40100976824760437, + "rewards_train/margins": 0.8800456821918488, + "rewards_train/rejected": -1.2810554504394531, + "step": 1605 + }, + { + "epoch": 0.45, + "logps_train/chosen": -9.69267749786377, + "logps_train/ref_chosen": -1.5390625, + "logps_train/ref_rejected": -1.5390625, + "logps_train/rejected": -9.495139122009277, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.815361499786377, + "rewards_train/margins": -0.01975381374359131, + "rewards_train/rejected": -0.7956076860427856, + "step": 1605 + }, + { + "epoch": 0.45, + "logps_train/chosen": -160.43603515625, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -185.83383178710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.493603706359863, + "rewards_train/margins": 2.389779567718506, + "rewards_train/rejected": -6.883383274078369, + "step": 1605 + }, + { + "epoch": 0.45, + "logps_train/chosen": -80.81271362304688, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -46.93475341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5812714099884033, + "rewards_train/margins": 0.5997040271759033, + "rewards_train/rejected": -2.1809754371643066, + "step": 1605 + }, + { + "epoch": 0.45, + "logps_train/chosen": -96.03482818603516, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -126.05145263671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8534828424453735, + "rewards_train/margins": 2.4516624212265015, + "rewards_train/rejected": -3.305145263671875, + "step": 1605 + }, + { + "epoch": 0.45, + "logps_train/chosen": -19.226232528686523, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -37.499351501464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3944982290267944, + "rewards_train/margins": 0.9554370641708374, + "rewards_train/rejected": -2.349935293197632, + "step": 1605 + }, + { + "epoch": 0.45, + "logps_train/chosen": -152.39724731445312, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -245.65089416503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.539724826812744, + "rewards_train/margins": 8.775364398956299, + "rewards_train/rejected": -12.315089225769043, + "step": 1605 + }, + { + "epoch": 0.45, + "learning_rate": 6.214449061702898e-07, + "loss": 0.5241, + "step": 1606 + }, + { + "epoch": 0.45, + "logps_train/chosen": -180.80679321289062, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -202.0899200439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.9306793212890625, + "rewards_train/margins": 1.6283130645751953, + "rewards_train/rejected": -8.558992385864258, + "step": 1606 + }, + { + "epoch": 0.45, + "logps_train/chosen": -40.50192642211914, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -64.85897064208984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40019264817237854, + "rewards_train/margins": 2.085704416036606, + "rewards_train/rejected": -2.4858970642089844, + "step": 1606 + }, + { + "epoch": 0.45, + "logps_train/chosen": -67.3464126586914, + "logps_train/ref_chosen": -34.25, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -155.39901733398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3096413612365723, + "rewards_train/margins": 3.2302603721618652, + "rewards_train/rejected": -6.5399017333984375, + "step": 1606 + }, + { + "epoch": 0.45, + "logps_train/chosen": -14.700883865356445, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -51.28461456298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32008838653564453, + "rewards_train/margins": 2.6458730697631836, + "rewards_train/rejected": -2.965961456298828, + "step": 1606 + }, + { + "epoch": 0.45, + "logps_train/chosen": -32.182735443115234, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -43.198692321777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1932735443115234, + "rewards_train/margins": 1.6203458309173584, + "rewards_train/rejected": -2.813619375228882, + "step": 1606 + }, + { + "epoch": 0.45, + "logps_train/chosen": -19.120615005493164, + "logps_train/ref_chosen": -8.0, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -12.376456260681152, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1120615005493164, + "rewards_train/margins": -0.9244158715009689, + "rewards_train/rejected": -0.18764562904834747, + "step": 1606 + }, + { + "epoch": 0.45, + "logps_train/chosen": -25.640884399414062, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -35.35409927368164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9015884399414062, + "rewards_train/margins": 0.5713214874267578, + "rewards_train/rejected": -1.472909927368164, + "step": 1606 + }, + { + "epoch": 0.45, + "logps_train/chosen": -151.20521545410156, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -212.95803833007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6705214977264404, + "rewards_train/margins": 4.42528223991394, + "rewards_train/rejected": -7.095803737640381, + "step": 1606 + }, + { + "epoch": 0.45, + "logps_train/chosen": -35.57769775390625, + "logps_train/ref_chosen": -23.5, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -36.806541442871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.207769751548767, + "rewards_train/margins": 0.1978844404220581, + "rewards_train/rejected": -1.4056541919708252, + "step": 1607 + }, + { + "epoch": 0.45, + "logps_train/chosen": -154.4827880859375, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -228.52943420410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.79827880859375, + "rewards_train/margins": 3.954664707183838, + "rewards_train/rejected": -6.752943515777588, + "step": 1607 + }, + { + "epoch": 0.45, + "logps_train/chosen": -48.966697692871094, + "logps_train/ref_chosen": -45.75, + "logps_train/ref_rejected": -33.25, + "logps_train/rejected": -42.43348693847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3216697871685028, + "rewards_train/margins": 0.5966789424419403, + "rewards_train/rejected": -0.9183487296104431, + "step": 1607 + }, + { + "epoch": 0.45, + "logps_train/chosen": -278.052001953125, + "logps_train/ref_chosen": -214.0, + "logps_train/ref_rejected": -248.0, + "logps_train/rejected": -311.583984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.405200481414795, + "rewards_train/margins": -0.04680204391479492, + "rewards_train/rejected": -6.3583984375, + "step": 1607 + }, + { + "epoch": 0.45, + "logps_train/chosen": -106.20834350585938, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -106.60905456542969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.020834445953369, + "rewards_train/margins": 0.04007101058959961, + "rewards_train/rejected": -3.0609054565429688, + "step": 1607 + }, + { + "epoch": 0.45, + "logps_train/chosen": -2.260779857635498, + "logps_train/ref_chosen": -0.41796875, + "logps_train/ref_rejected": -3.4375, + "logps_train/rejected": -11.434732437133789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1842811107635498, + "rewards_train/margins": 0.615442156791687, + "rewards_train/rejected": -0.7997232675552368, + "step": 1607 + }, + { + "epoch": 0.45, + "logps_train/chosen": -148.9182586669922, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -163.18243408203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.191825866699219, + "rewards_train/margins": -0.6235823631286621, + "rewards_train/rejected": -4.568243503570557, + "step": 1607 + }, + { + "epoch": 0.45, + "logps_train/chosen": -6.699605941772461, + "logps_train/ref_chosen": -3.65625, + "logps_train/ref_rejected": -1.84375, + "logps_train/rejected": -37.6949462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3043355941772461, + "rewards_train/margins": 3.2807841300964355, + "rewards_train/rejected": -3.5851197242736816, + "step": 1607 + }, + { + "epoch": 0.45, + "learning_rate": 6.189975672991517e-07, + "loss": 0.3919, + "step": 1608 + }, + { + "epoch": 0.45, + "logps_train/chosen": -125.85704040527344, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -112.86418914794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.335704326629639, + "rewards_train/margins": 0.15071487426757812, + "rewards_train/rejected": -4.486419200897217, + "step": 1608 + }, + { + "epoch": 0.45, + "logps_train/chosen": -52.128318786621094, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -35.52924346923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8878319263458252, + "rewards_train/margins": 0.44634246826171875, + "rewards_train/rejected": -2.334174394607544, + "step": 1608 + }, + { + "epoch": 0.45, + "logps_train/chosen": -231.23789978027344, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -228.8961181640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.323790073394775, + "rewards_train/margins": 2.4658217430114746, + "rewards_train/rejected": -7.78961181640625, + "step": 1608 + }, + { + "epoch": 0.45, + "logps_train/chosen": -25.76311683654785, + "logps_train/ref_chosen": -29.375, + "logps_train/ref_rejected": -5.09375, + "logps_train/rejected": -14.330077171325684, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3611883223056793, + "rewards_train/margins": 1.2848210632801056, + "rewards_train/rejected": -0.9236327409744263, + "step": 1608 + }, + { + "epoch": 0.45, + "logps_train/chosen": -133.3919219970703, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -125.5511245727539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9391922354698181, + "rewards_train/margins": 3.365920126438141, + "rewards_train/rejected": -4.305112361907959, + "step": 1608 + }, + { + "epoch": 0.45, + "logps_train/chosen": -11.766998291015625, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -32.42184829711914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05794982984662056, + "rewards_train/margins": 0.6217349879443645, + "rewards_train/rejected": -0.6796848177909851, + "step": 1608 + }, + { + "epoch": 0.45, + "logps_train/chosen": -126.48040008544922, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -142.78463745117188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8980400562286377, + "rewards_train/margins": -0.5695762634277344, + "rewards_train/rejected": -2.3284637928009033, + "step": 1608 + }, + { + "epoch": 0.45, + "logps_train/chosen": -15.427030563354492, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -37.5, + "logps_train/rejected": -59.41521453857422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4739530682563782, + "rewards_train/margins": 1.7175683379173279, + "rewards_train/rejected": -2.191521406173706, + "step": 1608 + }, + { + "epoch": 0.45, + "logps_train/chosen": -31.320663452148438, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -31.416608810424805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2070664167404175, + "rewards_train/margins": 1.4658445119857788, + "rewards_train/rejected": -2.6729109287261963, + "step": 1609 + }, + { + "epoch": 0.45, + "logps_train/chosen": -4.488080978393555, + "logps_train/ref_chosen": -0.71484375, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -11.799802780151367, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.377323716878891, + "rewards_train/margins": -0.07234343886375427, + "rewards_train/rejected": -0.3049802780151367, + "step": 1609 + }, + { + "epoch": 0.45, + "logps_train/chosen": -57.844482421875, + "logps_train/ref_chosen": -26.125, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -61.60548782348633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.171948194503784, + "rewards_train/margins": 1.7261006832122803, + "rewards_train/rejected": -4.8980488777160645, + "step": 1609 + }, + { + "epoch": 0.45, + "logps_train/chosen": -129.14193725585938, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -128.03009033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0141937732696533, + "rewards_train/margins": 1.0388152599334717, + "rewards_train/rejected": -2.053009033203125, + "step": 1609 + }, + { + "epoch": 0.45, + "logps_train/chosen": -19.636886596679688, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -26.684371948242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7949386835098267, + "rewards_train/margins": 0.04849851131439209, + "rewards_train/rejected": -0.8434371948242188, + "step": 1609 + }, + { + "epoch": 0.45, + "logps_train/chosen": -31.565128326416016, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -51.29906463623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8690128326416016, + "rewards_train/margins": 1.548393726348877, + "rewards_train/rejected": -2.4174065589904785, + "step": 1609 + }, + { + "epoch": 0.45, + "logps_train/chosen": -17.404428482055664, + "logps_train/ref_chosen": -2.046875, + "logps_train/ref_rejected": -5.28125, + "logps_train/rejected": -25.031679153442383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5357553958892822, + "rewards_train/margins": 0.43928754329681396, + "rewards_train/rejected": -1.9750429391860962, + "step": 1609 + }, + { + "epoch": 0.45, + "logps_train/chosen": -25.890851974487305, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -3.75, + "logps_train/rejected": -37.000709533691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7640852332115173, + "rewards_train/margins": 2.5609858632087708, + "rewards_train/rejected": -3.325071096420288, + "step": 1609 + }, + { + "epoch": 0.45, + "learning_rate": 6.165528950410884e-07, + "loss": 0.3706, + "step": 1610 + }, + { + "epoch": 0.45, + "logps_train/chosen": -83.47589874267578, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -208.85629272460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3475898504257202, + "rewards_train/margins": 7.138039231300354, + "rewards_train/rejected": -8.485629081726074, + "step": 1610 + }, + { + "epoch": 0.45, + "logps_train/chosen": -18.843645095825195, + "logps_train/ref_chosen": -2.21875, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -35.13101577758789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6624895334243774, + "rewards_train/margins": 1.309987187385559, + "rewards_train/rejected": -2.9724767208099365, + "step": 1610 + }, + { + "epoch": 0.45, + "logps_train/chosen": -87.29508972167969, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -122.34906768798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5045089721679688, + "rewards_train/margins": 0.6303977966308594, + "rewards_train/rejected": -3.134906768798828, + "step": 1610 + }, + { + "epoch": 0.45, + "logps_train/chosen": -23.56234359741211, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -34.711368560791016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7687343955039978, + "rewards_train/margins": 0.5399025082588196, + "rewards_train/rejected": -1.3086369037628174, + "step": 1610 + }, + { + "epoch": 0.45, + "logps_train/chosen": -128.74273681640625, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -186.622314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.574273586273193, + "rewards_train/margins": 3.5879578590393066, + "rewards_train/rejected": -8.1622314453125, + "step": 1610 + }, + { + "epoch": 0.45, + "logps_train/chosen": -134.21173095703125, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -269.8579406738281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5211732387542725, + "rewards_train/margins": 10.564621210098267, + "rewards_train/rejected": -13.085794448852539, + "step": 1610 + }, + { + "epoch": 0.45, + "logps_train/chosen": -45.34056091308594, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -28.625, + "logps_train/rejected": -77.19793701171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2090561389923096, + "rewards_train/margins": 3.648237466812134, + "rewards_train/rejected": -4.857293605804443, + "step": 1610 + }, + { + "epoch": 0.45, + "logps_train/chosen": -203.81967163085938, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -242.07266235351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.2819671630859375, + "rewards_train/margins": 0.42529916763305664, + "rewards_train/rejected": -7.707266330718994, + "step": 1610 + }, + { + "epoch": 0.45, + "logps_train/chosen": -79.77439880371094, + "logps_train/ref_chosen": -49.75, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -245.35838317871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0024399757385254, + "rewards_train/margins": 7.533398151397705, + "rewards_train/rejected": -10.53583812713623, + "step": 1611 + }, + { + "epoch": 0.45, + "logps_train/chosen": -82.6295394897461, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -179.03515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9129539728164673, + "rewards_train/margins": 6.440561652183533, + "rewards_train/rejected": -7.353515625, + "step": 1611 + }, + { + "epoch": 0.45, + "logps_train/chosen": -0.019188998267054558, + "logps_train/ref_chosen": -0.09375, + "logps_train/ref_rejected": -0.09375, + "logps_train/rejected": -0.019186925143003464, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.007456100080162287, + "rewards_train/margins": -2.0721927285194397e-07, + "rewards_train/rejected": 0.007456307299435139, + "step": 1611 + }, + { + "epoch": 0.45, + "logps_train/chosen": -30.298240661621094, + "logps_train/ref_chosen": -7.03125, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -48.92003631591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3266990184783936, + "rewards_train/margins": 1.7653048038482666, + "rewards_train/rejected": -4.09200382232666, + "step": 1611 + }, + { + "epoch": 0.45, + "logps_train/chosen": -6.8730549812316895, + "logps_train/ref_chosen": -2.09375, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -40.822750091552734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4779305160045624, + "rewards_train/margins": 2.6043446362018585, + "rewards_train/rejected": -3.082275152206421, + "step": 1611 + }, + { + "epoch": 0.45, + "logps_train/chosen": -12.979735374450684, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -21.341156005859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2229735404253006, + "rewards_train/margins": 1.098642036318779, + "rewards_train/rejected": -1.3216155767440796, + "step": 1611 + }, + { + "epoch": 0.45, + "logps_train/chosen": -9.879207611083984, + "logps_train/ref_chosen": -2.84375, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -29.44521141052246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7035457491874695, + "rewards_train/margins": 0.8659754395484924, + "rewards_train/rejected": -1.569521188735962, + "step": 1611 + }, + { + "epoch": 0.45, + "logps_train/chosen": -177.8641357421875, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -261.5433349609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.88641357421875, + "rewards_train/margins": 4.26792049407959, + "rewards_train/rejected": -10.15433406829834, + "step": 1611 + }, + { + "epoch": 0.45, + "learning_rate": 6.141109065062135e-07, + "loss": 0.2036, + "step": 1612 + }, + { + "epoch": 0.45, + "logps_train/chosen": -7.3697099685668945, + "logps_train/ref_chosen": -1.6328125, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -42.62761688232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5736897587776184, + "rewards_train/margins": 3.032822072505951, + "rewards_train/rejected": -3.6065118312835693, + "step": 1612 + }, + { + "epoch": 0.45, + "logps_train/chosen": -8.727764129638672, + "logps_train/ref_chosen": -1.8046875, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -11.861031532287598, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6923076510429382, + "rewards_train/margins": -0.14995449781417847, + "rewards_train/rejected": -0.5423531532287598, + "step": 1612 + }, + { + "epoch": 0.45, + "logps_train/chosen": -106.50956726074219, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -248.77313232421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.150956869125366, + "rewards_train/margins": 12.076356172561646, + "rewards_train/rejected": -15.227313041687012, + "step": 1612 + }, + { + "epoch": 0.45, + "logps_train/chosen": -113.79299926757812, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -137.49267578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2792999744415283, + "rewards_train/margins": 2.1199676990509033, + "rewards_train/rejected": -5.399267673492432, + "step": 1612 + }, + { + "epoch": 0.45, + "logps_train/chosen": -162.01947021484375, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -188.71493530273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8019471168518066, + "rewards_train/margins": 2.969546318054199, + "rewards_train/rejected": -5.771493434906006, + "step": 1612 + }, + { + "epoch": 0.45, + "logps_train/chosen": -15.819351196289062, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -12.0, + "logps_train/rejected": -22.863605499267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7319351434707642, + "rewards_train/margins": 0.35442543029785156, + "rewards_train/rejected": -1.0863605737686157, + "step": 1612 + }, + { + "epoch": 0.45, + "logps_train/chosen": -35.074546813964844, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -32.434730529785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4574546813964844, + "rewards_train/margins": 0.43601834774017334, + "rewards_train/rejected": -1.8934730291366577, + "step": 1612 + }, + { + "epoch": 0.45, + "logps_train/chosen": -133.40594482421875, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -220.17620849609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7405946254730225, + "rewards_train/margins": 6.027026414871216, + "rewards_train/rejected": -9.767621040344238, + "step": 1612 + }, + { + "epoch": 0.45, + "logps_train/chosen": -164.1982421875, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -180.73675537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.06982421875, + "rewards_train/margins": 1.9038515090942383, + "rewards_train/rejected": -7.973675727844238, + "step": 1613 + }, + { + "epoch": 0.45, + "logps_train/chosen": -133.85552978515625, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -222.20672607421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3855531215667725, + "rewards_train/margins": 5.435119867324829, + "rewards_train/rejected": -8.820672988891602, + "step": 1613 + }, + { + "epoch": 0.45, + "logps_train/chosen": -0.9180788993835449, + "logps_train/ref_chosen": -0.97265625, + "logps_train/ref_rejected": -1.21875, + "logps_train/rejected": -4.942108631134033, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005457735154777765, + "rewards_train/margins": 0.3777936161495745, + "rewards_train/rejected": -0.37233588099479675, + "step": 1613 + }, + { + "epoch": 0.45, + "logps_train/chosen": -119.02682495117188, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -193.60829162597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1526825428009033, + "rewards_train/margins": 3.208146810531616, + "rewards_train/rejected": -5.3608293533325195, + "step": 1613 + }, + { + "epoch": 0.45, + "logps_train/chosen": -101.59554290771484, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -159.40768432617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.009554386138916, + "rewards_train/margins": 1.331214189529419, + "rewards_train/rejected": -3.340768575668335, + "step": 1613 + }, + { + "epoch": 0.45, + "logps_train/chosen": -148.18807983398438, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -149.794677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1188080310821533, + "rewards_train/margins": 1.1106598377227783, + "rewards_train/rejected": -3.2294678688049316, + "step": 1613 + }, + { + "epoch": 0.45, + "logps_train/chosen": -104.72754669189453, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -95.4555892944336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.222754716873169, + "rewards_train/margins": 0.1728043556213379, + "rewards_train/rejected": -2.395559072494507, + "step": 1613 + }, + { + "epoch": 0.45, + "logps_train/chosen": -211.76654052734375, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -225.23727416992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.17665433883667, + "rewards_train/margins": 2.247073173522949, + "rewards_train/rejected": -6.423727512359619, + "step": 1613 + }, + { + "epoch": 0.45, + "learning_rate": 6.11671618785858e-07, + "loss": 0.2468, + "step": 1614 + }, + { + "epoch": 0.45, + "logps_train/chosen": -91.07925415039062, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -99.48652648925781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5579254627227783, + "rewards_train/margins": -0.359272837638855, + "rewards_train/rejected": -1.1986526250839233, + "step": 1614 + }, + { + "epoch": 0.45, + "logps_train/chosen": -89.23507690429688, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -150.63287353515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.148507833480835, + "rewards_train/margins": 2.014779806137085, + "rewards_train/rejected": -5.16328763961792, + "step": 1614 + }, + { + "epoch": 0.45, + "logps_train/chosen": -70.70587158203125, + "logps_train/ref_chosen": -45.25, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -110.83240509033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5455873012542725, + "rewards_train/margins": 0.037653207778930664, + "rewards_train/rejected": -2.583240509033203, + "step": 1614 + }, + { + "epoch": 0.45, + "logps_train/chosen": -154.97662353515625, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -232.577880859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.097662448883057, + "rewards_train/margins": 5.160126209259033, + "rewards_train/rejected": -9.25778865814209, + "step": 1614 + }, + { + "epoch": 0.45, + "logps_train/chosen": -6.362199783325195, + "logps_train/ref_chosen": -2.296875, + "logps_train/ref_rejected": -1.578125, + "logps_train/rejected": -6.389403820037842, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40653249621391296, + "rewards_train/margins": 0.0745953917503357, + "rewards_train/rejected": -0.48112788796424866, + "step": 1614 + }, + { + "epoch": 0.45, + "logps_train/chosen": -85.9710693359375, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -96.7430419921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.297106981277466, + "rewards_train/margins": -0.5228027105331421, + "rewards_train/rejected": -1.7743042707443237, + "step": 1614 + }, + { + "epoch": 0.45, + "logps_train/chosen": -90.29986572265625, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -91.37564086914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2799866199493408, + "rewards_train/margins": 0.10757744312286377, + "rewards_train/rejected": -1.3875640630722046, + "step": 1614 + }, + { + "epoch": 0.45, + "logps_train/chosen": -125.13053894042969, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -197.95489501953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7130539417266846, + "rewards_train/margins": 6.88243556022644, + "rewards_train/rejected": -10.595489501953125, + "step": 1614 + }, + { + "epoch": 0.45, + "logps_train/chosen": -142.7735137939453, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -203.51901245117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.277351379394531, + "rewards_train/margins": 1.3745498657226562, + "rewards_train/rejected": -6.6519012451171875, + "step": 1615 + }, + { + "epoch": 0.45, + "logps_train/chosen": -25.282974243164062, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -27.59977912902832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4032974243164062, + "rewards_train/margins": 0.07543051242828369, + "rewards_train/rejected": -1.47872793674469, + "step": 1615 + }, + { + "epoch": 0.45, + "logps_train/chosen": -182.632080078125, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -216.63290405273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.863208055496216, + "rewards_train/margins": 3.50008225440979, + "rewards_train/rejected": -6.363290309906006, + "step": 1615 + }, + { + "epoch": 0.45, + "logps_train/chosen": -91.38304138183594, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -244.71884155273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8883041143417358, + "rewards_train/margins": 10.333580613136292, + "rewards_train/rejected": -12.221884727478027, + "step": 1615 + }, + { + "epoch": 0.45, + "logps_train/chosen": -31.556072235107422, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -23.5, + "logps_train/rejected": -83.37368774414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3431072235107422, + "rewards_train/margins": 4.644261837005615, + "rewards_train/rejected": -5.987369060516357, + "step": 1615 + }, + { + "epoch": 0.45, + "logps_train/chosen": -155.01068115234375, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -325.85113525390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.901068210601807, + "rewards_train/margins": 11.484046459197998, + "rewards_train/rejected": -16.385114669799805, + "step": 1615 + }, + { + "epoch": 0.45, + "logps_train/chosen": -215.98593139648438, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -211.55882263183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.798593044281006, + "rewards_train/margins": 4.357289791107178, + "rewards_train/rejected": -10.155882835388184, + "step": 1615 + }, + { + "epoch": 0.45, + "logps_train/chosen": -8.802009582519531, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -22.625, + "logps_train/rejected": -42.42947006225586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.051049042493104935, + "rewards_train/margins": 2.0314960964024067, + "rewards_train/rejected": -1.9804470539093018, + "step": 1615 + }, + { + "epoch": 0.45, + "learning_rate": 6.092350489524487e-07, + "loss": 0.3149, + "step": 1616 + }, + { + "epoch": 0.45, + "logps_train/chosen": -108.24636840820312, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -219.71701049804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9246368408203125, + "rewards_train/margins": 5.947064399719238, + "rewards_train/rejected": -7.871701240539551, + "step": 1616 + }, + { + "epoch": 0.45, + "logps_train/chosen": -23.04753875732422, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -45.30225372314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.323503851890564, + "rewards_train/margins": 1.5817216634750366, + "rewards_train/rejected": -2.9052255153656006, + "step": 1616 + }, + { + "epoch": 0.45, + "logps_train/chosen": -3.573211669921875, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -39.092872619628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11142883449792862, + "rewards_train/margins": 2.0582160726189613, + "rewards_train/rejected": -1.9467872381210327, + "step": 1616 + }, + { + "epoch": 0.45, + "logps_train/chosen": -32.103981018066406, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -24.48508644104004, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8603981733322144, + "rewards_train/margins": 0.01936054229736328, + "rewards_train/rejected": -1.8797587156295776, + "step": 1616 + }, + { + "epoch": 0.45, + "logps_train/chosen": -86.71821594238281, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -185.0, + "logps_train/rejected": -224.8300018310547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9218215942382812, + "rewards_train/margins": 3.061178684234619, + "rewards_train/rejected": -3.9830002784729004, + "step": 1616 + }, + { + "epoch": 0.45, + "logps_train/chosen": -10.965775489807129, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -3.6875, + "logps_train/rejected": -8.958183288574219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6090775728225708, + "rewards_train/margins": -0.08200925588607788, + "rewards_train/rejected": -0.5270683169364929, + "step": 1616 + }, + { + "epoch": 0.45, + "logps_train/chosen": -23.588245391845703, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -21.162261962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5713245272636414, + "rewards_train/margins": 1.0074016451835632, + "rewards_train/rejected": -1.5787261724472046, + "step": 1616 + }, + { + "epoch": 0.45, + "logps_train/chosen": -29.39315414428711, + "logps_train/ref_chosen": -2.921875, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -21.704864501953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.647127866744995, + "rewards_train/margins": -0.8891414403915405, + "rewards_train/rejected": -1.7579864263534546, + "step": 1616 + }, + { + "epoch": 0.45, + "logps_train/chosen": -16.91243553161621, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -0.33984375, + "logps_train/rejected": -17.126523971557617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03375644609332085, + "rewards_train/margins": 1.7124244682490826, + "rewards_train/rejected": -1.6786680221557617, + "step": 1617 + }, + { + "epoch": 0.45, + "logps_train/chosen": -29.42646026611328, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -114.40701293945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.986396074295044, + "rewards_train/margins": 1.2043051719665527, + "rewards_train/rejected": -3.1907012462615967, + "step": 1617 + }, + { + "epoch": 0.45, + "logps_train/chosen": -158.13458251953125, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -156.02069091796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4134583473205566, + "rewards_train/margins": 2.6886110305786133, + "rewards_train/rejected": -5.10206937789917, + "step": 1617 + }, + { + "epoch": 0.45, + "logps_train/chosen": -27.876691818237305, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -3.671875, + "logps_train/rejected": -19.107458114624023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1501692533493042, + "rewards_train/margins": 0.39338910579681396, + "rewards_train/rejected": -1.5435583591461182, + "step": 1617 + }, + { + "epoch": 0.45, + "logps_train/chosen": -22.842235565185547, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -6.8125, + "logps_train/rejected": -38.2906494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0904735326766968, + "rewards_train/margins": 2.057341456413269, + "rewards_train/rejected": -3.147814989089966, + "step": 1617 + }, + { + "epoch": 0.45, + "logps_train/chosen": -174.55303955078125, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -199.41168212890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7553040981292725, + "rewards_train/margins": 2.9858644008636475, + "rewards_train/rejected": -5.74116849899292, + "step": 1617 + }, + { + "epoch": 0.45, + "logps_train/chosen": -1.4941468238830566, + "logps_train/ref_chosen": -0.703125, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -4.4528727531433105, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07910218089818954, + "rewards_train/margins": -0.1369399055838585, + "rewards_train/rejected": 0.057837724685668945, + "step": 1617 + }, + { + "epoch": 0.45, + "logps_train/chosen": -76.437255859375, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -56.25, + "logps_train/rejected": -92.51475524902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3187255859375, + "rewards_train/margins": 2.3077499866485596, + "rewards_train/rejected": -3.6264755725860596, + "step": 1617 + }, + { + "epoch": 0.45, + "learning_rate": 6.068012140593921e-07, + "loss": 0.3349, + "step": 1618 + }, + { + "epoch": 0.45, + "logps_train/chosen": -21.827512741088867, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -22.25, + "logps_train/rejected": -39.2473030090332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1390012502670288, + "rewards_train/margins": 0.5607290267944336, + "rewards_train/rejected": -1.6997302770614624, + "step": 1618 + }, + { + "epoch": 0.45, + "logps_train/chosen": -113.7755126953125, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -123.46943664550781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.277551293373108, + "rewards_train/margins": 0.26939237117767334, + "rewards_train/rejected": -1.5469436645507812, + "step": 1618 + }, + { + "epoch": 0.45, + "logps_train/chosen": -48.67802810668945, + "logps_train/ref_chosen": -30.5, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -37.037933349609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8178027868270874, + "rewards_train/margins": -0.8515094518661499, + "rewards_train/rejected": -0.9662933349609375, + "step": 1618 + }, + { + "epoch": 0.45, + "logps_train/chosen": -52.59062576293945, + "logps_train/ref_chosen": -34.5, + "logps_train/ref_rejected": -39.0, + "logps_train/rejected": -78.23259735107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8090626001358032, + "rewards_train/margins": 2.1141971349716187, + "rewards_train/rejected": -3.923259735107422, + "step": 1618 + }, + { + "epoch": 0.45, + "logps_train/chosen": -22.91474151611328, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -51.102088928222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0477241277694702, + "rewards_train/margins": 1.3999847173690796, + "rewards_train/rejected": -2.44770884513855, + "step": 1618 + }, + { + "epoch": 0.45, + "logps_train/chosen": -24.822715759277344, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -39.827945709228516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8541465997695923, + "rewards_train/margins": -0.4213520288467407, + "rewards_train/rejected": -1.4327945709228516, + "step": 1618 + }, + { + "epoch": 0.45, + "logps_train/chosen": -3.6991822719573975, + "logps_train/ref_chosen": -1.015625, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -11.401653289794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26835572719573975, + "rewards_train/margins": 0.40930962562561035, + "rewards_train/rejected": -0.6776653528213501, + "step": 1618 + }, + { + "epoch": 0.45, + "logps_train/chosen": -68.8438720703125, + "logps_train/ref_chosen": -34.5, + "logps_train/ref_rejected": -41.75, + "logps_train/rejected": -77.63111114501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.43438720703125, + "rewards_train/margins": 0.15372395515441895, + "rewards_train/rejected": -3.588111162185669, + "step": 1618 + }, + { + "epoch": 0.45, + "logps_train/chosen": -14.600534439086914, + "logps_train/ref_chosen": -6.4375, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -88.84105682373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8163034319877625, + "rewards_train/margins": 4.617802441120148, + "rewards_train/rejected": -5.43410587310791, + "step": 1619 + }, + { + "epoch": 0.45, + "logps_train/chosen": -3.9025211334228516, + "logps_train/ref_chosen": -1.4296875, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -8.352032661437988, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24728336930274963, + "rewards_train/margins": 0.1441698968410492, + "rewards_train/rejected": -0.39145326614379883, + "step": 1619 + }, + { + "epoch": 0.45, + "logps_train/chosen": -23.05880355834961, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -28.0, + "logps_train/rejected": -45.23413848876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.312130331993103, + "rewards_train/margins": 0.4112834930419922, + "rewards_train/rejected": -1.7234138250350952, + "step": 1619 + }, + { + "epoch": 0.45, + "logps_train/chosen": -99.20726013183594, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -125.29774475097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6207259893417358, + "rewards_train/margins": 0.4090484380722046, + "rewards_train/rejected": -2.0297744274139404, + "step": 1619 + }, + { + "epoch": 0.45, + "logps_train/chosen": -12.978044509887695, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -21.355073928833008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2478044480085373, + "rewards_train/margins": 0.2127029448747635, + "rewards_train/rejected": -0.4605073928833008, + "step": 1619 + }, + { + "epoch": 0.45, + "logps_train/chosen": -21.73346710205078, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -28.292381286621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9545966982841492, + "rewards_train/margins": 0.22464150190353394, + "rewards_train/rejected": -1.179238200187683, + "step": 1619 + }, + { + "epoch": 0.45, + "logps_train/chosen": -12.338764190673828, + "logps_train/ref_chosen": -3.125, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -19.907094955444336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9213764071464539, + "rewards_train/margins": 0.17558306455612183, + "rewards_train/rejected": -1.0969594717025757, + "step": 1619 + }, + { + "epoch": 0.45, + "logps_train/chosen": -163.57455444335938, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -282.13995361328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.307455539703369, + "rewards_train/margins": 8.906540393829346, + "rewards_train/rejected": -14.213995933532715, + "step": 1619 + }, + { + "epoch": 0.45, + "learning_rate": 6.04370131140952e-07, + "loss": 0.5034, + "step": 1620 + }, + { + "epoch": 0.45, + "logps_train/chosen": -6.491186141967773, + "logps_train/ref_chosen": -8.0, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -12.077349662780762, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15088139474391937, + "rewards_train/margins": 0.533616378903389, + "rewards_train/rejected": -0.3827349841594696, + "step": 1620 + }, + { + "epoch": 0.45, + "logps_train/chosen": -36.409393310546875, + "logps_train/ref_chosen": -29.625, + "logps_train/ref_rejected": -55.0, + "logps_train/rejected": -75.9729995727539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6784393191337585, + "rewards_train/margins": 1.4188607335090637, + "rewards_train/rejected": -2.0973000526428223, + "step": 1620 + }, + { + "epoch": 0.45, + "logps_train/chosen": -106.43904113769531, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -95.904296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4939041137695312, + "rewards_train/margins": -1.3034744262695312, + "rewards_train/rejected": -1.1904296875, + "step": 1620 + }, + { + "epoch": 0.45, + "logps_train/chosen": -92.59332275390625, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -173.15316772460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2093323469161987, + "rewards_train/margins": 2.9059845209121704, + "rewards_train/rejected": -4.115316867828369, + "step": 1620 + }, + { + "epoch": 0.45, + "logps_train/chosen": -170.61865234375, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -160.1494903564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.811865329742432, + "rewards_train/margins": 0.40308380126953125, + "rewards_train/rejected": -5.214949131011963, + "step": 1620 + }, + { + "epoch": 0.45, + "logps_train/chosen": -135.2791748046875, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -159.68222045898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.977917432785034, + "rewards_train/margins": 3.2903048992156982, + "rewards_train/rejected": -7.268222332000732, + "step": 1620 + }, + { + "epoch": 0.45, + "logps_train/chosen": -174.97352600097656, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -179.97640991210938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.0473527908325195, + "rewards_train/margins": -0.7997117042541504, + "rewards_train/rejected": -6.247641086578369, + "step": 1620 + }, + { + "epoch": 0.45, + "logps_train/chosen": -34.85835266113281, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -38.5, + "logps_train/rejected": -53.659271240234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1233352422714233, + "rewards_train/margins": 0.3925919532775879, + "rewards_train/rejected": -1.5159271955490112, + "step": 1620 + }, + { + "epoch": 0.45, + "logps_train/chosen": -236.45652770996094, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -250.62722778320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -12.195652961730957, + "rewards_train/margins": 1.8170700073242188, + "rewards_train/rejected": -14.012722969055176, + "step": 1621 + }, + { + "epoch": 0.45, + "logps_train/chosen": -68.72674560546875, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -106.23355102539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.227325439453125, + "rewards_train/margins": 1.5506805181503296, + "rewards_train/rejected": -1.3233550786972046, + "step": 1621 + }, + { + "epoch": 0.45, + "logps_train/chosen": -139.85394287109375, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -172.23585510253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.435394287109375, + "rewards_train/margins": 2.338191509246826, + "rewards_train/rejected": -6.773585796356201, + "step": 1621 + }, + { + "epoch": 0.45, + "logps_train/chosen": -115.0391845703125, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -168.0039825439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6539185047149658, + "rewards_train/margins": 1.146479845046997, + "rewards_train/rejected": -2.800398349761963, + "step": 1621 + }, + { + "epoch": 0.45, + "logps_train/chosen": -32.346744537353516, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -17.550304412841797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5221744775772095, + "rewards_train/margins": -0.517143964767456, + "rewards_train/rejected": -1.0050305128097534, + "step": 1621 + }, + { + "epoch": 0.45, + "logps_train/chosen": -23.33710479736328, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -33.559993743896484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.414960503578186, + "rewards_train/margins": 0.0410388708114624, + "rewards_train/rejected": -1.4559993743896484, + "step": 1621 + }, + { + "epoch": 0.45, + "logps_train/chosen": -30.03017807006836, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -29.26207733154297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6780178546905518, + "rewards_train/margins": -0.43931007385253906, + "rewards_train/rejected": -1.2387077808380127, + "step": 1621 + }, + { + "epoch": 0.45, + "logps_train/chosen": -167.4284210205078, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -167.23973083496094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.042842149734497, + "rewards_train/margins": -0.01886892318725586, + "rewards_train/rejected": -2.023973226547241, + "step": 1621 + }, + { + "epoch": 0.45, + "learning_rate": 6.019418172121305e-07, + "loss": 0.5324, + "step": 1622 + }, + { + "epoch": 0.45, + "logps_train/chosen": -119.62000274658203, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -186.84429931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7120002508163452, + "rewards_train/margins": 6.6224294900894165, + "rewards_train/rejected": -8.334429740905762, + "step": 1622 + }, + { + "epoch": 0.45, + "logps_train/chosen": -47.63905334472656, + "logps_train/ref_chosen": -25.375, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -83.80946350097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.226405382156372, + "rewards_train/margins": 2.604541063308716, + "rewards_train/rejected": -4.830946445465088, + "step": 1622 + }, + { + "epoch": 0.45, + "logps_train/chosen": -108.85137176513672, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -92.0803451538086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.785137176513672, + "rewards_train/margins": -0.727102518081665, + "rewards_train/rejected": -2.058034658432007, + "step": 1622 + }, + { + "epoch": 0.45, + "logps_train/chosen": -197.99270629882812, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -208.4915771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.999270915985107, + "rewards_train/margins": 0.1498866081237793, + "rewards_train/rejected": -8.149157524108887, + "step": 1622 + }, + { + "epoch": 0.45, + "logps_train/chosen": -138.90721130371094, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -67.16946411132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2907211482524872, + "rewards_train/margins": 0.6762252748012543, + "rewards_train/rejected": -0.9669464230537415, + "step": 1622 + }, + { + "epoch": 0.45, + "logps_train/chosen": -150.94163513183594, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -162.44752502441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.994163513183594, + "rewards_train/margins": 2.6505889892578125, + "rewards_train/rejected": -7.644752502441406, + "step": 1622 + }, + { + "epoch": 0.45, + "logps_train/chosen": -142.3765869140625, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -140.79981994628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9876587390899658, + "rewards_train/margins": 3.3423235416412354, + "rewards_train/rejected": -5.329982280731201, + "step": 1622 + }, + { + "epoch": 0.45, + "logps_train/chosen": -89.76502990722656, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -204.3141326904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.92650306224823, + "rewards_train/margins": 6.904910206794739, + "rewards_train/rejected": -8.831413269042969, + "step": 1622 + }, + { + "epoch": 0.45, + "logps_train/chosen": -50.002628326416016, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -67.74368286132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.675262928009033, + "rewards_train/margins": 0.7116055488586426, + "rewards_train/rejected": -4.386868476867676, + "step": 1623 + }, + { + "epoch": 0.45, + "logps_train/chosen": -211.8177490234375, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -335.4328918457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.731775283813477, + "rewards_train/margins": 5.9115142822265625, + "rewards_train/rejected": -16.64328956604004, + "step": 1623 + }, + { + "epoch": 0.45, + "logps_train/chosen": -81.0702133178711, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -87.52871704101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8570213317871094, + "rewards_train/margins": 0.645850419998169, + "rewards_train/rejected": -1.5028717517852783, + "step": 1623 + }, + { + "epoch": 0.45, + "logps_train/chosen": -20.376251220703125, + "logps_train/ref_chosen": -7.46875, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -9.519214630126953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2907501459121704, + "rewards_train/margins": -0.9294536709785461, + "rewards_train/rejected": -0.36129647493362427, + "step": 1623 + }, + { + "epoch": 0.45, + "logps_train/chosen": -10.094182014465332, + "logps_train/ref_chosen": -1.328125, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -37.23088455200195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8766056895256042, + "rewards_train/margins": 1.5464828610420227, + "rewards_train/rejected": -2.423088550567627, + "step": 1623 + }, + { + "epoch": 0.45, + "logps_train/chosen": -196.67568969726562, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -207.86135864257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.017569065093994, + "rewards_train/margins": 0.46856689453125, + "rewards_train/rejected": -7.486135959625244, + "step": 1623 + }, + { + "epoch": 0.45, + "logps_train/chosen": -196.24908447265625, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -220.0, + "logps_train/rejected": -285.180908203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.524908542633057, + "rewards_train/margins": -0.006817817687988281, + "rewards_train/rejected": -6.518090724945068, + "step": 1623 + }, + { + "epoch": 0.45, + "logps_train/chosen": -106.89849090576172, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -246.9510955810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7898491024971008, + "rewards_train/margins": 7.505260646343231, + "rewards_train/rejected": -8.295109748840332, + "step": 1623 + }, + { + "epoch": 0.45, + "learning_rate": 5.995162892685508e-07, + "loss": 0.362, + "step": 1624 + }, + { + "epoch": 0.45, + "logps_train/chosen": -150.857177734375, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -220.2568359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.285717725753784, + "rewards_train/margins": 3.339966058731079, + "rewards_train/rejected": -5.625683784484863, + "step": 1624 + }, + { + "epoch": 0.45, + "logps_train/chosen": -4.76904821395874, + "logps_train/ref_chosen": -3.359375, + "logps_train/ref_rejected": -1.3828125, + "logps_train/rejected": -33.38417053222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14096732437610626, + "rewards_train/margins": 3.0591686218976974, + "rewards_train/rejected": -3.2001359462738037, + "step": 1624 + }, + { + "epoch": 0.45, + "logps_train/chosen": -26.838956832885742, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -60.77676010131836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6088957190513611, + "rewards_train/margins": 4.537530481815338, + "rewards_train/rejected": -5.146426200866699, + "step": 1624 + }, + { + "epoch": 0.45, + "logps_train/chosen": -159.53643798828125, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -156.4225311279297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.203643798828125, + "rewards_train/margins": -0.26139068603515625, + "rewards_train/rejected": -4.942253112792969, + "step": 1624 + }, + { + "epoch": 0.45, + "logps_train/chosen": -33.43865203857422, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -26.5, + "logps_train/rejected": -47.572147369384766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.693865180015564, + "rewards_train/margins": 0.4133495092391968, + "rewards_train/rejected": -2.1072146892547607, + "step": 1624 + }, + { + "epoch": 0.45, + "logps_train/chosen": -85.57733154296875, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -125.1336669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.207733154296875, + "rewards_train/margins": 0.505633533000946, + "rewards_train/rejected": -0.713366687297821, + "step": 1624 + }, + { + "epoch": 0.45, + "logps_train/chosen": -147.973388671875, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -168.4642791748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9973388910293579, + "rewards_train/margins": 3.399089217185974, + "rewards_train/rejected": -4.396428108215332, + "step": 1624 + }, + { + "epoch": 0.45, + "logps_train/chosen": -44.76677703857422, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -7.90625, + "logps_train/rejected": -36.34581756591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.051677703857422, + "rewards_train/margins": 0.7922790050506592, + "rewards_train/rejected": -2.843956708908081, + "step": 1624 + }, + { + "epoch": 0.45, + "logps_train/chosen": -176.83834838867188, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -255.49270629882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.183835029602051, + "rewards_train/margins": 5.565435409545898, + "rewards_train/rejected": -9.74927043914795, + "step": 1625 + }, + { + "epoch": 0.45, + "logps_train/chosen": -10.600724220275879, + "logps_train/ref_chosen": -7.15625, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -23.756057739257812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.34444743394851685, + "rewards_train/margins": -0.09384164214134216, + "rewards_train/rejected": -0.2506057918071747, + "step": 1625 + }, + { + "epoch": 0.45, + "logps_train/chosen": -11.803587913513184, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -0.8515625, + "logps_train/rejected": -21.18428611755371, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3616088032722473, + "rewards_train/margins": 1.6716637015342712, + "rewards_train/rejected": -2.0332725048065186, + "step": 1625 + }, + { + "epoch": 0.45, + "logps_train/chosen": -145.91188049316406, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -215.09365844726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0411880016326904, + "rewards_train/margins": 4.968178033828735, + "rewards_train/rejected": -8.009366035461426, + "step": 1625 + }, + { + "epoch": 0.45, + "logps_train/chosen": -174.03485107421875, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -170.15325927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.303485155105591, + "rewards_train/margins": 0.9118409156799316, + "rewards_train/rejected": -3.2153260707855225, + "step": 1625 + }, + { + "epoch": 0.45, + "logps_train/chosen": -72.00511169433594, + "logps_train/ref_chosen": -45.75, + "logps_train/ref_rejected": -26.125, + "logps_train/rejected": -58.92942428588867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6255111694335938, + "rewards_train/margins": 0.6549313068389893, + "rewards_train/rejected": -3.280442476272583, + "step": 1625 + }, + { + "epoch": 0.45, + "logps_train/chosen": -182.12216186523438, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -215.67437744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.312216281890869, + "rewards_train/margins": 2.655221462249756, + "rewards_train/rejected": -4.967437744140625, + "step": 1625 + }, + { + "epoch": 0.45, + "logps_train/chosen": -53.59626770019531, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -57.0, + "logps_train/rejected": -73.83521270751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2596267461776733, + "rewards_train/margins": 0.4238945245742798, + "rewards_train/rejected": -1.6835212707519531, + "step": 1625 + }, + { + "epoch": 0.45, + "learning_rate": 5.970935642863374e-07, + "loss": 0.2851, + "step": 1626 + }, + { + "epoch": 0.45, + "logps_train/chosen": -10.948806762695312, + "logps_train/ref_chosen": -5.65625, + "logps_train/ref_rejected": -2.671875, + "logps_train/rejected": -24.459941864013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5292556881904602, + "rewards_train/margins": 1.6495510935783386, + "rewards_train/rejected": -2.178806781768799, + "step": 1626 + }, + { + "epoch": 0.45, + "logps_train/chosen": -62.808929443359375, + "logps_train/ref_chosen": -48.25, + "logps_train/ref_rejected": -46.75, + "logps_train/rejected": -100.89591979980469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4558929204940796, + "rewards_train/margins": 3.958699345588684, + "rewards_train/rejected": -5.414592266082764, + "step": 1626 + }, + { + "epoch": 0.45, + "logps_train/chosen": -96.13774108886719, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -107.05947875976562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.863774061203003, + "rewards_train/margins": -0.05782604217529297, + "rewards_train/rejected": -2.80594801902771, + "step": 1626 + }, + { + "epoch": 0.45, + "logps_train/chosen": -7.731978416442871, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -30.863666534423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19507284462451935, + "rewards_train/margins": 1.991293951869011, + "rewards_train/rejected": -2.1863667964935303, + "step": 1626 + }, + { + "epoch": 0.45, + "logps_train/chosen": -26.06656837463379, + "logps_train/ref_chosen": -6.75, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -33.71014404296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.931656837463379, + "rewards_train/margins": -0.22314238548278809, + "rewards_train/rejected": -1.7085144519805908, + "step": 1626 + }, + { + "epoch": 0.45, + "logps_train/chosen": -86.53977966308594, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -181.11251831054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0039780139923096, + "rewards_train/margins": 6.057273626327515, + "rewards_train/rejected": -8.061251640319824, + "step": 1626 + }, + { + "epoch": 0.45, + "logps_train/chosen": -10.927526473999023, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -31.991552352905273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5072473883628845, + "rewards_train/margins": 1.2064026594161987, + "rewards_train/rejected": -0.6991552710533142, + "step": 1626 + }, + { + "epoch": 0.45, + "logps_train/chosen": -31.632522583007812, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -118.1128921508789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.313252329826355, + "rewards_train/margins": 1.0980368852615356, + "rewards_train/rejected": -2.4112892150878906, + "step": 1626 + }, + { + "epoch": 0.45, + "logps_train/chosen": -134.62437438964844, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -183.47537231445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.212437391281128, + "rewards_train/margins": 1.035099983215332, + "rewards_train/rejected": -3.24753737449646, + "step": 1627 + }, + { + "epoch": 0.45, + "logps_train/chosen": -53.25250244140625, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -11.75, + "logps_train/rejected": -51.046546936035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2002503871917725, + "rewards_train/margins": 0.7294044494628906, + "rewards_train/rejected": -3.929654836654663, + "step": 1627 + }, + { + "epoch": 0.45, + "logps_train/chosen": -16.28963851928711, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -3.5625, + "logps_train/rejected": -34.214508056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.222713828086853, + "rewards_train/margins": 1.8424869775772095, + "rewards_train/rejected": -3.0652008056640625, + "step": 1627 + }, + { + "epoch": 0.45, + "logps_train/chosen": -7.30704927444458, + "logps_train/ref_chosen": -2.875, + "logps_train/ref_rejected": -1.3515625, + "logps_train/rejected": -14.179641723632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44320493936538696, + "rewards_train/margins": 0.8396030068397522, + "rewards_train/rejected": -1.2828079462051392, + "step": 1627 + }, + { + "epoch": 0.45, + "logps_train/chosen": -82.08197021484375, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -1.8359375, + "logps_train/rejected": -19.238203048706055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.39180299639701843, + "rewards_train/margins": 2.1320296227931976, + "rewards_train/rejected": -1.7402266263961792, + "step": 1627 + }, + { + "epoch": 0.45, + "logps_train/chosen": -159.96043395996094, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -183.14047241210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7960433959960938, + "rewards_train/margins": 0.31800413131713867, + "rewards_train/rejected": -4.114047527313232, + "step": 1627 + }, + { + "epoch": 0.45, + "logps_train/chosen": -89.6580581665039, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -96.4902114868164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0158058404922485, + "rewards_train/margins": 1.6832152605056763, + "rewards_train/rejected": -2.699021100997925, + "step": 1627 + }, + { + "epoch": 0.45, + "logps_train/chosen": -12.866793632507324, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -1.5625, + "logps_train/rejected": -17.879566192626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6523043513298035, + "rewards_train/margins": 0.9794022440910339, + "rewards_train/rejected": -1.6317065954208374, + "step": 1627 + }, + { + "epoch": 0.46, + "learning_rate": 5.94673659221996e-07, + "loss": 0.2976, + "step": 1628 + }, + { + "epoch": 0.46, + "logps_train/chosen": -305.18792724609375, + "logps_train/ref_chosen": -250.0, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -65.11735534667969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.518792629241943, + "rewards_train/margins": -4.0820571184158325, + "rewards_train/rejected": -1.4367355108261108, + "step": 1628 + }, + { + "epoch": 0.46, + "logps_train/chosen": -202.06320190429688, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -185.0, + "logps_train/rejected": -294.4990539550781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.3063201904296875, + "rewards_train/margins": 4.643585205078125, + "rewards_train/rejected": -10.949905395507812, + "step": 1628 + }, + { + "epoch": 0.46, + "logps_train/chosen": -29.620075225830078, + "logps_train/ref_chosen": -28.25, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -52.36636734008789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13700751960277557, + "rewards_train/margins": 1.5496292859315872, + "rewards_train/rejected": -1.6866368055343628, + "step": 1628 + }, + { + "epoch": 0.46, + "logps_train/chosen": -50.87716293334961, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -43.903804779052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.718966245651245, + "rewards_train/margins": 0.002664327621459961, + "rewards_train/rejected": -3.721630573272705, + "step": 1628 + }, + { + "epoch": 0.46, + "logps_train/chosen": -116.26404571533203, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -101.62680053710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1264045685529709, + "rewards_train/margins": 1.4362755566835403, + "rewards_train/rejected": -1.5626801252365112, + "step": 1628 + }, + { + "epoch": 0.46, + "logps_train/chosen": -303.92620849609375, + "logps_train/ref_chosen": -234.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -243.06346130371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.992620944976807, + "rewards_train/margins": 0.813725471496582, + "rewards_train/rejected": -7.806346416473389, + "step": 1628 + }, + { + "epoch": 0.46, + "logps_train/chosen": -123.93247985839844, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -170.81195068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6932480335235596, + "rewards_train/margins": 6.687947034835815, + "rewards_train/rejected": -8.381195068359375, + "step": 1628 + }, + { + "epoch": 0.46, + "logps_train/chosen": -19.82071304321289, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -40.783416748046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.869571328163147, + "rewards_train/margins": 1.546270489692688, + "rewards_train/rejected": -2.415841817855835, + "step": 1628 + }, + { + "epoch": 0.46, + "logps_train/chosen": -77.06383514404297, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -51.25, + "logps_train/rejected": -77.19401550292969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.581383466720581, + "rewards_train/margins": 0.013018131256103516, + "rewards_train/rejected": -2.5944015979766846, + "step": 1629 + }, + { + "epoch": 0.46, + "logps_train/chosen": -18.340702056884766, + "logps_train/ref_chosen": -15.625, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -16.338266372680664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27157020568847656, + "rewards_train/margins": 0.5435064435005188, + "rewards_train/rejected": -0.8150766491889954, + "step": 1629 + }, + { + "epoch": 0.46, + "logps_train/chosen": -192.2804718017578, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -189.8911590576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.278047561645508, + "rewards_train/margins": 0.1110687255859375, + "rewards_train/rejected": -9.389116287231445, + "step": 1629 + }, + { + "epoch": 0.46, + "logps_train/chosen": -179.13088989257812, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -248.43624877929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.313088893890381, + "rewards_train/margins": 6.03053617477417, + "rewards_train/rejected": -11.34362506866455, + "step": 1629 + }, + { + "epoch": 0.46, + "logps_train/chosen": -9.514236450195312, + "logps_train/ref_chosen": -3.859375, + "logps_train/ref_rejected": -7.75, + "logps_train/rejected": -18.825902938842773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5654861330986023, + "rewards_train/margins": 0.542104184627533, + "rewards_train/rejected": -1.1075903177261353, + "step": 1629 + }, + { + "epoch": 0.46, + "logps_train/chosen": -33.14561462402344, + "logps_train/ref_chosen": -7.5, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -28.77783966064453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.564561605453491, + "rewards_train/margins": -0.13052749633789062, + "rewards_train/rejected": -2.4340341091156006, + "step": 1629 + }, + { + "epoch": 0.46, + "logps_train/chosen": -12.960258483886719, + "logps_train/ref_chosen": -3.1875, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -34.03232955932617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9772758483886719, + "rewards_train/margins": 0.7259571552276611, + "rewards_train/rejected": -1.703233003616333, + "step": 1629 + }, + { + "epoch": 0.46, + "logps_train/chosen": -153.35504150390625, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -159.05877685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.435504198074341, + "rewards_train/margins": 2.120373487472534, + "rewards_train/rejected": -5.555877685546875, + "step": 1629 + }, + { + "epoch": 0.46, + "learning_rate": 5.922565910122966e-07, + "loss": 0.5816, + "step": 1630 + }, + { + "epoch": 0.46, + "logps_train/chosen": -66.7575912475586, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -116.54414367675781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6257591247558594, + "rewards_train/margins": 1.2786552906036377, + "rewards_train/rejected": -2.904414415359497, + "step": 1630 + }, + { + "epoch": 0.46, + "logps_train/chosen": -104.90974426269531, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -200.4665985107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2909743785858154, + "rewards_train/margins": 6.05568528175354, + "rewards_train/rejected": -8.346659660339355, + "step": 1630 + }, + { + "epoch": 0.46, + "logps_train/chosen": -114.00995635986328, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -132.18812561035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3509957790374756, + "rewards_train/margins": 0.11781692504882812, + "rewards_train/rejected": -2.4688127040863037, + "step": 1630 + }, + { + "epoch": 0.46, + "logps_train/chosen": -115.02153015136719, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -294.6029052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9521530866622925, + "rewards_train/margins": 9.808138012886047, + "rewards_train/rejected": -11.76029109954834, + "step": 1630 + }, + { + "epoch": 0.46, + "logps_train/chosen": -121.073486328125, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -230.89389038085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0573487281799316, + "rewards_train/margins": 8.482040882110596, + "rewards_train/rejected": -10.539389610290527, + "step": 1630 + }, + { + "epoch": 0.46, + "logps_train/chosen": -122.8288803100586, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -210.44845581054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6828880310058594, + "rewards_train/margins": 5.661957740783691, + "rewards_train/rejected": -9.34484577178955, + "step": 1630 + }, + { + "epoch": 0.46, + "logps_train/chosen": -13.119122505187988, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -57.280372619628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36816224455833435, + "rewards_train/margins": 0.20987501740455627, + "rewards_train/rejected": -0.5780372619628906, + "step": 1630 + }, + { + "epoch": 0.46, + "logps_train/chosen": -157.74807739257812, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -195.20169067382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.674808025360107, + "rewards_train/margins": 2.045361042022705, + "rewards_train/rejected": -6.7201690673828125, + "step": 1630 + }, + { + "epoch": 0.46, + "logps_train/chosen": -42.809425354003906, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -48.80546188354492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2121925354003906, + "rewards_train/margins": 0.5496037006378174, + "rewards_train/rejected": -3.761796236038208, + "step": 1631 + }, + { + "epoch": 0.46, + "logps_train/chosen": -58.18735885620117, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -96.9660873413086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.668735921382904, + "rewards_train/margins": 1.8778727650642395, + "rewards_train/rejected": -2.5466086864471436, + "step": 1631 + }, + { + "epoch": 0.46, + "logps_train/chosen": -129.8406982421875, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -181.93707275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.73406982421875, + "rewards_train/margins": 4.559637546539307, + "rewards_train/rejected": -6.293707370758057, + "step": 1631 + }, + { + "epoch": 0.46, + "logps_train/chosen": -10.584017753601074, + "logps_train/ref_chosen": -0.859375, + "logps_train/ref_rejected": -3.390625, + "logps_train/rejected": -9.304044723510742, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9724642634391785, + "rewards_train/margins": -0.38112229108810425, + "rewards_train/rejected": -0.5913419723510742, + "step": 1631 + }, + { + "epoch": 0.46, + "logps_train/chosen": -134.06224060058594, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -171.47604370117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8562240600585938, + "rewards_train/margins": 1.191380500793457, + "rewards_train/rejected": -5.047604560852051, + "step": 1631 + }, + { + "epoch": 0.46, + "logps_train/chosen": -164.72216796875, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -165.15493774414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.872216820716858, + "rewards_train/margins": 0.04327702522277832, + "rewards_train/rejected": -1.9154938459396362, + "step": 1631 + }, + { + "epoch": 0.46, + "logps_train/chosen": -104.75080871582031, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -95.24057006835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4250808656215668, + "rewards_train/margins": 2.4989761412143707, + "rewards_train/rejected": -2.9240570068359375, + "step": 1631 + }, + { + "epoch": 0.46, + "logps_train/chosen": -6.778891563415527, + "logps_train/ref_chosen": -2.453125, + "logps_train/ref_rejected": -2.5625, + "logps_train/rejected": -20.285228729248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43257665634155273, + "rewards_train/margins": 1.3396962881088257, + "rewards_train/rejected": -1.7722729444503784, + "step": 1631 + }, + { + "epoch": 0.46, + "learning_rate": 5.898423765741536e-07, + "loss": 0.2724, + "step": 1632 + }, + { + "epoch": 0.46, + "logps_train/chosen": -13.250142097473145, + "logps_train/ref_chosen": -1.8515625, + "logps_train/ref_rejected": -4.0, + "logps_train/rejected": -19.362934112548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1398580074310303, + "rewards_train/margins": 0.39643537998199463, + "rewards_train/rejected": -1.536293387413025, + "step": 1632 + }, + { + "epoch": 0.46, + "logps_train/chosen": -84.12185668945312, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -46.534461975097656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0871856212615967, + "rewards_train/margins": -0.9087393283843994, + "rewards_train/rejected": -2.1784462928771973, + "step": 1632 + }, + { + "epoch": 0.46, + "logps_train/chosen": -22.82281494140625, + "logps_train/ref_chosen": -7.375, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -44.03582763671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5447815656661987, + "rewards_train/margins": 0.8588012456893921, + "rewards_train/rejected": -2.403582811355591, + "step": 1632 + }, + { + "epoch": 0.46, + "logps_train/chosen": -53.27163314819336, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -51.25, + "logps_train/rejected": -48.91661071777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7228366732597351, + "rewards_train/margins": 0.48949773609638214, + "rewards_train/rejected": 0.23333893716335297, + "step": 1632 + }, + { + "epoch": 0.46, + "logps_train/chosen": -10.855406761169434, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -28.25, + "logps_train/rejected": -37.01850891113281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.32070931792259216, + "rewards_train/margins": 1.1975602209568024, + "rewards_train/rejected": -0.8768509030342102, + "step": 1632 + }, + { + "epoch": 0.46, + "logps_train/chosen": -245.90489196777344, + "logps_train/ref_chosen": -211.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -217.53311157226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4904892444610596, + "rewards_train/margins": 2.8628218173980713, + "rewards_train/rejected": -6.353311061859131, + "step": 1632 + }, + { + "epoch": 0.46, + "logps_train/chosen": -22.619720458984375, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -19.851966857910156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4744720458984375, + "rewards_train/margins": -0.25490033626556396, + "rewards_train/rejected": -1.2195717096328735, + "step": 1632 + }, + { + "epoch": 0.46, + "logps_train/chosen": -42.93240737915039, + "logps_train/ref_chosen": -31.125, + "logps_train/ref_rejected": -37.0, + "logps_train/rejected": -49.995811462402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1807407140731812, + "rewards_train/margins": 0.11884045600891113, + "rewards_train/rejected": -1.2995811700820923, + "step": 1632 + }, + { + "epoch": 0.46, + "logps_train/chosen": -23.351425170898438, + "logps_train/ref_chosen": -1.203125, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -38.253379821777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.214830160140991, + "rewards_train/margins": 0.08550786972045898, + "rewards_train/rejected": -2.30033802986145, + "step": 1633 + }, + { + "epoch": 0.46, + "logps_train/chosen": -18.425016403198242, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -16.314655303955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41125163435935974, + "rewards_train/margins": 0.7452138960361481, + "rewards_train/rejected": -1.1564655303955078, + "step": 1633 + }, + { + "epoch": 0.46, + "logps_train/chosen": -129.04049682617188, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -181.123291015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5040496587753296, + "rewards_train/margins": 5.558279633522034, + "rewards_train/rejected": -7.062329292297363, + "step": 1633 + }, + { + "epoch": 0.46, + "logps_train/chosen": -113.38230895996094, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -171.41293334960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08823090046644211, + "rewards_train/margins": 0.3530624285340309, + "rewards_train/rejected": -0.441293329000473, + "step": 1633 + }, + { + "epoch": 0.46, + "logps_train/chosen": -8.895624160766602, + "logps_train/ref_chosen": -0.828125, + "logps_train/ref_rejected": -4.9375, + "logps_train/rejected": -13.18971061706543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8067499399185181, + "rewards_train/margins": 0.018471121788024902, + "rewards_train/rejected": -0.825221061706543, + "step": 1633 + }, + { + "epoch": 0.46, + "logps_train/chosen": -102.20758056640625, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -101.77053833007812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0707581043243408, + "rewards_train/margins": -0.04370427131652832, + "rewards_train/rejected": -1.0270538330078125, + "step": 1633 + }, + { + "epoch": 0.46, + "logps_train/chosen": -132.30075073242188, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -171.1531219482422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.630075216293335, + "rewards_train/margins": 3.485236883163452, + "rewards_train/rejected": -7.115312099456787, + "step": 1633 + }, + { + "epoch": 0.46, + "logps_train/chosen": -19.871034622192383, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -29.89841651916504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6105409860610962, + "rewards_train/margins": 0.5043007135391235, + "rewards_train/rejected": -2.1148416996002197, + "step": 1633 + }, + { + "epoch": 0.46, + "learning_rate": 5.874310328045088e-07, + "loss": 0.491, + "step": 1634 + }, + { + "epoch": 0.46, + "logps_train/chosen": -117.75921630859375, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -181.61770629882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.375921607017517, + "rewards_train/margins": 5.085848927497864, + "rewards_train/rejected": -6.461770534515381, + "step": 1634 + }, + { + "epoch": 0.46, + "logps_train/chosen": -9.18487548828125, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -2.4375, + "logps_train/rejected": -7.639495372772217, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.31901246309280396, + "rewards_train/margins": 0.8392120003700256, + "rewards_train/rejected": -0.5201995372772217, + "step": 1634 + }, + { + "epoch": 0.46, + "logps_train/chosen": -96.76656341552734, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -58.75, + "logps_train/rejected": -134.2252960205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.151656627655029, + "rewards_train/margins": 3.3958730697631836, + "rewards_train/rejected": -7.547529697418213, + "step": 1634 + }, + { + "epoch": 0.46, + "logps_train/chosen": -7.9670729637146, + "logps_train/ref_chosen": -3.953125, + "logps_train/ref_rejected": -6.21875, + "logps_train/rejected": -12.902523040771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4013948142528534, + "rewards_train/margins": 0.2669825255870819, + "rewards_train/rejected": -0.6683773398399353, + "step": 1634 + }, + { + "epoch": 0.46, + "logps_train/chosen": -61.238887786865234, + "logps_train/ref_chosen": -36.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -226.33645629882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5238888263702393, + "rewards_train/margins": 9.509756803512573, + "rewards_train/rejected": -12.033645629882812, + "step": 1634 + }, + { + "epoch": 0.46, + "logps_train/chosen": -111.25785827636719, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -117.86174774169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7757858037948608, + "rewards_train/margins": 0.21038901805877686, + "rewards_train/rejected": -1.9861748218536377, + "step": 1634 + }, + { + "epoch": 0.46, + "logps_train/chosen": -6.357089996337891, + "logps_train/ref_chosen": -1.5546875, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -11.259511947631836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48024025559425354, + "rewards_train/margins": 0.10821095108985901, + "rewards_train/rejected": -0.5884512066841125, + "step": 1634 + }, + { + "epoch": 0.46, + "logps_train/chosen": -11.78541088104248, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -46.750885009765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.731666088104248, + "rewards_train/margins": 3.274672508239746, + "rewards_train/rejected": -4.006338596343994, + "step": 1634 + }, + { + "epoch": 0.46, + "logps_train/chosen": -79.46200561523438, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -105.24008178710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6462005972862244, + "rewards_train/margins": 0.7778076529502869, + "rewards_train/rejected": -1.4240082502365112, + "step": 1635 + }, + { + "epoch": 0.46, + "logps_train/chosen": -173.2625732421875, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -151.2120819091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.626257419586182, + "rewards_train/margins": 1.2949509620666504, + "rewards_train/rejected": -6.921208381652832, + "step": 1635 + }, + { + "epoch": 0.46, + "logps_train/chosen": -129.69195556640625, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -161.94760131835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.769195556640625, + "rewards_train/margins": 2.925564765930176, + "rewards_train/rejected": -4.694760322570801, + "step": 1635 + }, + { + "epoch": 0.46, + "logps_train/chosen": -57.84388732910156, + "logps_train/ref_chosen": -58.25, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -78.17601013183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04061126708984375, + "rewards_train/margins": 0.7582122683525085, + "rewards_train/rejected": -0.7176010012626648, + "step": 1635 + }, + { + "epoch": 0.46, + "logps_train/chosen": -117.11076354980469, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -189.32077026367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9110764265060425, + "rewards_train/margins": 3.0210005044937134, + "rewards_train/rejected": -4.932076930999756, + "step": 1635 + }, + { + "epoch": 0.46, + "logps_train/chosen": -80.64488220214844, + "logps_train/ref_chosen": -60.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -226.24546813964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.064488172531128, + "rewards_train/margins": 3.860058546066284, + "rewards_train/rejected": -5.924546718597412, + "step": 1635 + }, + { + "epoch": 0.46, + "logps_train/chosen": -192.84432983398438, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -163.07827758789062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.634433269500732, + "rewards_train/margins": -1.2766056060791016, + "rewards_train/rejected": -6.357827663421631, + "step": 1635 + }, + { + "epoch": 0.46, + "logps_train/chosen": -52.942283630371094, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -1.8515625, + "logps_train/rejected": -39.481266021728516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.812978506088257, + "rewards_train/margins": -0.05000805854797363, + "rewards_train/rejected": -3.762970447540283, + "step": 1635 + }, + { + "epoch": 0.46, + "learning_rate": 5.850225765802121e-07, + "loss": 0.3503, + "step": 1636 + }, + { + "epoch": 0.46, + "logps_train/chosen": -103.21856689453125, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -61.0, + "logps_train/rejected": -63.318389892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02185668982565403, + "rewards_train/margins": 0.20998229645192623, + "rewards_train/rejected": -0.23183898627758026, + "step": 1636 + }, + { + "epoch": 0.46, + "logps_train/chosen": -108.64952087402344, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -150.613525390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6149520874023438, + "rewards_train/margins": 5.1964006423950195, + "rewards_train/rejected": -6.811352729797363, + "step": 1636 + }, + { + "epoch": 0.46, + "logps_train/chosen": -215.63853454589844, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -218.0455322265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.763853549957275, + "rewards_train/margins": -0.7593002319335938, + "rewards_train/rejected": -6.004553318023682, + "step": 1636 + }, + { + "epoch": 0.46, + "logps_train/chosen": -13.411651611328125, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -4.9375, + "logps_train/rejected": -9.929145812988281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4911651611328125, + "rewards_train/margins": 0.007999420166015625, + "rewards_train/rejected": -0.4991645812988281, + "step": 1636 + }, + { + "epoch": 0.46, + "logps_train/chosen": -15.677024841308594, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -24.125, + "logps_train/rejected": -44.029720306396484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2739524841308594, + "rewards_train/margins": 1.7165195941925049, + "rewards_train/rejected": -1.9904720783233643, + "step": 1636 + }, + { + "epoch": 0.46, + "logps_train/chosen": -146.0277862548828, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -122.18067169189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.602778673171997, + "rewards_train/margins": 2.865288496017456, + "rewards_train/rejected": -4.468067169189453, + "step": 1636 + }, + { + "epoch": 0.46, + "logps_train/chosen": -133.27352905273438, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -177.37078857421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9273529052734375, + "rewards_train/margins": 2.959725856781006, + "rewards_train/rejected": -6.887078762054443, + "step": 1636 + }, + { + "epoch": 0.46, + "logps_train/chosen": -1.0053843259811401, + "logps_train/ref_chosen": -1.3359375, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -25.499618530273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03305531665682793, + "rewards_train/margins": 1.6767672412097454, + "rewards_train/rejected": -1.6437119245529175, + "step": 1636 + }, + { + "epoch": 0.46, + "logps_train/chosen": -13.900211334228516, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -18.150840759277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46502113342285156, + "rewards_train/margins": 0.9375629425048828, + "rewards_train/rejected": -1.4025840759277344, + "step": 1637 + }, + { + "epoch": 0.46, + "logps_train/chosen": -114.61193084716797, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -114.17987060546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21119308471679688, + "rewards_train/margins": -0.043206021189689636, + "rewards_train/rejected": -0.16798706352710724, + "step": 1637 + }, + { + "epoch": 0.46, + "logps_train/chosen": -32.9581413269043, + "logps_train/ref_chosen": -26.5, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -27.25227928161621, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6458141207695007, + "rewards_train/margins": 1.2106638550758362, + "rewards_train/rejected": -1.856477975845337, + "step": 1637 + }, + { + "epoch": 0.46, + "logps_train/chosen": -31.15380096435547, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -3.9375, + "logps_train/rejected": -27.47329330444336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.065380096435547, + "rewards_train/margins": 0.28819918632507324, + "rewards_train/rejected": -2.35357928276062, + "step": 1637 + }, + { + "epoch": 0.46, + "logps_train/chosen": -24.099103927612305, + "logps_train/ref_chosen": -3.609375, + "logps_train/ref_rejected": -0.89453125, + "logps_train/rejected": -25.979726791381836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0489728450775146, + "rewards_train/margins": 0.4595468044281006, + "rewards_train/rejected": -2.5085196495056152, + "step": 1637 + }, + { + "epoch": 0.46, + "logps_train/chosen": -58.37028503417969, + "logps_train/ref_chosen": -42.75, + "logps_train/ref_rejected": -39.5, + "logps_train/rejected": -57.630592346191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5620285272598267, + "rewards_train/margins": 0.25103068351745605, + "rewards_train/rejected": -1.8130592107772827, + "step": 1637 + }, + { + "epoch": 0.46, + "logps_train/chosen": -37.61334228515625, + "logps_train/ref_chosen": -27.375, + "logps_train/ref_rejected": -1.53125, + "logps_train/rejected": -20.4316463470459, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.023834228515625, + "rewards_train/margins": 0.8662054538726807, + "rewards_train/rejected": -1.8900396823883057, + "step": 1637 + }, + { + "epoch": 0.46, + "logps_train/chosen": -75.55089569091797, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -177.22801208496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1949104368686676, + "rewards_train/margins": 8.217711836099625, + "rewards_train/rejected": -8.022801399230957, + "step": 1637 + }, + { + "epoch": 0.46, + "learning_rate": 5.826170247579034e-07, + "loss": 0.3848, + "step": 1638 + }, + { + "epoch": 0.46, + "logps_train/chosen": -10.874312400817871, + "logps_train/ref_chosen": -7.59375, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -37.08595275878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3280562460422516, + "rewards_train/margins": 0.6305390298366547, + "rewards_train/rejected": -0.9585952758789062, + "step": 1638 + }, + { + "epoch": 0.46, + "logps_train/chosen": -13.081427574157715, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -3.75, + "logps_train/rejected": -8.451271057128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2706427574157715, + "rewards_train/margins": 0.19948434829711914, + "rewards_train/rejected": -0.4701271057128906, + "step": 1638 + }, + { + "epoch": 0.46, + "logps_train/chosen": -151.4461669921875, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -209.98648071289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.594616651535034, + "rewards_train/margins": 5.704031229019165, + "rewards_train/rejected": -8.2986478805542, + "step": 1638 + }, + { + "epoch": 0.46, + "logps_train/chosen": -21.392505645751953, + "logps_train/ref_chosen": -2.5, + "logps_train/ref_rejected": -2.484375, + "logps_train/rejected": -15.878396034240723, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.889250636100769, + "rewards_train/margins": -0.5498485565185547, + "rewards_train/rejected": -1.3394020795822144, + "step": 1638 + }, + { + "epoch": 0.46, + "logps_train/chosen": -169.522705078125, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -199.19647216796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.802270412445068, + "rewards_train/margins": -1.4826231002807617, + "rewards_train/rejected": -4.319647312164307, + "step": 1638 + }, + { + "epoch": 0.46, + "logps_train/chosen": -188.768310546875, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -215.97540283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.576831102371216, + "rewards_train/margins": 1.1207091808319092, + "rewards_train/rejected": -4.697540283203125, + "step": 1638 + }, + { + "epoch": 0.46, + "logps_train/chosen": -11.04425048828125, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -0.734375, + "logps_train/rejected": -15.910186767578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4513000547885895, + "rewards_train/margins": 1.0662811696529388, + "rewards_train/rejected": -1.5175812244415283, + "step": 1638 + }, + { + "epoch": 0.46, + "logps_train/chosen": -12.196895599365234, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -48.98121643066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5384395718574524, + "rewards_train/margins": 1.9971820712089539, + "rewards_train/rejected": -2.5356216430664062, + "step": 1638 + }, + { + "epoch": 0.46, + "logps_train/chosen": -4.964558124542236, + "logps_train/ref_chosen": -2.34375, + "logps_train/ref_rejected": -28.0, + "logps_train/rejected": -48.581356048583984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2620808184146881, + "rewards_train/margins": 1.7960549294948578, + "rewards_train/rejected": -2.058135747909546, + "step": 1639 + }, + { + "epoch": 0.46, + "logps_train/chosen": -122.84955596923828, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -105.11614990234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5349557399749756, + "rewards_train/margins": 0.17665934562683105, + "rewards_train/rejected": -3.7116150856018066, + "step": 1639 + }, + { + "epoch": 0.46, + "logps_train/chosen": -192.15274047851562, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -179.96995544433594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.915274143218994, + "rewards_train/margins": -2.418278694152832, + "rewards_train/rejected": -4.496995449066162, + "step": 1639 + }, + { + "epoch": 0.46, + "logps_train/chosen": -194.52554321289062, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -193.59182739257812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.652554512023926, + "rewards_train/margins": -0.09337186813354492, + "rewards_train/rejected": -6.559182643890381, + "step": 1639 + }, + { + "epoch": 0.46, + "logps_train/chosen": -28.22697639465332, + "logps_train/ref_chosen": -25.875, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -28.062294006347656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.23519764840602875, + "rewards_train/margins": -0.016468241810798645, + "rewards_train/rejected": -0.2187294065952301, + "step": 1639 + }, + { + "epoch": 0.46, + "logps_train/chosen": -17.150196075439453, + "logps_train/ref_chosen": -13.5625, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -33.0040283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35876962542533875, + "rewards_train/margins": 1.6228832304477692, + "rewards_train/rejected": -1.981652855873108, + "step": 1639 + }, + { + "epoch": 0.46, + "logps_train/chosen": -19.358226776123047, + "logps_train/ref_chosen": -6.5, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -59.2176513671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2858227491378784, + "rewards_train/margins": 3.9578176736831665, + "rewards_train/rejected": -5.243640422821045, + "step": 1639 + }, + { + "epoch": 0.46, + "logps_train/chosen": -8.497081756591797, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -13.174480438232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3497081696987152, + "rewards_train/margins": 0.5302399098873138, + "rewards_train/rejected": -0.879948079586029, + "step": 1639 + }, + { + "epoch": 0.46, + "learning_rate": 5.802143941738944e-07, + "loss": 0.6121, + "step": 1640 + }, + { + "epoch": 0.46, + "logps_train/chosen": -26.216266632080078, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -40.758544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2716267108917236, + "rewards_train/margins": 1.441727876663208, + "rewards_train/rejected": -2.7133545875549316, + "step": 1640 + }, + { + "epoch": 0.46, + "logps_train/chosen": -159.83836364746094, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -193.70416259765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.583836555480957, + "rewards_train/margins": 6.386579513549805, + "rewards_train/rejected": -11.970416069030762, + "step": 1640 + }, + { + "epoch": 0.46, + "logps_train/chosen": -97.50299835205078, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -97.62831115722656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4002999067306519, + "rewards_train/margins": -0.3874688148498535, + "rewards_train/rejected": -1.0128310918807983, + "step": 1640 + }, + { + "epoch": 0.46, + "logps_train/chosen": -331.3382263183594, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -138.00885009765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -15.633822441101074, + "rewards_train/margins": -11.63293743133545, + "rewards_train/rejected": -4.000885009765625, + "step": 1640 + }, + { + "epoch": 0.46, + "logps_train/chosen": -134.2066650390625, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -212.29449462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.020666599273682, + "rewards_train/margins": 4.60878324508667, + "rewards_train/rejected": -8.629449844360352, + "step": 1640 + }, + { + "epoch": 0.46, + "logps_train/chosen": -25.09356689453125, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -43.0, + "logps_train/rejected": -55.26004409790039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4093566834926605, + "rewards_train/margins": 0.8166477978229523, + "rewards_train/rejected": -1.2260044813156128, + "step": 1640 + }, + { + "epoch": 0.46, + "logps_train/chosen": -22.469921112060547, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -14.376407623291016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9907421469688416, + "rewards_train/margins": -0.2843513488769531, + "rewards_train/rejected": -0.7063907980918884, + "step": 1640 + }, + { + "epoch": 0.46, + "logps_train/chosen": -141.212158203125, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -207.6312255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.421215772628784, + "rewards_train/margins": 2.7419068813323975, + "rewards_train/rejected": -6.163122653961182, + "step": 1640 + }, + { + "epoch": 0.46, + "logps_train/chosen": -197.95465087890625, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -136.64329528808594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.395465135574341, + "rewards_train/margins": -1.7811355590820312, + "rewards_train/rejected": -1.6143295764923096, + "step": 1641 + }, + { + "epoch": 0.46, + "logps_train/chosen": -42.44794464111328, + "logps_train/ref_chosen": -25.0, + "logps_train/ref_rejected": -39.5, + "logps_train/rejected": -112.67159271240234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.744794487953186, + "rewards_train/margins": 5.572364687919617, + "rewards_train/rejected": -7.317159175872803, + "step": 1641 + }, + { + "epoch": 0.46, + "logps_train/chosen": -83.72785949707031, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -61.0, + "logps_train/rejected": -83.71304321289062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2727859020233154, + "rewards_train/margins": -0.0014815330505371094, + "rewards_train/rejected": -2.2713043689727783, + "step": 1641 + }, + { + "epoch": 0.46, + "logps_train/chosen": -144.11146545410156, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -245.8726806640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.461146593093872, + "rewards_train/margins": 8.826122045516968, + "rewards_train/rejected": -12.28726863861084, + "step": 1641 + }, + { + "epoch": 0.46, + "logps_train/chosen": -174.356689453125, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -152.99520874023438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.535668849945068, + "rewards_train/margins": -2.5361480712890625, + "rewards_train/rejected": -4.999520778656006, + "step": 1641 + }, + { + "epoch": 0.46, + "logps_train/chosen": -112.29954528808594, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -115.76876831054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6299545168876648, + "rewards_train/margins": 0.09692233800888062, + "rewards_train/rejected": -0.7268768548965454, + "step": 1641 + }, + { + "epoch": 0.46, + "logps_train/chosen": -13.52479076385498, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -51.98657989501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4837290942668915, + "rewards_train/margins": 1.9774289429187775, + "rewards_train/rejected": -2.461158037185669, + "step": 1641 + }, + { + "epoch": 0.46, + "logps_train/chosen": -12.196377754211426, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -40.5312614440918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.363387793302536, + "rewards_train/margins": 1.6147383749485016, + "rewards_train/rejected": -1.9781261682510376, + "step": 1641 + }, + { + "epoch": 0.46, + "learning_rate": 5.778147016440527e-07, + "loss": 1.2651, + "step": 1642 + }, + { + "epoch": 0.46, + "logps_train/chosen": -191.1220245361328, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -213.6627960205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.41220235824585, + "rewards_train/margins": 3.254077434539795, + "rewards_train/rejected": -7.6662797927856445, + "step": 1642 + }, + { + "epoch": 0.46, + "logps_train/chosen": -131.2569580078125, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -124.13749694824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6256958246231079, + "rewards_train/margins": 2.088053822517395, + "rewards_train/rejected": -2.713749647140503, + "step": 1642 + }, + { + "epoch": 0.46, + "logps_train/chosen": -140.61685180664062, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -138.72947692871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.161685466766357, + "rewards_train/margins": 0.8112621307373047, + "rewards_train/rejected": -5.972947597503662, + "step": 1642 + }, + { + "epoch": 0.46, + "logps_train/chosen": -30.35099220275879, + "logps_train/ref_chosen": -29.625, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -15.81916618347168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07259922474622726, + "rewards_train/margins": 0.3655673936009407, + "rewards_train/rejected": -0.43816661834716797, + "step": 1642 + }, + { + "epoch": 0.46, + "logps_train/chosen": -72.93517303466797, + "logps_train/ref_chosen": -55.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -214.23388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7935173511505127, + "rewards_train/margins": 8.329871892929077, + "rewards_train/rejected": -10.12338924407959, + "step": 1642 + }, + { + "epoch": 0.46, + "logps_train/chosen": -81.42465209960938, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -254.11431884765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6174652576446533, + "rewards_train/margins": 10.493966817855835, + "rewards_train/rejected": -13.111432075500488, + "step": 1642 + }, + { + "epoch": 0.46, + "logps_train/chosen": -106.75376892089844, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -212.47705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.475376844406128, + "rewards_train/margins": 6.572328805923462, + "rewards_train/rejected": -9.04770565032959, + "step": 1642 + }, + { + "epoch": 0.46, + "logps_train/chosen": -142.92152404785156, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -219.93264770507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.442152500152588, + "rewards_train/margins": 5.3511128425598145, + "rewards_train/rejected": -8.793265342712402, + "step": 1642 + }, + { + "epoch": 0.46, + "logps_train/chosen": -90.49388122558594, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -164.4379119873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5493881702423096, + "rewards_train/margins": 5.594403028488159, + "rewards_train/rejected": -8.143791198730469, + "step": 1643 + }, + { + "epoch": 0.46, + "logps_train/chosen": -145.56532287597656, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -143.12440490722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2065322399139404, + "rewards_train/margins": 1.9059083461761475, + "rewards_train/rejected": -4.112440586090088, + "step": 1643 + }, + { + "epoch": 0.46, + "logps_train/chosen": -197.99667358398438, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -165.9728240966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.899667263031006, + "rewards_train/margins": 0.09761524200439453, + "rewards_train/rejected": -5.9972825050354, + "step": 1643 + }, + { + "epoch": 0.46, + "logps_train/chosen": -105.61836242675781, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -68.131103515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.611836314201355, + "rewards_train/margins": -1.123725950717926, + "rewards_train/rejected": -0.48811036348342896, + "step": 1643 + }, + { + "epoch": 0.46, + "logps_train/chosen": -0.5159526467323303, + "logps_train/ref_chosen": -0.45703125, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -14.036908149719238, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.005892139859497547, + "rewards_train/margins": 0.7946736989542842, + "rewards_train/rejected": -0.8005658388137817, + "step": 1643 + }, + { + "epoch": 0.46, + "logps_train/chosen": -254.6390380859375, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -185.0, + "logps_train/rejected": -291.34515380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.06390380859375, + "rewards_train/margins": 1.5706119537353516, + "rewards_train/rejected": -10.634515762329102, + "step": 1643 + }, + { + "epoch": 0.46, + "logps_train/chosen": -34.34977722167969, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -40.74900436401367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1115403175354004, + "rewards_train/margins": 0.15711021423339844, + "rewards_train/rejected": -3.268650531768799, + "step": 1643 + }, + { + "epoch": 0.46, + "logps_train/chosen": -7.540131568908691, + "logps_train/ref_chosen": -7.5, + "logps_train/ref_rejected": -1.7265625, + "logps_train/rejected": -25.073427200317383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004013156984001398, + "rewards_train/margins": 2.3306733607314527, + "rewards_train/rejected": -2.334686517715454, + "step": 1643 + }, + { + "epoch": 0.46, + "learning_rate": 5.75417963963681e-07, + "loss": 0.2823, + "step": 1644 + }, + { + "epoch": 0.46, + "logps_train/chosen": -114.74995422363281, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -188.08413696289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5249954462051392, + "rewards_train/margins": 6.733418822288513, + "rewards_train/rejected": -8.258414268493652, + "step": 1644 + }, + { + "epoch": 0.46, + "logps_train/chosen": -137.63284301757812, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -164.43687438964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.26328444480896, + "rewards_train/margins": 1.5304031372070312, + "rewards_train/rejected": -3.793687582015991, + "step": 1644 + }, + { + "epoch": 0.46, + "logps_train/chosen": -33.76215362548828, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -49.96847915649414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.113715410232544, + "rewards_train/margins": 1.8706326484680176, + "rewards_train/rejected": -2.9843480587005615, + "step": 1644 + }, + { + "epoch": 0.46, + "logps_train/chosen": -5.002165794372559, + "logps_train/ref_chosen": -3.53125, + "logps_train/ref_rejected": -6.46875, + "logps_train/rejected": -21.57061767578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1470915824174881, + "rewards_train/margins": 1.3630952090024948, + "rewards_train/rejected": -1.510186791419983, + "step": 1644 + }, + { + "epoch": 0.46, + "logps_train/chosen": -119.04642486572266, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -204.54315185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2546424865722656, + "rewards_train/margins": 8.0996732711792, + "rewards_train/rejected": -10.354315757751465, + "step": 1644 + }, + { + "epoch": 0.46, + "logps_train/chosen": -113.33580017089844, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -111.36798858642578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.633579969406128, + "rewards_train/margins": -0.19678115844726562, + "rewards_train/rejected": -2.4367988109588623, + "step": 1644 + }, + { + "epoch": 0.46, + "logps_train/chosen": -185.64048767089844, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -204.20689392089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.714049339294434, + "rewards_train/margins": 0.556640625, + "rewards_train/rejected": -9.270689964294434, + "step": 1644 + }, + { + "epoch": 0.46, + "logps_train/chosen": -14.342696189880371, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -2.078125, + "logps_train/rejected": -20.47238540649414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.615519642829895, + "rewards_train/margins": 1.223906397819519, + "rewards_train/rejected": -1.839426040649414, + "step": 1644 + }, + { + "epoch": 0.46, + "logps_train/chosen": -91.72649383544922, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -118.90347290039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1726493835449219, + "rewards_train/margins": 2.6676979064941406, + "rewards_train/rejected": -3.8403472900390625, + "step": 1645 + }, + { + "epoch": 0.46, + "logps_train/chosen": -128.62496948242188, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -234.41387939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5124969482421875, + "rewards_train/margins": 8.828890800476074, + "rewards_train/rejected": -11.341387748718262, + "step": 1645 + }, + { + "epoch": 0.46, + "logps_train/chosen": -19.082143783569336, + "logps_train/ref_chosen": -4.8125, + "logps_train/ref_rejected": -2.765625, + "logps_train/rejected": -11.609594345092773, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4269644021987915, + "rewards_train/margins": -0.5425674319267273, + "rewards_train/rejected": -0.8843969702720642, + "step": 1645 + }, + { + "epoch": 0.46, + "logps_train/chosen": -29.732839584350586, + "logps_train/ref_chosen": -2.890625, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -22.902353286743164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6842215061187744, + "rewards_train/margins": -0.834611177444458, + "rewards_train/rejected": -1.8496103286743164, + "step": 1645 + }, + { + "epoch": 0.46, + "logps_train/chosen": -161.2828369140625, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -204.34457397460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.778283596038818, + "rewards_train/margins": 2.3561739921569824, + "rewards_train/rejected": -7.134457588195801, + "step": 1645 + }, + { + "epoch": 0.46, + "logps_train/chosen": -41.477787017822266, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -27.0960693359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.347778797149658, + "rewards_train/margins": -1.1506718397140503, + "rewards_train/rejected": -1.197106957435608, + "step": 1645 + }, + { + "epoch": 0.46, + "logps_train/chosen": -193.5750274658203, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -205.1208038330078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0575027465820312, + "rewards_train/margins": 1.0545778274536133, + "rewards_train/rejected": -4.1120805740356445, + "step": 1645 + }, + { + "epoch": 0.46, + "logps_train/chosen": -196.70843505859375, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -207.0, + "logps_train/rejected": -277.37127685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.770843505859375, + "rewards_train/margins": 1.266284465789795, + "rewards_train/rejected": -7.03712797164917, + "step": 1645 + }, + { + "epoch": 0.46, + "learning_rate": 5.730241979074024e-07, + "loss": 0.4001, + "step": 1646 + }, + { + "epoch": 0.46, + "logps_train/chosen": -127.72149658203125, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -118.55728912353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7721498012542725, + "rewards_train/margins": 2.283579111099243, + "rewards_train/rejected": -5.055728912353516, + "step": 1646 + }, + { + "epoch": 0.46, + "logps_train/chosen": -48.11991882324219, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -51.75, + "logps_train/rejected": -82.89421844482422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2619918584823608, + "rewards_train/margins": 1.852429986000061, + "rewards_train/rejected": -3.114421844482422, + "step": 1646 + }, + { + "epoch": 0.46, + "logps_train/chosen": -17.00704002380371, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -30.643962860107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8444539904594421, + "rewards_train/margins": 1.0949422717094421, + "rewards_train/rejected": -1.9393962621688843, + "step": 1646 + }, + { + "epoch": 0.46, + "logps_train/chosen": -156.9190673828125, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -163.78854370117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2919068336486816, + "rewards_train/margins": 1.6369476318359375, + "rewards_train/rejected": -4.928854465484619, + "step": 1646 + }, + { + "epoch": 0.46, + "logps_train/chosen": -31.124465942382812, + "logps_train/ref_chosen": -28.5, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -24.15944480895996, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2624466121196747, + "rewards_train/margins": -0.009002119302749634, + "rewards_train/rejected": -0.25344449281692505, + "step": 1646 + }, + { + "epoch": 0.46, + "logps_train/chosen": -28.243427276611328, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -34.94991683959961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8493427634239197, + "rewards_train/margins": 2.2300239205360413, + "rewards_train/rejected": -3.079366683959961, + "step": 1646 + }, + { + "epoch": 0.46, + "logps_train/chosen": -14.246256828308105, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -26.75, + "logps_train/rejected": -33.95963668823242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.462125688791275, + "rewards_train/margins": 0.2588379681110382, + "rewards_train/rejected": -0.7209636569023132, + "step": 1646 + }, + { + "epoch": 0.46, + "logps_train/chosen": -54.748924255371094, + "logps_train/ref_chosen": -28.0, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -39.985633850097656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6748924255371094, + "rewards_train/margins": -0.2388288974761963, + "rewards_train/rejected": -2.436063528060913, + "step": 1646 + }, + { + "epoch": 0.46, + "logps_train/chosen": -94.64200592041016, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -110.29069519042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.014200592413544655, + "rewards_train/margins": 0.514868950471282, + "rewards_train/rejected": -0.5290695428848267, + "step": 1647 + }, + { + "epoch": 0.46, + "logps_train/chosen": -7.146955490112305, + "logps_train/ref_chosen": -3.59375, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -23.161895751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.355320543050766, + "rewards_train/margins": 0.14836904406547546, + "rewards_train/rejected": -0.5036895871162415, + "step": 1647 + }, + { + "epoch": 0.46, + "logps_train/chosen": -147.5972442626953, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -168.85198974609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.859724521636963, + "rewards_train/margins": -0.12452554702758789, + "rewards_train/rejected": -4.735198974609375, + "step": 1647 + }, + { + "epoch": 0.46, + "logps_train/chosen": -187.76136779785156, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -217.39149475097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9761369228363037, + "rewards_train/margins": 0.5630128383636475, + "rewards_train/rejected": -4.539149761199951, + "step": 1647 + }, + { + "epoch": 0.46, + "logps_train/chosen": -40.488067626953125, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -44.12407684326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.70505690574646, + "rewards_train/margins": 1.0917258262634277, + "rewards_train/rejected": -3.7967827320098877, + "step": 1647 + }, + { + "epoch": 0.46, + "logps_train/chosen": -5.045605182647705, + "logps_train/ref_chosen": -1.5546875, + "logps_train/ref_rejected": -5.21875, + "logps_train/rejected": -13.037216186523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3490917682647705, + "rewards_train/margins": 0.43275487422943115, + "rewards_train/rejected": -0.7818466424942017, + "step": 1647 + }, + { + "epoch": 0.46, + "logps_train/chosen": -75.62032318115234, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -76.05229187011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6620323061943054, + "rewards_train/margins": 0.043196916580200195, + "rewards_train/rejected": -0.7052292227745056, + "step": 1647 + }, + { + "epoch": 0.46, + "logps_train/chosen": -31.09282112121582, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -22.625, + "logps_train/rejected": -41.87516784667969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2717821598052979, + "rewards_train/margins": 0.653234601020813, + "rewards_train/rejected": -1.9250167608261108, + "step": 1647 + }, + { + "epoch": 0.46, + "learning_rate": 5.706334202290416e-07, + "loss": 0.4423, + "step": 1648 + }, + { + "epoch": 0.46, + "logps_train/chosen": -29.50994110107422, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -3.09375, + "logps_train/rejected": -29.51590347290039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.479119062423706, + "rewards_train/margins": 0.16309642791748047, + "rewards_train/rejected": -2.6422154903411865, + "step": 1648 + }, + { + "epoch": 0.46, + "logps_train/chosen": -21.5856990814209, + "logps_train/ref_chosen": -5.875, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -28.104883193969727, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5710699558258057, + "rewards_train/margins": -0.12308156490325928, + "rewards_train/rejected": -1.4479883909225464, + "step": 1648 + }, + { + "epoch": 0.46, + "logps_train/chosen": -218.55081176757812, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -258.7784423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.055081367492676, + "rewards_train/margins": 8.572763442993164, + "rewards_train/rejected": -14.62784481048584, + "step": 1648 + }, + { + "epoch": 0.46, + "logps_train/chosen": -26.287317276000977, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -35.43463897705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5537317991256714, + "rewards_train/margins": 0.9147320985794067, + "rewards_train/rejected": -2.468463897705078, + "step": 1648 + }, + { + "epoch": 0.46, + "logps_train/chosen": -22.47602653503418, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -29.466880798339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5851026773452759, + "rewards_train/margins": 1.0803353786468506, + "rewards_train/rejected": -1.6654380559921265, + "step": 1648 + }, + { + "epoch": 0.46, + "logps_train/chosen": -29.081663131713867, + "logps_train/ref_chosen": -4.40625, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -39.28921890258789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.467541456222534, + "rewards_train/margins": 0.8832554817199707, + "rewards_train/rejected": -3.350796937942505, + "step": 1648 + }, + { + "epoch": 0.46, + "logps_train/chosen": -56.614749908447266, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -62.5, + "logps_train/rejected": -70.72080993652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.011474991217255592, + "rewards_train/margins": 0.810606038197875, + "rewards_train/rejected": -0.8220810294151306, + "step": 1648 + }, + { + "epoch": 0.46, + "logps_train/chosen": -72.94007110595703, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -44.75, + "logps_train/rejected": -59.28131103515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1690070629119873, + "rewards_train/margins": -0.7158759832382202, + "rewards_train/rejected": -1.453131079673767, + "step": 1648 + }, + { + "epoch": 0.46, + "logps_train/chosen": -34.79742431640625, + "logps_train/ref_chosen": -23.5, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -50.52045440673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1297425031661987, + "rewards_train/margins": 2.3723028898239136, + "rewards_train/rejected": -3.5020453929901123, + "step": 1649 + }, + { + "epoch": 0.46, + "logps_train/chosen": -132.01370239257812, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -147.68663024902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1513702869415283, + "rewards_train/margins": 2.0672926902770996, + "rewards_train/rejected": -3.218662977218628, + "step": 1649 + }, + { + "epoch": 0.46, + "logps_train/chosen": -187.60475158691406, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -172.26849365234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2604751586914062, + "rewards_train/margins": 3.716374397277832, + "rewards_train/rejected": -6.976849555969238, + "step": 1649 + }, + { + "epoch": 0.46, + "logps_train/chosen": -116.6304931640625, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -103.3889389038086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4130494594573975, + "rewards_train/margins": 2.175844430923462, + "rewards_train/rejected": -4.588893890380859, + "step": 1649 + }, + { + "epoch": 0.46, + "logps_train/chosen": -70.41596984863281, + "logps_train/ref_chosen": -31.5, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -69.09962463378906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.891597032546997, + "rewards_train/margins": -0.9816346168518066, + "rewards_train/rejected": -2.9099624156951904, + "step": 1649 + }, + { + "epoch": 0.46, + "logps_train/chosen": -68.71975708007812, + "logps_train/ref_chosen": -45.0, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -50.93913269042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3719756603240967, + "rewards_train/margins": 1.7906877994537354, + "rewards_train/rejected": -4.162663459777832, + "step": 1649 + }, + { + "epoch": 0.46, + "logps_train/chosen": -118.77249145507812, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -170.82965087890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9772491455078125, + "rewards_train/margins": 2.9057159423828125, + "rewards_train/rejected": -3.882965087890625, + "step": 1649 + }, + { + "epoch": 0.46, + "logps_train/chosen": -76.92427825927734, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -45.25, + "logps_train/rejected": -90.7593765258789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.142427921295166, + "rewards_train/margins": 1.4085097312927246, + "rewards_train/rejected": -4.550937652587891, + "step": 1649 + }, + { + "epoch": 0.46, + "learning_rate": 5.682456476615072e-07, + "loss": 0.3684, + "step": 1650 + }, + { + "epoch": 0.46, + "logps_train/chosen": -84.035400390625, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -128.45347595214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.953540027141571, + "rewards_train/margins": 1.3918076157569885, + "rewards_train/rejected": -2.3453476428985596, + "step": 1650 + }, + { + "epoch": 0.46, + "logps_train/chosen": -47.576202392578125, + "logps_train/ref_chosen": -34.0, + "logps_train/ref_rejected": -41.0, + "logps_train/rejected": -52.83174133300781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3576202392578125, + "rewards_train/margins": -0.17444610595703125, + "rewards_train/rejected": -1.1831741333007812, + "step": 1650 + }, + { + "epoch": 0.46, + "logps_train/chosen": -2.6676554679870605, + "logps_train/ref_chosen": -1.5234375, + "logps_train/ref_rejected": -1.71875, + "logps_train/rejected": -3.821422576904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1144217997789383, + "rewards_train/margins": 0.09584546089172363, + "rewards_train/rejected": -0.21026726067066193, + "step": 1650 + }, + { + "epoch": 0.46, + "logps_train/chosen": -5.64427375793457, + "logps_train/ref_chosen": -3.21875, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -35.001678466796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24255238473415375, + "rewards_train/margins": 1.4201154857873917, + "rewards_train/rejected": -1.6626678705215454, + "step": 1650 + }, + { + "epoch": 0.46, + "logps_train/chosen": -209.9884033203125, + "logps_train/ref_chosen": -197.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -326.41448974609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2988404035568237, + "rewards_train/margins": 13.442608952522278, + "rewards_train/rejected": -14.741449356079102, + "step": 1650 + }, + { + "epoch": 0.46, + "logps_train/chosen": -10.236120223999023, + "logps_train/ref_chosen": -2.546875, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -47.53958511352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7689245343208313, + "rewards_train/margins": 3.235034167766571, + "rewards_train/rejected": -4.003958702087402, + "step": 1650 + }, + { + "epoch": 0.46, + "logps_train/chosen": -12.12750244140625, + "logps_train/ref_chosen": -2.421875, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -31.051774978637695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.970562756061554, + "rewards_train/margins": 0.2846147418022156, + "rewards_train/rejected": -1.2551774978637695, + "step": 1650 + }, + { + "epoch": 0.46, + "logps_train/chosen": -94.36077117919922, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -122.82960510253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7860772609710693, + "rewards_train/margins": 0.14688324928283691, + "rewards_train/rejected": -2.9329605102539062, + "step": 1650 + }, + { + "epoch": 0.46, + "logps_train/chosen": -3.248490810394287, + "logps_train/ref_chosen": -1.625, + "logps_train/ref_rejected": -0.51953125, + "logps_train/rejected": -5.360431671142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16234908998012543, + "rewards_train/margins": 0.3217409700155258, + "rewards_train/rejected": -0.48409005999565125, + "step": 1651 + }, + { + "epoch": 0.46, + "logps_train/chosen": -149.32977294921875, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -113.09782409667969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.732977271080017, + "rewards_train/margins": 0.3768051862716675, + "rewards_train/rejected": -2.1097824573516846, + "step": 1651 + }, + { + "epoch": 0.46, + "logps_train/chosen": -32.592552185058594, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -57.27785110473633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2280051708221436, + "rewards_train/margins": 2.762280225753784, + "rewards_train/rejected": -4.990285396575928, + "step": 1651 + }, + { + "epoch": 0.46, + "logps_train/chosen": -98.2939453125, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -84.19464874267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.679394543170929, + "rewards_train/margins": 1.0900703072547913, + "rewards_train/rejected": -1.7694648504257202, + "step": 1651 + }, + { + "epoch": 0.46, + "logps_train/chosen": -17.670442581176758, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -44.5, + "logps_train/rejected": -86.03863525390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1889193058013916, + "rewards_train/margins": 2.9649441242218018, + "rewards_train/rejected": -4.153863430023193, + "step": 1651 + }, + { + "epoch": 0.46, + "logps_train/chosen": -27.935802459716797, + "logps_train/ref_chosen": -7.5, + "logps_train/ref_rejected": -2.078125, + "logps_train/rejected": -11.603109359741211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0435802936553955, + "rewards_train/margins": -1.0910818576812744, + "rewards_train/rejected": -0.9524984359741211, + "step": 1651 + }, + { + "epoch": 0.46, + "logps_train/chosen": -95.88739013671875, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -114.16610717773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28873902559280396, + "rewards_train/margins": 0.07787171006202698, + "rewards_train/rejected": -0.36661073565483093, + "step": 1651 + }, + { + "epoch": 0.46, + "logps_train/chosen": -23.30599594116211, + "logps_train/ref_chosen": -4.1875, + "logps_train/ref_rejected": -2.421875, + "logps_train/rejected": -17.78000259399414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9118496179580688, + "rewards_train/margins": -0.3760368824005127, + "rewards_train/rejected": -1.5358127355575562, + "step": 1651 + }, + { + "epoch": 0.46, + "learning_rate": 5.658608969166754e-07, + "loss": 0.4684, + "step": 1652 + }, + { + "epoch": 0.46, + "logps_train/chosen": -121.18325805664062, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -169.0618133544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.418325901031494, + "rewards_train/margins": 2.6378555297851562, + "rewards_train/rejected": -5.05618143081665, + "step": 1652 + }, + { + "epoch": 0.46, + "logps_train/chosen": -26.586729049682617, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -37.30069351196289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6274229288101196, + "rewards_train/margins": 0.3776465654373169, + "rewards_train/rejected": -2.0050694942474365, + "step": 1652 + }, + { + "epoch": 0.46, + "logps_train/chosen": -15.645191192626953, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -15.4375, + "logps_train/rejected": -23.666606903076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6770191192626953, + "rewards_train/margins": 0.14589160680770874, + "rewards_train/rejected": -0.822910726070404, + "step": 1652 + }, + { + "epoch": 0.46, + "logps_train/chosen": -83.6854248046875, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -198.93475341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.518542468547821, + "rewards_train/margins": 2.7749329209327698, + "rewards_train/rejected": -3.293475389480591, + "step": 1652 + }, + { + "epoch": 0.46, + "logps_train/chosen": -96.34846496582031, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -177.68478393554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3848465085029602, + "rewards_train/margins": 5.983631789684296, + "rewards_train/rejected": -6.368478298187256, + "step": 1652 + }, + { + "epoch": 0.46, + "logps_train/chosen": -24.830398559570312, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -49.85209655761719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7205398678779602, + "rewards_train/margins": 1.8021697402000427, + "rewards_train/rejected": -2.522709608078003, + "step": 1652 + }, + { + "epoch": 0.46, + "logps_train/chosen": -115.38566589355469, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -111.56739807128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08856659382581711, + "rewards_train/margins": 0.16817323118448257, + "rewards_train/rejected": -0.2567398250102997, + "step": 1652 + }, + { + "epoch": 0.46, + "logps_train/chosen": -111.515625, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -162.9431915283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1015625, + "rewards_train/margins": 4.692756652832031, + "rewards_train/rejected": -5.794319152832031, + "step": 1652 + }, + { + "epoch": 0.46, + "logps_train/chosen": -59.98179626464844, + "logps_train/ref_chosen": -26.875, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -52.37422180175781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.3106796741485596, + "rewards_train/margins": -0.12325739860534668, + "rewards_train/rejected": -3.187422275543213, + "step": 1653 + }, + { + "epoch": 0.46, + "logps_train/chosen": -175.7259521484375, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -178.67648315429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.922595500946045, + "rewards_train/margins": 0.09505271911621094, + "rewards_train/rejected": -6.017648220062256, + "step": 1653 + }, + { + "epoch": 0.46, + "logps_train/chosen": -30.613201141357422, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -47.97065734863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6675701141357422, + "rewards_train/margins": 0.3419957160949707, + "rewards_train/rejected": -2.009565830230713, + "step": 1653 + }, + { + "epoch": 0.46, + "logps_train/chosen": -16.61029052734375, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -0.6484375, + "logps_train/rejected": -6.242392539978027, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32352906465530396, + "rewards_train/margins": 0.23586642742156982, + "rewards_train/rejected": -0.5593954920768738, + "step": 1653 + }, + { + "epoch": 0.46, + "logps_train/chosen": -82.60438537597656, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -152.42884826660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.33956146240234375, + "rewards_train/margins": 4.032446384429932, + "rewards_train/rejected": -3.692884922027588, + "step": 1653 + }, + { + "epoch": 0.46, + "logps_train/chosen": -63.30250549316406, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -142.84835815429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2697494626045227, + "rewards_train/margins": 5.704585373401642, + "rewards_train/rejected": -5.434835910797119, + "step": 1653 + }, + { + "epoch": 0.46, + "logps_train/chosen": -12.914170265197754, + "logps_train/ref_chosen": -3.71875, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -27.327524185180664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9195420145988464, + "rewards_train/margins": 0.5757104754447937, + "rewards_train/rejected": -1.4952524900436401, + "step": 1653 + }, + { + "epoch": 0.46, + "logps_train/chosen": -86.5203628540039, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -150.92295837402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3020362854003906, + "rewards_train/margins": 3.6902594566345215, + "rewards_train/rejected": -4.992295742034912, + "step": 1653 + }, + { + "epoch": 0.46, + "learning_rate": 5.63479184685273e-07, + "loss": 0.3165, + "step": 1654 + }, + { + "epoch": 0.46, + "logps_train/chosen": -0.7928546667098999, + "logps_train/ref_chosen": -0.34375, + "logps_train/ref_rejected": -0.34375, + "logps_train/rejected": -0.7881897687911987, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04491046816110611, + "rewards_train/margins": -0.00046649202704429626, + "rewards_train/rejected": -0.04444397613406181, + "step": 1654 + }, + { + "epoch": 0.46, + "logps_train/chosen": -41.75193786621094, + "logps_train/ref_chosen": -31.25, + "logps_train/ref_rejected": -42.75, + "logps_train/rejected": -57.951805114746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0501937866210938, + "rewards_train/margins": 0.46998679637908936, + "rewards_train/rejected": -1.520180583000183, + "step": 1654 + }, + { + "epoch": 0.46, + "logps_train/chosen": -33.03844451904297, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -3.9375, + "logps_train/rejected": -37.45075988769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.697594404220581, + "rewards_train/margins": 0.6537315845489502, + "rewards_train/rejected": -3.3513259887695312, + "step": 1654 + }, + { + "epoch": 0.46, + "logps_train/chosen": -0.2695826590061188, + "logps_train/ref_chosen": -0.48046875, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -6.020895004272461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.021088609471917152, + "rewards_train/margins": 0.12630310840904713, + "rewards_train/rejected": -0.10521449893712997, + "step": 1654 + }, + { + "epoch": 0.46, + "logps_train/chosen": -149.80494689941406, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -181.4742431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.480494737625122, + "rewards_train/margins": 5.616929769515991, + "rewards_train/rejected": -8.097424507141113, + "step": 1654 + }, + { + "epoch": 0.46, + "logps_train/chosen": -119.358642578125, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -111.5636978149414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1358642578125, + "rewards_train/margins": 1.270505666732788, + "rewards_train/rejected": -3.406369924545288, + "step": 1654 + }, + { + "epoch": 0.46, + "logps_train/chosen": -146.63681030273438, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -215.93878173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.063681125640869, + "rewards_train/margins": 0.8301973342895508, + "rewards_train/rejected": -4.89387845993042, + "step": 1654 + }, + { + "epoch": 0.46, + "logps_train/chosen": -180.08370971679688, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -195.1179656982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.758370876312256, + "rewards_train/margins": 0.45342588424682617, + "rewards_train/rejected": -8.211796760559082, + "step": 1654 + }, + { + "epoch": 0.46, + "logps_train/chosen": -114.65735626220703, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -253.45872497558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.015735626220703, + "rewards_train/margins": 5.230136871337891, + "rewards_train/rejected": -8.245872497558594, + "step": 1655 + }, + { + "epoch": 0.46, + "logps_train/chosen": -56.931453704833984, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -46.17384338378906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.474395275115967, + "rewards_train/margins": -1.419510841369629, + "rewards_train/rejected": -3.054884433746338, + "step": 1655 + }, + { + "epoch": 0.46, + "logps_train/chosen": -125.28279113769531, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -207.11643981933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0782792568206787, + "rewards_train/margins": 6.133364915847778, + "rewards_train/rejected": -9.211644172668457, + "step": 1655 + }, + { + "epoch": 0.46, + "logps_train/chosen": -36.73570251464844, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -33.16108703613281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9985703229904175, + "rewards_train/margins": 0.523788332939148, + "rewards_train/rejected": -2.5223586559295654, + "step": 1655 + }, + { + "epoch": 0.46, + "logps_train/chosen": -198.794677734375, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -165.12576293945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.779468059539795, + "rewards_train/margins": 0.2331085205078125, + "rewards_train/rejected": -6.012576580047607, + "step": 1655 + }, + { + "epoch": 0.46, + "logps_train/chosen": -115.84452056884766, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -106.19471740722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.73445200920105, + "rewards_train/margins": 1.3850200176239014, + "rewards_train/rejected": -4.119472026824951, + "step": 1655 + }, + { + "epoch": 0.46, + "logps_train/chosen": -14.15439224243164, + "logps_train/ref_chosen": -5.5625, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -33.93516540527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8591892123222351, + "rewards_train/margins": 1.884327471256256, + "rewards_train/rejected": -2.743516683578491, + "step": 1655 + }, + { + "epoch": 0.46, + "logps_train/chosen": -31.506916046142578, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -44.106903076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3631916046142578, + "rewards_train/margins": 1.1724987030029297, + "rewards_train/rejected": -2.5356903076171875, + "step": 1655 + }, + { + "epoch": 0.46, + "learning_rate": 5.611005276367604e-07, + "loss": 0.4164, + "step": 1656 + }, + { + "epoch": 0.46, + "logps_train/chosen": -107.10221099853516, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -144.78326416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6602210998535156, + "rewards_train/margins": 2.168105363845825, + "rewards_train/rejected": -3.828326463699341, + "step": 1656 + }, + { + "epoch": 0.46, + "logps_train/chosen": -14.35501480102539, + "logps_train/ref_chosen": -3.84375, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -35.2149772644043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.051126480102539, + "rewards_train/margins": 1.820371389389038, + "rewards_train/rejected": -2.871497869491577, + "step": 1656 + }, + { + "epoch": 0.46, + "logps_train/chosen": -169.74386596679688, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -255.0, + "logps_train/rejected": -308.3673095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8743865489959717, + "rewards_train/margins": 2.4623444080352783, + "rewards_train/rejected": -5.33673095703125, + "step": 1656 + }, + { + "epoch": 0.46, + "logps_train/chosen": -134.5858612060547, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -134.15243530273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2085860967636108, + "rewards_train/margins": 2.2566574811935425, + "rewards_train/rejected": -3.4652435779571533, + "step": 1656 + }, + { + "epoch": 0.46, + "logps_train/chosen": -31.698989868164062, + "logps_train/ref_chosen": -7.21875, + "logps_train/ref_rejected": -3.484375, + "logps_train/rejected": -32.33164596557617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.448024034500122, + "rewards_train/margins": 0.4367032051086426, + "rewards_train/rejected": -2.8847272396087646, + "step": 1656 + }, + { + "epoch": 0.46, + "logps_train/chosen": -121.04100036621094, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -197.0, + "logps_train/rejected": -345.19110107421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.154100179672241, + "rewards_train/margins": 12.66500973701477, + "rewards_train/rejected": -14.819109916687012, + "step": 1656 + }, + { + "epoch": 0.46, + "logps_train/chosen": -98.92117309570312, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -181.8813934326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5078827142715454, + "rewards_train/margins": 1.6960220336914062, + "rewards_train/rejected": -1.1881393194198608, + "step": 1656 + }, + { + "epoch": 0.46, + "logps_train/chosen": -129.24749755859375, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -173.24383544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4747498035430908, + "rewards_train/margins": 5.249634027481079, + "rewards_train/rejected": -6.72438383102417, + "step": 1656 + }, + { + "epoch": 0.46, + "logps_train/chosen": -1.286614179611206, + "logps_train/ref_chosen": -1.4765625, + "logps_train/ref_rejected": -3.609375, + "logps_train/rejected": -3.8625073432922363, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018994832411408424, + "rewards_train/margins": 0.04430806636810303, + "rewards_train/rejected": -0.025313233956694603, + "step": 1657 + }, + { + "epoch": 0.46, + "logps_train/chosen": -66.5497817993164, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -55.0, + "logps_train/rejected": -58.24017333984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.104978322982788, + "rewards_train/margins": -1.7809609770774841, + "rewards_train/rejected": -0.32401734590530396, + "step": 1657 + }, + { + "epoch": 0.46, + "logps_train/chosen": -99.82514190673828, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -131.89138793945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4825141429901123, + "rewards_train/margins": 1.256624698638916, + "rewards_train/rejected": -3.7391388416290283, + "step": 1657 + }, + { + "epoch": 0.46, + "logps_train/chosen": -169.40237426757812, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -205.0, + "logps_train/rejected": -246.76531982421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5402374267578125, + "rewards_train/margins": 3.6362948417663574, + "rewards_train/rejected": -4.17653226852417, + "step": 1657 + }, + { + "epoch": 0.46, + "logps_train/chosen": -125.31672668457031, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -204.50694274902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1316726207733154, + "rewards_train/margins": 5.019022226333618, + "rewards_train/rejected": -8.150694847106934, + "step": 1657 + }, + { + "epoch": 0.46, + "logps_train/chosen": -161.89801025390625, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -168.627685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.039801120758057, + "rewards_train/margins": 2.322967529296875, + "rewards_train/rejected": -6.362768650054932, + "step": 1657 + }, + { + "epoch": 0.46, + "logps_train/chosen": -36.05013656616211, + "logps_train/ref_chosen": -1.859375, + "logps_train/ref_rejected": -1.859375, + "logps_train/rejected": -35.223121643066406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4190762042999268, + "rewards_train/margins": -0.08270144462585449, + "rewards_train/rejected": -3.3363747596740723, + "step": 1657 + }, + { + "epoch": 0.46, + "logps_train/chosen": -19.58668327331543, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -50.06953430175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14133167266845703, + "rewards_train/margins": 3.2607851028442383, + "rewards_train/rejected": -3.1194534301757812, + "step": 1657 + }, + { + "epoch": 0.46, + "learning_rate": 5.587249424192158e-07, + "loss": 0.3043, + "step": 1658 + }, + { + "epoch": 0.46, + "logps_train/chosen": -100.63862609863281, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -181.91629028320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3638626039028168, + "rewards_train/margins": 3.327766567468643, + "rewards_train/rejected": -3.69162917137146, + "step": 1658 + }, + { + "epoch": 0.46, + "logps_train/chosen": -25.691654205322266, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -32.75, + "logps_train/rejected": -30.563457489013672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7191654443740845, + "rewards_train/margins": -0.937819704413414, + "rewards_train/rejected": 0.21865426003932953, + "step": 1658 + }, + { + "epoch": 0.46, + "logps_train/chosen": -15.997591018676758, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -3.703125, + "logps_train/rejected": -20.593917846679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1841341257095337, + "rewards_train/margins": 0.5049451589584351, + "rewards_train/rejected": -1.6890792846679688, + "step": 1658 + }, + { + "epoch": 0.46, + "logps_train/chosen": -31.701736450195312, + "logps_train/ref_chosen": -14.75, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -35.18861389160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6951736211776733, + "rewards_train/margins": 0.08618783950805664, + "rewards_train/rejected": -1.78136146068573, + "step": 1658 + }, + { + "epoch": 0.46, + "logps_train/chosen": -12.405599594116211, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -47.98322677612305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6499349474906921, + "rewards_train/margins": 2.5358877778053284, + "rewards_train/rejected": -3.1858227252960205, + "step": 1658 + }, + { + "epoch": 0.46, + "logps_train/chosen": -118.35887145996094, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -173.18875122070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9858871698379517, + "rewards_train/margins": 1.2329879999160767, + "rewards_train/rejected": -3.2188751697540283, + "step": 1658 + }, + { + "epoch": 0.46, + "logps_train/chosen": -203.26339721679688, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -238.06057739257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.426339864730835, + "rewards_train/margins": 3.979717969894409, + "rewards_train/rejected": -6.406057834625244, + "step": 1658 + }, + { + "epoch": 0.46, + "logps_train/chosen": -207.6896209716797, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -110.16202545166016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7689621448516846, + "rewards_train/margins": -1.9527596235275269, + "rewards_train/rejected": -1.8162025213241577, + "step": 1658 + }, + { + "epoch": 0.46, + "logps_train/chosen": -110.63850402832031, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -213.17437744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9138504266738892, + "rewards_train/margins": 6.303587317466736, + "rewards_train/rejected": -8.217437744140625, + "step": 1659 + }, + { + "epoch": 0.46, + "logps_train/chosen": -12.091654777526855, + "logps_train/ref_chosen": -14.5, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -39.00214385986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24083451926708221, + "rewards_train/margins": 1.7785488814115524, + "rewards_train/rejected": -1.5377143621444702, + "step": 1659 + }, + { + "epoch": 0.46, + "logps_train/chosen": -124.27750396728516, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -163.47506713867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1777503490448, + "rewards_train/margins": 0.8197565078735352, + "rewards_train/rejected": -3.997506856918335, + "step": 1659 + }, + { + "epoch": 0.46, + "logps_train/chosen": -40.76957702636719, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -38.0233154296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.489457845687866, + "rewards_train/margins": -0.3996262550354004, + "rewards_train/rejected": -2.089831590652466, + "step": 1659 + }, + { + "epoch": 0.46, + "logps_train/chosen": -124.33192443847656, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -149.68283081054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6331924200057983, + "rewards_train/margins": 3.7850905656814575, + "rewards_train/rejected": -5.418282985687256, + "step": 1659 + }, + { + "epoch": 0.46, + "logps_train/chosen": -58.158164978027344, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -24.0, + "logps_train/rejected": -72.2973861694336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5908164978027344, + "rewards_train/margins": 3.238922119140625, + "rewards_train/rejected": -4.829738616943359, + "step": 1659 + }, + { + "epoch": 0.46, + "logps_train/chosen": -42.04290008544922, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -33.023948669433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8792900443077087, + "rewards_train/margins": 1.7387297749519348, + "rewards_train/rejected": -2.6180198192596436, + "step": 1659 + }, + { + "epoch": 0.46, + "logps_train/chosen": -41.28236770629883, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -3.03125, + "logps_train/rejected": -21.456819534301758, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0157368183135986, + "rewards_train/margins": -0.17317986488342285, + "rewards_train/rejected": -1.8425569534301758, + "step": 1659 + }, + { + "epoch": 0.46, + "learning_rate": 5.563524456592163e-07, + "loss": 0.4565, + "step": 1660 + }, + { + "epoch": 0.46, + "logps_train/chosen": -145.83811950683594, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -201.885498046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.633811950683594, + "rewards_train/margins": 2.0047378540039062, + "rewards_train/rejected": -8.6385498046875, + "step": 1660 + }, + { + "epoch": 0.46, + "logps_train/chosen": -4.937295436859131, + "logps_train/ref_chosen": -0.88671875, + "logps_train/ref_rejected": -1.375, + "logps_train/rejected": -4.449901103973389, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4050576686859131, + "rewards_train/margins": -0.09756755828857422, + "rewards_train/rejected": -0.30749011039733887, + "step": 1660 + }, + { + "epoch": 0.46, + "logps_train/chosen": -85.38372802734375, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -232.68719482421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.738372802734375, + "rewards_train/margins": 7.7303466796875, + "rewards_train/rejected": -8.468719482421875, + "step": 1660 + }, + { + "epoch": 0.46, + "logps_train/chosen": -25.45792579650879, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -43.96503448486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.320792555809021, + "rewards_train/margins": 2.025710940361023, + "rewards_train/rejected": -3.346503496170044, + "step": 1660 + }, + { + "epoch": 0.46, + "logps_train/chosen": -14.860189437866211, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -26.033287048339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1889810562133789, + "rewards_train/margins": 1.3235597610473633, + "rewards_train/rejected": -1.1345787048339844, + "step": 1660 + }, + { + "epoch": 0.46, + "logps_train/chosen": -182.379638671875, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -259.7445068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.837963819503784, + "rewards_train/margins": 7.2364866733551025, + "rewards_train/rejected": -11.074450492858887, + "step": 1660 + }, + { + "epoch": 0.46, + "logps_train/chosen": -162.413818359375, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -243.69845581054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0413818359375, + "rewards_train/margins": 7.328463554382324, + "rewards_train/rejected": -9.369845390319824, + "step": 1660 + }, + { + "epoch": 0.46, + "logps_train/chosen": -225.1692352294922, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -188.0, + "logps_train/rejected": -273.551025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.516923427581787, + "rewards_train/margins": 1.0381789207458496, + "rewards_train/rejected": -8.555102348327637, + "step": 1660 + }, + { + "epoch": 0.46, + "logps_train/chosen": -61.4420280456543, + "logps_train/ref_chosen": -49.5, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -50.953147888183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1942027807235718, + "rewards_train/margins": 1.5386120080947876, + "rewards_train/rejected": -2.7328147888183594, + "step": 1661 + }, + { + "epoch": 0.46, + "logps_train/chosen": -9.471195220947266, + "logps_train/ref_chosen": -3.84375, + "logps_train/ref_rejected": -0.85546875, + "logps_train/rejected": -8.693617820739746, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5627445578575134, + "rewards_train/margins": 0.22107034921646118, + "rewards_train/rejected": -0.7838149070739746, + "step": 1661 + }, + { + "epoch": 0.46, + "logps_train/chosen": -222.88265991210938, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -245.72189331054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.588265895843506, + "rewards_train/margins": 0.6839237213134766, + "rewards_train/rejected": -7.272189617156982, + "step": 1661 + }, + { + "epoch": 0.46, + "logps_train/chosen": -5.9926018714904785, + "logps_train/ref_chosen": -2.78125, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -16.923704147338867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32113519310951233, + "rewards_train/margins": 0.5524852573871613, + "rewards_train/rejected": -0.8736204504966736, + "step": 1661 + }, + { + "epoch": 0.46, + "logps_train/chosen": -49.05910873413086, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -29.858230590820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.530910849571228, + "rewards_train/margins": 1.004912257194519, + "rewards_train/rejected": -2.535823106765747, + "step": 1661 + }, + { + "epoch": 0.46, + "logps_train/chosen": -46.367313385009766, + "logps_train/ref_chosen": -38.25, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -41.599090576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8117313385009766, + "rewards_train/margins": 2.423177719116211, + "rewards_train/rejected": -3.2349090576171875, + "step": 1661 + }, + { + "epoch": 0.46, + "logps_train/chosen": -3.2044734954833984, + "logps_train/ref_chosen": -1.6484375, + "logps_train/ref_rejected": -3.453125, + "logps_train/rejected": -18.821517944335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15560360252857208, + "rewards_train/margins": 1.3812357634305954, + "rewards_train/rejected": -1.5368393659591675, + "step": 1661 + }, + { + "epoch": 0.46, + "logps_train/chosen": -98.79956817626953, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -163.64817810058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.979956865310669, + "rewards_train/margins": 4.734861135482788, + "rewards_train/rejected": -6.714818000793457, + "step": 1661 + }, + { + "epoch": 0.46, + "learning_rate": 5.539830539617234e-07, + "loss": 0.2382, + "step": 1662 + }, + { + "epoch": 0.46, + "logps_train/chosen": -10.516044616699219, + "logps_train/ref_chosen": -13.0625, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -19.190067291259766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.25464555621147156, + "rewards_train/margins": 0.8549022972583771, + "rewards_train/rejected": -0.6002567410469055, + "step": 1662 + }, + { + "epoch": 0.46, + "logps_train/chosen": -96.2351303100586, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -182.9380340576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9235130548477173, + "rewards_train/margins": 5.320290446281433, + "rewards_train/rejected": -7.24380350112915, + "step": 1662 + }, + { + "epoch": 0.46, + "logps_train/chosen": -152.80657958984375, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -242.5552978515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.780658006668091, + "rewards_train/margins": 6.174872159957886, + "rewards_train/rejected": -8.955530166625977, + "step": 1662 + }, + { + "epoch": 0.46, + "logps_train/chosen": -169.31646728515625, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -189.97177124023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.23164701461792, + "rewards_train/margins": 3.115530014038086, + "rewards_train/rejected": -7.347177028656006, + "step": 1662 + }, + { + "epoch": 0.46, + "logps_train/chosen": -6.217621803283691, + "logps_train/ref_chosen": -1.4609375, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -23.249366760253906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.47566843032836914, + "rewards_train/margins": -0.163231760263443, + "rewards_train/rejected": -0.31243667006492615, + "step": 1662 + }, + { + "epoch": 0.46, + "logps_train/chosen": -96.62254333496094, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -26.925880432128906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.337254524230957, + "rewards_train/margins": -4.382166504859924, + "rewards_train/rejected": -1.9550880193710327, + "step": 1662 + }, + { + "epoch": 0.46, + "logps_train/chosen": -43.54228210449219, + "logps_train/ref_chosen": -22.25, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -55.32832717895508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.129228353500366, + "rewards_train/margins": 2.4598543643951416, + "rewards_train/rejected": -4.589082717895508, + "step": 1662 + }, + { + "epoch": 0.46, + "logps_train/chosen": -35.981658935546875, + "logps_train/ref_chosen": -29.375, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -75.7311782836914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6606659293174744, + "rewards_train/margins": 3.3124518990516663, + "rewards_train/rejected": -3.9731178283691406, + "step": 1662 + }, + { + "epoch": 0.46, + "logps_train/chosen": -139.3744354248047, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -251.1240692138672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.587443828582764, + "rewards_train/margins": 3.824963092803955, + "rewards_train/rejected": -9.412406921386719, + "step": 1663 + }, + { + "epoch": 0.46, + "logps_train/chosen": -70.16691589355469, + "logps_train/ref_chosen": -25.375, + "logps_train/ref_rejected": -31.125, + "logps_train/rejected": -93.02674865722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.479191780090332, + "rewards_train/margins": 1.7109832763671875, + "rewards_train/rejected": -6.1901750564575195, + "step": 1663 + }, + { + "epoch": 0.46, + "logps_train/chosen": -3.579495906829834, + "logps_train/ref_chosen": -1.1171875, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -16.321592330932617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2462308406829834, + "rewards_train/margins": 0.029678404331207275, + "rewards_train/rejected": -0.2759092450141907, + "step": 1663 + }, + { + "epoch": 0.46, + "logps_train/chosen": -25.85872459411621, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -37.48280334472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5546225309371948, + "rewards_train/margins": 1.4749077558517456, + "rewards_train/rejected": -3.0295302867889404, + "step": 1663 + }, + { + "epoch": 0.46, + "logps_train/chosen": -7.074056625366211, + "logps_train/ref_chosen": -3.53125, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -12.866674423217773, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3542806804180145, + "rewards_train/margins": -0.2738632336258888, + "rewards_train/rejected": -0.0804174467921257, + "step": 1663 + }, + { + "epoch": 0.46, + "logps_train/chosen": -66.23456573486328, + "logps_train/ref_chosen": -47.25, + "logps_train/ref_rejected": -2.984375, + "logps_train/rejected": -13.813518524169922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8984565734863281, + "rewards_train/margins": -0.8155422210693359, + "rewards_train/rejected": -1.0829143524169922, + "step": 1663 + }, + { + "epoch": 0.46, + "logps_train/chosen": -29.682941436767578, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -140.66104125976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8807941675186157, + "rewards_train/margins": 3.7353099584579468, + "rewards_train/rejected": -4.6161041259765625, + "step": 1663 + }, + { + "epoch": 0.46, + "logps_train/chosen": -37.72193908691406, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -29.875, + "logps_train/rejected": -47.51560592651367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5721939206123352, + "rewards_train/margins": 1.19186669588089, + "rewards_train/rejected": -1.764060616493225, + "step": 1663 + }, + { + "epoch": 0.47, + "learning_rate": 5.51616783909968e-07, + "loss": 0.5671, + "step": 1664 + }, + { + "epoch": 0.47, + "logps_train/chosen": -85.94457244873047, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -139.67210388183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4944572448730469, + "rewards_train/margins": 2.7227530479431152, + "rewards_train/rejected": -4.217210292816162, + "step": 1664 + }, + { + "epoch": 0.47, + "logps_train/chosen": -80.83577728271484, + "logps_train/ref_chosen": -46.5, + "logps_train/ref_rejected": -54.5, + "logps_train/rejected": -86.23258972167969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4335777759552, + "rewards_train/margins": -0.2603187561035156, + "rewards_train/rejected": -3.1732590198516846, + "step": 1664 + }, + { + "epoch": 0.47, + "logps_train/chosen": -74.47711944580078, + "logps_train/ref_chosen": -27.625, + "logps_train/ref_rejected": -6.875, + "logps_train/rejected": -47.18646240234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.685212135314941, + "rewards_train/margins": -0.6540656089782715, + "rewards_train/rejected": -4.03114652633667, + "step": 1664 + }, + { + "epoch": 0.47, + "logps_train/chosen": -54.52041244506836, + "logps_train/ref_chosen": -46.25, + "logps_train/ref_rejected": -39.0, + "logps_train/rejected": -60.25127029418945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8270412683486938, + "rewards_train/margins": 1.2980858087539673, + "rewards_train/rejected": -2.125127077102661, + "step": 1664 + }, + { + "epoch": 0.47, + "logps_train/chosen": -233.73204040527344, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -225.22433471679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.973204612731934, + "rewards_train/margins": 0.2992286682128906, + "rewards_train/rejected": -10.272433280944824, + "step": 1664 + }, + { + "epoch": 0.47, + "logps_train/chosen": -16.25621795654297, + "logps_train/ref_chosen": -2.5, + "logps_train/ref_rejected": -3.53125, + "logps_train/rejected": -21.6962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3756217956542969, + "rewards_train/margins": 0.4408820867538452, + "rewards_train/rejected": -1.816503882408142, + "step": 1664 + }, + { + "epoch": 0.47, + "logps_train/chosen": -129.05889892578125, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -210.92613220214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.755889892578125, + "rewards_train/margins": 6.736723899841309, + "rewards_train/rejected": -10.492613792419434, + "step": 1664 + }, + { + "epoch": 0.47, + "logps_train/chosen": -28.112571716308594, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -5.46875, + "logps_train/rejected": -25.384485244750977, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.286257266998291, + "rewards_train/margins": -0.29468369483947754, + "rewards_train/rejected": -1.9915735721588135, + "step": 1664 + }, + { + "epoch": 0.47, + "logps_train/chosen": -21.348278045654297, + "logps_train/ref_chosen": -1.453125, + "logps_train/ref_rejected": -3.171875, + "logps_train/rejected": -33.713863372802734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9895153045654297, + "rewards_train/margins": 1.0646836757659912, + "rewards_train/rejected": -3.054198980331421, + "step": 1665 + }, + { + "epoch": 0.47, + "logps_train/chosen": -65.19779205322266, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -94.39384460449219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8197792172431946, + "rewards_train/margins": 1.4696053862571716, + "rewards_train/rejected": -2.289384603500366, + "step": 1665 + }, + { + "epoch": 0.47, + "logps_train/chosen": -92.26692962646484, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -179.58168029785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7766929864883423, + "rewards_train/margins": 7.831475615501404, + "rewards_train/rejected": -8.608168601989746, + "step": 1665 + }, + { + "epoch": 0.47, + "logps_train/chosen": -134.0353240966797, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -152.91064453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.553532361984253, + "rewards_train/margins": 1.6375319957733154, + "rewards_train/rejected": -5.191064357757568, + "step": 1665 + }, + { + "epoch": 0.47, + "logps_train/chosen": -44.39678955078125, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -79.99590301513672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.127178907394409, + "rewards_train/margins": 3.134911298751831, + "rewards_train/rejected": -5.26209020614624, + "step": 1665 + }, + { + "epoch": 0.47, + "logps_train/chosen": -150.70584106445312, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -193.31515502929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.120584011077881, + "rewards_train/margins": 0.01093149185180664, + "rewards_train/rejected": -6.1315155029296875, + "step": 1665 + }, + { + "epoch": 0.47, + "logps_train/chosen": -119.94081115722656, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -160.33282470703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4940811395645142, + "rewards_train/margins": 2.689201235771179, + "rewards_train/rejected": -4.183282375335693, + "step": 1665 + }, + { + "epoch": 0.47, + "logps_train/chosen": -120.34892272949219, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -197.00880432128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4848923683166504, + "rewards_train/margins": 2.415987968444824, + "rewards_train/rejected": -4.900880336761475, + "step": 1665 + }, + { + "epoch": 0.47, + "learning_rate": 5.492536520653307e-07, + "loss": 0.3548, + "step": 1666 + }, + { + "epoch": 0.47, + "logps_train/chosen": -12.298563003540039, + "logps_train/ref_chosen": -1.703125, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -105.89501953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0595438480377197, + "rewards_train/margins": 2.379958152770996, + "rewards_train/rejected": -3.439502000808716, + "step": 1666 + }, + { + "epoch": 0.47, + "logps_train/chosen": -122.7174072265625, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -30.0, + "logps_train/rejected": -47.81707000732422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.97174072265625, + "rewards_train/margins": -1.1900336742401123, + "rewards_train/rejected": -1.7817070484161377, + "step": 1666 + }, + { + "epoch": 0.47, + "logps_train/chosen": -113.56627655029297, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -91.35566711425781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6566276550292969, + "rewards_train/margins": 0.2789391279220581, + "rewards_train/rejected": -1.935566782951355, + "step": 1666 + }, + { + "epoch": 0.47, + "logps_train/chosen": -95.83466339111328, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -129.1947784423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.883466362953186, + "rewards_train/margins": 2.336011528968811, + "rewards_train/rejected": -3.219477891921997, + "step": 1666 + }, + { + "epoch": 0.47, + "logps_train/chosen": -101.91553497314453, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -216.48426818847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3415535688400269, + "rewards_train/margins": 11.206873059272766, + "rewards_train/rejected": -12.548426628112793, + "step": 1666 + }, + { + "epoch": 0.47, + "logps_train/chosen": -234.25567626953125, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -232.79617309570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.225567817687988, + "rewards_train/margins": 2.954050064086914, + "rewards_train/rejected": -12.179617881774902, + "step": 1666 + }, + { + "epoch": 0.47, + "logps_train/chosen": -170.884765625, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -208.04824829101562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.4384765625, + "rewards_train/margins": -0.3336515426635742, + "rewards_train/rejected": -7.104825019836426, + "step": 1666 + }, + { + "epoch": 0.47, + "logps_train/chosen": -82.39655303955078, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -82.73516082763672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2396553754806519, + "rewards_train/margins": 0.03386068344116211, + "rewards_train/rejected": -1.273516058921814, + "step": 1666 + }, + { + "epoch": 0.47, + "logps_train/chosen": -9.783914566040039, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -2.546875, + "logps_train/rejected": -12.607154846191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29089146852493286, + "rewards_train/margins": 0.7151365876197815, + "rewards_train/rejected": -1.0060280561447144, + "step": 1667 + }, + { + "epoch": 0.47, + "logps_train/chosen": -167.48123168945312, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -207.4403076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9481231570243835, + "rewards_train/margins": 6.095907509326935, + "rewards_train/rejected": -7.044030666351318, + "step": 1667 + }, + { + "epoch": 0.47, + "logps_train/chosen": -112.1965560913086, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -116.86842346191406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.269655704498291, + "rewards_train/margins": -0.23281335830688477, + "rewards_train/rejected": -2.0368423461914062, + "step": 1667 + }, + { + "epoch": 0.47, + "logps_train/chosen": -172.50534057617188, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -267.19085693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.700534343719482, + "rewards_train/margins": 6.618551731109619, + "rewards_train/rejected": -11.319086074829102, + "step": 1667 + }, + { + "epoch": 0.47, + "logps_train/chosen": -15.603116035461426, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -19.286935806274414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1321866512298584, + "rewards_train/margins": 0.18088197708129883, + "rewards_train/rejected": -1.3130686283111572, + "step": 1667 + }, + { + "epoch": 0.47, + "logps_train/chosen": -87.05986022949219, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -141.57632446289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1059861183166504, + "rewards_train/margins": 0.4516463279724121, + "rewards_train/rejected": -2.5576324462890625, + "step": 1667 + }, + { + "epoch": 0.47, + "logps_train/chosen": -11.827828407287598, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -13.153156280517578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5546578764915466, + "rewards_train/margins": 0.12003278732299805, + "rewards_train/rejected": -0.6746906638145447, + "step": 1667 + }, + { + "epoch": 0.47, + "logps_train/chosen": -19.15397834777832, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -16.143455505371094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0091478824615479, + "rewards_train/margins": -0.2135522961616516, + "rewards_train/rejected": -0.7955955862998962, + "step": 1667 + }, + { + "epoch": 0.47, + "learning_rate": 5.468936749672291e-07, + "loss": 0.4726, + "step": 1668 + }, + { + "epoch": 0.47, + "logps_train/chosen": -102.83685302734375, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -114.35829162597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.233685255050659, + "rewards_train/margins": 0.9521439075469971, + "rewards_train/rejected": -4.185829162597656, + "step": 1668 + }, + { + "epoch": 0.47, + "logps_train/chosen": -163.60096740722656, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -198.949462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5600967407226562, + "rewards_train/margins": 3.8348498344421387, + "rewards_train/rejected": -5.394946575164795, + "step": 1668 + }, + { + "epoch": 0.47, + "logps_train/chosen": -31.444835662841797, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -41.93761444091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.100733518600464, + "rewards_train/margins": 1.5899028778076172, + "rewards_train/rejected": -3.690636396408081, + "step": 1668 + }, + { + "epoch": 0.47, + "logps_train/chosen": -24.774394989013672, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -6.34375, + "logps_train/rejected": -28.271207809448242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0711896419525146, + "rewards_train/margins": 0.12155628204345703, + "rewards_train/rejected": -2.1927459239959717, + "step": 1668 + }, + { + "epoch": 0.47, + "logps_train/chosen": -168.94081115722656, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -167.1792449951172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.544081211090088, + "rewards_train/margins": -0.3761568069458008, + "rewards_train/rejected": -6.167924404144287, + "step": 1668 + }, + { + "epoch": 0.47, + "logps_train/chosen": -151.95599365234375, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -144.97671508789062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.695599555969238, + "rewards_train/margins": -0.14792776107788086, + "rewards_train/rejected": -4.547671794891357, + "step": 1668 + }, + { + "epoch": 0.47, + "logps_train/chosen": -24.334213256835938, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -39.80783462524414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9959213137626648, + "rewards_train/margins": 1.8473621010780334, + "rewards_train/rejected": -2.8432834148406982, + "step": 1668 + }, + { + "epoch": 0.47, + "logps_train/chosen": -15.941854476928711, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -16.126070022583008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48793545365333557, + "rewards_train/margins": 0.14967158436775208, + "rewards_train/rejected": -0.6376070380210876, + "step": 1668 + }, + { + "epoch": 0.47, + "logps_train/chosen": -67.79660034179688, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -59.73652267456055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.004660129547119, + "rewards_train/margins": 0.7439920902252197, + "rewards_train/rejected": -2.748652219772339, + "step": 1669 + }, + { + "epoch": 0.47, + "logps_train/chosen": -153.40521240234375, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -112.84913635253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.140521287918091, + "rewards_train/margins": 1.5943922996520996, + "rewards_train/rejected": -3.7349135875701904, + "step": 1669 + }, + { + "epoch": 0.47, + "logps_train/chosen": -76.85247802734375, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -175.9168243408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.464752197265625, + "rewards_train/margins": 8.806434631347656, + "rewards_train/rejected": -8.341682434082031, + "step": 1669 + }, + { + "epoch": 0.47, + "logps_train/chosen": -214.55702209472656, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -266.0, + "logps_train/rejected": -373.82061767578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6557023525238037, + "rewards_train/margins": 7.126359224319458, + "rewards_train/rejected": -10.782061576843262, + "step": 1669 + }, + { + "epoch": 0.47, + "logps_train/chosen": -23.713937759399414, + "logps_train/ref_chosen": -3.046875, + "logps_train/ref_rejected": -2.953125, + "logps_train/rejected": -8.06440258026123, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.066706418991089, + "rewards_train/margins": -1.5555786490440369, + "rewards_train/rejected": -0.511127769947052, + "step": 1669 + }, + { + "epoch": 0.47, + "logps_train/chosen": -20.838701248168945, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -45.761146545410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1276201009750366, + "rewards_train/margins": 1.8484946489334106, + "rewards_train/rejected": -2.9761147499084473, + "step": 1669 + }, + { + "epoch": 0.47, + "logps_train/chosen": -189.62332153320312, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -185.75607299804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.612332344055176, + "rewards_train/margins": 2.1132755279541016, + "rewards_train/rejected": -8.725607872009277, + "step": 1669 + }, + { + "epoch": 0.47, + "logps_train/chosen": -184.7439727783203, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -150.54901123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.374397277832031, + "rewards_train/margins": 1.4305038452148438, + "rewards_train/rejected": -5.804901123046875, + "step": 1669 + }, + { + "epoch": 0.47, + "learning_rate": 5.445368691330006e-07, + "loss": 0.4, + "step": 1670 + }, + { + "epoch": 0.47, + "logps_train/chosen": -19.462478637695312, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -39.5, + "logps_train/rejected": -53.27817916870117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5712478756904602, + "rewards_train/margins": 0.8065701127052307, + "rewards_train/rejected": -1.377817988395691, + "step": 1670 + }, + { + "epoch": 0.47, + "logps_train/chosen": -7.810180187225342, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -31.906173706054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2935180366039276, + "rewards_train/margins": 1.8970992863178253, + "rewards_train/rejected": -2.190617322921753, + "step": 1670 + }, + { + "epoch": 0.47, + "logps_train/chosen": -188.19900512695312, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -306.4169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.519900798797607, + "rewards_train/margins": 8.92179822921753, + "rewards_train/rejected": -13.441699028015137, + "step": 1670 + }, + { + "epoch": 0.47, + "logps_train/chosen": -16.675012588500977, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -1.734375, + "logps_train/rejected": -40.066402435302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8237512707710266, + "rewards_train/margins": 3.0094515681266785, + "rewards_train/rejected": -3.833202838897705, + "step": 1670 + }, + { + "epoch": 0.47, + "logps_train/chosen": -80.65446472167969, + "logps_train/ref_chosen": -34.0, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -29.85424041748047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.665446758270264, + "rewards_train/margins": -3.505022644996643, + "rewards_train/rejected": -1.1604241132736206, + "step": 1670 + }, + { + "epoch": 0.47, + "logps_train/chosen": -81.27935028076172, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -35.232826232910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2779350280761719, + "rewards_train/margins": 0.45784759521484375, + "rewards_train/rejected": -1.7357826232910156, + "step": 1670 + }, + { + "epoch": 0.47, + "logps_train/chosen": -90.1794662475586, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -114.06090545654297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5679466128349304, + "rewards_train/margins": 2.688144028186798, + "rewards_train/rejected": -3.2560906410217285, + "step": 1670 + }, + { + "epoch": 0.47, + "logps_train/chosen": -57.49473190307617, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -57.75, + "logps_train/rejected": -81.0776138305664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10052680969238281, + "rewards_train/margins": 2.433288335800171, + "rewards_train/rejected": -2.332761526107788, + "step": 1670 + }, + { + "epoch": 0.47, + "logps_train/chosen": -80.50042724609375, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -93.4180679321289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.700042724609375, + "rewards_train/margins": 0.6917641162872314, + "rewards_train/rejected": -1.3918068408966064, + "step": 1671 + }, + { + "epoch": 0.47, + "logps_train/chosen": -26.619611740112305, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -42.08354949951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9119611978530884, + "rewards_train/margins": 1.333893895149231, + "rewards_train/rejected": -2.2458550930023193, + "step": 1671 + }, + { + "epoch": 0.47, + "logps_train/chosen": -133.98826599121094, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -232.5106964111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7988266944885254, + "rewards_train/margins": 4.752243518829346, + "rewards_train/rejected": -8.551070213317871, + "step": 1671 + }, + { + "epoch": 0.47, + "logps_train/chosen": -203.16021728515625, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -282.01617431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.216021776199341, + "rewards_train/margins": 7.685596227645874, + "rewards_train/rejected": -9.901618003845215, + "step": 1671 + }, + { + "epoch": 0.47, + "logps_train/chosen": -29.679950714111328, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -43.35480499267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7929950952529907, + "rewards_train/margins": 2.779985547065735, + "rewards_train/rejected": -3.5729806423187256, + "step": 1671 + }, + { + "epoch": 0.47, + "logps_train/chosen": -157.694091796875, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -158.9916229248047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.969409465789795, + "rewards_train/margins": -0.32024717330932617, + "rewards_train/rejected": -4.649162292480469, + "step": 1671 + }, + { + "epoch": 0.47, + "logps_train/chosen": -58.26850128173828, + "logps_train/ref_chosen": -49.75, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -101.93341064453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.851850152015686, + "rewards_train/margins": 1.0914909839630127, + "rewards_train/rejected": -1.9433411359786987, + "step": 1671 + }, + { + "epoch": 0.47, + "logps_train/chosen": -110.45524597167969, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -203.39996337890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4455246925354004, + "rewards_train/margins": 3.8944716453552246, + "rewards_train/rejected": -6.339996337890625, + "step": 1671 + }, + { + "epoch": 0.47, + "learning_rate": 5.421832510577882e-07, + "loss": 0.4138, + "step": 1672 + }, + { + "epoch": 0.47, + "logps_train/chosen": -74.77330780029297, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -117.71112823486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2523307800292969, + "rewards_train/margins": 1.4187819957733154, + "rewards_train/rejected": -2.6711127758026123, + "step": 1672 + }, + { + "epoch": 0.47, + "logps_train/chosen": -43.13325119018555, + "logps_train/ref_chosen": -31.375, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -91.21141052246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1758251190185547, + "rewards_train/margins": 1.8203160762786865, + "rewards_train/rejected": -2.996141195297241, + "step": 1672 + }, + { + "epoch": 0.47, + "logps_train/chosen": -182.48654174804688, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -167.86622619628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.848654270172119, + "rewards_train/margins": 2.9879684448242188, + "rewards_train/rejected": -5.836622714996338, + "step": 1672 + }, + { + "epoch": 0.47, + "logps_train/chosen": -64.4715347290039, + "logps_train/ref_chosen": -46.25, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -103.75653839111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8221534490585327, + "rewards_train/margins": 0.9535003900527954, + "rewards_train/rejected": -2.775653839111328, + "step": 1672 + }, + { + "epoch": 0.47, + "logps_train/chosen": -39.939117431640625, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -32.38626480102539, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.768911838531494, + "rewards_train/margins": -0.1209104061126709, + "rewards_train/rejected": -2.6480014324188232, + "step": 1672 + }, + { + "epoch": 0.47, + "logps_train/chosen": -6.748284339904785, + "logps_train/ref_chosen": -2.0, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -29.643024444580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47482845187187195, + "rewards_train/margins": 1.1269740164279938, + "rewards_train/rejected": -1.6018024682998657, + "step": 1672 + }, + { + "epoch": 0.47, + "logps_train/chosen": -9.686352729797363, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -38.2015266418457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5530102849006653, + "rewards_train/margins": 2.5296425223350525, + "rewards_train/rejected": -3.0826528072357178, + "step": 1672 + }, + { + "epoch": 0.47, + "logps_train/chosen": -19.592918395996094, + "logps_train/ref_chosen": -3.3125, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -26.440139770507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6280418634414673, + "rewards_train/margins": 0.5315972566604614, + "rewards_train/rejected": -2.1596391201019287, + "step": 1672 + }, + { + "epoch": 0.47, + "logps_train/chosen": -2.9879722595214844, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -13.793410301208496, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.009015274234116077, + "rewards_train/margins": -0.20539370458573103, + "rewards_train/rejected": 0.2144089788198471, + "step": 1673 + }, + { + "epoch": 0.47, + "logps_train/chosen": -114.68826293945312, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -149.4158935546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.118826389312744, + "rewards_train/margins": 3.4727630615234375, + "rewards_train/rejected": -5.591589450836182, + "step": 1673 + }, + { + "epoch": 0.47, + "logps_train/chosen": -120.1935806274414, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -132.1802215576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5193580389022827, + "rewards_train/margins": 3.7486640214920044, + "rewards_train/rejected": -5.268022060394287, + "step": 1673 + }, + { + "epoch": 0.47, + "logps_train/chosen": -94.1196060180664, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -127.84461975097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2619606256484985, + "rewards_train/margins": 2.6725014448165894, + "rewards_train/rejected": -3.934462070465088, + "step": 1673 + }, + { + "epoch": 0.47, + "logps_train/chosen": -19.49356460571289, + "logps_train/ref_chosen": -2.75, + "logps_train/ref_rejected": -1.265625, + "logps_train/rejected": -18.127342224121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.674356460571289, + "rewards_train/margins": 0.011815309524536133, + "rewards_train/rejected": -1.6861717700958252, + "step": 1673 + }, + { + "epoch": 0.47, + "logps_train/chosen": -22.82154655456543, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -51.752464294433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.113404631614685, + "rewards_train/margins": 3.011841893196106, + "rewards_train/rejected": -4.125246524810791, + "step": 1673 + }, + { + "epoch": 0.47, + "logps_train/chosen": -115.8081283569336, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -172.66458129882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0308128595352173, + "rewards_train/margins": 5.285645365715027, + "rewards_train/rejected": -6.316458225250244, + "step": 1673 + }, + { + "epoch": 0.47, + "logps_train/chosen": -18.34461212158203, + "logps_train/ref_chosen": -2.0625, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -43.763092041015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.628211259841919, + "rewards_train/margins": 1.5543479919433594, + "rewards_train/rejected": -3.1825592517852783, + "step": 1673 + }, + { + "epoch": 0.47, + "learning_rate": 5.398328372144223e-07, + "loss": 0.2607, + "step": 1674 + }, + { + "epoch": 0.47, + "logps_train/chosen": -23.80013084411621, + "logps_train/ref_chosen": -3.8125, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -121.74786376953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.998763084411621, + "rewards_train/margins": 2.576023578643799, + "rewards_train/rejected": -4.57478666305542, + "step": 1674 + }, + { + "epoch": 0.47, + "logps_train/chosen": -29.254764556884766, + "logps_train/ref_chosen": -14.5, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -66.8534164428711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4754765033721924, + "rewards_train/margins": 3.2473652362823486, + "rewards_train/rejected": -4.722841739654541, + "step": 1674 + }, + { + "epoch": 0.47, + "logps_train/chosen": -36.168922424316406, + "logps_train/ref_chosen": -13.5625, + "logps_train/ref_rejected": -57.0, + "logps_train/rejected": -97.39322662353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2606422901153564, + "rewards_train/margins": 1.7786805629730225, + "rewards_train/rejected": -4.039322853088379, + "step": 1674 + }, + { + "epoch": 0.47, + "logps_train/chosen": -108.58141326904297, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -296.00750732421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9081413745880127, + "rewards_train/margins": 14.192610502243042, + "rewards_train/rejected": -17.100751876831055, + "step": 1674 + }, + { + "epoch": 0.47, + "logps_train/chosen": -9.086320877075195, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -1.1015625, + "logps_train/rejected": -17.750925064086914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03511791303753853, + "rewards_train/margins": 1.7000542171299458, + "rewards_train/rejected": -1.6649363040924072, + "step": 1674 + }, + { + "epoch": 0.47, + "logps_train/chosen": -127.61486053466797, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -309.08343505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7114861011505127, + "rewards_train/margins": 11.796857595443726, + "rewards_train/rejected": -15.508343696594238, + "step": 1674 + }, + { + "epoch": 0.47, + "logps_train/chosen": -118.54595947265625, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -263.3699035644531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.754595994949341, + "rewards_train/margins": 7.882394552230835, + "rewards_train/rejected": -10.636990547180176, + "step": 1674 + }, + { + "epoch": 0.47, + "logps_train/chosen": -129.83123779296875, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -188.4507293701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6331238746643066, + "rewards_train/margins": 4.411949634552002, + "rewards_train/rejected": -8.045073509216309, + "step": 1674 + }, + { + "epoch": 0.47, + "logps_train/chosen": -94.79307556152344, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -266.7174072265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.929307699203491, + "rewards_train/margins": 10.842433214187622, + "rewards_train/rejected": -13.771740913391113, + "step": 1675 + }, + { + "epoch": 0.47, + "logps_train/chosen": -206.70851135253906, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -229.90557861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8708512783050537, + "rewards_train/margins": 5.419706583023071, + "rewards_train/rejected": -8.290557861328125, + "step": 1675 + }, + { + "epoch": 0.47, + "logps_train/chosen": -6.048965930938721, + "logps_train/ref_chosen": -0.703125, + "logps_train/ref_rejected": -2.0, + "logps_train/rejected": -4.691488265991211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.534584105014801, + "rewards_train/margins": -0.26543527841567993, + "rewards_train/rejected": -0.2691488265991211, + "step": 1675 + }, + { + "epoch": 0.47, + "logps_train/chosen": -25.55376434326172, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -34.63324737548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.149126410484314, + "rewards_train/margins": 0.03919839859008789, + "rewards_train/rejected": -1.1883248090744019, + "step": 1675 + }, + { + "epoch": 0.47, + "logps_train/chosen": -12.52940559387207, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -7.158267498016357, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.837315559387207, + "rewards_train/margins": -0.5074262917041779, + "rewards_train/rejected": -0.3298892676830292, + "step": 1675 + }, + { + "epoch": 0.47, + "logps_train/chosen": -119.042724609375, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -183.62457275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5542725324630737, + "rewards_train/margins": 2.9081846475601196, + "rewards_train/rejected": -4.462457180023193, + "step": 1675 + }, + { + "epoch": 0.47, + "logps_train/chosen": -0.496391236782074, + "logps_train/ref_chosen": -0.58203125, + "logps_train/ref_rejected": -2.46875, + "logps_train/rejected": -2.084853410720825, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.008564001880586147, + "rewards_train/margins": -0.029825658537447453, + "rewards_train/rejected": 0.0383896604180336, + "step": 1675 + }, + { + "epoch": 0.47, + "logps_train/chosen": -14.805479049682617, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -17.251916885375977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14304791390895844, + "rewards_train/margins": 0.7071437984704971, + "rewards_train/rejected": -0.8501917123794556, + "step": 1675 + }, + { + "epoch": 0.47, + "learning_rate": 5.374856440533078e-07, + "loss": 0.2563, + "step": 1676 + }, + { + "epoch": 0.47, + "logps_train/chosen": -290.1817626953125, + "logps_train/ref_chosen": -220.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -160.4767303466797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.018176555633545, + "rewards_train/margins": -4.470503568649292, + "rewards_train/rejected": -2.547672986984253, + "step": 1676 + }, + { + "epoch": 0.47, + "logps_train/chosen": -107.07734680175781, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -188.37982177734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1577346324920654, + "rewards_train/margins": 4.480247735977173, + "rewards_train/rejected": -6.637982368469238, + "step": 1676 + }, + { + "epoch": 0.47, + "logps_train/chosen": -35.77778625488281, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -23.025556564331055, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4527785778045654, + "rewards_train/margins": -1.2064728736877441, + "rewards_train/rejected": -1.2463057041168213, + "step": 1676 + }, + { + "epoch": 0.47, + "logps_train/chosen": -93.91647338867188, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -157.626220703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7416473627090454, + "rewards_train/margins": 1.0209747552871704, + "rewards_train/rejected": -1.7626221179962158, + "step": 1676 + }, + { + "epoch": 0.47, + "logps_train/chosen": -17.55684471130371, + "logps_train/ref_chosen": -1.703125, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -26.074766159057617, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.585371971130371, + "rewards_train/margins": -0.8403953313827515, + "rewards_train/rejected": -0.7449766397476196, + "step": 1676 + }, + { + "epoch": 0.47, + "logps_train/chosen": -180.71389770507812, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -213.18411254882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.9713897705078125, + "rewards_train/margins": 3.2470216751098633, + "rewards_train/rejected": -8.218411445617676, + "step": 1676 + }, + { + "epoch": 0.47, + "logps_train/chosen": -22.425586700439453, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -63.38749694824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1425586938858032, + "rewards_train/margins": 1.8211909532546997, + "rewards_train/rejected": -2.963749647140503, + "step": 1676 + }, + { + "epoch": 0.47, + "logps_train/chosen": -110.88174438476562, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -137.5239715576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2381744384765625, + "rewards_train/margins": 1.9142227172851562, + "rewards_train/rejected": -2.1523971557617188, + "step": 1676 + }, + { + "epoch": 0.47, + "logps_train/chosen": -113.65657043457031, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -137.6307830810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5656570792198181, + "rewards_train/margins": 0.9474212527275085, + "rewards_train/rejected": -1.5130783319473267, + "step": 1677 + }, + { + "epoch": 0.47, + "logps_train/chosen": -104.52708435058594, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -183.1158447265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.102708578109741, + "rewards_train/margins": 5.308875799179077, + "rewards_train/rejected": -7.411584377288818, + "step": 1677 + }, + { + "epoch": 0.47, + "logps_train/chosen": -13.841533660888672, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -9.186286926269531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.37165337800979614, + "rewards_train/margins": -0.10927468538284302, + "rewards_train/rejected": -0.2623786926269531, + "step": 1677 + }, + { + "epoch": 0.47, + "logps_train/chosen": -17.065372467041016, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -30.75, + "logps_train/rejected": -55.275550842285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6252872347831726, + "rewards_train/margins": 1.8272679448127747, + "rewards_train/rejected": -2.4525551795959473, + "step": 1677 + }, + { + "epoch": 0.47, + "logps_train/chosen": -74.77455139160156, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -145.75157165527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.302455186843872, + "rewards_train/margins": 0.6227020025253296, + "rewards_train/rejected": -1.9251571893692017, + "step": 1677 + }, + { + "epoch": 0.47, + "logps_train/chosen": -111.95234680175781, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -108.32830047607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8952347040176392, + "rewards_train/margins": 2.637595295906067, + "rewards_train/rejected": -3.532829999923706, + "step": 1677 + }, + { + "epoch": 0.47, + "logps_train/chosen": -274.0362548828125, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -188.0, + "logps_train/rejected": -255.808837890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.703625679016113, + "rewards_train/margins": -2.9227418899536133, + "rewards_train/rejected": -6.7808837890625, + "step": 1677 + }, + { + "epoch": 0.47, + "logps_train/chosen": -136.30422973632812, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -206.38522338867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4804229736328125, + "rewards_train/margins": 2.5580992698669434, + "rewards_train/rejected": -5.038522243499756, + "step": 1677 + }, + { + "epoch": 0.47, + "learning_rate": 5.351416880023078e-07, + "loss": 0.7862, + "step": 1678 + }, + { + "epoch": 0.47, + "logps_train/chosen": -6.823106288909912, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -11.754825592041016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20731063187122345, + "rewards_train/margins": 0.31192193925380707, + "rewards_train/rejected": -0.5192325711250305, + "step": 1678 + }, + { + "epoch": 0.47, + "logps_train/chosen": -187.19485473632812, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -238.21267700195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.919485569000244, + "rewards_train/margins": 3.8017821311950684, + "rewards_train/rejected": -8.721267700195312, + "step": 1678 + }, + { + "epoch": 0.47, + "logps_train/chosen": -142.5922088623047, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -190.9758758544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.059221029281616, + "rewards_train/margins": 4.188366651535034, + "rewards_train/rejected": -7.24758768081665, + "step": 1678 + }, + { + "epoch": 0.47, + "logps_train/chosen": -5.459925651550293, + "logps_train/ref_chosen": -1.546875, + "logps_train/ref_rejected": -1.8359375, + "logps_train/rejected": -1.8908137083053589, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3913050591945648, + "rewards_train/margins": -0.38581743836402893, + "rewards_train/rejected": -0.005487620830535889, + "step": 1678 + }, + { + "epoch": 0.47, + "logps_train/chosen": -132.98550415039062, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -167.144775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.09855055809021, + "rewards_train/margins": 4.915927171707153, + "rewards_train/rejected": -7.014477729797363, + "step": 1678 + }, + { + "epoch": 0.47, + "logps_train/chosen": -26.89419937133789, + "logps_train/ref_chosen": -15.625, + "logps_train/ref_rejected": -2.171875, + "logps_train/rejected": -34.35260772705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1269199848175049, + "rewards_train/margins": 2.091153383255005, + "rewards_train/rejected": -3.2180733680725098, + "step": 1678 + }, + { + "epoch": 0.47, + "logps_train/chosen": -43.651737213134766, + "logps_train/ref_chosen": -41.25, + "logps_train/ref_rejected": -43.0, + "logps_train/rejected": -61.577327728271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24017372727394104, + "rewards_train/margins": 1.6175590455532074, + "rewards_train/rejected": -1.8577327728271484, + "step": 1678 + }, + { + "epoch": 0.47, + "logps_train/chosen": -20.36009979248047, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -107.63001251220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7547599673271179, + "rewards_train/margins": 3.358241379261017, + "rewards_train/rejected": -4.113001346588135, + "step": 1678 + }, + { + "epoch": 0.47, + "logps_train/chosen": -10.599336624145508, + "logps_train/ref_chosen": -6.03125, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -27.460641860961914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4568086564540863, + "rewards_train/margins": 1.664255529642105, + "rewards_train/rejected": -2.1210641860961914, + "step": 1679 + }, + { + "epoch": 0.47, + "logps_train/chosen": -4.860195636749268, + "logps_train/ref_chosen": -7.125, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -32.9410514831543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22648043930530548, + "rewards_train/margins": 2.358085587620735, + "rewards_train/rejected": -2.1316051483154297, + "step": 1679 + }, + { + "epoch": 0.47, + "logps_train/chosen": -49.8665657043457, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -23.79288101196289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1866565942764282, + "rewards_train/margins": 0.5395065546035767, + "rewards_train/rejected": -1.7261631488800049, + "step": 1679 + }, + { + "epoch": 0.47, + "logps_train/chosen": -20.454547882080078, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -20.696449279785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8704547882080078, + "rewards_train/margins": 0.3929401636123657, + "rewards_train/rejected": -1.2633949518203735, + "step": 1679 + }, + { + "epoch": 0.47, + "logps_train/chosen": -83.43495178222656, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -82.94973754882812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3934952020645142, + "rewards_train/margins": -0.04852139949798584, + "rewards_train/rejected": -1.3449738025665283, + "step": 1679 + }, + { + "epoch": 0.47, + "logps_train/chosen": -144.38507080078125, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -176.96414184570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.638507068157196, + "rewards_train/margins": 4.40790730714798, + "rewards_train/rejected": -5.046414375305176, + "step": 1679 + }, + { + "epoch": 0.47, + "logps_train/chosen": -169.93307495117188, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -149.47100830078125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.943307399749756, + "rewards_train/margins": -0.0962066650390625, + "rewards_train/rejected": -5.847100734710693, + "step": 1679 + }, + { + "epoch": 0.47, + "logps_train/chosen": -103.33982849121094, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -103.67935943603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6339828372001648, + "rewards_train/margins": 0.03395313024520874, + "rewards_train/rejected": -0.6679359674453735, + "step": 1679 + }, + { + "epoch": 0.47, + "learning_rate": 5.328009854666302e-07, + "loss": 0.326, + "step": 1680 + }, + { + "epoch": 0.47, + "logps_train/chosen": -182.94932556152344, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -231.36660766601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.294932842254639, + "rewards_train/margins": 0.34172821044921875, + "rewards_train/rejected": -5.636661052703857, + "step": 1680 + }, + { + "epoch": 0.47, + "logps_train/chosen": -11.963703155517578, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -52.208290100097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48699530959129333, + "rewards_train/margins": 1.7088338434696198, + "rewards_train/rejected": -2.195829153060913, + "step": 1680 + }, + { + "epoch": 0.47, + "logps_train/chosen": -100.39236450195312, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -57.955196380615234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1392364501953125, + "rewards_train/margins": 3.743783473968506, + "rewards_train/rejected": -4.883019924163818, + "step": 1680 + }, + { + "epoch": 0.47, + "logps_train/chosen": -2.764075994491577, + "logps_train/ref_chosen": -0.40234375, + "logps_train/ref_rejected": -2.8125, + "logps_train/rejected": -4.674307346343994, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.23617322742938995, + "rewards_train/margins": -0.04999248683452606, + "rewards_train/rejected": -0.1861807405948639, + "step": 1680 + }, + { + "epoch": 0.47, + "logps_train/chosen": -165.6116943359375, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -332.8544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.26116943359375, + "rewards_train/margins": 13.82427978515625, + "rewards_train/rejected": -16.08544921875, + "step": 1680 + }, + { + "epoch": 0.47, + "logps_train/chosen": -98.82821655273438, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -138.84274291992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0328216552734375, + "rewards_train/margins": 2.05145263671875, + "rewards_train/rejected": -3.0842742919921875, + "step": 1680 + }, + { + "epoch": 0.47, + "logps_train/chosen": -119.36561584472656, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -133.60128784179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.086561679840088, + "rewards_train/margins": 2.723567008972168, + "rewards_train/rejected": -5.810128688812256, + "step": 1680 + }, + { + "epoch": 0.47, + "logps_train/chosen": -13.16423225402832, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -14.541313171386719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27892324328422546, + "rewards_train/margins": 0.6252081096172333, + "rewards_train/rejected": -0.9041313529014587, + "step": 1680 + }, + { + "epoch": 0.47, + "logps_train/chosen": -176.26260375976562, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -195.0, + "logps_train/rejected": -226.5937957763672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.12626051902771, + "rewards_train/margins": 1.0331192016601562, + "rewards_train/rejected": -3.159379720687866, + "step": 1681 + }, + { + "epoch": 0.47, + "logps_train/chosen": -2.5962512493133545, + "logps_train/ref_chosen": -3.21875, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -18.585742950439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06224987655878067, + "rewards_train/margins": 1.3833242431282997, + "rewards_train/rejected": -1.321074366569519, + "step": 1681 + }, + { + "epoch": 0.47, + "logps_train/chosen": -54.68778610229492, + "logps_train/ref_chosen": -40.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -193.81553649902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4687786102294922, + "rewards_train/margins": 1.9127750396728516, + "rewards_train/rejected": -3.3815536499023438, + "step": 1681 + }, + { + "epoch": 0.47, + "logps_train/chosen": -35.090885162353516, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -19.95846176147461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9840885400772095, + "rewards_train/margins": 0.19925761222839355, + "rewards_train/rejected": -1.183346152305603, + "step": 1681 + }, + { + "epoch": 0.47, + "logps_train/chosen": -55.30875015258789, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -49.47358703613281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5558750629425049, + "rewards_train/margins": -0.2835162878036499, + "rewards_train/rejected": -1.272358775138855, + "step": 1681 + }, + { + "epoch": 0.47, + "logps_train/chosen": -189.21090698242188, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -165.5721435546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.0710906982421875, + "rewards_train/margins": -1.2638764381408691, + "rewards_train/rejected": -5.807214260101318, + "step": 1681 + }, + { + "epoch": 0.47, + "logps_train/chosen": -118.28638458251953, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -139.46734619140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7286384701728821, + "rewards_train/margins": 4.768096148967743, + "rewards_train/rejected": -5.496734619140625, + "step": 1681 + }, + { + "epoch": 0.47, + "logps_train/chosen": -161.3701171875, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -246.85304260253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.837011814117432, + "rewards_train/margins": 6.248292446136475, + "rewards_train/rejected": -11.085304260253906, + "step": 1681 + }, + { + "epoch": 0.47, + "learning_rate": 5.304635528287112e-07, + "loss": 0.3556, + "step": 1682 + }, + { + "epoch": 0.47, + "logps_train/chosen": -123.26688385009766, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -239.7118682861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4266884326934814, + "rewards_train/margins": 8.544498205184937, + "rewards_train/rejected": -11.971186637878418, + "step": 1682 + }, + { + "epoch": 0.47, + "logps_train/chosen": -47.454856872558594, + "logps_train/ref_chosen": -26.125, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -50.30546188354492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.132985830307007, + "rewards_train/margins": 1.2725603580474854, + "rewards_train/rejected": -3.405546188354492, + "step": 1682 + }, + { + "epoch": 0.47, + "logps_train/chosen": -18.99416732788086, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -27.606861114501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0556668043136597, + "rewards_train/margins": 0.11751937866210938, + "rewards_train/rejected": -1.173186182975769, + "step": 1682 + }, + { + "epoch": 0.47, + "logps_train/chosen": -164.37620544433594, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -163.9963836669922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3376206159591675, + "rewards_train/margins": -0.03798222541809082, + "rewards_train/rejected": -1.2996383905410767, + "step": 1682 + }, + { + "epoch": 0.47, + "logps_train/chosen": -133.7884063720703, + "logps_train/ref_chosen": -59.25, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -280.26654052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.453840732574463, + "rewards_train/margins": 7.572813510894775, + "rewards_train/rejected": -15.026654243469238, + "step": 1682 + }, + { + "epoch": 0.47, + "logps_train/chosen": -16.174226760864258, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -20.440881729125977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10492267459630966, + "rewards_train/margins": 0.16416550427675247, + "rewards_train/rejected": -0.26908817887306213, + "step": 1682 + }, + { + "epoch": 0.47, + "logps_train/chosen": -138.99346923828125, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -178.03408813476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.999346911907196, + "rewards_train/margins": 5.40406209230423, + "rewards_train/rejected": -6.403409004211426, + "step": 1682 + }, + { + "epoch": 0.47, + "logps_train/chosen": -10.64258861541748, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -2.828125, + "logps_train/rejected": -3.822951555252075, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.504883885383606, + "rewards_train/margins": -0.40540122985839844, + "rewards_train/rejected": -0.09948265552520752, + "step": 1682 + }, + { + "epoch": 0.47, + "logps_train/chosen": -90.11672973632812, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -142.2318572998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9616729617118835, + "rewards_train/margins": 5.161512672901154, + "rewards_train/rejected": -6.123185634613037, + "step": 1683 + }, + { + "epoch": 0.47, + "logps_train/chosen": -53.89128112792969, + "logps_train/ref_chosen": -25.625, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -34.9552116394043, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8266282081604004, + "rewards_train/margins": -1.4061070680618286, + "rewards_train/rejected": -1.4205211400985718, + "step": 1683 + }, + { + "epoch": 0.47, + "logps_train/chosen": -191.9351806640625, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -222.6297607421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.093518257141113, + "rewards_train/margins": 5.169458389282227, + "rewards_train/rejected": -9.26297664642334, + "step": 1683 + }, + { + "epoch": 0.47, + "logps_train/chosen": -186.1295928955078, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -179.41311645507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.962959289550781, + "rewards_train/margins": 0.9283523559570312, + "rewards_train/rejected": -8.891311645507812, + "step": 1683 + }, + { + "epoch": 0.47, + "logps_train/chosen": -186.89889526367188, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -330.8360595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8898894786834717, + "rewards_train/margins": 12.193716287612915, + "rewards_train/rejected": -15.083605766296387, + "step": 1683 + }, + { + "epoch": 0.47, + "logps_train/chosen": -114.15198516845703, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -220.80642700195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8151986598968506, + "rewards_train/margins": 10.415444612503052, + "rewards_train/rejected": -13.230643272399902, + "step": 1683 + }, + { + "epoch": 0.47, + "logps_train/chosen": -119.1435546875, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -161.89480590820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.564355492591858, + "rewards_train/margins": 2.4751251935958862, + "rewards_train/rejected": -4.039480686187744, + "step": 1683 + }, + { + "epoch": 0.47, + "logps_train/chosen": -99.70948791503906, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -200.94593811035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.320948839187622, + "rewards_train/margins": 4.2736451625823975, + "rewards_train/rejected": -6.5945940017700195, + "step": 1683 + }, + { + "epoch": 0.47, + "learning_rate": 5.281294064481006e-07, + "loss": 0.3247, + "step": 1684 + }, + { + "epoch": 0.47, + "logps_train/chosen": -23.853904724121094, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -42.098350524902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6228904724121094, + "rewards_train/margins": 1.524444580078125, + "rewards_train/rejected": -2.1473350524902344, + "step": 1684 + }, + { + "epoch": 0.47, + "logps_train/chosen": -121.86370086669922, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -15.6875, + "logps_train/rejected": -44.2888298034668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8363702297210693, + "rewards_train/margins": 0.02376270294189453, + "rewards_train/rejected": -2.860132932662964, + "step": 1684 + }, + { + "epoch": 0.47, + "logps_train/chosen": -47.094879150390625, + "logps_train/ref_chosen": -41.25, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -115.33757019042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5844879150390625, + "rewards_train/margins": 2.349269151687622, + "rewards_train/rejected": -2.9337570667266846, + "step": 1684 + }, + { + "epoch": 0.47, + "logps_train/chosen": -24.86489486694336, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -52.0, + "logps_train/rejected": -57.0946044921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5489895343780518, + "rewards_train/margins": -1.0395290851593018, + "rewards_train/rejected": -0.50946044921875, + "step": 1684 + }, + { + "epoch": 0.47, + "logps_train/chosen": -8.214229583740234, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -20.44892120361328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.29329797625541687, + "rewards_train/margins": -0.14840584993362427, + "rewards_train/rejected": -0.1448921263217926, + "step": 1684 + }, + { + "epoch": 0.47, + "logps_train/chosen": -119.8125991821289, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -155.8207550048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.231259822845459, + "rewards_train/margins": 1.6508159637451172, + "rewards_train/rejected": -5.882075786590576, + "step": 1684 + }, + { + "epoch": 0.47, + "logps_train/chosen": -67.890625, + "logps_train/ref_chosen": -35.25, + "logps_train/ref_rejected": -29.5, + "logps_train/rejected": -38.188018798828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2640626430511475, + "rewards_train/margins": -2.395260751247406, + "rewards_train/rejected": -0.8688018918037415, + "step": 1684 + }, + { + "epoch": 0.47, + "logps_train/chosen": -15.066642761230469, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -1.5390625, + "logps_train/rejected": -19.167978286743164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3254142701625824, + "rewards_train/margins": 1.4374773800373077, + "rewards_train/rejected": -1.7628916501998901, + "step": 1684 + }, + { + "epoch": 0.47, + "logps_train/chosen": -63.271484375, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -34.027618408203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4771484434604645, + "rewards_train/margins": 1.6131134927272797, + "rewards_train/rejected": -2.090261936187744, + "step": 1685 + }, + { + "epoch": 0.47, + "logps_train/chosen": -161.18739318847656, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -189.63681030273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9187393188476562, + "rewards_train/margins": 3.7449417114257812, + "rewards_train/rejected": -5.6636810302734375, + "step": 1685 + }, + { + "epoch": 0.47, + "logps_train/chosen": -113.15937805175781, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -117.19959259033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26593780517578125, + "rewards_train/margins": 0.0540214478969574, + "rewards_train/rejected": -0.31995925307273865, + "step": 1685 + }, + { + "epoch": 0.47, + "logps_train/chosen": -75.39814758300781, + "logps_train/ref_chosen": -59.25, + "logps_train/ref_rejected": -53.5, + "logps_train/rejected": -79.74617004394531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6148147583007812, + "rewards_train/margins": 1.0098023414611816, + "rewards_train/rejected": -2.624617099761963, + "step": 1685 + }, + { + "epoch": 0.47, + "logps_train/chosen": -178.34861755371094, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -204.78135681152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.934861898422241, + "rewards_train/margins": 0.04327392578125, + "rewards_train/rejected": -3.978135824203491, + "step": 1685 + }, + { + "epoch": 0.47, + "logps_train/chosen": -11.241533279418945, + "logps_train/ref_chosen": -1.2421875, + "logps_train/ref_rejected": -2.453125, + "logps_train/rejected": -21.447391510009766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9999346137046814, + "rewards_train/margins": 0.899492084980011, + "rewards_train/rejected": -1.8994266986846924, + "step": 1685 + }, + { + "epoch": 0.47, + "logps_train/chosen": -190.49209594726562, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -222.58753967285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2492096424102783, + "rewards_train/margins": 7.009544134140015, + "rewards_train/rejected": -9.258753776550293, + "step": 1685 + }, + { + "epoch": 0.47, + "logps_train/chosen": -26.71837043762207, + "logps_train/ref_chosen": -2.484375, + "logps_train/ref_rejected": -7.5625, + "logps_train/rejected": -34.55646514892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4233996868133545, + "rewards_train/margins": 0.2759969234466553, + "rewards_train/rejected": -2.6993966102600098, + "step": 1685 + }, + { + "epoch": 0.47, + "learning_rate": 5.25798562661348e-07, + "loss": 0.5434, + "step": 1686 + }, + { + "epoch": 0.47, + "logps_train/chosen": -20.9769344329834, + "logps_train/ref_chosen": -1.5234375, + "logps_train/ref_rejected": -3.65625, + "logps_train/rejected": -24.66200828552246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9453496932983398, + "rewards_train/margins": 0.1552262306213379, + "rewards_train/rejected": -2.1005759239196777, + "step": 1686 + }, + { + "epoch": 0.47, + "logps_train/chosen": -18.837032318115234, + "logps_train/ref_chosen": -3.5, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -35.76551055908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5337032079696655, + "rewards_train/margins": 1.105347990989685, + "rewards_train/rejected": -2.6390511989593506, + "step": 1686 + }, + { + "epoch": 0.47, + "logps_train/chosen": -100.99805450439453, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -59.5769157409668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6498055458068848, + "rewards_train/margins": 1.5078859329223633, + "rewards_train/rejected": -4.157691478729248, + "step": 1686 + }, + { + "epoch": 0.47, + "logps_train/chosen": -217.5458526611328, + "logps_train/ref_chosen": -196.0, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -89.8773193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.154585361480713, + "rewards_train/margins": 0.03314661979675293, + "rewards_train/rejected": -2.187731981277466, + "step": 1686 + }, + { + "epoch": 0.47, + "logps_train/chosen": -102.25532531738281, + "logps_train/ref_chosen": -43.0, + "logps_train/ref_rejected": -40.5, + "logps_train/rejected": -75.896728515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.925532817840576, + "rewards_train/margins": -2.385859966278076, + "rewards_train/rejected": -3.5396728515625, + "step": 1686 + }, + { + "epoch": 0.47, + "logps_train/chosen": -48.47162628173828, + "logps_train/ref_chosen": -50.0, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -136.97152709960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1528373807668686, + "rewards_train/margins": 8.099989995360374, + "rewards_train/rejected": -7.947152614593506, + "step": 1686 + }, + { + "epoch": 0.47, + "logps_train/chosen": -38.596744537353516, + "logps_train/ref_chosen": -24.875, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -24.877899169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3721745014190674, + "rewards_train/margins": 0.7796778678894043, + "rewards_train/rejected": -2.1518523693084717, + "step": 1686 + }, + { + "epoch": 0.47, + "logps_train/chosen": -34.440147399902344, + "logps_train/ref_chosen": -27.375, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -21.13360595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7065147757530212, + "rewards_train/margins": 0.22559583187103271, + "rewards_train/rejected": -0.932110607624054, + "step": 1686 + }, + { + "epoch": 0.47, + "logps_train/chosen": -88.4305419921875, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -35.65655517578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.043054103851318, + "rewards_train/margins": -2.2461485862731934, + "rewards_train/rejected": -2.796905517578125, + "step": 1687 + }, + { + "epoch": 0.47, + "logps_train/chosen": -19.101146697998047, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -12.904051780700684, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8163647055625916, + "rewards_train/margins": 0.02404046058654785, + "rewards_train/rejected": -0.8404051661491394, + "step": 1687 + }, + { + "epoch": 0.47, + "logps_train/chosen": -22.995121002197266, + "logps_train/ref_chosen": -3.3125, + "logps_train/ref_rejected": -3.59375, + "logps_train/rejected": -30.709348678588867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9682620763778687, + "rewards_train/margins": 0.7432979345321655, + "rewards_train/rejected": -2.711560010910034, + "step": 1687 + }, + { + "epoch": 0.47, + "logps_train/chosen": -23.847854614257812, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -2.421875, + "logps_train/rejected": -16.188283920288086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.566035509109497, + "rewards_train/margins": -0.18939459323883057, + "rewards_train/rejected": -1.3766409158706665, + "step": 1687 + }, + { + "epoch": 0.47, + "logps_train/chosen": -166.3367919921875, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -231.1199188232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.533679246902466, + "rewards_train/margins": 5.078312635421753, + "rewards_train/rejected": -8.611991882324219, + "step": 1687 + }, + { + "epoch": 0.47, + "logps_train/chosen": -193.0509033203125, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -225.0, + "logps_train/rejected": -268.27349853515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3050904273986816, + "rewards_train/margins": 1.0222597122192383, + "rewards_train/rejected": -4.32735013961792, + "step": 1687 + }, + { + "epoch": 0.47, + "logps_train/chosen": -135.4649200439453, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -152.46043395996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8964920043945312, + "rewards_train/margins": 2.749551296234131, + "rewards_train/rejected": -4.646043300628662, + "step": 1687 + }, + { + "epoch": 0.47, + "logps_train/chosen": -97.64323425292969, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -151.7365264892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2143234014511108, + "rewards_train/margins": 1.9593292474746704, + "rewards_train/rejected": -3.1736526489257812, + "step": 1687 + }, + { + "epoch": 0.47, + "learning_rate": 5.234710377818894e-07, + "loss": 0.6214, + "step": 1688 + }, + { + "epoch": 0.47, + "logps_train/chosen": -283.4337463378906, + "logps_train/ref_chosen": -199.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -194.46849060058594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.443374633789062, + "rewards_train/margins": -2.3965253829956055, + "rewards_train/rejected": -6.046849250793457, + "step": 1688 + }, + { + "epoch": 0.47, + "logps_train/chosen": -87.53350830078125, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -181.5740509033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.703350782394409, + "rewards_train/margins": 7.204054117202759, + "rewards_train/rejected": -9.907404899597168, + "step": 1688 + }, + { + "epoch": 0.47, + "logps_train/chosen": -144.6489715576172, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -227.04229736328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.014897108078003, + "rewards_train/margins": 4.289332628250122, + "rewards_train/rejected": -6.304229736328125, + "step": 1688 + }, + { + "epoch": 0.47, + "logps_train/chosen": -245.32122802734375, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -251.48504638671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.032122611999512, + "rewards_train/margins": 0.016382217407226562, + "rewards_train/rejected": -11.048504829406738, + "step": 1688 + }, + { + "epoch": 0.47, + "logps_train/chosen": -159.92111206054688, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -194.6721954345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.892111301422119, + "rewards_train/margins": 2.9751081466674805, + "rewards_train/rejected": -5.8672194480896, + "step": 1688 + }, + { + "epoch": 0.47, + "logps_train/chosen": -25.200471878051758, + "logps_train/ref_chosen": -25.0, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -26.74455451965332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02004718780517578, + "rewards_train/margins": 2.0887832641601562, + "rewards_train/rejected": -2.108830451965332, + "step": 1688 + }, + { + "epoch": 0.47, + "logps_train/chosen": -87.21756744384766, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -136.33302307128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.22175669670105, + "rewards_train/margins": 2.5615456104278564, + "rewards_train/rejected": -4.783302307128906, + "step": 1688 + }, + { + "epoch": 0.47, + "logps_train/chosen": -85.279541015625, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -2.890625, + "logps_train/rejected": -33.64695358276367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8779541254043579, + "rewards_train/margins": 2.1976786851882935, + "rewards_train/rejected": -3.0756328105926514, + "step": 1688 + }, + { + "epoch": 0.47, + "logps_train/chosen": -95.87240600585938, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -184.85104370117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5872405767440796, + "rewards_train/margins": 7.4478641748428345, + "rewards_train/rejected": -9.035104751586914, + "step": 1689 + }, + { + "epoch": 0.47, + "logps_train/chosen": -97.53713989257812, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -37.221397399902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3537139892578125, + "rewards_train/margins": 1.6559257507324219, + "rewards_train/rejected": -2.0096397399902344, + "step": 1689 + }, + { + "epoch": 0.47, + "logps_train/chosen": -24.915096282958984, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -20.24742889404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6165096163749695, + "rewards_train/margins": 0.1769832968711853, + "rewards_train/rejected": -0.7934929132461548, + "step": 1689 + }, + { + "epoch": 0.47, + "logps_train/chosen": -26.387935638427734, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -30.0, + "logps_train/rejected": -39.861873626708984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8200435638427734, + "rewards_train/margins": -0.8338561654090881, + "rewards_train/rejected": -0.9861873984336853, + "step": 1689 + }, + { + "epoch": 0.47, + "logps_train/chosen": -5.685601234436035, + "logps_train/ref_chosen": -1.28125, + "logps_train/ref_rejected": -1.28125, + "logps_train/rejected": -5.485766887664795, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.44043514132499695, + "rewards_train/margins": -0.0199834406375885, + "rewards_train/rejected": -0.42045170068740845, + "step": 1689 + }, + { + "epoch": 0.47, + "logps_train/chosen": -39.185821533203125, + "logps_train/ref_chosen": -40.5, + "logps_train/ref_rejected": -62.25, + "logps_train/rejected": -87.68446350097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13141785562038422, + "rewards_train/margins": 2.6748641580343246, + "rewards_train/rejected": -2.5434463024139404, + "step": 1689 + }, + { + "epoch": 0.47, + "logps_train/chosen": -177.80987548828125, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -217.08609008789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.980987548828125, + "rewards_train/margins": 3.427621364593506, + "rewards_train/rejected": -7.408608913421631, + "step": 1689 + }, + { + "epoch": 0.47, + "logps_train/chosen": -12.041217803955078, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -3.0, + "logps_train/rejected": -7.380688190460205, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5666217803955078, + "rewards_train/margins": -0.12855294346809387, + "rewards_train/rejected": -0.43806883692741394, + "step": 1689 + }, + { + "epoch": 0.47, + "learning_rate": 5.211468480999304e-07, + "loss": 0.4415, + "step": 1690 + }, + { + "epoch": 0.47, + "logps_train/chosen": -292.98504638671875, + "logps_train/ref_chosen": -210.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -279.0932922363281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.298504829406738, + "rewards_train/margins": 2.710824966430664, + "rewards_train/rejected": -11.009329795837402, + "step": 1690 + }, + { + "epoch": 0.47, + "logps_train/chosen": -48.37119674682617, + "logps_train/ref_chosen": -44.25, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -106.76564025878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41211968660354614, + "rewards_train/margins": 3.3644444346427917, + "rewards_train/rejected": -3.776564121246338, + "step": 1690 + }, + { + "epoch": 0.47, + "logps_train/chosen": -23.884809494018555, + "logps_train/ref_chosen": -2.203125, + "logps_train/ref_rejected": -7.0625, + "logps_train/rejected": -12.153998374938965, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.168168544769287, + "rewards_train/margins": -1.6590186953544617, + "rewards_train/rejected": -0.5091498494148254, + "step": 1690 + }, + { + "epoch": 0.47, + "logps_train/chosen": -135.21722412109375, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -153.01773071289062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.071722507476807, + "rewards_train/margins": -0.3699493408203125, + "rewards_train/rejected": -4.701773166656494, + "step": 1690 + }, + { + "epoch": 0.47, + "logps_train/chosen": -161.1818084716797, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -231.9484100341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.618180751800537, + "rewards_train/margins": 2.3766608238220215, + "rewards_train/rejected": -8.994841575622559, + "step": 1690 + }, + { + "epoch": 0.47, + "logps_train/chosen": -19.2517032623291, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -1.984375, + "logps_train/rejected": -10.904133796691895, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8564203381538391, + "rewards_train/margins": 0.03555554151535034, + "rewards_train/rejected": -0.8919758796691895, + "step": 1690 + }, + { + "epoch": 0.47, + "logps_train/chosen": -175.6296844482422, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -213.5334930419922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8629684448242188, + "rewards_train/margins": 3.090381145477295, + "rewards_train/rejected": -4.953349590301514, + "step": 1690 + }, + { + "epoch": 0.47, + "logps_train/chosen": -78.0843505859375, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -175.31309509277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.70843505859375, + "rewards_train/margins": 5.672874450683594, + "rewards_train/rejected": -6.381309509277344, + "step": 1690 + }, + { + "epoch": 0.47, + "logps_train/chosen": -41.04298400878906, + "logps_train/ref_chosen": -25.5, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -65.98817443847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5542984008789062, + "rewards_train/margins": 2.457019329071045, + "rewards_train/rejected": -4.011317729949951, + "step": 1691 + }, + { + "epoch": 0.47, + "logps_train/chosen": -156.07815551757812, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -189.08590698242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5921844840049744, + "rewards_train/margins": 3.100775182247162, + "rewards_train/rejected": -2.5085906982421875, + "step": 1691 + }, + { + "epoch": 0.47, + "logps_train/chosen": -38.73207092285156, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -30.625, + "logps_train/rejected": -44.300357818603516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1607072353363037, + "rewards_train/margins": -0.7931714057922363, + "rewards_train/rejected": -1.3675358295440674, + "step": 1691 + }, + { + "epoch": 0.47, + "logps_train/chosen": -18.753292083740234, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -30.375, + "logps_train/rejected": -102.59449768066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5065792202949524, + "rewards_train/margins": 6.715370833873749, + "rewards_train/rejected": -7.221950054168701, + "step": 1691 + }, + { + "epoch": 0.47, + "logps_train/chosen": -85.79202270507812, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -61.0, + "logps_train/rejected": -65.96480560302734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6792022585868835, + "rewards_train/margins": -0.18272170424461365, + "rewards_train/rejected": -0.4964805543422699, + "step": 1691 + }, + { + "epoch": 0.47, + "logps_train/chosen": -25.776626586914062, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -24.93134117126465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5526626706123352, + "rewards_train/margins": 1.5060965418815613, + "rewards_train/rejected": -2.0587592124938965, + "step": 1691 + }, + { + "epoch": 0.47, + "logps_train/chosen": -36.484561920166016, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -3.875, + "logps_train/rejected": -37.522132873535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2672061920166016, + "rewards_train/margins": 1.0975072383880615, + "rewards_train/rejected": -3.364713430404663, + "step": 1691 + }, + { + "epoch": 0.47, + "logps_train/chosen": -153.8387451171875, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -207.34243774414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6338746547698975, + "rewards_train/margins": 3.8003690242767334, + "rewards_train/rejected": -6.434243679046631, + "step": 1691 + }, + { + "epoch": 0.47, + "learning_rate": 5.188260098823355e-07, + "loss": 0.3897, + "step": 1692 + }, + { + "epoch": 0.47, + "logps_train/chosen": -5.949985504150391, + "logps_train/ref_chosen": -2.3125, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -11.241630554199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36374855041503906, + "rewards_train/margins": 0.06666451692581177, + "rewards_train/rejected": -0.43041306734085083, + "step": 1692 + }, + { + "epoch": 0.47, + "logps_train/chosen": -121.31543731689453, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -51.893150329589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7815437316894531, + "rewards_train/margins": 1.0827713012695312, + "rewards_train/rejected": -2.8643150329589844, + "step": 1692 + }, + { + "epoch": 0.47, + "logps_train/chosen": -79.84261322021484, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -115.2474136352539, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3842613697052002, + "rewards_train/margins": -0.05952000617980957, + "rewards_train/rejected": -1.3247413635253906, + "step": 1692 + }, + { + "epoch": 0.47, + "logps_train/chosen": -29.122255325317383, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -13.094049453735352, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1997255086898804, + "rewards_train/margins": -0.8028205633163452, + "rewards_train/rejected": -0.39690494537353516, + "step": 1692 + }, + { + "epoch": 0.47, + "logps_train/chosen": -47.11222839355469, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -47.99355697631836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.236222982406616, + "rewards_train/margins": 0.8131327629089355, + "rewards_train/rejected": -3.0493557453155518, + "step": 1692 + }, + { + "epoch": 0.47, + "logps_train/chosen": -1.1047199964523315, + "logps_train/ref_chosen": -0.51953125, + "logps_train/ref_rejected": -0.51953125, + "logps_train/rejected": -1.1085455417633057, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.058518875390291214, + "rewards_train/margins": 0.0003825537860393524, + "rewards_train/rejected": -0.058901429176330566, + "step": 1692 + }, + { + "epoch": 0.47, + "logps_train/chosen": -5.574838161468506, + "logps_train/ref_chosen": -2.8125, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -6.69490909576416, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.27623382210731506, + "rewards_train/margins": -0.3754929155111313, + "rewards_train/rejected": 0.09925909340381622, + "step": 1692 + }, + { + "epoch": 0.47, + "logps_train/chosen": -34.182003021240234, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -50.25, + "logps_train/rejected": -86.0914306640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8307002782821655, + "rewards_train/margins": 1.7534428834915161, + "rewards_train/rejected": -3.5841431617736816, + "step": 1692 + }, + { + "epoch": 0.47, + "logps_train/chosen": -71.90035247802734, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -71.80985260009766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.30996474623680115, + "rewards_train/margins": -0.00905001163482666, + "rewards_train/rejected": 0.3190147578716278, + "step": 1693 + }, + { + "epoch": 0.47, + "logps_train/chosen": -12.66944408416748, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -15.557336807250977, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.770069420337677, + "rewards_train/margins": -0.0455857515335083, + "rewards_train/rejected": -0.7244836688041687, + "step": 1693 + }, + { + "epoch": 0.47, + "logps_train/chosen": -114.12879943847656, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -111.65641021728516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5128799676895142, + "rewards_train/margins": -0.9472389221191406, + "rewards_train/rejected": -0.5656410455703735, + "step": 1693 + }, + { + "epoch": 0.47, + "logps_train/chosen": -139.40708923339844, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -127.4408187866211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0407088994979858, + "rewards_train/margins": -0.1466270089149475, + "rewards_train/rejected": -0.8940818905830383, + "step": 1693 + }, + { + "epoch": 0.47, + "logps_train/chosen": -152.55377197265625, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -134.2397003173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7553772926330566, + "rewards_train/margins": 1.3685927391052246, + "rewards_train/rejected": -4.123970031738281, + "step": 1693 + }, + { + "epoch": 0.47, + "logps_train/chosen": -8.121121406555176, + "logps_train/ref_chosen": -2.140625, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -52.51708221435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5980496406555176, + "rewards_train/margins": 3.1411585807800293, + "rewards_train/rejected": -3.739208221435547, + "step": 1693 + }, + { + "epoch": 0.47, + "logps_train/chosen": -148.27529907226562, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -205.0, + "logps_train/rejected": -331.8700866699219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6275298595428467, + "rewards_train/margins": 10.059478998184204, + "rewards_train/rejected": -12.68700885772705, + "step": 1693 + }, + { + "epoch": 0.47, + "logps_train/chosen": -80.36859130859375, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -85.52677917480469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.836859107017517, + "rewards_train/margins": -2.0841811895370483, + "rewards_train/rejected": 0.24732208251953125, + "step": 1693 + }, + { + "epoch": 0.47, + "learning_rate": 5.165085393725106e-07, + "loss": 0.6815, + "step": 1694 + }, + { + "epoch": 0.47, + "logps_train/chosen": -44.52119064331055, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -96.98591613769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5978809595108032, + "rewards_train/margins": 1.8964725732803345, + "rewards_train/rejected": -1.2985916137695312, + "step": 1694 + }, + { + "epoch": 0.47, + "logps_train/chosen": -48.19231414794922, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -37.48712158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5067315101623535, + "rewards_train/margins": 0.5919806957244873, + "rewards_train/rejected": -3.098712205886841, + "step": 1694 + }, + { + "epoch": 0.47, + "logps_train/chosen": -148.80899047851562, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -255.695556640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.080899238586426, + "rewards_train/margins": 5.788656234741211, + "rewards_train/rejected": -10.869555473327637, + "step": 1694 + }, + { + "epoch": 0.47, + "logps_train/chosen": -160.82427978515625, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -179.68826293945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.532428026199341, + "rewards_train/margins": 5.436398267745972, + "rewards_train/rejected": -8.968826293945312, + "step": 1694 + }, + { + "epoch": 0.47, + "logps_train/chosen": -104.11691284179688, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -92.7550048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.911691427230835, + "rewards_train/margins": 0.03880906105041504, + "rewards_train/rejected": -2.95050048828125, + "step": 1694 + }, + { + "epoch": 0.47, + "logps_train/chosen": -20.37361717224121, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -27.118192672729492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4998617172241211, + "rewards_train/margins": 0.8182076215744019, + "rewards_train/rejected": -1.318069338798523, + "step": 1694 + }, + { + "epoch": 0.47, + "logps_train/chosen": -89.080078125, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -150.80564880371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2080078125, + "rewards_train/margins": 2.322557210922241, + "rewards_train/rejected": -3.530565023422241, + "step": 1694 + }, + { + "epoch": 0.47, + "logps_train/chosen": -6.057729244232178, + "logps_train/ref_chosen": -7.84375, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -14.577984809875488, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17860208451747894, + "rewards_train/margins": 0.6114005595445633, + "rewards_train/rejected": -0.43279847502708435, + "step": 1694 + }, + { + "epoch": 0.47, + "logps_train/chosen": -103.05390930175781, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -179.35455322265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1053909063339233, + "rewards_train/margins": 6.530064702033997, + "rewards_train/rejected": -7.63545560836792, + "step": 1695 + }, + { + "epoch": 0.47, + "logps_train/chosen": -80.9824447631836, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -120.9889907836914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6982444524765015, + "rewards_train/margins": 3.7506548166275024, + "rewards_train/rejected": -5.448899269104004, + "step": 1695 + }, + { + "epoch": 0.47, + "logps_train/chosen": -162.20216369628906, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -224.8621063232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7202165126800537, + "rewards_train/margins": 3.8659942150115967, + "rewards_train/rejected": -6.58621072769165, + "step": 1695 + }, + { + "epoch": 0.47, + "logps_train/chosen": -166.58274841308594, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -268.93927001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.758274793624878, + "rewards_train/margins": 8.835652589797974, + "rewards_train/rejected": -11.593927383422852, + "step": 1695 + }, + { + "epoch": 0.47, + "logps_train/chosen": -0.12895676493644714, + "logps_train/ref_chosen": -0.2021484375, + "logps_train/ref_rejected": -0.2021484375, + "logps_train/rejected": -0.12893959879875183, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.007319167256355286, + "rewards_train/margins": -1.7168931663036346e-06, + "rewards_train/rejected": 0.007320884149521589, + "step": 1695 + }, + { + "epoch": 0.47, + "logps_train/chosen": -9.522143363952637, + "logps_train/ref_chosen": -1.3359375, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -15.035651206970215, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8186206221580505, + "rewards_train/margins": -0.771305501461029, + "rewards_train/rejected": -0.047315120697021484, + "step": 1695 + }, + { + "epoch": 0.47, + "logps_train/chosen": -17.87826919555664, + "logps_train/ref_chosen": -13.625, + "logps_train/ref_rejected": -6.375, + "logps_train/rejected": -30.67479705810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4253269135951996, + "rewards_train/margins": 2.004652887582779, + "rewards_train/rejected": -2.4299798011779785, + "step": 1695 + }, + { + "epoch": 0.47, + "logps_train/chosen": -209.96322631835938, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -322.2523193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.196322441101074, + "rewards_train/margins": 6.728909492492676, + "rewards_train/rejected": -14.92523193359375, + "step": 1695 + }, + { + "epoch": 0.47, + "learning_rate": 5.141944527902932e-07, + "loss": 0.2608, + "step": 1696 + }, + { + "epoch": 0.47, + "logps_train/chosen": -22.660621643066406, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -28.611061096191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5785622000694275, + "rewards_train/margins": 0.22004389762878418, + "rewards_train/rejected": -0.7986060976982117, + "step": 1696 + }, + { + "epoch": 0.47, + "logps_train/chosen": -83.91129302978516, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -110.40635681152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3911293148994446, + "rewards_train/margins": 4.249506652355194, + "rewards_train/rejected": -4.640635967254639, + "step": 1696 + }, + { + "epoch": 0.47, + "logps_train/chosen": -0.04581960290670395, + "logps_train/ref_chosen": -0.08642578125, + "logps_train/ref_rejected": -0.08642578125, + "logps_train/rejected": -0.04570341482758522, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0040606181137263775, + "rewards_train/margins": -1.1618714779615402e-05, + "rewards_train/rejected": 0.004072236828505993, + "step": 1696 + }, + { + "epoch": 0.47, + "logps_train/chosen": -93.2325439453125, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -56.5, + "logps_train/rejected": -94.63660430908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1732544898986816, + "rewards_train/margins": 1.6404058933258057, + "rewards_train/rejected": -3.8136603832244873, + "step": 1696 + }, + { + "epoch": 0.47, + "logps_train/chosen": -5.718400478363037, + "logps_train/ref_chosen": -0.470703125, + "logps_train/ref_rejected": -0.470703125, + "logps_train/rejected": -5.486185550689697, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5247697234153748, + "rewards_train/margins": -0.023221492767333984, + "rewards_train/rejected": -0.5015482306480408, + "step": 1696 + }, + { + "epoch": 0.47, + "logps_train/chosen": -17.796627044677734, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -20.93488311767578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18591271340847015, + "rewards_train/margins": 0.9575755745172501, + "rewards_train/rejected": -1.1434882879257202, + "step": 1696 + }, + { + "epoch": 0.47, + "logps_train/chosen": -115.72604370117188, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -228.43226623535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4226044416427612, + "rewards_train/margins": 7.720622181892395, + "rewards_train/rejected": -9.143226623535156, + "step": 1696 + }, + { + "epoch": 0.47, + "logps_train/chosen": -8.479693412780762, + "logps_train/ref_chosen": -1.75, + "logps_train/ref_rejected": -1.296875, + "logps_train/rejected": -9.510187149047852, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6729693412780762, + "rewards_train/margins": 0.14836186170578003, + "rewards_train/rejected": -0.8213312029838562, + "step": 1696 + }, + { + "epoch": 0.47, + "logps_train/chosen": -21.74331283569336, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -89.01126861572266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2680813074111938, + "rewards_train/margins": 0.0830456018447876, + "rewards_train/rejected": -1.3511269092559814, + "step": 1697 + }, + { + "epoch": 0.47, + "logps_train/chosen": -241.4951171875, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -224.47482299804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.949512004852295, + "rewards_train/margins": 2.147970676422119, + "rewards_train/rejected": -10.097482681274414, + "step": 1697 + }, + { + "epoch": 0.47, + "logps_train/chosen": -119.07217407226562, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -200.47857666015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1572173833847046, + "rewards_train/margins": 5.190640568733215, + "rewards_train/rejected": -6.34785795211792, + "step": 1697 + }, + { + "epoch": 0.47, + "logps_train/chosen": -10.659491539001465, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -28.573776245117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5159491896629333, + "rewards_train/margins": 0.9101784825325012, + "rewards_train/rejected": -1.4261276721954346, + "step": 1697 + }, + { + "epoch": 0.47, + "logps_train/chosen": -40.093116760253906, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -30.5, + "logps_train/rejected": -41.32366180419922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13431167602539062, + "rewards_train/margins": 0.9480545520782471, + "rewards_train/rejected": -1.0823662281036377, + "step": 1697 + }, + { + "epoch": 0.47, + "logps_train/chosen": -54.312294006347656, + "logps_train/ref_chosen": -38.5, + "logps_train/ref_rejected": -35.0, + "logps_train/rejected": -83.49443054199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5812294483184814, + "rewards_train/margins": 3.2682135105133057, + "rewards_train/rejected": -4.849442958831787, + "step": 1697 + }, + { + "epoch": 0.47, + "logps_train/chosen": -100.01736450195312, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -178.009521484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1517364978790283, + "rewards_train/margins": 3.3992159366607666, + "rewards_train/rejected": -5.550952434539795, + "step": 1697 + }, + { + "epoch": 0.47, + "logps_train/chosen": -128.88778686523438, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -189.2850341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0887787342071533, + "rewards_train/margins": 1.039724588394165, + "rewards_train/rejected": -4.128503322601318, + "step": 1697 + }, + { + "epoch": 0.47, + "learning_rate": 5.118837663318351e-07, + "loss": 0.3085, + "step": 1698 + }, + { + "epoch": 0.47, + "logps_train/chosen": -33.72853088378906, + "logps_train/ref_chosen": -26.875, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -32.666805267333984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6853531002998352, + "rewards_train/margins": 1.4375774264335632, + "rewards_train/rejected": -2.1229305267333984, + "step": 1698 + }, + { + "epoch": 0.47, + "logps_train/chosen": -6.909272193908691, + "logps_train/ref_chosen": -3.3125, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -28.53248405456543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3596772253513336, + "rewards_train/margins": 1.306071251630783, + "rewards_train/rejected": -1.6657484769821167, + "step": 1698 + }, + { + "epoch": 0.47, + "logps_train/chosen": -0.09583094716072083, + "logps_train/ref_chosen": -0.2431640625, + "logps_train/ref_rejected": -0.2431640625, + "logps_train/rejected": -0.09418483823537827, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.014733311720192432, + "rewards_train/margins": -0.0001646103337407112, + "rewards_train/rejected": 0.014897922053933144, + "step": 1698 + }, + { + "epoch": 0.47, + "logps_train/chosen": -99.03146362304688, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -187.8509979248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5031464099884033, + "rewards_train/margins": 7.381953954696655, + "rewards_train/rejected": -10.885100364685059, + "step": 1698 + }, + { + "epoch": 0.47, + "logps_train/chosen": -12.242165565490723, + "logps_train/ref_chosen": -3.46875, + "logps_train/ref_rejected": -3.234375, + "logps_train/rejected": -23.026447296142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8773415684700012, + "rewards_train/margins": 1.1018657088279724, + "rewards_train/rejected": -1.9792072772979736, + "step": 1698 + }, + { + "epoch": 0.47, + "logps_train/chosen": -17.849123001098633, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -30.875, + "logps_train/rejected": -53.01882553100586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02758770063519478, + "rewards_train/margins": 2.2419703491032124, + "rewards_train/rejected": -2.2143826484680176, + "step": 1698 + }, + { + "epoch": 0.47, + "logps_train/chosen": -70.6493911743164, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -3.84375, + "logps_train/rejected": -24.79218292236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8899391889572144, + "rewards_train/margins": 0.2049041986465454, + "rewards_train/rejected": -2.0948433876037598, + "step": 1698 + }, + { + "epoch": 0.47, + "logps_train/chosen": -134.7767333984375, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -204.94046020507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.17767333984375, + "rewards_train/margins": 5.366373062133789, + "rewards_train/rejected": -9.544046401977539, + "step": 1698 + }, + { + "epoch": 0.47, + "logps_train/chosen": -98.39199829101562, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -91.93802642822266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23919983208179474, + "rewards_train/margins": 2.0546029061079025, + "rewards_train/rejected": -2.2938027381896973, + "step": 1699 + }, + { + "epoch": 0.47, + "logps_train/chosen": -138.6009521484375, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -219.98617553710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.760095238685608, + "rewards_train/margins": 4.0385226011276245, + "rewards_train/rejected": -5.798617839813232, + "step": 1699 + }, + { + "epoch": 0.47, + "logps_train/chosen": -29.345462799072266, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -28.5, + "logps_train/rejected": -47.60797119140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1970462799072266, + "rewards_train/margins": 0.7137508392333984, + "rewards_train/rejected": -1.910797119140625, + "step": 1699 + }, + { + "epoch": 0.47, + "logps_train/chosen": -29.74146270751953, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -94.17081451416016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.117896318435669, + "rewards_train/margins": 2.3241851329803467, + "rewards_train/rejected": -4.442081451416016, + "step": 1699 + }, + { + "epoch": 0.47, + "logps_train/chosen": -178.40756225585938, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -175.6641845703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5407562255859375, + "rewards_train/margins": 1.3756623268127441, + "rewards_train/rejected": -4.916418552398682, + "step": 1699 + }, + { + "epoch": 0.47, + "logps_train/chosen": -85.4450912475586, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -99.95693969726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.169509172439575, + "rewards_train/margins": 0.3261847496032715, + "rewards_train/rejected": -2.4956939220428467, + "step": 1699 + }, + { + "epoch": 0.47, + "logps_train/chosen": -28.7479190826416, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -3.421875, + "logps_train/rejected": -45.83848190307617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.068542003631592, + "rewards_train/margins": 2.1731185913085938, + "rewards_train/rejected": -4.2416605949401855, + "step": 1699 + }, + { + "epoch": 0.47, + "logps_train/chosen": -75.22080993652344, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -166.05279541015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7720810174942017, + "rewards_train/margins": 3.5831984281539917, + "rewards_train/rejected": -5.355279445648193, + "step": 1699 + }, + { + "epoch": 0.48, + "learning_rate": 5.095764961694922e-07, + "loss": 0.2291, + "step": 1700 + }, + { + "epoch": 0.48, + "logps_train/chosen": -51.877071380615234, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -49.25, + "logps_train/rejected": -80.42513275146484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.28729286789894104, + "rewards_train/margins": 3.404806286096573, + "rewards_train/rejected": -3.117513418197632, + "step": 1700 + }, + { + "epoch": 0.48, + "logps_train/chosen": -10.697936058044434, + "logps_train/ref_chosen": -6.625, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -59.870296478271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4072936177253723, + "rewards_train/margins": 4.857861220836639, + "rewards_train/rejected": -5.265154838562012, + "step": 1700 + }, + { + "epoch": 0.48, + "logps_train/chosen": -119.30631256103516, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -108.7523422241211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2306312322616577, + "rewards_train/margins": 1.2946029901504517, + "rewards_train/rejected": -2.5252342224121094, + "step": 1700 + }, + { + "epoch": 0.48, + "logps_train/chosen": -197.46742248535156, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -249.81857299804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.5467424392700195, + "rewards_train/margins": 5.335115432739258, + "rewards_train/rejected": -10.881857872009277, + "step": 1700 + }, + { + "epoch": 0.48, + "logps_train/chosen": -38.830589294433594, + "logps_train/ref_chosen": -34.25, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -53.50495147705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4580589234828949, + "rewards_train/margins": 3.9111861288547516, + "rewards_train/rejected": -4.3692450523376465, + "step": 1700 + }, + { + "epoch": 0.48, + "logps_train/chosen": -233.46701049804688, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -175.47830200195312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.846701622009277, + "rewards_train/margins": -0.7988710403442383, + "rewards_train/rejected": -8.047830581665039, + "step": 1700 + }, + { + "epoch": 0.48, + "logps_train/chosen": -59.30002975463867, + "logps_train/ref_chosen": -50.0, + "logps_train/ref_rejected": -14.125, + "logps_train/rejected": -34.13257598876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9300029873847961, + "rewards_train/margins": 1.0707547068595886, + "rewards_train/rejected": -2.0007576942443848, + "step": 1700 + }, + { + "epoch": 0.48, + "logps_train/chosen": -57.542137145996094, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -307.20745849609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.154213786125183, + "rewards_train/margins": 13.566532254219055, + "rewards_train/rejected": -14.720746040344238, + "step": 1700 + }, + { + "epoch": 0.48, + "logps_train/chosen": -105.05453491210938, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -245.8337860107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6554535031318665, + "rewards_train/margins": 7.627925097942352, + "rewards_train/rejected": -8.283378601074219, + "step": 1701 + }, + { + "epoch": 0.48, + "logps_train/chosen": -126.93878173828125, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -189.37716674804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.393878221511841, + "rewards_train/margins": 1.943838357925415, + "rewards_train/rejected": -4.337716579437256, + "step": 1701 + }, + { + "epoch": 0.48, + "logps_train/chosen": -34.44993591308594, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -19.815082550048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9699935913085938, + "rewards_train/margins": 0.595889687538147, + "rewards_train/rejected": -1.5658832788467407, + "step": 1701 + }, + { + "epoch": 0.48, + "logps_train/chosen": -74.48812866210938, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -186.18783569335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1988128423690796, + "rewards_train/margins": 9.169971108436584, + "rewards_train/rejected": -10.368783950805664, + "step": 1701 + }, + { + "epoch": 0.48, + "logps_train/chosen": -19.362524032592773, + "logps_train/ref_chosen": -3.5, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -40.86627197265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5862524509429932, + "rewards_train/margins": -0.23712527751922607, + "rewards_train/rejected": -1.349127173423767, + "step": 1701 + }, + { + "epoch": 0.48, + "logps_train/chosen": -1.0838207006454468, + "logps_train/ref_chosen": -0.11865234375, + "logps_train/ref_rejected": -0.11865234375, + "logps_train/rejected": -0.9357073307037354, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09651684015989304, + "rewards_train/margins": -0.014811336994171143, + "rewards_train/rejected": -0.0817055031657219, + "step": 1701 + }, + { + "epoch": 0.48, + "logps_train/chosen": -7.811100959777832, + "logps_train/ref_chosen": -2.375, + "logps_train/ref_rejected": -6.28125, + "logps_train/rejected": -40.355411529541016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5436100959777832, + "rewards_train/margins": 2.8638060092926025, + "rewards_train/rejected": -3.4074161052703857, + "step": 1701 + }, + { + "epoch": 0.48, + "logps_train/chosen": -28.544504165649414, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -41.53286361694336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.004450559616089, + "rewards_train/margins": 1.667585849761963, + "rewards_train/rejected": -3.6720364093780518, + "step": 1701 + }, + { + "epoch": 0.48, + "learning_rate": 5.072726584517085e-07, + "loss": 0.2556, + "step": 1702 + }, + { + "epoch": 0.48, + "logps_train/chosen": -155.18301391601562, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -180.6603240966797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.6183013916015625, + "rewards_train/margins": -0.8522689342498779, + "rewards_train/rejected": -3.7660324573516846, + "step": 1702 + }, + { + "epoch": 0.48, + "logps_train/chosen": -157.3924560546875, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -206.00814819335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6392457485198975, + "rewards_train/margins": 1.2615692615509033, + "rewards_train/rejected": -4.900815010070801, + "step": 1702 + }, + { + "epoch": 0.48, + "logps_train/chosen": -131.25369262695312, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -163.41632080078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5753692388534546, + "rewards_train/margins": 2.166262984275818, + "rewards_train/rejected": -3.7416322231292725, + "step": 1702 + }, + { + "epoch": 0.48, + "logps_train/chosen": -19.030954360961914, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -28.875, + "logps_train/rejected": -66.05940246582031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9655954241752625, + "rewards_train/margins": 2.7528448700904846, + "rewards_train/rejected": -3.718440294265747, + "step": 1702 + }, + { + "epoch": 0.48, + "logps_train/chosen": -68.70396423339844, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -69.03810119628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1296035796403885, + "rewards_train/margins": 0.03341370075941086, + "rewards_train/rejected": 0.09618987888097763, + "step": 1702 + }, + { + "epoch": 0.48, + "logps_train/chosen": -198.01724243164062, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -175.94747924804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0017242431640625, + "rewards_train/margins": 3.39302396774292, + "rewards_train/rejected": -6.394748210906982, + "step": 1702 + }, + { + "epoch": 0.48, + "logps_train/chosen": -17.7216796875, + "logps_train/ref_chosen": -3.65625, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -27.88222312927246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4065430164337158, + "rewards_train/margins": 0.12542927265167236, + "rewards_train/rejected": -1.5319722890853882, + "step": 1702 + }, + { + "epoch": 0.48, + "logps_train/chosen": -39.0167236328125, + "logps_train/ref_chosen": -7.125, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -38.832427978515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1891725063323975, + "rewards_train/margins": -0.6809296607971191, + "rewards_train/rejected": -2.5082428455352783, + "step": 1702 + }, + { + "epoch": 0.48, + "logps_train/chosen": -42.76721954345703, + "logps_train/ref_chosen": -4.625, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -33.252052307128906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.8142220973968506, + "rewards_train/margins": -1.02964186668396, + "rewards_train/rejected": -2.7845802307128906, + "step": 1703 + }, + { + "epoch": 0.48, + "logps_train/chosen": -96.21086120605469, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -210.25967407226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7210861444473267, + "rewards_train/margins": 5.604881167411804, + "rewards_train/rejected": -7.325967311859131, + "step": 1703 + }, + { + "epoch": 0.48, + "logps_train/chosen": -146.2403106689453, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -195.6885986328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6240310668945312, + "rewards_train/margins": 1.1448287963867188, + "rewards_train/rejected": -4.76885986328125, + "step": 1703 + }, + { + "epoch": 0.48, + "logps_train/chosen": -41.56163024902344, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -75.63127136230469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0561630725860596, + "rewards_train/margins": 2.481964349746704, + "rewards_train/rejected": -4.538127422332764, + "step": 1703 + }, + { + "epoch": 0.48, + "logps_train/chosen": -173.099365234375, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -234.85757446289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.959936618804932, + "rewards_train/margins": 4.825821399688721, + "rewards_train/rejected": -9.785758018493652, + "step": 1703 + }, + { + "epoch": 0.48, + "logps_train/chosen": -114.17272186279297, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -165.2135467529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.867272138595581, + "rewards_train/margins": 3.754082441329956, + "rewards_train/rejected": -6.621354579925537, + "step": 1703 + }, + { + "epoch": 0.48, + "logps_train/chosen": -14.473832130432129, + "logps_train/ref_chosen": -3.46875, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -28.199024200439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.100508213043213, + "rewards_train/margins": 0.5881441831588745, + "rewards_train/rejected": -1.6886523962020874, + "step": 1703 + }, + { + "epoch": 0.48, + "logps_train/chosen": -101.8918228149414, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -161.0884246826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8391823172569275, + "rewards_train/margins": 2.4696601033210754, + "rewards_train/rejected": -3.308842420578003, + "step": 1703 + }, + { + "epoch": 0.48, + "learning_rate": 5.04972269302906e-07, + "loss": 0.3942, + "step": 1704 + }, + { + "epoch": 0.48, + "logps_train/chosen": -192.3095703125, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -220.577880859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.73095703125, + "rewards_train/margins": 1.7268309593200684, + "rewards_train/rejected": -4.457787990570068, + "step": 1704 + }, + { + "epoch": 0.48, + "logps_train/chosen": -142.38351440429688, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -159.49720764160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6383514404296875, + "rewards_train/margins": 1.9113693237304688, + "rewards_train/rejected": -5.549720764160156, + "step": 1704 + }, + { + "epoch": 0.48, + "logps_train/chosen": -2.925874948501587, + "logps_train/ref_chosen": -1.578125, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -19.484336853027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13477499783039093, + "rewards_train/margins": 1.3167837113142014, + "rewards_train/rejected": -1.4515587091445923, + "step": 1704 + }, + { + "epoch": 0.48, + "logps_train/chosen": -105.39936065673828, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -108.47938537597656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.039936065673828, + "rewards_train/margins": -2.191997528076172, + "rewards_train/rejected": -0.8479385375976562, + "step": 1704 + }, + { + "epoch": 0.48, + "logps_train/chosen": -96.34130096435547, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -202.97396850585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4841301143169403, + "rewards_train/margins": 6.7132666409015656, + "rewards_train/rejected": -7.197396755218506, + "step": 1704 + }, + { + "epoch": 0.48, + "logps_train/chosen": -269.7914733886719, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -251.2476043701172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.579147338867188, + "rewards_train/margins": -1.7543869018554688, + "rewards_train/rejected": -9.824760437011719, + "step": 1704 + }, + { + "epoch": 0.48, + "logps_train/chosen": -5.409395217895508, + "logps_train/ref_chosen": -2.921875, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -42.16735076904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24875202775001526, + "rewards_train/margins": 2.2804830968379974, + "rewards_train/rejected": -2.5292351245880127, + "step": 1704 + }, + { + "epoch": 0.48, + "logps_train/chosen": -50.26305389404297, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -58.25, + "logps_train/rejected": -77.80658721923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4486946165561676, + "rewards_train/margins": 2.4043534100055695, + "rewards_train/rejected": -1.9556587934494019, + "step": 1704 + }, + { + "epoch": 0.48, + "logps_train/chosen": -176.6650848388672, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -205.2465057373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.716508388519287, + "rewards_train/margins": 0.90814208984375, + "rewards_train/rejected": -6.624650478363037, + "step": 1705 + }, + { + "epoch": 0.48, + "logps_train/chosen": -6.891360759735107, + "logps_train/ref_chosen": -3.421875, + "logps_train/ref_rejected": -2.765625, + "logps_train/rejected": -14.138869285583496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3469485938549042, + "rewards_train/margins": 0.7903758585453033, + "rewards_train/rejected": -1.1373244524002075, + "step": 1705 + }, + { + "epoch": 0.48, + "logps_train/chosen": -18.85527229309082, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -27.625, + "logps_train/rejected": -38.16189956665039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.648027241230011, + "rewards_train/margins": 0.4056627154350281, + "rewards_train/rejected": -1.053689956665039, + "step": 1705 + }, + { + "epoch": 0.48, + "logps_train/chosen": -3.9701640605926514, + "logps_train/ref_chosen": -1.6640625, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -9.367464065551758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23061016201972961, + "rewards_train/margins": 0.2686362564563751, + "rewards_train/rejected": -0.49924641847610474, + "step": 1705 + }, + { + "epoch": 0.48, + "logps_train/chosen": -66.36045837402344, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -60.75, + "logps_train/rejected": -67.29443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5610458254814148, + "rewards_train/margins": 0.09339755773544312, + "rewards_train/rejected": -0.6544433832168579, + "step": 1705 + }, + { + "epoch": 0.48, + "logps_train/chosen": -176.76507568359375, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -154.8321533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.976507663726807, + "rewards_train/margins": 0.3567075729370117, + "rewards_train/rejected": -6.333215236663818, + "step": 1705 + }, + { + "epoch": 0.48, + "logps_train/chosen": -33.90727615356445, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -61.298744201660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.440727710723877, + "rewards_train/margins": 2.089146614074707, + "rewards_train/rejected": -4.529874324798584, + "step": 1705 + }, + { + "epoch": 0.48, + "logps_train/chosen": -182.46466064453125, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -188.43031311035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.04646635055542, + "rewards_train/margins": 0.34656524658203125, + "rewards_train/rejected": -6.393031597137451, + "step": 1705 + }, + { + "epoch": 0.48, + "learning_rate": 5.026753448233702e-07, + "loss": 0.5348, + "step": 1706 + }, + { + "epoch": 0.48, + "logps_train/chosen": -14.126314163208008, + "logps_train/ref_chosen": -14.5, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -64.09192657470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03736858442425728, + "rewards_train/margins": 5.649686146527529, + "rewards_train/rejected": -5.6123175621032715, + "step": 1706 + }, + { + "epoch": 0.48, + "logps_train/chosen": -121.0007553100586, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -125.1700439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7500755786895752, + "rewards_train/margins": 1.5669288635253906, + "rewards_train/rejected": -3.317004442214966, + "step": 1706 + }, + { + "epoch": 0.48, + "logps_train/chosen": -44.9246826171875, + "logps_train/ref_chosen": -6.84375, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -99.92375946044922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.808093309402466, + "rewards_train/margins": -0.915717363357544, + "rewards_train/rejected": -2.892375946044922, + "step": 1706 + }, + { + "epoch": 0.48, + "logps_train/chosen": -146.7508544921875, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -182.713623046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.575085639953613, + "rewards_train/margins": 5.146276473999023, + "rewards_train/rejected": -9.721362113952637, + "step": 1706 + }, + { + "epoch": 0.48, + "logps_train/chosen": -82.68702697753906, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -203.60507202148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7187026739120483, + "rewards_train/margins": 6.791804909706116, + "rewards_train/rejected": -8.510507583618164, + "step": 1706 + }, + { + "epoch": 0.48, + "logps_train/chosen": -122.34466552734375, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -52.973899841308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1344666481018066, + "rewards_train/margins": 0.3754234313964844, + "rewards_train/rejected": -3.509890079498291, + "step": 1706 + }, + { + "epoch": 0.48, + "logps_train/chosen": -18.955646514892578, + "logps_train/ref_chosen": -0.37109375, + "logps_train/ref_rejected": -0.37109375, + "logps_train/rejected": -18.955753326416016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8584553003311157, + "rewards_train/margins": 1.0728836059570312e-05, + "rewards_train/rejected": -1.8584660291671753, + "step": 1706 + }, + { + "epoch": 0.48, + "logps_train/chosen": -119.76901245117188, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -160.48849487304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5269012451171875, + "rewards_train/margins": 3.1219482421875, + "rewards_train/rejected": -5.6488494873046875, + "step": 1706 + }, + { + "epoch": 0.48, + "logps_train/chosen": -153.5190887451172, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -206.92381286621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.80190896987915, + "rewards_train/margins": 3.390472888946533, + "rewards_train/rejected": -8.192381858825684, + "step": 1707 + }, + { + "epoch": 0.48, + "logps_train/chosen": -188.68838500976562, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -215.68399047851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.6688385009765625, + "rewards_train/margins": 2.549560546875, + "rewards_train/rejected": -10.218399047851562, + "step": 1707 + }, + { + "epoch": 0.48, + "logps_train/chosen": -1.3761314153671265, + "logps_train/ref_chosen": -0.84375, + "logps_train/ref_rejected": -0.76171875, + "logps_train/rejected": -2.1887457370758057, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.053238142281770706, + "rewards_train/margins": 0.08946455642580986, + "rewards_train/rejected": -0.14270269870758057, + "step": 1707 + }, + { + "epoch": 0.48, + "logps_train/chosen": -8.772562980651855, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -4.265798091888428, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.39288130402565, + "rewards_train/margins": -0.3756764940917492, + "rewards_train/rejected": -0.017204809933900833, + "step": 1707 + }, + { + "epoch": 0.48, + "logps_train/chosen": -251.99220275878906, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -241.93946838378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.099220275878906, + "rewards_train/margins": 0.3947267532348633, + "rewards_train/rejected": -10.49394702911377, + "step": 1707 + }, + { + "epoch": 0.48, + "logps_train/chosen": -186.45611572265625, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -187.37744140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.545611619949341, + "rewards_train/margins": -0.00786733627319336, + "rewards_train/rejected": -3.5377442836761475, + "step": 1707 + }, + { + "epoch": 0.48, + "logps_train/chosen": -144.74180603027344, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -145.02171325683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4741806983947754, + "rewards_train/margins": 0.02799057960510254, + "rewards_train/rejected": -2.502171277999878, + "step": 1707 + }, + { + "epoch": 0.48, + "logps_train/chosen": -10.31187915802002, + "logps_train/ref_chosen": -3.390625, + "logps_train/ref_rejected": -5.71875, + "logps_train/rejected": -12.401710510253906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6921254396438599, + "rewards_train/margins": -0.023829400539398193, + "rewards_train/rejected": -0.6682960391044617, + "step": 1707 + }, + { + "epoch": 0.48, + "learning_rate": 5.003819010891366e-07, + "loss": 0.4349, + "step": 1708 + }, + { + "epoch": 0.48, + "logps_train/chosen": -18.367938995361328, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -26.30302619934082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5367938876152039, + "rewards_train/margins": 1.287258803844452, + "rewards_train/rejected": -1.8240526914596558, + "step": 1708 + }, + { + "epoch": 0.48, + "logps_train/chosen": -122.51220703125, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -121.42444610595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.951220750808716, + "rewards_train/margins": 0.041224002838134766, + "rewards_train/rejected": -2.9924447536468506, + "step": 1708 + }, + { + "epoch": 0.48, + "logps_train/chosen": -47.424835205078125, + "logps_train/ref_chosen": -36.75, + "logps_train/ref_rejected": -31.0, + "logps_train/rejected": -35.94252014160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0674835443496704, + "rewards_train/margins": -0.5732315182685852, + "rewards_train/rejected": -0.4942520260810852, + "step": 1708 + }, + { + "epoch": 0.48, + "logps_train/chosen": -41.54676055908203, + "logps_train/ref_chosen": -31.25, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -43.79750061035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.029676079750061, + "rewards_train/margins": 1.9625741243362427, + "rewards_train/rejected": -2.9922502040863037, + "step": 1708 + }, + { + "epoch": 0.48, + "logps_train/chosen": -21.322202682495117, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -28.097606658935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8947202563285828, + "rewards_train/margins": 1.3056655526161194, + "rewards_train/rejected": -2.200385808944702, + "step": 1708 + }, + { + "epoch": 0.48, + "logps_train/chosen": -6.523248672485352, + "logps_train/ref_chosen": -2.984375, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -21.222476959228516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3538873791694641, + "rewards_train/margins": 0.4683603048324585, + "rewards_train/rejected": -0.8222476840019226, + "step": 1708 + }, + { + "epoch": 0.48, + "logps_train/chosen": -221.10861206054688, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -212.88035583496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.710861206054688, + "rewards_train/margins": 1.9271745681762695, + "rewards_train/rejected": -12.638035774230957, + "step": 1708 + }, + { + "epoch": 0.48, + "logps_train/chosen": -107.8582763671875, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -141.70925903320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.985827624797821, + "rewards_train/margins": 2.035098373889923, + "rewards_train/rejected": -3.020925998687744, + "step": 1708 + }, + { + "epoch": 0.48, + "logps_train/chosen": -20.959470748901367, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -28.711095809936523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6053221225738525, + "rewards_train/margins": 0.1657874584197998, + "rewards_train/rejected": -1.7711095809936523, + "step": 1709 + }, + { + "epoch": 0.48, + "logps_train/chosen": -120.76679992675781, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -188.7865753173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.37667989730835, + "rewards_train/margins": 2.8019776344299316, + "rewards_train/rejected": -7.178657531738281, + "step": 1709 + }, + { + "epoch": 0.48, + "logps_train/chosen": -130.78726196289062, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -131.7456817626953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0287262201309204, + "rewards_train/margins": -0.5541580319404602, + "rewards_train/rejected": -0.4745681881904602, + "step": 1709 + }, + { + "epoch": 0.48, + "logps_train/chosen": -139.6224822998047, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -209.51382446289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.912248134613037, + "rewards_train/margins": 4.239134311676025, + "rewards_train/rejected": -10.151382446289062, + "step": 1709 + }, + { + "epoch": 0.48, + "logps_train/chosen": -26.05384635925293, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -20.90414810180664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0678846836090088, + "rewards_train/margins": -0.6899698674678802, + "rewards_train/rejected": -0.37791481614112854, + "step": 1709 + }, + { + "epoch": 0.48, + "logps_train/chosen": -38.88159942626953, + "logps_train/ref_chosen": -27.75, + "logps_train/ref_rejected": -48.25, + "logps_train/rejected": -57.24720001220703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1131600141525269, + "rewards_train/margins": -0.21344000101089478, + "rewards_train/rejected": -0.8997200131416321, + "step": 1709 + }, + { + "epoch": 0.48, + "logps_train/chosen": -43.26140594482422, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -35.0, + "logps_train/rejected": -59.99161148071289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1948907375335693, + "rewards_train/margins": -0.6957294940948486, + "rewards_train/rejected": -2.4991612434387207, + "step": 1709 + }, + { + "epoch": 0.48, + "logps_train/chosen": -71.44696044921875, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -22.875, + "logps_train/rejected": -40.05851745605469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9946960806846619, + "rewards_train/margins": 0.723655641078949, + "rewards_train/rejected": -1.7183517217636108, + "step": 1709 + }, + { + "epoch": 0.48, + "learning_rate": 4.980919541518795e-07, + "loss": 0.5096, + "step": 1710 + }, + { + "epoch": 0.48, + "logps_train/chosen": -23.63886260986328, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -4.9375, + "logps_train/rejected": -46.85847473144531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4201363325119019, + "rewards_train/margins": 2.7719613313674927, + "rewards_train/rejected": -4.1920976638793945, + "step": 1710 + }, + { + "epoch": 0.48, + "logps_train/chosen": -148.84423828125, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -203.1588592529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6844239234924316, + "rewards_train/margins": 3.1314620971679688, + "rewards_train/rejected": -6.8158860206604, + "step": 1710 + }, + { + "epoch": 0.48, + "logps_train/chosen": -77.14009857177734, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -85.60445404052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9140098690986633, + "rewards_train/margins": 1.3214356303215027, + "rewards_train/rejected": -2.235445499420166, + "step": 1710 + }, + { + "epoch": 0.48, + "logps_train/chosen": -165.13031005859375, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -208.34454345703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.513031005859375, + "rewards_train/margins": 3.52142333984375, + "rewards_train/rejected": -5.034454345703125, + "step": 1710 + }, + { + "epoch": 0.48, + "logps_train/chosen": -131.2230987548828, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -230.4384002685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5223098993301392, + "rewards_train/margins": 7.721530318260193, + "rewards_train/rejected": -9.243840217590332, + "step": 1710 + }, + { + "epoch": 0.48, + "logps_train/chosen": -97.50980377197266, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -59.75, + "logps_train/rejected": -87.14112854003906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2009804248809814, + "rewards_train/margins": -0.4618675708770752, + "rewards_train/rejected": -2.7391128540039062, + "step": 1710 + }, + { + "epoch": 0.48, + "logps_train/chosen": -109.41507720947266, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -158.71266174316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4915077686309814, + "rewards_train/margins": 2.379758358001709, + "rewards_train/rejected": -3.8712661266326904, + "step": 1710 + }, + { + "epoch": 0.48, + "logps_train/chosen": -116.3957748413086, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -119.06749725341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8395774364471436, + "rewards_train/margins": 0.8171722888946533, + "rewards_train/rejected": -4.656749725341797, + "step": 1710 + }, + { + "epoch": 0.48, + "logps_train/chosen": -4.050144195556641, + "logps_train/ref_chosen": -2.03125, + "logps_train/ref_rejected": -1.1796875, + "logps_train/rejected": -2.9634480476379395, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20188942551612854, + "rewards_train/margins": -0.02351336181163788, + "rewards_train/rejected": -0.17837606370449066, + "step": 1711 + }, + { + "epoch": 0.48, + "logps_train/chosen": -115.89065551757812, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -185.4862060546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3390655517578125, + "rewards_train/margins": 3.609555244445801, + "rewards_train/rejected": -4.948620796203613, + "step": 1711 + }, + { + "epoch": 0.48, + "logps_train/chosen": -113.99884796142578, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -128.8654022216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09988480061292648, + "rewards_train/margins": 0.38665542751550674, + "rewards_train/rejected": -0.4865402281284332, + "step": 1711 + }, + { + "epoch": 0.48, + "logps_train/chosen": -104.62294006347656, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -155.57733154296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.262294054031372, + "rewards_train/margins": 2.645439386367798, + "rewards_train/rejected": -4.90773344039917, + "step": 1711 + }, + { + "epoch": 0.48, + "logps_train/chosen": -20.08152961730957, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -22.25, + "logps_train/rejected": -40.735023498535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6269029974937439, + "rewards_train/margins": 1.2215994000434875, + "rewards_train/rejected": -1.8485023975372314, + "step": 1711 + }, + { + "epoch": 0.48, + "logps_train/chosen": -250.34945678710938, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -237.1832275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.034945487976074, + "rewards_train/margins": 1.5833778381347656, + "rewards_train/rejected": -9.61832332611084, + "step": 1711 + }, + { + "epoch": 0.48, + "logps_train/chosen": -105.22027587890625, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -162.46348571777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.172027587890625, + "rewards_train/margins": 2.8243212699890137, + "rewards_train/rejected": -5.996348857879639, + "step": 1711 + }, + { + "epoch": 0.48, + "logps_train/chosen": -95.47813415527344, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -160.95797729492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6978133916854858, + "rewards_train/margins": 2.9479843378067017, + "rewards_train/rejected": -4.6457977294921875, + "step": 1711 + }, + { + "epoch": 0.48, + "learning_rate": 4.958055200388002e-07, + "loss": 0.2279, + "step": 1712 + }, + { + "epoch": 0.48, + "logps_train/chosen": -2.148737668991089, + "logps_train/ref_chosen": -1.375, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -4.908271312713623, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07737376540899277, + "rewards_train/margins": -0.16779663413763046, + "rewards_train/rejected": 0.0904228687286377, + "step": 1712 + }, + { + "epoch": 0.48, + "logps_train/chosen": -136.16549682617188, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -111.0315933227539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.066549777984619, + "rewards_train/margins": 0.6366095542907715, + "rewards_train/rejected": -4.703159332275391, + "step": 1712 + }, + { + "epoch": 0.48, + "logps_train/chosen": -7.813957691192627, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -2.59375, + "logps_train/rejected": -6.216375350952148, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14702077209949493, + "rewards_train/margins": 0.21524177491664886, + "rewards_train/rejected": -0.3622625470161438, + "step": 1712 + }, + { + "epoch": 0.48, + "logps_train/chosen": -134.9122772216797, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -147.8855438232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.091228008270264, + "rewards_train/margins": 0.5473265647888184, + "rewards_train/rejected": -4.638554573059082, + "step": 1712 + }, + { + "epoch": 0.48, + "logps_train/chosen": -26.48246955871582, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -22.625, + "logps_train/rejected": -62.71573257446289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.229496955871582, + "rewards_train/margins": 2.779576301574707, + "rewards_train/rejected": -4.009073257446289, + "step": 1712 + }, + { + "epoch": 0.48, + "logps_train/chosen": -206.2052764892578, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -193.50035095214844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.120527744293213, + "rewards_train/margins": -0.2704925537109375, + "rewards_train/rejected": -4.850035190582275, + "step": 1712 + }, + { + "epoch": 0.48, + "logps_train/chosen": -128.6704559326172, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -199.528564453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8670456409454346, + "rewards_train/margins": 6.235810995101929, + "rewards_train/rejected": -9.102856636047363, + "step": 1712 + }, + { + "epoch": 0.48, + "logps_train/chosen": -82.96463775634766, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -76.53790283203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0214638710021973, + "rewards_train/margins": -0.8676735162734985, + "rewards_train/rejected": -1.1537903547286987, + "step": 1712 + }, + { + "epoch": 0.48, + "logps_train/chosen": -18.387680053710938, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -37.7148323059082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4575180113315582, + "rewards_train/margins": 0.9389652907848358, + "rewards_train/rejected": -1.396483302116394, + "step": 1713 + }, + { + "epoch": 0.48, + "logps_train/chosen": -174.36505126953125, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -199.6015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.636505126953125, + "rewards_train/margins": 0.023651123046875, + "rewards_train/rejected": -6.66015625, + "step": 1713 + }, + { + "epoch": 0.48, + "logps_train/chosen": -187.01454162597656, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -199.2991943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.0014543533325195, + "rewards_train/margins": 3.7784652709960938, + "rewards_train/rejected": -7.779919624328613, + "step": 1713 + }, + { + "epoch": 0.48, + "logps_train/chosen": -31.89464569091797, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -29.0, + "logps_train/rejected": -39.23678970336914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2269645929336548, + "rewards_train/margins": -0.2032855749130249, + "rewards_train/rejected": -1.0236790180206299, + "step": 1713 + }, + { + "epoch": 0.48, + "logps_train/chosen": -21.255146026611328, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -20.315826416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0880146026611328, + "rewards_train/margins": 0.5576305389404297, + "rewards_train/rejected": -1.6456451416015625, + "step": 1713 + }, + { + "epoch": 0.48, + "logps_train/chosen": -100.00070190429688, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -98.6993408203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45007020235061646, + "rewards_train/margins": 0.3198639154434204, + "rewards_train/rejected": -0.7699341177940369, + "step": 1713 + }, + { + "epoch": 0.48, + "logps_train/chosen": -135.26229858398438, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -140.6869659423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4762299060821533, + "rewards_train/margins": 3.542466878890991, + "rewards_train/rejected": -5.0186967849731445, + "step": 1713 + }, + { + "epoch": 0.48, + "logps_train/chosen": -78.45108795166016, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -144.18731689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6201088428497314, + "rewards_train/margins": 2.198622941970825, + "rewards_train/rejected": -3.8187317848205566, + "step": 1713 + }, + { + "epoch": 0.48, + "learning_rate": 4.935226147525129e-07, + "loss": 0.4586, + "step": 1714 + }, + { + "epoch": 0.48, + "logps_train/chosen": -47.58197784423828, + "logps_train/ref_chosen": -28.625, + "logps_train/ref_rejected": -40.5, + "logps_train/rejected": -89.77462005615234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.895697832107544, + "rewards_train/margins": 3.031764268875122, + "rewards_train/rejected": -4.927462100982666, + "step": 1714 + }, + { + "epoch": 0.48, + "logps_train/chosen": -15.087372779846191, + "logps_train/ref_chosen": -3.515625, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -21.13056755065918, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.157174825668335, + "rewards_train/margins": -0.38161808252334595, + "rewards_train/rejected": -0.775556743144989, + "step": 1714 + }, + { + "epoch": 0.48, + "logps_train/chosen": -14.005370140075684, + "logps_train/ref_chosen": -1.265625, + "logps_train/ref_rejected": -5.75, + "logps_train/rejected": -33.09667205810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2739745378494263, + "rewards_train/margins": 1.4606927633285522, + "rewards_train/rejected": -2.7346673011779785, + "step": 1714 + }, + { + "epoch": 0.48, + "logps_train/chosen": -60.686912536621094, + "logps_train/ref_chosen": -26.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -170.01211547851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.418691396713257, + "rewards_train/margins": 1.3325202465057373, + "rewards_train/rejected": -4.751211643218994, + "step": 1714 + }, + { + "epoch": 0.48, + "logps_train/chosen": -147.63665771484375, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -153.09426879882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8136658668518066, + "rewards_train/margins": 0.7957611083984375, + "rewards_train/rejected": -4.609426975250244, + "step": 1714 + }, + { + "epoch": 0.48, + "logps_train/chosen": -130.3788604736328, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -203.1290740966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.3128862380981445, + "rewards_train/margins": 1.700021743774414, + "rewards_train/rejected": -9.012907981872559, + "step": 1714 + }, + { + "epoch": 0.48, + "logps_train/chosen": -153.31268310546875, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -205.47445678710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.281268358230591, + "rewards_train/margins": 4.616177320480347, + "rewards_train/rejected": -7.8974456787109375, + "step": 1714 + }, + { + "epoch": 0.48, + "logps_train/chosen": -73.27167510986328, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -56.5, + "logps_train/rejected": -69.92022705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2271675169467926, + "rewards_train/margins": 1.1148552596569061, + "rewards_train/rejected": -1.3420227766036987, + "step": 1714 + }, + { + "epoch": 0.48, + "logps_train/chosen": -207.02650451660156, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -198.0, + "logps_train/rejected": -256.06695556640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.502650499343872, + "rewards_train/margins": 2.3040449619293213, + "rewards_train/rejected": -5.806695461273193, + "step": 1715 + }, + { + "epoch": 0.48, + "logps_train/chosen": -64.36268615722656, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -55.0, + "logps_train/rejected": -61.44898223876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2637313902378082, + "rewards_train/margins": 0.9086296260356903, + "rewards_train/rejected": -0.6448982357978821, + "step": 1715 + }, + { + "epoch": 0.48, + "logps_train/chosen": -128.70932006835938, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -29.25, + "logps_train/rejected": -44.580623626708984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2209320068359375, + "rewards_train/margins": 1.312130331993103, + "rewards_train/rejected": -1.5330623388290405, + "step": 1715 + }, + { + "epoch": 0.48, + "logps_train/chosen": -221.47874450683594, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -226.26632690429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.49787425994873, + "rewards_train/margins": 0.12875843048095703, + "rewards_train/rejected": -9.626632690429688, + "step": 1715 + }, + { + "epoch": 0.48, + "logps_train/chosen": -33.35670471191406, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -72.0829849243164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.879420518875122, + "rewards_train/margins": 1.9288780689239502, + "rewards_train/rejected": -3.8082985877990723, + "step": 1715 + }, + { + "epoch": 0.48, + "logps_train/chosen": -86.33485412597656, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -197.1629638671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1334855556488037, + "rewards_train/margins": 6.18281102180481, + "rewards_train/rejected": -8.316296577453613, + "step": 1715 + }, + { + "epoch": 0.48, + "logps_train/chosen": -180.78652954101562, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -130.6571044921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3786529302597046, + "rewards_train/margins": 1.3370574712753296, + "rewards_train/rejected": -2.715710401535034, + "step": 1715 + }, + { + "epoch": 0.48, + "logps_train/chosen": -18.633350372314453, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -41.571319580078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6383350491523743, + "rewards_train/margins": 2.168796956539154, + "rewards_train/rejected": -2.8071320056915283, + "step": 1715 + }, + { + "epoch": 0.48, + "learning_rate": 4.912432542709344e-07, + "loss": 0.251, + "step": 1716 + }, + { + "epoch": 0.48, + "logps_train/chosen": -75.25164031982422, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -83.39021301269531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2001640796661377, + "rewards_train/margins": -0.7111427783966064, + "rewards_train/rejected": -0.48902130126953125, + "step": 1716 + }, + { + "epoch": 0.48, + "logps_train/chosen": -17.919755935668945, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -7.8125, + "logps_train/rejected": -41.78385925292969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9857255816459656, + "rewards_train/margins": 2.411410391330719, + "rewards_train/rejected": -3.3971359729766846, + "step": 1716 + }, + { + "epoch": 0.48, + "logps_train/chosen": -18.55394744873047, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -13.444372177124023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34914475679397583, + "rewards_train/margins": 0.4577924609184265, + "rewards_train/rejected": -0.8069372177124023, + "step": 1716 + }, + { + "epoch": 0.48, + "logps_train/chosen": -6.180625915527344, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -36.92878341674805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2415000945329666, + "rewards_train/margins": 0.9763782471418381, + "rewards_train/rejected": -1.2178783416748047, + "step": 1716 + }, + { + "epoch": 0.48, + "logps_train/chosen": -107.16104125976562, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -197.72930908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1661041975021362, + "rewards_train/margins": 6.606826901435852, + "rewards_train/rejected": -7.772931098937988, + "step": 1716 + }, + { + "epoch": 0.48, + "logps_train/chosen": -8.358123779296875, + "logps_train/ref_chosen": -2.671875, + "logps_train/ref_rejected": -12.0, + "logps_train/rejected": -26.39977264404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5686249136924744, + "rewards_train/margins": 0.8713523745536804, + "rewards_train/rejected": -1.4399772882461548, + "step": 1716 + }, + { + "epoch": 0.48, + "logps_train/chosen": -72.84962463378906, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -73.06614685058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3849624693393707, + "rewards_train/margins": 0.0216522216796875, + "rewards_train/rejected": -0.4066146910190582, + "step": 1716 + }, + { + "epoch": 0.48, + "logps_train/chosen": -87.81659698486328, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -150.215576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0316598415374756, + "rewards_train/margins": 4.039897680282593, + "rewards_train/rejected": -6.071557521820068, + "step": 1716 + }, + { + "epoch": 0.48, + "logps_train/chosen": -167.05087280273438, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -206.57785034179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.755087375640869, + "rewards_train/margins": 1.502697467803955, + "rewards_train/rejected": -8.257784843444824, + "step": 1717 + }, + { + "epoch": 0.48, + "logps_train/chosen": -12.03003978729248, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -29.353351593017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04050397872924805, + "rewards_train/margins": 2.0573313236236572, + "rewards_train/rejected": -2.0978353023529053, + "step": 1717 + }, + { + "epoch": 0.48, + "logps_train/chosen": -192.55072021484375, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -135.9512176513672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.05507230758667, + "rewards_train/margins": -1.8599505424499512, + "rewards_train/rejected": -4.195121765136719, + "step": 1717 + }, + { + "epoch": 0.48, + "logps_train/chosen": -31.2221622467041, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -4.03125, + "logps_train/rejected": -38.454673767089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.115966320037842, + "rewards_train/margins": 1.32637619972229, + "rewards_train/rejected": -3.442342519760132, + "step": 1717 + }, + { + "epoch": 0.48, + "logps_train/chosen": -175.2032928466797, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -173.64419555664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6203293800354004, + "rewards_train/margins": 0.9940900802612305, + "rewards_train/rejected": -4.614419460296631, + "step": 1717 + }, + { + "epoch": 0.48, + "logps_train/chosen": -10.631307601928711, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -15.45661449432373, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3681192398071289, + "rewards_train/margins": 0.25128068774938583, + "rewards_train/rejected": 0.11683855205774307, + "step": 1717 + }, + { + "epoch": 0.48, + "logps_train/chosen": -10.01134204864502, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -3.3125, + "logps_train/rejected": -12.797968864440918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4855092167854309, + "rewards_train/margins": 0.4630376696586609, + "rewards_train/rejected": -0.9485468864440918, + "step": 1717 + }, + { + "epoch": 0.48, + "logps_train/chosen": -129.9437255859375, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -155.83782958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4943726062774658, + "rewards_train/margins": 0.6894104480743408, + "rewards_train/rejected": -2.1837830543518066, + "step": 1717 + }, + { + "epoch": 0.48, + "learning_rate": 4.889674545471711e-07, + "loss": 0.4627, + "step": 1718 + }, + { + "epoch": 0.48, + "logps_train/chosen": -44.72278594970703, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -58.68592071533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4972786009311676, + "rewards_train/margins": 4.327563375234604, + "rewards_train/rejected": -4.8248419761657715, + "step": 1718 + }, + { + "epoch": 0.48, + "logps_train/chosen": -144.86952209472656, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -246.340576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1869522333145142, + "rewards_train/margins": 6.7471054792404175, + "rewards_train/rejected": -7.934057712554932, + "step": 1718 + }, + { + "epoch": 0.48, + "logps_train/chosen": -110.32777404785156, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -139.74362182617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.882777452468872, + "rewards_train/margins": 1.2915847301483154, + "rewards_train/rejected": -3.1743621826171875, + "step": 1718 + }, + { + "epoch": 0.48, + "logps_train/chosen": -176.78929138183594, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -206.75582885742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.678929090499878, + "rewards_train/margins": 4.24665379524231, + "rewards_train/rejected": -7.9255828857421875, + "step": 1718 + }, + { + "epoch": 0.48, + "logps_train/chosen": -65.93009948730469, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -146.6485595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8680099844932556, + "rewards_train/margins": 3.5968461632728577, + "rewards_train/rejected": -4.464856147766113, + "step": 1718 + }, + { + "epoch": 0.48, + "logps_train/chosen": -41.45451354980469, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -25.137226104736328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0579514503479004, + "rewards_train/margins": -0.012978792190551758, + "rewards_train/rejected": -2.0449726581573486, + "step": 1718 + }, + { + "epoch": 0.48, + "logps_train/chosen": -92.37442016601562, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -85.70864868164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7374420166015625, + "rewards_train/margins": 0.4834228754043579, + "rewards_train/rejected": -1.2208648920059204, + "step": 1718 + }, + { + "epoch": 0.48, + "logps_train/chosen": -30.46258544921875, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -3.546875, + "logps_train/rejected": -19.15895652770996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0587586164474487, + "rewards_train/margins": 0.5024495124816895, + "rewards_train/rejected": -1.5612081289291382, + "step": 1718 + }, + { + "epoch": 0.48, + "logps_train/chosen": -183.59921264648438, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -177.6917724609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.8099212646484375, + "rewards_train/margins": 0.5592560768127441, + "rewards_train/rejected": -7.369177341461182, + "step": 1719 + }, + { + "epoch": 0.48, + "logps_train/chosen": -89.9583511352539, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -130.5845489501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4458351135253906, + "rewards_train/margins": 3.062619924545288, + "rewards_train/rejected": -3.5084550380706787, + "step": 1719 + }, + { + "epoch": 0.48, + "logps_train/chosen": -159.0453643798828, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -183.25193786621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0045363903045654, + "rewards_train/margins": 2.32065749168396, + "rewards_train/rejected": -5.325193881988525, + "step": 1719 + }, + { + "epoch": 0.48, + "logps_train/chosen": -210.99679565429688, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -185.80615234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.899679660797119, + "rewards_train/margins": 0.5809354782104492, + "rewards_train/rejected": -7.480615139007568, + "step": 1719 + }, + { + "epoch": 0.48, + "logps_train/chosen": -86.21709442138672, + "logps_train/ref_chosen": -45.25, + "logps_train/ref_rejected": -43.25, + "logps_train/rejected": -77.2048110961914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.096709728240967, + "rewards_train/margins": -0.7012286186218262, + "rewards_train/rejected": -3.3954811096191406, + "step": 1719 + }, + { + "epoch": 0.48, + "logps_train/chosen": -171.5590057373047, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -105.18208312988281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.7059006690979, + "rewards_train/margins": 0.2873077392578125, + "rewards_train/rejected": -4.993208408355713, + "step": 1719 + }, + { + "epoch": 0.48, + "logps_train/chosen": -8.644317626953125, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -6.484897613525391, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3863067626953125, + "rewards_train/margins": -0.16906699538230896, + "rewards_train/rejected": -0.21723976731300354, + "step": 1719 + }, + { + "epoch": 0.48, + "logps_train/chosen": -21.45539665222168, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -49.54112243652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4674147367477417, + "rewards_train/margins": 1.0116976499557495, + "rewards_train/rejected": -2.479112386703491, + "step": 1719 + }, + { + "epoch": 0.48, + "learning_rate": 4.866952315094087e-07, + "loss": 0.3593, + "step": 1720 + }, + { + "epoch": 0.48, + "logps_train/chosen": -157.15878295898438, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -160.14923095703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.615878582000732, + "rewards_train/margins": -1.1509554386138916, + "rewards_train/rejected": -3.464923143386841, + "step": 1720 + }, + { + "epoch": 0.48, + "logps_train/chosen": -59.0787353515625, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -2.53125, + "logps_train/rejected": -46.53928756713867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.895373821258545, + "rewards_train/margins": -0.4945697784423828, + "rewards_train/rejected": -4.400804042816162, + "step": 1720 + }, + { + "epoch": 0.48, + "logps_train/chosen": -10.374177932739258, + "logps_train/ref_chosen": -2.28125, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -28.95478057861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8092927932739258, + "rewards_train/margins": 1.608060359954834, + "rewards_train/rejected": -2.4173531532287598, + "step": 1720 + }, + { + "epoch": 0.48, + "logps_train/chosen": -93.57196044921875, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -111.42827606201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24280396103858948, + "rewards_train/margins": 2.8356315195560455, + "rewards_train/rejected": -2.592827558517456, + "step": 1720 + }, + { + "epoch": 0.48, + "logps_train/chosen": -129.89125061035156, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -190.18414306640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7391250133514404, + "rewards_train/margins": 3.829289197921753, + "rewards_train/rejected": -6.568414211273193, + "step": 1720 + }, + { + "epoch": 0.48, + "logps_train/chosen": -17.971439361572266, + "logps_train/ref_chosen": -1.484375, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -61.584259033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6487064361572266, + "rewards_train/margins": 1.2597196102142334, + "rewards_train/rejected": -2.90842604637146, + "step": 1720 + }, + { + "epoch": 0.48, + "logps_train/chosen": -192.7869415283203, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -190.94888305664062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.778694152832031, + "rewards_train/margins": -0.48380565643310547, + "rewards_train/rejected": -7.294888496398926, + "step": 1720 + }, + { + "epoch": 0.48, + "logps_train/chosen": -146.20022583007812, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -208.05902099609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8200225830078125, + "rewards_train/margins": 1.0858795642852783, + "rewards_train/rejected": -3.905902147293091, + "step": 1720 + }, + { + "epoch": 0.48, + "logps_train/chosen": -84.04901123046875, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -154.61329650878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.554901123046875, + "rewards_train/margins": 5.5564284324646, + "rewards_train/rejected": -6.111329555511475, + "step": 1721 + }, + { + "epoch": 0.48, + "logps_train/chosen": -248.81015014648438, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -231.1444854736328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.081015586853027, + "rewards_train/margins": -0.4665670394897461, + "rewards_train/rejected": -9.614448547363281, + "step": 1721 + }, + { + "epoch": 0.48, + "logps_train/chosen": -154.78265380859375, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -149.48812866210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22173462808132172, + "rewards_train/margins": 2.970547541975975, + "rewards_train/rejected": -2.7488129138946533, + "step": 1721 + }, + { + "epoch": 0.48, + "logps_train/chosen": -42.78538131713867, + "logps_train/ref_chosen": -40.25, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -19.836044311523438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2535381317138672, + "rewards_train/margins": -0.0699336975812912, + "rewards_train/rejected": -0.183604434132576, + "step": 1721 + }, + { + "epoch": 0.48, + "logps_train/chosen": -5.855315685272217, + "logps_train/ref_chosen": -5.59375, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -18.825307846069336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02615656889975071, + "rewards_train/margins": 1.3657492157071829, + "rewards_train/rejected": -1.3919057846069336, + "step": 1721 + }, + { + "epoch": 0.48, + "logps_train/chosen": -168.14358520507812, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -240.16958618164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.414358615875244, + "rewards_train/margins": 4.702600002288818, + "rewards_train/rejected": -10.116958618164062, + "step": 1721 + }, + { + "epoch": 0.48, + "logps_train/chosen": -46.57638931274414, + "logps_train/ref_chosen": -27.0, + "logps_train/ref_rejected": -12.875, + "logps_train/rejected": -17.895692825317383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9576389789581299, + "rewards_train/margins": -1.4555696845054626, + "rewards_train/rejected": -0.5020692944526672, + "step": 1721 + }, + { + "epoch": 0.48, + "logps_train/chosen": -18.481792449951172, + "logps_train/ref_chosen": -4.65625, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -48.73155212402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.382554292678833, + "rewards_train/margins": 2.1968510150909424, + "rewards_train/rejected": -3.5794053077697754, + "step": 1721 + }, + { + "epoch": 0.48, + "learning_rate": 4.84426601060799e-07, + "loss": 0.494, + "step": 1722 + }, + { + "epoch": 0.48, + "logps_train/chosen": -140.5355224609375, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -38.5, + "logps_train/rejected": -43.12151336669922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8535523414611816, + "rewards_train/margins": -2.391400992870331, + "rewards_train/rejected": -0.46215134859085083, + "step": 1722 + }, + { + "epoch": 0.48, + "logps_train/chosen": -8.604208946228027, + "logps_train/ref_chosen": -2.046875, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -25.210880279541016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6557334065437317, + "rewards_train/margins": 1.1122296452522278, + "rewards_train/rejected": -1.7679630517959595, + "step": 1722 + }, + { + "epoch": 0.48, + "logps_train/chosen": -160.68106079101562, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -182.3779296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.1681060791015625, + "rewards_train/margins": 1.3196868896484375, + "rewards_train/rejected": -6.48779296875, + "step": 1722 + }, + { + "epoch": 0.48, + "logps_train/chosen": -31.13842010498047, + "logps_train/ref_chosen": -7.1875, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -66.44818878173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.395092010498047, + "rewards_train/margins": 1.7872271537780762, + "rewards_train/rejected": -4.182319164276123, + "step": 1722 + }, + { + "epoch": 0.48, + "logps_train/chosen": -146.63644409179688, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -171.0460662841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.663644313812256, + "rewards_train/margins": 0.8909626007080078, + "rewards_train/rejected": -5.554606914520264, + "step": 1722 + }, + { + "epoch": 0.48, + "logps_train/chosen": -30.607240676879883, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -52.615997314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7232241034507751, + "rewards_train/margins": 3.9852505326271057, + "rewards_train/rejected": -4.708474636077881, + "step": 1722 + }, + { + "epoch": 0.48, + "logps_train/chosen": -115.01893615722656, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -174.9044647216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.351893663406372, + "rewards_train/margins": 3.4385530948638916, + "rewards_train/rejected": -5.790446758270264, + "step": 1722 + }, + { + "epoch": 0.48, + "logps_train/chosen": -130.71583557128906, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -186.36587524414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8715837001800537, + "rewards_train/margins": 4.665004014968872, + "rewards_train/rejected": -7.536587715148926, + "step": 1722 + }, + { + "epoch": 0.48, + "logps_train/chosen": -44.14241409301758, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -46.97246551513672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7392414808273315, + "rewards_train/margins": 1.983005166053772, + "rewards_train/rejected": -3.7222466468811035, + "step": 1723 + }, + { + "epoch": 0.48, + "logps_train/chosen": -60.03528594970703, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -47.346885681152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9535285830497742, + "rewards_train/margins": 2.48115998506546, + "rewards_train/rejected": -3.4346885681152344, + "step": 1723 + }, + { + "epoch": 0.48, + "logps_train/chosen": -79.93486785888672, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -59.75, + "logps_train/rejected": -83.62689971923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8184868097305298, + "rewards_train/margins": 0.56920325756073, + "rewards_train/rejected": -2.3876900672912598, + "step": 1723 + }, + { + "epoch": 0.48, + "logps_train/chosen": -86.7156982421875, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -59.789527893066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.121569871902466, + "rewards_train/margins": 2.994882822036743, + "rewards_train/rejected": -5.116452693939209, + "step": 1723 + }, + { + "epoch": 0.48, + "logps_train/chosen": -9.019831657409668, + "logps_train/ref_chosen": -1.9609375, + "logps_train/ref_rejected": -0.66015625, + "logps_train/rejected": -13.148439407348633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7058894038200378, + "rewards_train/margins": 0.5429388880729675, + "rewards_train/rejected": -1.2488282918930054, + "step": 1723 + }, + { + "epoch": 0.48, + "logps_train/chosen": -46.81918716430664, + "logps_train/ref_chosen": -25.875, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -26.739259719848633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.09441876411438, + "rewards_train/margins": -0.4517427682876587, + "rewards_train/rejected": -1.6426759958267212, + "step": 1723 + }, + { + "epoch": 0.48, + "logps_train/chosen": -40.69297790527344, + "logps_train/ref_chosen": -23.0, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -28.214658737182617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7692978382110596, + "rewards_train/margins": 0.21466803550720215, + "rewards_train/rejected": -1.9839658737182617, + "step": 1723 + }, + { + "epoch": 0.48, + "logps_train/chosen": -44.739933013916016, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -123.76570892333984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1989933252334595, + "rewards_train/margins": 4.477577567100525, + "rewards_train/rejected": -5.676570892333984, + "step": 1723 + }, + { + "epoch": 0.48, + "learning_rate": 4.821615790793495e-07, + "loss": 0.3919, + "step": 1724 + }, + { + "epoch": 0.48, + "logps_train/chosen": -4.390580654144287, + "logps_train/ref_chosen": -3.21875, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -42.12864303588867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11718306690454483, + "rewards_train/margins": 3.051931284368038, + "rewards_train/rejected": -3.169114351272583, + "step": 1724 + }, + { + "epoch": 0.48, + "logps_train/chosen": -4.370794773101807, + "logps_train/ref_chosen": -0.90625, + "logps_train/ref_rejected": -0.90625, + "logps_train/rejected": -4.42556095123291, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3464544713497162, + "rewards_train/margins": 0.005476623773574829, + "rewards_train/rejected": -0.351931095123291, + "step": 1724 + }, + { + "epoch": 0.48, + "logps_train/chosen": -81.94168090820312, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -111.04682922363281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8691681623458862, + "rewards_train/margins": 1.8355149030685425, + "rewards_train/rejected": -3.7046830654144287, + "step": 1724 + }, + { + "epoch": 0.48, + "logps_train/chosen": -164.8881378173828, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -148.99114990234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.488813877105713, + "rewards_train/margins": -2.489698886871338, + "rewards_train/rejected": -2.999114990234375, + "step": 1724 + }, + { + "epoch": 0.48, + "logps_train/chosen": -63.982666015625, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -125.65548706054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05173340067267418, + "rewards_train/margins": 3.617282059043646, + "rewards_train/rejected": -3.5655486583709717, + "step": 1724 + }, + { + "epoch": 0.48, + "logps_train/chosen": -124.7244873046875, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -176.18365478515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.272448778152466, + "rewards_train/margins": 5.1959168910980225, + "rewards_train/rejected": -7.468365669250488, + "step": 1724 + }, + { + "epoch": 0.48, + "logps_train/chosen": -94.23033905029297, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -168.65121459960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2230339050292969, + "rewards_train/margins": 6.7420878410339355, + "rewards_train/rejected": -7.965121746063232, + "step": 1724 + }, + { + "epoch": 0.48, + "logps_train/chosen": -34.180179595947266, + "logps_train/ref_chosen": -28.5, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -49.112449645996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5680179595947266, + "rewards_train/margins": 0.7682269811630249, + "rewards_train/rejected": -1.3362449407577515, + "step": 1724 + }, + { + "epoch": 0.48, + "logps_train/chosen": -121.63704681396484, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -159.6609649658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5637047290802002, + "rewards_train/margins": 2.6023919582366943, + "rewards_train/rejected": -4.1660966873168945, + "step": 1725 + }, + { + "epoch": 0.48, + "logps_train/chosen": -89.9248275756836, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -277.29345703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.092482805252075, + "rewards_train/margins": 7.9368627071380615, + "rewards_train/rejected": -10.029345512390137, + "step": 1725 + }, + { + "epoch": 0.48, + "logps_train/chosen": -138.4881591796875, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -218.97015380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.398815870285034, + "rewards_train/margins": 3.3981997966766357, + "rewards_train/rejected": -6.79701566696167, + "step": 1725 + }, + { + "epoch": 0.48, + "logps_train/chosen": -120.25459289550781, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -194.8113250732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9754592776298523, + "rewards_train/margins": 3.605673134326935, + "rewards_train/rejected": -4.581132411956787, + "step": 1725 + }, + { + "epoch": 0.48, + "logps_train/chosen": -141.71255493164062, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -195.75079345703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.328744500875473, + "rewards_train/margins": 7.253823846578598, + "rewards_train/rejected": -6.925079345703125, + "step": 1725 + }, + { + "epoch": 0.48, + "logps_train/chosen": -163.96731567382812, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -204.57508850097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.996731758117676, + "rewards_train/margins": 0.5607771873474121, + "rewards_train/rejected": -5.557508945465088, + "step": 1725 + }, + { + "epoch": 0.48, + "logps_train/chosen": -11.975308418273926, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -15.0, + "logps_train/rejected": -45.563514709472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5694058537483215, + "rewards_train/margins": 2.4869455695152283, + "rewards_train/rejected": -3.05635142326355, + "step": 1725 + }, + { + "epoch": 0.48, + "logps_train/chosen": -20.757240295410156, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -42.75, + "logps_train/rejected": -84.800537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15072403848171234, + "rewards_train/margins": 4.054329767823219, + "rewards_train/rejected": -4.205053806304932, + "step": 1725 + }, + { + "epoch": 0.48, + "learning_rate": 4.799001814178134e-07, + "loss": 0.2843, + "step": 1726 + }, + { + "epoch": 0.48, + "logps_train/chosen": -34.380210876464844, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -57.14635467529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2255210876464844, + "rewards_train/margins": 1.23911452293396, + "rewards_train/rejected": -2.4646356105804443, + "step": 1726 + }, + { + "epoch": 0.48, + "logps_train/chosen": -113.84312438964844, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -133.58053588867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3343124389648438, + "rewards_train/margins": 3.173741340637207, + "rewards_train/rejected": -4.508053779602051, + "step": 1726 + }, + { + "epoch": 0.48, + "logps_train/chosen": -21.988510131835938, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -15.205945014953613, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7301010489463806, + "rewards_train/margins": 0.2342434525489807, + "rewards_train/rejected": -0.9643445014953613, + "step": 1726 + }, + { + "epoch": 0.48, + "logps_train/chosen": -0.49787595868110657, + "logps_train/ref_chosen": -0.3515625, + "logps_train/ref_rejected": -0.3515625, + "logps_train/rejected": -0.508065402507782, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.014631345868110657, + "rewards_train/margins": 0.0010189451277256012, + "rewards_train/rejected": -0.015650290995836258, + "step": 1726 + }, + { + "epoch": 0.48, + "logps_train/chosen": -97.9793472290039, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -161.5417938232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.372934818267822, + "rewards_train/margins": 1.3812446594238281, + "rewards_train/rejected": -5.75417947769165, + "step": 1726 + }, + { + "epoch": 0.48, + "logps_train/chosen": -10.600685119628906, + "logps_train/ref_chosen": -6.65625, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -11.162813186645508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3944435119628906, + "rewards_train/margins": 0.25621283054351807, + "rewards_train/rejected": -0.6506563425064087, + "step": 1726 + }, + { + "epoch": 0.48, + "logps_train/chosen": -21.987457275390625, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -41.84324645996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1737457513809204, + "rewards_train/margins": 2.260578989982605, + "rewards_train/rejected": -3.4343247413635254, + "step": 1726 + }, + { + "epoch": 0.48, + "logps_train/chosen": -17.47400665283203, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -31.543285369873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1849006414413452, + "rewards_train/margins": 0.45067787170410156, + "rewards_train/rejected": -1.6355785131454468, + "step": 1726 + }, + { + "epoch": 0.48, + "logps_train/chosen": -178.1273651123047, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -154.43316650390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.812736511230469, + "rewards_train/margins": -0.06941986083984375, + "rewards_train/rejected": -4.743316650390625, + "step": 1727 + }, + { + "epoch": 0.48, + "logps_train/chosen": -27.002946853637695, + "logps_train/ref_chosen": -15.625, + "logps_train/ref_rejected": -44.25, + "logps_train/rejected": -64.41641235351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1377947330474854, + "rewards_train/margins": 0.8788466453552246, + "rewards_train/rejected": -2.01664137840271, + "step": 1727 + }, + { + "epoch": 0.48, + "logps_train/chosen": -42.15756607055664, + "logps_train/ref_chosen": -29.75, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -121.25798034667969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.240756630897522, + "rewards_train/margins": 2.3350414037704468, + "rewards_train/rejected": -3.5757980346679688, + "step": 1727 + }, + { + "epoch": 0.48, + "logps_train/chosen": -187.16195678710938, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -169.0210723876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9161956310272217, + "rewards_train/margins": 1.5359117984771729, + "rewards_train/rejected": -4.4521074295043945, + "step": 1727 + }, + { + "epoch": 0.48, + "logps_train/chosen": -109.8106460571289, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -165.27093505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8810645937919617, + "rewards_train/margins": 4.146028816699982, + "rewards_train/rejected": -5.027093410491943, + "step": 1727 + }, + { + "epoch": 0.48, + "logps_train/chosen": -70.92108154296875, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -61.0, + "logps_train/rejected": -97.24072265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.642108142375946, + "rewards_train/margins": 2.9819641709327698, + "rewards_train/rejected": -3.624072313308716, + "step": 1727 + }, + { + "epoch": 0.48, + "logps_train/chosen": -10.089402198791504, + "logps_train/ref_chosen": -2.40625, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -60.10020065307617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7683152556419373, + "rewards_train/margins": 2.0167048573493958, + "rewards_train/rejected": -2.785020112991333, + "step": 1727 + }, + { + "epoch": 0.48, + "logps_train/chosen": -15.669382095336914, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -3.96875, + "logps_train/rejected": -9.699911117553711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9763132333755493, + "rewards_train/margins": -0.40319710969924927, + "rewards_train/rejected": -0.5731161236763, + "step": 1727 + }, + { + "epoch": 0.48, + "learning_rate": 4.776424239035759e-07, + "loss": 0.3393, + "step": 1728 + }, + { + "epoch": 0.48, + "logps_train/chosen": -118.35648345947266, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -181.71246337890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6856483221054077, + "rewards_train/margins": 1.385598063468933, + "rewards_train/rejected": -3.071246385574341, + "step": 1728 + }, + { + "epoch": 0.48, + "logps_train/chosen": -78.05699157714844, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -180.351806640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.49430084228515625, + "rewards_train/margins": 4.429481506347656, + "rewards_train/rejected": -3.9351806640625, + "step": 1728 + }, + { + "epoch": 0.48, + "logps_train/chosen": -200.81910705566406, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -184.5537872314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9819107055664062, + "rewards_train/margins": 1.1734681129455566, + "rewards_train/rejected": -4.155378818511963, + "step": 1728 + }, + { + "epoch": 0.48, + "logps_train/chosen": -87.10154724121094, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -117.12226867675781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2101547718048096, + "rewards_train/margins": 2.352072238922119, + "rewards_train/rejected": -3.5622270107269287, + "step": 1728 + }, + { + "epoch": 0.48, + "logps_train/chosen": -32.76808166503906, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -3.296875, + "logps_train/rejected": -23.639434814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9768081903457642, + "rewards_train/margins": 0.05744779109954834, + "rewards_train/rejected": -2.0342559814453125, + "step": 1728 + }, + { + "epoch": 0.48, + "logps_train/chosen": -182.7872314453125, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -212.52102661132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.578723430633545, + "rewards_train/margins": 0.47337913513183594, + "rewards_train/rejected": -5.052102565765381, + "step": 1728 + }, + { + "epoch": 0.48, + "logps_train/chosen": -195.56959533691406, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -228.3576202392578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.556960105895996, + "rewards_train/margins": 1.928802490234375, + "rewards_train/rejected": -12.485762596130371, + "step": 1728 + }, + { + "epoch": 0.48, + "logps_train/chosen": -10.608416557312012, + "logps_train/ref_chosen": -3.671875, + "logps_train/ref_rejected": -3.375, + "logps_train/rejected": -12.221920013427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6936541795730591, + "rewards_train/margins": 0.1910378336906433, + "rewards_train/rejected": -0.8846920132637024, + "step": 1728 + }, + { + "epoch": 0.48, + "logps_train/chosen": -202.71188354492188, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -235.51791381835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.971188545227051, + "rewards_train/margins": 3.18060302734375, + "rewards_train/rejected": -9.1517915725708, + "step": 1729 + }, + { + "epoch": 0.48, + "logps_train/chosen": -24.90534210205078, + "logps_train/ref_chosen": -13.0625, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -22.245628356933594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1842842102050781, + "rewards_train/margins": -0.3972213864326477, + "rewards_train/rejected": -0.7870628237724304, + "step": 1729 + }, + { + "epoch": 0.48, + "logps_train/chosen": -104.93148040771484, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -42.4205207824707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2431480437517166, + "rewards_train/margins": 0.44890402257442474, + "rewards_train/rejected": -0.6920520663261414, + "step": 1729 + }, + { + "epoch": 0.48, + "logps_train/chosen": -14.463809967041016, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -3.0, + "logps_train/rejected": -28.69483184814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24013100564479828, + "rewards_train/margins": 2.3293522745370865, + "rewards_train/rejected": -2.5694832801818848, + "step": 1729 + }, + { + "epoch": 0.48, + "logps_train/chosen": -163.61988830566406, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -189.50794982910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8619887828826904, + "rewards_train/margins": 2.788806200027466, + "rewards_train/rejected": -5.650794982910156, + "step": 1729 + }, + { + "epoch": 0.48, + "logps_train/chosen": -35.06226348876953, + "logps_train/ref_chosen": -27.5, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -33.140865325927734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7562263607978821, + "rewards_train/margins": 1.6516103148460388, + "rewards_train/rejected": -2.407836675643921, + "step": 1729 + }, + { + "epoch": 0.48, + "logps_train/chosen": -240.1632080078125, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -214.88856506347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.716320991516113, + "rewards_train/margins": 0.4725360870361328, + "rewards_train/rejected": -8.188857078552246, + "step": 1729 + }, + { + "epoch": 0.48, + "logps_train/chosen": -179.0581817626953, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -190.704833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.0058183670043945, + "rewards_train/margins": 0.8646650314331055, + "rewards_train/rejected": -5.8704833984375, + "step": 1729 + }, + { + "epoch": 0.48, + "learning_rate": 4.7538832233854665e-07, + "loss": 0.3182, + "step": 1730 + }, + { + "epoch": 0.48, + "logps_train/chosen": -156.64114379882812, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -218.9132080078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.714114665985107, + "rewards_train/margins": 2.7772059440612793, + "rewards_train/rejected": -8.491320610046387, + "step": 1730 + }, + { + "epoch": 0.48, + "logps_train/chosen": -13.154291152954102, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -10.831663131713867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3279291093349457, + "rewards_train/margins": -0.2697627954185009, + "rewards_train/rejected": -0.05816631391644478, + "step": 1730 + }, + { + "epoch": 0.48, + "logps_train/chosen": -127.79217529296875, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -230.8621826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.129217505455017, + "rewards_train/margins": 6.357000946998596, + "rewards_train/rejected": -7.486218452453613, + "step": 1730 + }, + { + "epoch": 0.48, + "logps_train/chosen": -90.32845306396484, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -147.36334228515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.38284531235694885, + "rewards_train/margins": 7.253489106893539, + "rewards_train/rejected": -7.636334419250488, + "step": 1730 + }, + { + "epoch": 0.48, + "logps_train/chosen": -35.295509338378906, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -36.427581787109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5045509338378906, + "rewards_train/margins": 0.794457197189331, + "rewards_train/rejected": -2.2990081310272217, + "step": 1730 + }, + { + "epoch": 0.48, + "logps_train/chosen": -53.91322326660156, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -84.14613342285156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6288223266601562, + "rewards_train/margins": -2.214208960533142, + "rewards_train/rejected": -1.4146133661270142, + "step": 1730 + }, + { + "epoch": 0.48, + "logps_train/chosen": -15.653717041015625, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -14.875, + "logps_train/rejected": -45.29924011230469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9278717041015625, + "rewards_train/margins": 2.1145522594451904, + "rewards_train/rejected": -3.042423963546753, + "step": 1730 + }, + { + "epoch": 0.48, + "logps_train/chosen": -40.994754791259766, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -91.32112884521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.224475622177124, + "rewards_train/margins": 1.9326374530792236, + "rewards_train/rejected": -4.157113075256348, + "step": 1730 + }, + { + "epoch": 0.48, + "logps_train/chosen": -129.82968139648438, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -164.65382385253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0829681158065796, + "rewards_train/margins": 4.13241446018219, + "rewards_train/rejected": -5.2153825759887695, + "step": 1731 + }, + { + "epoch": 0.48, + "logps_train/chosen": -7.496090888977051, + "logps_train/ref_chosen": -3.21875, + "logps_train/ref_rejected": -5.15625, + "logps_train/rejected": -18.631671905517578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4277341067790985, + "rewards_train/margins": 0.9198080599308014, + "rewards_train/rejected": -1.3475421667099, + "step": 1731 + }, + { + "epoch": 0.48, + "logps_train/chosen": -132.0386505126953, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -187.26528930664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.353865146636963, + "rewards_train/margins": 4.372663974761963, + "rewards_train/rejected": -7.726529121398926, + "step": 1731 + }, + { + "epoch": 0.48, + "logps_train/chosen": -130.04165649414062, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -160.4309844970703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1041656732559204, + "rewards_train/margins": 3.1389330625534058, + "rewards_train/rejected": -4.243098735809326, + "step": 1731 + }, + { + "epoch": 0.48, + "logps_train/chosen": -62.45458221435547, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -158.20968627929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4704582393169403, + "rewards_train/margins": 5.500510483980179, + "rewards_train/rejected": -5.970968723297119, + "step": 1731 + }, + { + "epoch": 0.48, + "logps_train/chosen": -19.274093627929688, + "logps_train/ref_chosen": -0.375, + "logps_train/ref_rejected": -0.375, + "logps_train/rejected": -19.02752685546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8899093866348267, + "rewards_train/margins": -0.02465665340423584, + "rewards_train/rejected": -1.8652527332305908, + "step": 1731 + }, + { + "epoch": 0.48, + "logps_train/chosen": -191.90396118164062, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -241.11407470703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.8903961181640625, + "rewards_train/margins": 4.7710113525390625, + "rewards_train/rejected": -11.661407470703125, + "step": 1731 + }, + { + "epoch": 0.48, + "logps_train/chosen": -2.5447607040405273, + "logps_train/ref_chosen": -3.734375, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -15.148981094360352, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11896143108606339, + "rewards_train/margins": 0.8026095405220985, + "rewards_train/rejected": -0.6836481094360352, + "step": 1731 + }, + { + "epoch": 0.48, + "learning_rate": 4.731378924990458e-07, + "loss": 0.3341, + "step": 1732 + }, + { + "epoch": 0.48, + "logps_train/chosen": -7.366960525512695, + "logps_train/ref_chosen": -3.984375, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -19.067663192749023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3382585644721985, + "rewards_train/margins": 0.6997577548027039, + "rewards_train/rejected": -1.0380163192749023, + "step": 1732 + }, + { + "epoch": 0.48, + "logps_train/chosen": -62.33037567138672, + "logps_train/ref_chosen": -54.0, + "logps_train/ref_rejected": -6.90625, + "logps_train/rejected": -26.988855361938477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8330375552177429, + "rewards_train/margins": 1.175222933292389, + "rewards_train/rejected": -2.008260488510132, + "step": 1732 + }, + { + "epoch": 0.48, + "logps_train/chosen": -99.31243896484375, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -92.09397888183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8812439441680908, + "rewards_train/margins": 0.2781538963317871, + "rewards_train/rejected": -2.159397840499878, + "step": 1732 + }, + { + "epoch": 0.48, + "logps_train/chosen": -210.1828155517578, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -228.8563690185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.51828145980835, + "rewards_train/margins": 0.7673554420471191, + "rewards_train/rejected": -7.285636901855469, + "step": 1732 + }, + { + "epoch": 0.48, + "logps_train/chosen": -151.06259155273438, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -162.59085083007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.206259250640869, + "rewards_train/margins": 2.8528261184692383, + "rewards_train/rejected": -5.059085369110107, + "step": 1732 + }, + { + "epoch": 0.48, + "logps_train/chosen": -4.786340713500977, + "logps_train/ref_chosen": -0.3828125, + "logps_train/ref_rejected": -0.3828125, + "logps_train/rejected": -4.744643211364746, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.44035282731056213, + "rewards_train/margins": -0.004169762134552002, + "rewards_train/rejected": -0.43618306517601013, + "step": 1732 + }, + { + "epoch": 0.48, + "logps_train/chosen": -91.18510437011719, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -120.83021545410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8685104846954346, + "rewards_train/margins": 1.2645111083984375, + "rewards_train/rejected": -3.133021593093872, + "step": 1732 + }, + { + "epoch": 0.48, + "logps_train/chosen": -65.47197723388672, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -171.579345703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3471977412700653, + "rewards_train/margins": 6.660736829042435, + "rewards_train/rejected": -7.0079345703125, + "step": 1732 + }, + { + "epoch": 0.48, + "logps_train/chosen": -34.70999526977539, + "logps_train/ref_chosen": -28.5, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -19.969566345214844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6209995150566101, + "rewards_train/margins": -0.07404285669326782, + "rewards_train/rejected": -0.5469566583633423, + "step": 1733 + }, + { + "epoch": 0.48, + "logps_train/chosen": -29.647586822509766, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -50.06648254394531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6710087060928345, + "rewards_train/margins": 1.7356396913528442, + "rewards_train/rejected": -3.4066483974456787, + "step": 1733 + }, + { + "epoch": 0.48, + "logps_train/chosen": -111.97166442871094, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -168.2479705810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7471665143966675, + "rewards_train/margins": 2.6776305437088013, + "rewards_train/rejected": -4.424797058105469, + "step": 1733 + }, + { + "epoch": 0.48, + "logps_train/chosen": -137.0249481201172, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -204.77899169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.302494764328003, + "rewards_train/margins": 1.775404691696167, + "rewards_train/rejected": -4.07789945602417, + "step": 1733 + }, + { + "epoch": 0.48, + "logps_train/chosen": -63.79634094238281, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -28.42423439025879, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.004634141921997, + "rewards_train/margins": -1.4122107028961182, + "rewards_train/rejected": -1.592423439025879, + "step": 1733 + }, + { + "epoch": 0.48, + "logps_train/chosen": -13.520113945007324, + "logps_train/ref_chosen": -1.71875, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -11.406377792358398, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1801364421844482, + "rewards_train/margins": -0.45199865102767944, + "rewards_train/rejected": -0.7281377911567688, + "step": 1733 + }, + { + "epoch": 0.48, + "logps_train/chosen": -122.12155151367188, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -112.18370056152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.512155294418335, + "rewards_train/margins": 1.5062148571014404, + "rewards_train/rejected": -4.018370151519775, + "step": 1733 + }, + { + "epoch": 0.48, + "logps_train/chosen": -127.02366638183594, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -192.37863159179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.102366924285889, + "rewards_train/margins": 3.6854963302612305, + "rewards_train/rejected": -7.787863254547119, + "step": 1733 + }, + { + "epoch": 0.48, + "learning_rate": 4.7089115013569716e-07, + "loss": 0.4085, + "step": 1734 + }, + { + "epoch": 0.48, + "logps_train/chosen": -121.05348205566406, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -177.544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6053482294082642, + "rewards_train/margins": 2.04914391040802, + "rewards_train/rejected": -3.654492139816284, + "step": 1734 + }, + { + "epoch": 0.48, + "logps_train/chosen": -27.20771026611328, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -50.011600494384766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3020210266113281, + "rewards_train/margins": 2.8616390228271484, + "rewards_train/rejected": -4.163660049438477, + "step": 1734 + }, + { + "epoch": 0.48, + "logps_train/chosen": -127.59203338623047, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -128.85081481933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.159203290939331, + "rewards_train/margins": 1.4758784770965576, + "rewards_train/rejected": -4.635081768035889, + "step": 1734 + }, + { + "epoch": 0.48, + "logps_train/chosen": -105.09028625488281, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -155.37347412109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.609028697013855, + "rewards_train/margins": 6.278319001197815, + "rewards_train/rejected": -7.88734769821167, + "step": 1734 + }, + { + "epoch": 0.48, + "logps_train/chosen": -34.49154281616211, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -37.410804748535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.324154257774353, + "rewards_train/margins": 1.7731763124465942, + "rewards_train/rejected": -3.0973305702209473, + "step": 1734 + }, + { + "epoch": 0.48, + "logps_train/chosen": -24.17940902709961, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -42.492530822753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0116909742355347, + "rewards_train/margins": 1.718812108039856, + "rewards_train/rejected": -2.7305030822753906, + "step": 1734 + }, + { + "epoch": 0.48, + "logps_train/chosen": -28.14832305908203, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -29.48443603515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.73983234167099, + "rewards_train/margins": 1.7398613095283508, + "rewards_train/rejected": -2.479693651199341, + "step": 1734 + }, + { + "epoch": 0.48, + "logps_train/chosen": -52.788516998291016, + "logps_train/ref_chosen": -40.0, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -91.19320678710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2788517475128174, + "rewards_train/margins": 0.9904689788818359, + "rewards_train/rejected": -2.2693207263946533, + "step": 1734 + }, + { + "epoch": 0.48, + "logps_train/chosen": -17.032718658447266, + "logps_train/ref_chosen": -4.625, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -27.07267189025879, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2407718896865845, + "rewards_train/margins": -0.5460047125816345, + "rewards_train/rejected": -0.69476717710495, + "step": 1735 + }, + { + "epoch": 0.48, + "logps_train/chosen": -29.57841682434082, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -45.5, + "logps_train/rejected": -90.75843811035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8703417181968689, + "rewards_train/margins": 3.6555023789405823, + "rewards_train/rejected": -4.525844097137451, + "step": 1735 + }, + { + "epoch": 0.48, + "logps_train/chosen": -20.94487762451172, + "logps_train/ref_chosen": -13.625, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -39.01755142211914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7319877743721008, + "rewards_train/margins": 2.113517463207245, + "rewards_train/rejected": -2.8455052375793457, + "step": 1735 + }, + { + "epoch": 0.48, + "logps_train/chosen": -38.33494186401367, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -18.725526809692383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.620994210243225, + "rewards_train/margins": -0.6671915054321289, + "rewards_train/rejected": -0.9538027048110962, + "step": 1735 + }, + { + "epoch": 0.48, + "logps_train/chosen": -101.263671875, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -35.351009368896484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.326367139816284, + "rewards_train/margins": -0.6537661552429199, + "rewards_train/rejected": -1.6726009845733643, + "step": 1735 + }, + { + "epoch": 0.48, + "logps_train/chosen": -43.18925857543945, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -34.40253829956055, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6439259052276611, + "rewards_train/margins": -0.003672003746032715, + "rewards_train/rejected": -1.6402539014816284, + "step": 1735 + }, + { + "epoch": 0.48, + "logps_train/chosen": -9.475120544433594, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -16.619754791259766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5193870663642883, + "rewards_train/margins": -0.2199115753173828, + "rewards_train/rejected": -0.2994754910469055, + "step": 1735 + }, + { + "epoch": 0.48, + "logps_train/chosen": -180.2895050048828, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -201.0, + "logps_train/rejected": -232.0196075439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.628950595855713, + "rewards_train/margins": 0.4730103015899658, + "rewards_train/rejected": -3.1019608974456787, + "step": 1735 + }, + { + "epoch": 0.49, + "learning_rate": 4.686481109733146e-07, + "loss": 0.404, + "step": 1736 + }, + { + "epoch": 0.49, + "logps_train/chosen": -28.09088897705078, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -93.28560638427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6403388977050781, + "rewards_train/margins": 0.08822178840637207, + "rewards_train/rejected": -1.7285606861114502, + "step": 1736 + }, + { + "epoch": 0.49, + "logps_train/chosen": -29.430795669555664, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -3.03125, + "logps_train/rejected": -34.35809326171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.680579662322998, + "rewards_train/margins": 0.4521048069000244, + "rewards_train/rejected": -3.1326844692230225, + "step": 1736 + }, + { + "epoch": 0.49, + "logps_train/chosen": -50.806026458740234, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -78.88436889648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2443973571062088, + "rewards_train/margins": 0.9828342348337173, + "rewards_train/rejected": -0.7384368777275085, + "step": 1736 + }, + { + "epoch": 0.49, + "logps_train/chosen": -130.62730407714844, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -147.33370971679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.812730550765991, + "rewards_train/margins": 1.9706404209136963, + "rewards_train/rejected": -4.7833709716796875, + "step": 1736 + }, + { + "epoch": 0.49, + "logps_train/chosen": -33.06997299194336, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -21.591663360595703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0319974422454834, + "rewards_train/margins": -0.2790811061859131, + "rewards_train/rejected": -1.7529163360595703, + "step": 1736 + }, + { + "epoch": 0.49, + "logps_train/chosen": -159.69813537597656, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -196.2133331298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.869813442230225, + "rewards_train/margins": 1.2515201568603516, + "rewards_train/rejected": -7.121333599090576, + "step": 1736 + }, + { + "epoch": 0.49, + "logps_train/chosen": -8.09485149383545, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -9.008503913879395, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19073514640331268, + "rewards_train/margins": 0.00074024498462677, + "rewards_train/rejected": -0.19147539138793945, + "step": 1736 + }, + { + "epoch": 0.49, + "logps_train/chosen": -95.064697265625, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -191.5586395263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7064697742462158, + "rewards_train/margins": 5.949394464492798, + "rewards_train/rejected": -7.655864238739014, + "step": 1736 + }, + { + "epoch": 0.49, + "logps_train/chosen": -132.32728576660156, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -248.90679931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.4327287673950195, + "rewards_train/margins": 8.407951354980469, + "rewards_train/rejected": -13.840680122375488, + "step": 1737 + }, + { + "epoch": 0.49, + "logps_train/chosen": -191.48974609375, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -183.99911499023438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.298974514007568, + "rewards_train/margins": -0.3990631103515625, + "rewards_train/rejected": -6.899911403656006, + "step": 1737 + }, + { + "epoch": 0.49, + "logps_train/chosen": -67.9561767578125, + "logps_train/ref_chosen": -49.75, + "logps_train/ref_rejected": -28.875, + "logps_train/rejected": -70.3410873413086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.82061767578125, + "rewards_train/margins": 2.325991153717041, + "rewards_train/rejected": -4.146608829498291, + "step": 1737 + }, + { + "epoch": 0.49, + "logps_train/chosen": -11.383912086486816, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -18.45819091796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2991088032722473, + "rewards_train/margins": 0.31992789544165134, + "rewards_train/rejected": -0.02081909216940403, + "step": 1737 + }, + { + "epoch": 0.49, + "logps_train/chosen": -91.72576904296875, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -95.337158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.272576928138733, + "rewards_train/margins": 0.5611388683319092, + "rewards_train/rejected": -1.833715796470642, + "step": 1737 + }, + { + "epoch": 0.49, + "logps_train/chosen": -13.588918685913086, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -20.435928344726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7182669043540955, + "rewards_train/margins": 0.8940759301185608, + "rewards_train/rejected": -1.6123428344726562, + "step": 1737 + }, + { + "epoch": 0.49, + "logps_train/chosen": -96.8359603881836, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -117.98329162597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7335960268974304, + "rewards_train/margins": 3.1647332310676575, + "rewards_train/rejected": -3.898329257965088, + "step": 1737 + }, + { + "epoch": 0.49, + "logps_train/chosen": -15.686962127685547, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -3.875, + "logps_train/rejected": -25.116363525390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2811962068080902, + "rewards_train/margins": 1.842940241098404, + "rewards_train/rejected": -2.124136447906494, + "step": 1737 + }, + { + "epoch": 0.49, + "learning_rate": 4.6640879071079395e-07, + "loss": 0.3702, + "step": 1738 + }, + { + "epoch": 0.49, + "logps_train/chosen": -7.728877544403076, + "logps_train/ref_chosen": -1.09375, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -29.552452087402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6635127663612366, + "rewards_train/margins": 0.2792324423789978, + "rewards_train/rejected": -0.9427452087402344, + "step": 1738 + }, + { + "epoch": 0.49, + "logps_train/chosen": -27.31627655029297, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -32.309593200683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39412766695022583, + "rewards_train/margins": 1.9055816531181335, + "rewards_train/rejected": -2.2997093200683594, + "step": 1738 + }, + { + "epoch": 0.49, + "logps_train/chosen": -12.59950065612793, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -4.5625, + "logps_train/rejected": -7.919445991516113, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9365125894546509, + "rewards_train/margins": -0.6008179783821106, + "rewards_train/rejected": -0.3356946110725403, + "step": 1738 + }, + { + "epoch": 0.49, + "logps_train/chosen": -31.958219528198242, + "logps_train/ref_chosen": -23.5, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -55.24507141113281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8458219766616821, + "rewards_train/margins": 2.841185212135315, + "rewards_train/rejected": -3.687007188796997, + "step": 1738 + }, + { + "epoch": 0.49, + "logps_train/chosen": -70.3622055053711, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -196.28659057617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.261220693588257, + "rewards_train/margins": 6.4674389362335205, + "rewards_train/rejected": -8.728659629821777, + "step": 1738 + }, + { + "epoch": 0.49, + "logps_train/chosen": -249.5935516357422, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -295.47796630859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.959355354309082, + "rewards_train/margins": 1.5884418487548828, + "rewards_train/rejected": -11.547797203063965, + "step": 1738 + }, + { + "epoch": 0.49, + "logps_train/chosen": -149.890625, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -199.0436248779297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.2890625, + "rewards_train/margins": 4.415300369262695, + "rewards_train/rejected": -9.704362869262695, + "step": 1738 + }, + { + "epoch": 0.49, + "logps_train/chosen": -55.076778411865234, + "logps_train/ref_chosen": -27.75, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -53.362361907958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.732677936553955, + "rewards_train/margins": 0.5410583019256592, + "rewards_train/rejected": -3.2737362384796143, + "step": 1738 + }, + { + "epoch": 0.49, + "logps_train/chosen": -194.9195556640625, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -198.64151000976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.94195556640625, + "rewards_train/margins": 0.3721952438354492, + "rewards_train/rejected": -8.3141508102417, + "step": 1739 + }, + { + "epoch": 0.49, + "logps_train/chosen": -31.7130069732666, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -48.09098815917969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.790050745010376, + "rewards_train/margins": 1.094048023223877, + "rewards_train/rejected": -2.884098768234253, + "step": 1739 + }, + { + "epoch": 0.49, + "logps_train/chosen": -86.05411529541016, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -97.61885833740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6554116010665894, + "rewards_train/margins": 0.35647428035736084, + "rewards_train/rejected": -2.01188588142395, + "step": 1739 + }, + { + "epoch": 0.49, + "logps_train/chosen": -130.99557495117188, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -152.95108032226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6995574831962585, + "rewards_train/margins": 1.4955506920814514, + "rewards_train/rejected": -2.19510817527771, + "step": 1739 + }, + { + "epoch": 0.49, + "logps_train/chosen": -6.978725910186768, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -2.0625, + "logps_train/rejected": -3.8538520336151123, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2916226089000702, + "rewards_train/margins": -0.11248740553855896, + "rewards_train/rejected": -0.17913520336151123, + "step": 1739 + }, + { + "epoch": 0.49, + "logps_train/chosen": -91.0027847290039, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -143.2292022705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7502784729003906, + "rewards_train/margins": 4.472641944885254, + "rewards_train/rejected": -6.2229204177856445, + "step": 1739 + }, + { + "epoch": 0.49, + "logps_train/chosen": -37.464317321777344, + "logps_train/ref_chosen": -7.1875, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -51.881591796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.027681827545166, + "rewards_train/margins": 1.222977638244629, + "rewards_train/rejected": -4.250659465789795, + "step": 1739 + }, + { + "epoch": 0.49, + "logps_train/chosen": -152.32496643066406, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -175.68995666503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.732496738433838, + "rewards_train/margins": 2.0364990234375, + "rewards_train/rejected": -5.768995761871338, + "step": 1739 + }, + { + "epoch": 0.49, + "learning_rate": 4.641732050210031e-07, + "loss": 0.3214, + "step": 1740 + }, + { + "epoch": 0.49, + "logps_train/chosen": -56.23534393310547, + "logps_train/ref_chosen": -54.0, + "logps_train/ref_rejected": -53.25, + "logps_train/rejected": -59.108184814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22353439033031464, + "rewards_train/margins": 0.3622840791940689, + "rewards_train/rejected": -0.5858184695243835, + "step": 1740 + }, + { + "epoch": 0.49, + "logps_train/chosen": -181.50454711914062, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -211.96737670898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.050454616546631, + "rewards_train/margins": 0.7462830543518066, + "rewards_train/rejected": -5.7967376708984375, + "step": 1740 + }, + { + "epoch": 0.49, + "logps_train/chosen": -93.92758178710938, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -103.89967346191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39275819063186646, + "rewards_train/margins": 1.8472091555595398, + "rewards_train/rejected": -2.2399673461914062, + "step": 1740 + }, + { + "epoch": 0.49, + "logps_train/chosen": -16.50424575805664, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -3.640625, + "logps_train/rejected": -21.0681209564209, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.506674587726593, + "rewards_train/margins": 1.236074984073639, + "rewards_train/rejected": -1.742749571800232, + "step": 1740 + }, + { + "epoch": 0.49, + "logps_train/chosen": -22.90185546875, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -38.25053024291992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20268554985523224, + "rewards_train/margins": 2.1723676174879074, + "rewards_train/rejected": -2.3750531673431396, + "step": 1740 + }, + { + "epoch": 0.49, + "logps_train/chosen": -106.06172943115234, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -131.97142028808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3061729371547699, + "rewards_train/margins": 5.3909691870212555, + "rewards_train/rejected": -5.697142124176025, + "step": 1740 + }, + { + "epoch": 0.49, + "logps_train/chosen": -27.009521484375, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -47.4976921081543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0384521484375, + "rewards_train/margins": 2.9613170623779297, + "rewards_train/rejected": -3.9997692108154297, + "step": 1740 + }, + { + "epoch": 0.49, + "logps_train/chosen": -197.07464599609375, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -171.02154541015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.357464790344238, + "rewards_train/margins": -1.3053102493286133, + "rewards_train/rejected": -6.052154541015625, + "step": 1740 + }, + { + "epoch": 0.49, + "logps_train/chosen": -22.21051025390625, + "logps_train/ref_chosen": -6.15625, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -13.929744720458984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6054260730743408, + "rewards_train/margins": -0.8187015652656555, + "rewards_train/rejected": -0.7867245078086853, + "step": 1741 + }, + { + "epoch": 0.49, + "logps_train/chosen": -1.5747106075286865, + "logps_train/ref_chosen": -1.8203125, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -109.03076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.024560188874602318, + "rewards_train/margins": 1.4276363607496023, + "rewards_train/rejected": -1.403076171875, + "step": 1741 + }, + { + "epoch": 0.49, + "logps_train/chosen": -31.73263168334961, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -23.203330993652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.710763156414032, + "rewards_train/margins": 1.0470699667930603, + "rewards_train/rejected": -1.7578331232070923, + "step": 1741 + }, + { + "epoch": 0.49, + "logps_train/chosen": -196.85232543945312, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -173.57339477539062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.085232734680176, + "rewards_train/margins": -0.52789306640625, + "rewards_train/rejected": -5.557339668273926, + "step": 1741 + }, + { + "epoch": 0.49, + "logps_train/chosen": -21.89781379699707, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -36.96702194213867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.946031391620636, + "rewards_train/margins": 2.313170850276947, + "rewards_train/rejected": -3.259202241897583, + "step": 1741 + }, + { + "epoch": 0.49, + "logps_train/chosen": -60.365936279296875, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -113.78174591064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9615936279296875, + "rewards_train/margins": 3.7665810585021973, + "rewards_train/rejected": -4.728174686431885, + "step": 1741 + }, + { + "epoch": 0.49, + "logps_train/chosen": -35.80429458618164, + "logps_train/ref_chosen": -25.5, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -42.65544128417969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.030429482460022, + "rewards_train/margins": 2.2038646936416626, + "rewards_train/rejected": -3.2342941761016846, + "step": 1741 + }, + { + "epoch": 0.49, + "logps_train/chosen": -16.869718551635742, + "logps_train/ref_chosen": -2.390625, + "logps_train/ref_rejected": -5.0625, + "logps_train/rejected": -29.076671600341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4479093551635742, + "rewards_train/margins": 0.9535079002380371, + "rewards_train/rejected": -2.4014172554016113, + "step": 1741 + }, + { + "epoch": 0.49, + "learning_rate": 4.6194136955067206e-07, + "loss": 0.3913, + "step": 1742 + }, + { + "epoch": 0.49, + "logps_train/chosen": -87.06534576416016, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -115.28941345214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7065345644950867, + "rewards_train/margins": 1.572406828403473, + "rewards_train/rejected": -2.2789413928985596, + "step": 1742 + }, + { + "epoch": 0.49, + "logps_train/chosen": -21.990283966064453, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -21.125, + "logps_train/rejected": -36.402000427246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2927783727645874, + "rewards_train/margins": 0.23492169380187988, + "rewards_train/rejected": -1.5277000665664673, + "step": 1742 + }, + { + "epoch": 0.49, + "logps_train/chosen": -166.05722045898438, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -170.3411407470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0057220458984375, + "rewards_train/margins": 1.4783921241760254, + "rewards_train/rejected": -4.484114170074463, + "step": 1742 + }, + { + "epoch": 0.49, + "logps_train/chosen": -3.6570634841918945, + "logps_train/ref_chosen": -3.90625, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -44.4669075012207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.024918651208281517, + "rewards_train/margins": 3.4966095443814993, + "rewards_train/rejected": -3.4716908931732178, + "step": 1742 + }, + { + "epoch": 0.49, + "logps_train/chosen": -151.34877014160156, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -166.646240234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.934876918792725, + "rewards_train/margins": -0.27025270462036133, + "rewards_train/rejected": -4.664624214172363, + "step": 1742 + }, + { + "epoch": 0.49, + "logps_train/chosen": -92.76368713378906, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -107.35972595214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2763687074184418, + "rewards_train/margins": 0.1096038818359375, + "rewards_train/rejected": -0.3859725892543793, + "step": 1742 + }, + { + "epoch": 0.49, + "logps_train/chosen": -4.273125648498535, + "logps_train/ref_chosen": -1.921875, + "logps_train/ref_rejected": -2.46875, + "logps_train/rejected": -6.497241020202637, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23512506484985352, + "rewards_train/margins": 0.16772404313087463, + "rewards_train/rejected": -0.40284910798072815, + "step": 1742 + }, + { + "epoch": 0.49, + "logps_train/chosen": -193.01663208007812, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -229.661865234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.60166335105896, + "rewards_train/margins": 5.964523553848267, + "rewards_train/rejected": -9.566186904907227, + "step": 1742 + }, + { + "epoch": 0.49, + "logps_train/chosen": -105.1595687866211, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -208.71295166015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.115956783294678, + "rewards_train/margins": 3.3553385734558105, + "rewards_train/rejected": -7.471295356750488, + "step": 1743 + }, + { + "epoch": 0.49, + "logps_train/chosen": -13.55981159210205, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -27.12188720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.012768841348588467, + "rewards_train/margins": 0.22495756205171347, + "rewards_train/rejected": -0.212188720703125, + "step": 1743 + }, + { + "epoch": 0.49, + "logps_train/chosen": -11.973255157470703, + "logps_train/ref_chosen": -2.640625, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -41.15224075317383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9332630038261414, + "rewards_train/margins": 2.7194610238075256, + "rewards_train/rejected": -3.652724027633667, + "step": 1743 + }, + { + "epoch": 0.49, + "logps_train/chosen": -52.42022705078125, + "logps_train/ref_chosen": -48.25, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -81.49688720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.417022705078125, + "rewards_train/margins": 1.1826660633087158, + "rewards_train/rejected": -1.5996887683868408, + "step": 1743 + }, + { + "epoch": 0.49, + "logps_train/chosen": -16.170326232910156, + "logps_train/ref_chosen": -3.296875, + "logps_train/ref_rejected": -6.1875, + "logps_train/rejected": -25.499755859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2873451709747314, + "rewards_train/margins": 0.6438804864883423, + "rewards_train/rejected": -1.9312256574630737, + "step": 1743 + }, + { + "epoch": 0.49, + "logps_train/chosen": -99.36593627929688, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -217.6148681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.786593914031982, + "rewards_train/margins": 5.474893093109131, + "rewards_train/rejected": -10.261487007141113, + "step": 1743 + }, + { + "epoch": 0.49, + "logps_train/chosen": -17.09958267211914, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -24.22756576538086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.672458291053772, + "rewards_train/margins": 0.953423261642456, + "rewards_train/rejected": -1.625881552696228, + "step": 1743 + }, + { + "epoch": 0.49, + "logps_train/chosen": -80.55950927734375, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -96.47370147705078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20595093071460724, + "rewards_train/margins": -0.3585807830095291, + "rewards_train/rejected": 0.15262985229492188, + "step": 1743 + }, + { + "epoch": 0.49, + "learning_rate": 4.5971329992028227e-07, + "loss": 0.3558, + "step": 1744 + }, + { + "epoch": 0.49, + "logps_train/chosen": -141.3810577392578, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -171.3453369140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5881057977676392, + "rewards_train/margins": 1.2464278936386108, + "rewards_train/rejected": -2.83453369140625, + "step": 1744 + }, + { + "epoch": 0.49, + "logps_train/chosen": -107.89237976074219, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -126.25422668457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8392379879951477, + "rewards_train/margins": 0.8361846804618835, + "rewards_train/rejected": -1.6754226684570312, + "step": 1744 + }, + { + "epoch": 0.49, + "logps_train/chosen": -36.7075080871582, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -37.664337158203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3520009517669678, + "rewards_train/margins": -0.04181718826293945, + "rewards_train/rejected": -2.3101837635040283, + "step": 1744 + }, + { + "epoch": 0.49, + "logps_train/chosen": -11.548707962036133, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -16.363529205322266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27987080812454224, + "rewards_train/margins": 0.6471071243286133, + "rewards_train/rejected": -0.9269779324531555, + "step": 1744 + }, + { + "epoch": 0.49, + "logps_train/chosen": -90.75132751464844, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -39.25321960449219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7751327753067017, + "rewards_train/margins": 0.8689392805099487, + "rewards_train/rejected": -2.6440720558166504, + "step": 1744 + }, + { + "epoch": 0.49, + "logps_train/chosen": -124.34873962402344, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -156.19680786132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.384874105453491, + "rewards_train/margins": 1.6848065853118896, + "rewards_train/rejected": -4.069680690765381, + "step": 1744 + }, + { + "epoch": 0.49, + "logps_train/chosen": -180.85049438476562, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -202.93675231933594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.485049724578857, + "rewards_train/margins": -0.29137420654296875, + "rewards_train/rejected": -6.193675518035889, + "step": 1744 + }, + { + "epoch": 0.49, + "logps_train/chosen": -212.1329345703125, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -251.56430053710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.413293361663818, + "rewards_train/margins": 3.443136692047119, + "rewards_train/rejected": -7.8564300537109375, + "step": 1744 + }, + { + "epoch": 0.49, + "logps_train/chosen": -9.537984848022461, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -21.095361709594727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.033701516687870026, + "rewards_train/margins": 0.8244877234101295, + "rewards_train/rejected": -0.7907862067222595, + "step": 1745 + }, + { + "epoch": 0.49, + "logps_train/chosen": -82.45491027832031, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -148.58274841308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0954910516738892, + "rewards_train/margins": 4.212783694267273, + "rewards_train/rejected": -5.308274745941162, + "step": 1745 + }, + { + "epoch": 0.49, + "logps_train/chosen": -118.80152893066406, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -191.4954071044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.830152988433838, + "rewards_train/margins": 3.119387626647949, + "rewards_train/rejected": -5.949540615081787, + "step": 1745 + }, + { + "epoch": 0.49, + "logps_train/chosen": -147.83261108398438, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -129.10498046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5832611322402954, + "rewards_train/margins": -0.2227630615234375, + "rewards_train/rejected": -1.360498070716858, + "step": 1745 + }, + { + "epoch": 0.49, + "logps_train/chosen": -110.56893920898438, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -66.19328308105469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.056894063949585, + "rewards_train/margins": -1.5125657320022583, + "rewards_train/rejected": -1.5443283319473267, + "step": 1745 + }, + { + "epoch": 0.49, + "logps_train/chosen": -4.632715225219727, + "logps_train/ref_chosen": -1.5546875, + "logps_train/ref_rejected": -1.390625, + "logps_train/rejected": -12.08951473236084, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3078027665615082, + "rewards_train/margins": 0.7620861828327179, + "rewards_train/rejected": -1.069888949394226, + "step": 1745 + }, + { + "epoch": 0.49, + "logps_train/chosen": -19.614044189453125, + "logps_train/ref_chosen": -3.75, + "logps_train/ref_rejected": -2.78125, + "logps_train/rejected": -19.169429779052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5864044427871704, + "rewards_train/margins": 0.05241358280181885, + "rewards_train/rejected": -1.6388180255889893, + "step": 1745 + }, + { + "epoch": 0.49, + "logps_train/chosen": -85.5267562866211, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -144.43856811523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0026756287552416325, + "rewards_train/margins": 5.441181087400764, + "rewards_train/rejected": -5.443856716156006, + "step": 1745 + }, + { + "epoch": 0.49, + "learning_rate": 4.5748901172395913e-07, + "loss": 0.4466, + "step": 1746 + }, + { + "epoch": 0.49, + "logps_train/chosen": -124.12452697753906, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -201.3243865966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.812452793121338, + "rewards_train/margins": 3.919985771179199, + "rewards_train/rejected": -6.732438564300537, + "step": 1746 + }, + { + "epoch": 0.49, + "logps_train/chosen": -60.448974609375, + "logps_train/ref_chosen": -28.25, + "logps_train/ref_rejected": -28.25, + "logps_train/rejected": -60.544986724853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.219897508621216, + "rewards_train/margins": 0.009601116180419922, + "rewards_train/rejected": -3.2294986248016357, + "step": 1746 + }, + { + "epoch": 0.49, + "logps_train/chosen": -90.27130126953125, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -161.87701416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.127130150794983, + "rewards_train/margins": 7.110571265220642, + "rewards_train/rejected": -8.237701416015625, + "step": 1746 + }, + { + "epoch": 0.49, + "logps_train/chosen": -173.43597412109375, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -31.82172393798828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.693597316741943, + "rewards_train/margins": -3.2426748275756836, + "rewards_train/rejected": -2.4509224891662598, + "step": 1746 + }, + { + "epoch": 0.49, + "logps_train/chosen": -48.254356384277344, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -6.875, + "logps_train/rejected": -24.176589965820312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.787935733795166, + "rewards_train/margins": -2.057776689529419, + "rewards_train/rejected": -1.730159044265747, + "step": 1746 + }, + { + "epoch": 0.49, + "logps_train/chosen": -85.46926879882812, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -112.94424438476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4469269514083862, + "rewards_train/margins": 0.44749748706817627, + "rewards_train/rejected": -1.8944244384765625, + "step": 1746 + }, + { + "epoch": 0.49, + "logps_train/chosen": -3.817934036254883, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -2.15625, + "logps_train/rejected": -8.499838829040527, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0739809051156044, + "rewards_train/margins": 0.5603779777884483, + "rewards_train/rejected": -0.6343588829040527, + "step": 1746 + }, + { + "epoch": 0.49, + "logps_train/chosen": -178.23178100585938, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -211.0, + "logps_train/rejected": -273.3551025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.623178243637085, + "rewards_train/margins": 2.6123321056365967, + "rewards_train/rejected": -6.235510349273682, + "step": 1746 + }, + { + "epoch": 0.49, + "logps_train/chosen": -26.34023666381836, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -21.736656188964844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9215236902236938, + "rewards_train/margins": -0.06035804748535156, + "rewards_train/rejected": -0.8611656427383423, + "step": 1747 + }, + { + "epoch": 0.49, + "logps_train/chosen": -2.832754135131836, + "logps_train/ref_chosen": -2.28125, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -2.6890084743499756, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.05515041574835777, + "rewards_train/margins": -0.050312068313360214, + "rewards_train/rejected": -0.004838347434997559, + "step": 1747 + }, + { + "epoch": 0.49, + "logps_train/chosen": -34.6904411315918, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -29.418655395507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5565441846847534, + "rewards_train/margins": 0.554071307182312, + "rewards_train/rejected": -2.1106154918670654, + "step": 1747 + }, + { + "epoch": 0.49, + "logps_train/chosen": -102.537109375, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -127.33243560791016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.253710985183716, + "rewards_train/margins": 2.0295326709747314, + "rewards_train/rejected": -5.283243656158447, + "step": 1747 + }, + { + "epoch": 0.49, + "logps_train/chosen": -14.111061096191406, + "logps_train/ref_chosen": -2.3125, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -30.570789337158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1798561811447144, + "rewards_train/margins": 1.449097752571106, + "rewards_train/rejected": -2.6289539337158203, + "step": 1747 + }, + { + "epoch": 0.49, + "logps_train/chosen": -143.1399383544922, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -222.09002685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3639938831329346, + "rewards_train/margins": 4.245008707046509, + "rewards_train/rejected": -7.609002590179443, + "step": 1747 + }, + { + "epoch": 0.49, + "logps_train/chosen": -46.48545837402344, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -65.4543685913086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6735458374023438, + "rewards_train/margins": 0.046890974044799805, + "rewards_train/rejected": -2.7204368114471436, + "step": 1747 + }, + { + "epoch": 0.49, + "logps_train/chosen": -177.83595275878906, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -177.57090759277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.733595371246338, + "rewards_train/margins": 1.2734951972961426, + "rewards_train/rejected": -8.00709056854248, + "step": 1747 + }, + { + "epoch": 0.49, + "learning_rate": 4.5526852052936113e-07, + "loss": 0.6478, + "step": 1748 + }, + { + "epoch": 0.49, + "logps_train/chosen": -15.659492492675781, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -17.27085304260254, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0784492492675781, + "rewards_train/margins": -0.17636394500732422, + "rewards_train/rejected": -0.9020853042602539, + "step": 1748 + }, + { + "epoch": 0.49, + "logps_train/chosen": -104.27503967285156, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -124.85675048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5275039672851562, + "rewards_train/margins": 2.358171224594116, + "rewards_train/rejected": -3.8856751918792725, + "step": 1748 + }, + { + "epoch": 0.49, + "logps_train/chosen": -24.745685577392578, + "logps_train/ref_chosen": -7.5, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -97.77141571044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7245686054229736, + "rewards_train/margins": 0.9525730609893799, + "rewards_train/rejected": -2.6771416664123535, + "step": 1748 + }, + { + "epoch": 0.49, + "logps_train/chosen": -10.286572456359863, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -28.338577270507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5505322813987732, + "rewards_train/margins": 1.4520755410194397, + "rewards_train/rejected": -2.002607822418213, + "step": 1748 + }, + { + "epoch": 0.49, + "logps_train/chosen": -57.436004638671875, + "logps_train/ref_chosen": -40.75, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -70.67415618896484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6686004400253296, + "rewards_train/margins": 2.611315369606018, + "rewards_train/rejected": -4.279915809631348, + "step": 1748 + }, + { + "epoch": 0.49, + "logps_train/chosen": -15.80913257598877, + "logps_train/ref_chosen": -5.03125, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -59.773746490478516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.077788233757019, + "rewards_train/margins": 3.099586606025696, + "rewards_train/rejected": -4.177374839782715, + "step": 1748 + }, + { + "epoch": 0.49, + "logps_train/chosen": -208.75633239746094, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -173.95126342773438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.275633335113525, + "rewards_train/margins": -1.2305068969726562, + "rewards_train/rejected": -5.045126438140869, + "step": 1748 + }, + { + "epoch": 0.49, + "logps_train/chosen": -205.20932006835938, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -279.1044006347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.220932006835938, + "rewards_train/margins": 2.8895082473754883, + "rewards_train/rejected": -11.110440254211426, + "step": 1748 + }, + { + "epoch": 0.49, + "logps_train/chosen": -7.6225266456604, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -2.65625, + "logps_train/rejected": -12.081212997436523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34662768244743347, + "rewards_train/margins": 0.5958686172962189, + "rewards_train/rejected": -0.9424962997436523, + "step": 1749 + }, + { + "epoch": 0.49, + "logps_train/chosen": -123.61006164550781, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -188.45802307128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8110063076019287, + "rewards_train/margins": 3.384795904159546, + "rewards_train/rejected": -6.195802211761475, + "step": 1749 + }, + { + "epoch": 0.49, + "logps_train/chosen": -29.482004165649414, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -4.0, + "logps_train/rejected": -19.30838394165039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.085700511932373, + "rewards_train/margins": -0.5548621416091919, + "rewards_train/rejected": -1.5308383703231812, + "step": 1749 + }, + { + "epoch": 0.49, + "logps_train/chosen": -1.94497549533844, + "logps_train/ref_chosen": -2.40625, + "logps_train/ref_rejected": -3.3125, + "logps_train/rejected": -3.013289451599121, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.046127449721097946, + "rewards_train/margins": 0.016206394881010056, + "rewards_train/rejected": 0.02992105484008789, + "step": 1749 + }, + { + "epoch": 0.49, + "logps_train/chosen": -154.88687133789062, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -147.5548858642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6886872053146362, + "rewards_train/margins": 1.7168015241622925, + "rewards_train/rejected": -3.4054887294769287, + "step": 1749 + }, + { + "epoch": 0.49, + "logps_train/chosen": -129.56520080566406, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -137.09349060058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2065200805664062, + "rewards_train/margins": 2.902829170227051, + "rewards_train/rejected": -5.109349250793457, + "step": 1749 + }, + { + "epoch": 0.49, + "logps_train/chosen": -31.11613655090332, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -22.68054962158203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.992863655090332, + "rewards_train/margins": -0.3060586452484131, + "rewards_train/rejected": -1.686805009841919, + "step": 1749 + }, + { + "epoch": 0.49, + "logps_train/chosen": -159.50927734375, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -220.392822265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.900927782058716, + "rewards_train/margins": 3.638354539871216, + "rewards_train/rejected": -7.539282321929932, + "step": 1749 + }, + { + "epoch": 0.49, + "learning_rate": 4.530518418775733e-07, + "loss": 0.396, + "step": 1750 + }, + { + "epoch": 0.49, + "logps_train/chosen": -6.034655570983887, + "logps_train/ref_chosen": -3.109375, + "logps_train/ref_rejected": -3.5625, + "logps_train/rejected": -7.03346061706543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29252806305885315, + "rewards_train/margins": 0.05456799268722534, + "rewards_train/rejected": -0.3470960557460785, + "step": 1750 + }, + { + "epoch": 0.49, + "logps_train/chosen": -28.870595932006836, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -51.0, + "logps_train/rejected": -60.3997802734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6495596170425415, + "rewards_train/margins": 0.29041844606399536, + "rewards_train/rejected": -0.9399780631065369, + "step": 1750 + }, + { + "epoch": 0.49, + "logps_train/chosen": -221.89271545410156, + "logps_train/ref_chosen": -183.0, + "logps_train/ref_rejected": -213.0, + "logps_train/rejected": -276.0528564453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8892714977264404, + "rewards_train/margins": 2.4160144329071045, + "rewards_train/rejected": -6.305285930633545, + "step": 1750 + }, + { + "epoch": 0.49, + "logps_train/chosen": -84.53384399414062, + "logps_train/ref_chosen": -59.0, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -86.10943603515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.55338454246521, + "rewards_train/margins": -0.29244089126586914, + "rewards_train/rejected": -2.260943651199341, + "step": 1750 + }, + { + "epoch": 0.49, + "logps_train/chosen": -16.190570831298828, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -48.239376068115234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0128071308135986, + "rewards_train/margins": 1.348630428314209, + "rewards_train/rejected": -2.3614375591278076, + "step": 1750 + }, + { + "epoch": 0.49, + "logps_train/chosen": -190.83175659179688, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -285.9823913574219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.483175754547119, + "rewards_train/margins": 5.715063571929932, + "rewards_train/rejected": -11.19823932647705, + "step": 1750 + }, + { + "epoch": 0.49, + "logps_train/chosen": -61.450340270996094, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -35.25, + "logps_train/rejected": -45.91398620605469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0950340032577515, + "rewards_train/margins": -0.028635382652282715, + "rewards_train/rejected": -1.0663986206054688, + "step": 1750 + }, + { + "epoch": 0.49, + "logps_train/chosen": -25.543855667114258, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -51.25046157836914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2356356382369995, + "rewards_train/margins": 2.0019105672836304, + "rewards_train/rejected": -3.23754620552063, + "step": 1750 + }, + { + "epoch": 0.49, + "logps_train/chosen": -115.71519470214844, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -73.47989654541016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9715194702148438, + "rewards_train/margins": 0.3264702558517456, + "rewards_train/rejected": -1.2979897260665894, + "step": 1751 + }, + { + "epoch": 0.49, + "logps_train/chosen": -35.242984771728516, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -33.8105354309082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9117984771728516, + "rewards_train/margins": 0.3630051612854004, + "rewards_train/rejected": -2.274803638458252, + "step": 1751 + }, + { + "epoch": 0.49, + "logps_train/chosen": -80.53996276855469, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -131.67138671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8539962768554688, + "rewards_train/margins": 2.0131423473358154, + "rewards_train/rejected": -3.867138624191284, + "step": 1751 + }, + { + "epoch": 0.49, + "logps_train/chosen": -2.0187442302703857, + "logps_train/ref_chosen": -2.78125, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -15.485479354858398, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0762505754828453, + "rewards_train/margins": -0.03770148754119873, + "rewards_train/rejected": 0.11395206302404404, + "step": 1751 + }, + { + "epoch": 0.49, + "logps_train/chosen": -21.26531982421875, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -14.736922264099121, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0265320539474487, + "rewards_train/margins": 0.012785196304321289, + "rewards_train/rejected": -1.03931725025177, + "step": 1751 + }, + { + "epoch": 0.49, + "logps_train/chosen": -108.59588623046875, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -173.16665649414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.859588623046875, + "rewards_train/margins": 4.657077312469482, + "rewards_train/rejected": -6.516665935516357, + "step": 1751 + }, + { + "epoch": 0.49, + "logps_train/chosen": -17.673229217529297, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -19.56175994873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2673228979110718, + "rewards_train/margins": 0.03885316848754883, + "rewards_train/rejected": -1.3061760663986206, + "step": 1751 + }, + { + "epoch": 0.49, + "logps_train/chosen": -18.496475219726562, + "logps_train/ref_chosen": -15.8125, + "logps_train/ref_rejected": -28.125, + "logps_train/rejected": -65.87315368652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2683975398540497, + "rewards_train/margins": 3.5064177811145782, + "rewards_train/rejected": -3.774815320968628, + "step": 1751 + }, + { + "epoch": 0.49, + "learning_rate": 4.5083899128299506e-07, + "loss": 0.4086, + "step": 1752 + }, + { + "epoch": 0.49, + "logps_train/chosen": -111.7685775756836, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -144.60125732421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.023142242804169655, + "rewards_train/margins": 0.3832679931074381, + "rewards_train/rejected": -0.36012575030326843, + "step": 1752 + }, + { + "epoch": 0.49, + "logps_train/chosen": -55.004844665527344, + "logps_train/ref_chosen": -26.875, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -48.44255828857422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8129844665527344, + "rewards_train/margins": 1.3093962669372559, + "rewards_train/rejected": -4.12238073348999, + "step": 1752 + }, + { + "epoch": 0.49, + "logps_train/chosen": -53.73768615722656, + "logps_train/ref_chosen": -36.0, + "logps_train/ref_rejected": -44.75, + "logps_train/rejected": -65.8905258178711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.773768663406372, + "rewards_train/margins": 0.3402838706970215, + "rewards_train/rejected": -2.1140525341033936, + "step": 1752 + }, + { + "epoch": 0.49, + "logps_train/chosen": -13.622802734375, + "logps_train/ref_chosen": -4.5, + "logps_train/ref_rejected": -1.8046875, + "logps_train/rejected": -23.404401779174805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.912280261516571, + "rewards_train/margins": 1.2476912140846252, + "rewards_train/rejected": -2.1599714756011963, + "step": 1752 + }, + { + "epoch": 0.49, + "logps_train/chosen": -9.927044868469238, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -32.7891731262207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07604551315307617, + "rewards_train/margins": 2.6237127780914307, + "rewards_train/rejected": -2.5476672649383545, + "step": 1752 + }, + { + "epoch": 0.49, + "logps_train/chosen": -175.32363891601562, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -178.2959747314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7323638796806335, + "rewards_train/margins": 2.9972336888313293, + "rewards_train/rejected": -3.729597568511963, + "step": 1752 + }, + { + "epoch": 0.49, + "logps_train/chosen": -80.32118225097656, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -92.98455047607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9821182489395142, + "rewards_train/margins": 0.31633687019348145, + "rewards_train/rejected": -1.2984551191329956, + "step": 1752 + }, + { + "epoch": 0.49, + "logps_train/chosen": -23.046689987182617, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -27.33806610107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4734190702438354, + "rewards_train/margins": 0.34163761138916016, + "rewards_train/rejected": -1.8150566816329956, + "step": 1752 + }, + { + "epoch": 0.49, + "logps_train/chosen": -27.905603408813477, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -1.828125, + "logps_train/rejected": -42.96279525756836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0530604124069214, + "rewards_train/margins": 3.060406804084778, + "rewards_train/rejected": -4.113467216491699, + "step": 1753 + }, + { + "epoch": 0.49, + "logps_train/chosen": -18.757055282592773, + "logps_train/ref_chosen": -2.9375, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -129.24598693847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5819555521011353, + "rewards_train/margins": 0.6926432847976685, + "rewards_train/rejected": -2.2745988368988037, + "step": 1753 + }, + { + "epoch": 0.49, + "logps_train/chosen": -25.142902374267578, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -39.13384246826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9392902255058289, + "rewards_train/margins": 1.4803441166877747, + "rewards_train/rejected": -2.4196343421936035, + "step": 1753 + }, + { + "epoch": 0.49, + "logps_train/chosen": -99.66822814941406, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -35.25, + "logps_train/rejected": -109.52143859863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1168229579925537, + "rewards_train/margins": 5.310321092605591, + "rewards_train/rejected": -7.4271440505981445, + "step": 1753 + }, + { + "epoch": 0.49, + "logps_train/chosen": -52.9486083984375, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -137.77638244628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03013916127383709, + "rewards_train/margins": 4.607777310535312, + "rewards_train/rejected": -4.577638149261475, + "step": 1753 + }, + { + "epoch": 0.49, + "logps_train/chosen": -94.90011596679688, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -55.0, + "logps_train/rejected": -120.40448760986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9900115728378296, + "rewards_train/margins": 4.550437092781067, + "rewards_train/rejected": -6.5404486656188965, + "step": 1753 + }, + { + "epoch": 0.49, + "logps_train/chosen": -140.77081298828125, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -167.14993286132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.677081286907196, + "rewards_train/margins": 3.587911903858185, + "rewards_train/rejected": -4.264993190765381, + "step": 1753 + }, + { + "epoch": 0.49, + "logps_train/chosen": -139.82420349121094, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -205.62908935546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.082420349121094, + "rewards_train/margins": 5.130488395690918, + "rewards_train/rejected": -9.212908744812012, + "step": 1753 + }, + { + "epoch": 0.49, + "learning_rate": 4.486299842332353e-07, + "loss": 0.2166, + "step": 1754 + }, + { + "epoch": 0.49, + "logps_train/chosen": -138.5552978515625, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -254.34866333007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5055298805236816, + "rewards_train/margins": 7.929337024688721, + "rewards_train/rejected": -10.434866905212402, + "step": 1754 + }, + { + "epoch": 0.49, + "logps_train/chosen": -49.48577880859375, + "logps_train/ref_chosen": -25.125, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -51.3558464050293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.436077833175659, + "rewards_train/margins": 0.5120067596435547, + "rewards_train/rejected": -2.948084592819214, + "step": 1754 + }, + { + "epoch": 0.49, + "logps_train/chosen": -100.36651611328125, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -159.2772674560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1366517543792725, + "rewards_train/margins": 3.6410748958587646, + "rewards_train/rejected": -5.777726650238037, + "step": 1754 + }, + { + "epoch": 0.49, + "logps_train/chosen": -46.66725158691406, + "logps_train/ref_chosen": -42.0, + "logps_train/ref_rejected": -42.0, + "logps_train/rejected": -46.29227828979492, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4667251706123352, + "rewards_train/margins": -0.03749734163284302, + "rewards_train/rejected": -0.4292278289794922, + "step": 1754 + }, + { + "epoch": 0.49, + "logps_train/chosen": -78.96715545654297, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -145.50921630859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4717156887054443, + "rewards_train/margins": 2.429206132888794, + "rewards_train/rejected": -4.900921821594238, + "step": 1754 + }, + { + "epoch": 0.49, + "logps_train/chosen": -57.04601287841797, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -14.4375, + "logps_train/rejected": -66.56539916992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.517101287841797, + "rewards_train/margins": 1.6956887245178223, + "rewards_train/rejected": -5.212790012359619, + "step": 1754 + }, + { + "epoch": 0.49, + "logps_train/chosen": -90.16654205322266, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -220.14663696289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8166542053222656, + "rewards_train/margins": 10.648009300231934, + "rewards_train/rejected": -11.4646635055542, + "step": 1754 + }, + { + "epoch": 0.49, + "logps_train/chosen": -63.05524444580078, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -189.0, + "logps_train/rejected": -237.89968872070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2555244565010071, + "rewards_train/margins": 4.634444415569305, + "rewards_train/rejected": -4.8899688720703125, + "step": 1754 + }, + { + "epoch": 0.49, + "logps_train/chosen": -11.715893745422363, + "logps_train/ref_chosen": -12.6875, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -28.2825927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09716062992811203, + "rewards_train/margins": 1.406669907271862, + "rewards_train/rejected": -1.30950927734375, + "step": 1755 + }, + { + "epoch": 0.49, + "logps_train/chosen": -29.593042373657227, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -7.59375, + "logps_train/rejected": -18.42925453186035, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0093042850494385, + "rewards_train/margins": 0.07424616813659668, + "rewards_train/rejected": -1.0835504531860352, + "step": 1755 + }, + { + "epoch": 0.49, + "logps_train/chosen": -127.72569274902344, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -223.76565551757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6225693225860596, + "rewards_train/margins": 8.653996229171753, + "rewards_train/rejected": -11.276565551757812, + "step": 1755 + }, + { + "epoch": 0.49, + "logps_train/chosen": -21.662845611572266, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -21.690086364746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2787845730781555, + "rewards_train/margins": 0.00272408127784729, + "rewards_train/rejected": -0.2815086543560028, + "step": 1755 + }, + { + "epoch": 0.49, + "logps_train/chosen": -167.23721313476562, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -205.17755126953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.123721599578857, + "rewards_train/margins": 0.6940336227416992, + "rewards_train/rejected": -4.817755222320557, + "step": 1755 + }, + { + "epoch": 0.49, + "logps_train/chosen": -2.9138224124908447, + "logps_train/ref_chosen": -2.265625, + "logps_train/ref_rejected": -10.1875, + "logps_train/rejected": -27.10893440246582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06481974571943283, + "rewards_train/margins": 1.6273236945271492, + "rewards_train/rejected": -1.692143440246582, + "step": 1755 + }, + { + "epoch": 0.49, + "logps_train/chosen": -236.79861450195312, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -238.74517822265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.079861640930176, + "rewards_train/margins": 0.8946561813354492, + "rewards_train/rejected": -9.974517822265625, + "step": 1755 + }, + { + "epoch": 0.49, + "logps_train/chosen": -27.743839263916016, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -100.19841766357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.430634021759033, + "rewards_train/margins": 1.839207649230957, + "rewards_train/rejected": -4.26984167098999, + "step": 1755 + }, + { + "epoch": 0.49, + "learning_rate": 4.464248361890005e-07, + "loss": 0.2571, + "step": 1756 + }, + { + "epoch": 0.49, + "logps_train/chosen": -0.04064609110355377, + "logps_train/ref_chosen": -0.275390625, + "logps_train/ref_rejected": -1.5546875, + "logps_train/rejected": -3.1716129779815674, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.023474453017115593, + "rewards_train/margins": 0.1851669978350401, + "rewards_train/rejected": -0.1616925448179245, + "step": 1756 + }, + { + "epoch": 0.49, + "logps_train/chosen": -138.13967895507812, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -129.3248748779297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.213967800140381, + "rewards_train/margins": 0.7185196876525879, + "rewards_train/rejected": -4.932487487792969, + "step": 1756 + }, + { + "epoch": 0.49, + "logps_train/chosen": -77.20270538330078, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -89.96635437011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7702705264091492, + "rewards_train/margins": 2.1763649582862854, + "rewards_train/rejected": -2.9466354846954346, + "step": 1756 + }, + { + "epoch": 0.49, + "logps_train/chosen": -130.75299072265625, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -122.24414825439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.175299048423767, + "rewards_train/margins": 1.8491157293319702, + "rewards_train/rejected": -3.0244147777557373, + "step": 1756 + }, + { + "epoch": 0.49, + "logps_train/chosen": -58.544586181640625, + "logps_train/ref_chosen": -34.0, + "logps_train/ref_rejected": -45.5, + "logps_train/rejected": -102.45410919189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.454458713531494, + "rewards_train/margins": 3.240952491760254, + "rewards_train/rejected": -5.695411205291748, + "step": 1756 + }, + { + "epoch": 0.49, + "logps_train/chosen": -91.31355285644531, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -24.25, + "logps_train/rejected": -45.77642059326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1313552856445312, + "rewards_train/margins": 1.0212867259979248, + "rewards_train/rejected": -2.152642011642456, + "step": 1756 + }, + { + "epoch": 0.49, + "logps_train/chosen": -26.273839950561523, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -26.0, + "logps_train/rejected": -82.41034698486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30238398909568787, + "rewards_train/margins": 5.338650614023209, + "rewards_train/rejected": -5.6410346031188965, + "step": 1756 + }, + { + "epoch": 0.49, + "logps_train/chosen": -9.587542533874512, + "logps_train/ref_chosen": -2.40625, + "logps_train/ref_rejected": -13.3125, + "logps_train/rejected": -33.40509033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7181292772293091, + "rewards_train/margins": 1.2911297082901, + "rewards_train/rejected": -2.009258985519409, + "step": 1756 + }, + { + "epoch": 0.49, + "logps_train/chosen": -113.02006530761719, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -53.63867950439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.602006673812866, + "rewards_train/margins": 0.024361371994018555, + "rewards_train/rejected": -2.6263680458068848, + "step": 1757 + }, + { + "epoch": 0.49, + "logps_train/chosen": -19.63605499267578, + "logps_train/ref_chosen": -3.890625, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -24.490943908691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5745429992675781, + "rewards_train/margins": 0.38705146312713623, + "rewards_train/rejected": -1.9615944623947144, + "step": 1757 + }, + { + "epoch": 0.49, + "logps_train/chosen": -11.359724044799805, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -31.595195770263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6484724283218384, + "rewards_train/margins": 0.4860471487045288, + "rewards_train/rejected": -1.1345195770263672, + "step": 1757 + }, + { + "epoch": 0.49, + "logps_train/chosen": -101.47047424316406, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -213.44866943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.947047472000122, + "rewards_train/margins": 7.39781928062439, + "rewards_train/rejected": -11.344866752624512, + "step": 1757 + }, + { + "epoch": 0.49, + "logps_train/chosen": -37.24714279174805, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -39.75, + "logps_train/rejected": -43.69892120361328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.262214422225952, + "rewards_train/margins": -1.8673222959041595, + "rewards_train/rejected": -0.3948921263217926, + "step": 1757 + }, + { + "epoch": 0.49, + "logps_train/chosen": -47.70311737060547, + "logps_train/ref_chosen": -29.875, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -20.454635620117188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7828117609024048, + "rewards_train/margins": -1.031098186969757, + "rewards_train/rejected": -0.7517135739326477, + "step": 1757 + }, + { + "epoch": 0.49, + "logps_train/chosen": -47.90388488769531, + "logps_train/ref_chosen": -30.375, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -37.60710144042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.752888560295105, + "rewards_train/margins": 0.19532155990600586, + "rewards_train/rejected": -1.9482101202011108, + "step": 1757 + }, + { + "epoch": 0.49, + "logps_train/chosen": -24.868595123291016, + "logps_train/ref_chosen": -14.25, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -64.32646942138672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0618594884872437, + "rewards_train/margins": 3.38328754901886, + "rewards_train/rejected": -4.4451470375061035, + "step": 1757 + }, + { + "epoch": 0.49, + "learning_rate": 4.442235625839897e-07, + "loss": 0.4694, + "step": 1758 + }, + { + "epoch": 0.49, + "logps_train/chosen": -112.53562927246094, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -178.23211669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09643707424402237, + "rewards_train/margins": 5.019648648798466, + "rewards_train/rejected": -4.923211574554443, + "step": 1758 + }, + { + "epoch": 0.49, + "logps_train/chosen": -40.305362701416016, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -13.3125, + "logps_train/rejected": -23.32306480407715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1194637343287468, + "rewards_train/margins": 1.1205202862620354, + "rewards_train/rejected": -1.0010565519332886, + "step": 1758 + }, + { + "epoch": 0.49, + "logps_train/chosen": -9.253304481506348, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -41.77409744262695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39095544815063477, + "rewards_train/margins": 2.6052043437957764, + "rewards_train/rejected": -2.996159791946411, + "step": 1758 + }, + { + "epoch": 0.49, + "logps_train/chosen": -14.019805908203125, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -31.797115325927734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22698059678077698, + "rewards_train/margins": 1.1152310073375702, + "rewards_train/rejected": -1.3422116041183472, + "step": 1758 + }, + { + "epoch": 0.49, + "logps_train/chosen": -99.42051696777344, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -124.6219253540039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.192051649093628, + "rewards_train/margins": 1.1701409816741943, + "rewards_train/rejected": -3.3621926307678223, + "step": 1758 + }, + { + "epoch": 0.49, + "logps_train/chosen": -16.24775505065918, + "logps_train/ref_chosen": -1.3828125, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -26.112977981567383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4864943027496338, + "rewards_train/margins": -0.1689465045928955, + "rewards_train/rejected": -1.3175477981567383, + "step": 1758 + }, + { + "epoch": 0.49, + "logps_train/chosen": -119.07463073730469, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -142.0855712890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8074630498886108, + "rewards_train/margins": 1.2010940313339233, + "rewards_train/rejected": -3.008557081222534, + "step": 1758 + }, + { + "epoch": 0.49, + "logps_train/chosen": -57.62723922729492, + "logps_train/ref_chosen": -52.25, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -84.13119506835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.537723958492279, + "rewards_train/margins": 1.525395691394806, + "rewards_train/rejected": -2.063119649887085, + "step": 1758 + }, + { + "epoch": 0.49, + "logps_train/chosen": -104.57688903808594, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -200.29605102539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2923111021518707, + "rewards_train/margins": 3.7219163477420807, + "rewards_train/rejected": -3.42960524559021, + "step": 1759 + }, + { + "epoch": 0.49, + "logps_train/chosen": -109.80213165283203, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -185.95443725585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.3302130699157715, + "rewards_train/margins": 5.015230655670166, + "rewards_train/rejected": -9.345443725585938, + "step": 1759 + }, + { + "epoch": 0.49, + "logps_train/chosen": -131.1421356201172, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -124.58086395263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5142135620117188, + "rewards_train/margins": 0.7938728332519531, + "rewards_train/rejected": -2.308086395263672, + "step": 1759 + }, + { + "epoch": 0.49, + "logps_train/chosen": -30.16204071044922, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -81.08158874511719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9224541187286377, + "rewards_train/margins": 1.2107048034667969, + "rewards_train/rejected": -3.1331589221954346, + "step": 1759 + }, + { + "epoch": 0.49, + "logps_train/chosen": -15.018649101257324, + "logps_train/ref_chosen": -2.109375, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -42.62709426879883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2909274101257324, + "rewards_train/margins": 1.6530320644378662, + "rewards_train/rejected": -2.9439594745635986, + "step": 1759 + }, + { + "epoch": 0.49, + "logps_train/chosen": -66.48863220214844, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -67.26519775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4988632202148438, + "rewards_train/margins": 0.07765662670135498, + "rewards_train/rejected": -1.5765198469161987, + "step": 1759 + }, + { + "epoch": 0.49, + "logps_train/chosen": -22.657474517822266, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -18.88320541381836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8094974756240845, + "rewards_train/margins": 0.33819806575775146, + "rewards_train/rejected": -1.147695541381836, + "step": 1759 + }, + { + "epoch": 0.49, + "logps_train/chosen": -40.86967468261719, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -37.554256439208984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6619675159454346, + "rewards_train/margins": 0.6372082233428955, + "rewards_train/rejected": -2.29917573928833, + "step": 1759 + }, + { + "epoch": 0.49, + "learning_rate": 4.42026178824784e-07, + "loss": 0.2881, + "step": 1760 + }, + { + "epoch": 0.49, + "logps_train/chosen": -18.790475845336914, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -21.128387451171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7290475964546204, + "rewards_train/margins": 0.843166172504425, + "rewards_train/rejected": -1.5722137689590454, + "step": 1760 + }, + { + "epoch": 0.49, + "logps_train/chosen": -125.25540924072266, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -181.35385131835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.675540924072266, + "rewards_train/margins": 1.2098441123962402, + "rewards_train/rejected": -5.885385036468506, + "step": 1760 + }, + { + "epoch": 0.49, + "logps_train/chosen": -150.73280334472656, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -159.30419921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6732803583145142, + "rewards_train/margins": 4.757139563560486, + "rewards_train/rejected": -6.430419921875, + "step": 1760 + }, + { + "epoch": 0.49, + "logps_train/chosen": -192.29449462890625, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -261.0997009277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.629449367523193, + "rewards_train/margins": 5.180520534515381, + "rewards_train/rejected": -9.809969902038574, + "step": 1760 + }, + { + "epoch": 0.49, + "logps_train/chosen": -137.48597717285156, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -147.04714965820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.148597717285156, + "rewards_train/margins": 1.2061171531677246, + "rewards_train/rejected": -5.354714870452881, + "step": 1760 + }, + { + "epoch": 0.49, + "logps_train/chosen": -168.7002716064453, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -178.42715454101562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.220026969909668, + "rewards_train/margins": -0.2273116111755371, + "rewards_train/rejected": -7.992715358734131, + "step": 1760 + }, + { + "epoch": 0.49, + "logps_train/chosen": -0.4123970866203308, + "logps_train/ref_chosen": -0.60546875, + "logps_train/ref_rejected": -0.60546875, + "logps_train/rejected": -0.3679226040840149, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.01930716633796692, + "rewards_train/margins": -0.0044474489986896515, + "rewards_train/rejected": 0.02375461533665657, + "step": 1760 + }, + { + "epoch": 0.49, + "logps_train/chosen": -134.76040649414062, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -179.74134826660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4260406494140625, + "rewards_train/margins": 1.5480942726135254, + "rewards_train/rejected": -3.974134922027588, + "step": 1760 + }, + { + "epoch": 0.49, + "logps_train/chosen": -73.92144775390625, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -141.61199951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3078552186489105, + "rewards_train/margins": 4.3190551698207855, + "rewards_train/rejected": -4.011199951171875, + "step": 1761 + }, + { + "epoch": 0.49, + "logps_train/chosen": -1.3552989959716797, + "logps_train/ref_chosen": -0.75390625, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -6.172411918640137, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06013927608728409, + "rewards_train/margins": 0.3180394098162651, + "rewards_train/rejected": -0.3781786859035492, + "step": 1761 + }, + { + "epoch": 0.49, + "logps_train/chosen": -5.288974285125732, + "logps_train/ref_chosen": -5.40625, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -22.51617431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011727571487426758, + "rewards_train/margins": 0.45084500312805176, + "rewards_train/rejected": -0.439117431640625, + "step": 1761 + }, + { + "epoch": 0.49, + "logps_train/chosen": -26.419532775878906, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -21.86516571044922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4107033014297485, + "rewards_train/margins": -0.16793668270111084, + "rewards_train/rejected": -1.2427666187286377, + "step": 1761 + }, + { + "epoch": 0.49, + "logps_train/chosen": -39.96002197265625, + "logps_train/ref_chosen": -38.25, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -14.703779220581055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17100219428539276, + "rewards_train/margins": 0.5025007277727127, + "rewards_train/rejected": -0.6735029220581055, + "step": 1761 + }, + { + "epoch": 0.49, + "logps_train/chosen": -17.339879989624023, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -21.295581817626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9558629989624023, + "rewards_train/margins": 0.05494523048400879, + "rewards_train/rejected": -1.0108082294464111, + "step": 1761 + }, + { + "epoch": 0.49, + "logps_train/chosen": -27.592315673828125, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -32.568931579589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2154815196990967, + "rewards_train/margins": 0.047661781311035156, + "rewards_train/rejected": -2.263143301010132, + "step": 1761 + }, + { + "epoch": 0.49, + "logps_train/chosen": -171.8841552734375, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -197.83511352539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.68841552734375, + "rewards_train/margins": 2.695096015930176, + "rewards_train/rejected": -5.383511543273926, + "step": 1761 + }, + { + "epoch": 0.49, + "learning_rate": 4.398327002907395e-07, + "loss": 0.3942, + "step": 1762 + }, + { + "epoch": 0.49, + "logps_train/chosen": -205.71270751953125, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -207.0, + "logps_train/rejected": -285.1363525390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.97127103805542, + "rewards_train/margins": 1.8423643112182617, + "rewards_train/rejected": -7.813635349273682, + "step": 1762 + }, + { + "epoch": 0.49, + "logps_train/chosen": -85.22530364990234, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -187.72604370117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4225304126739502, + "rewards_train/margins": 2.2500739097595215, + "rewards_train/rejected": -3.6726043224334717, + "step": 1762 + }, + { + "epoch": 0.49, + "logps_train/chosen": -92.521728515625, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -92.180419921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.19782714545726776, + "rewards_train/margins": -0.034130871295928955, + "rewards_train/rejected": 0.23195801675319672, + "step": 1762 + }, + { + "epoch": 0.49, + "logps_train/chosen": -214.01219177246094, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -196.96975708007812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.101219177246094, + "rewards_train/margins": -0.40424346923828125, + "rewards_train/rejected": -4.6969757080078125, + "step": 1762 + }, + { + "epoch": 0.49, + "logps_train/chosen": -156.54202270507812, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -305.6893615722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.204202175140381, + "rewards_train/margins": 8.164734363555908, + "rewards_train/rejected": -12.368936538696289, + "step": 1762 + }, + { + "epoch": 0.49, + "logps_train/chosen": -0.9180480241775513, + "logps_train/ref_chosen": -0.470703125, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -10.450789451599121, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04473448917269707, + "rewards_train/margins": 0.584719467908144, + "rewards_train/rejected": -0.6294539570808411, + "step": 1762 + }, + { + "epoch": 0.49, + "logps_train/chosen": -118.10162353515625, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -195.115966796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.960162401199341, + "rewards_train/margins": 7.051434278488159, + "rewards_train/rejected": -10.0115966796875, + "step": 1762 + }, + { + "epoch": 0.49, + "logps_train/chosen": -195.90806579589844, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -230.82388305664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.290806770324707, + "rewards_train/margins": 4.091581344604492, + "rewards_train/rejected": -8.3823881149292, + "step": 1762 + }, + { + "epoch": 0.49, + "logps_train/chosen": -90.0411376953125, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -133.00619506835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1041138172149658, + "rewards_train/margins": 3.0465056896209717, + "rewards_train/rejected": -4.1506195068359375, + "step": 1763 + }, + { + "epoch": 0.49, + "logps_train/chosen": -8.183732032775879, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -3.734375, + "logps_train/rejected": -19.331680297851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006626796908676624, + "rewards_train/margins": 1.5663573266938329, + "rewards_train/rejected": -1.5597305297851562, + "step": 1763 + }, + { + "epoch": 0.49, + "logps_train/chosen": -159.8145751953125, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -185.73800659179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.881457805633545, + "rewards_train/margins": 2.192343235015869, + "rewards_train/rejected": -8.073801040649414, + "step": 1763 + }, + { + "epoch": 0.49, + "logps_train/chosen": -49.82699966430664, + "logps_train/ref_chosen": -29.5, + "logps_train/ref_rejected": -45.75, + "logps_train/rejected": -70.73944854736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0327000617980957, + "rewards_train/margins": 0.4662449359893799, + "rewards_train/rejected": -2.4989449977874756, + "step": 1763 + }, + { + "epoch": 0.49, + "logps_train/chosen": -91.39359283447266, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -89.86670684814453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5893592834472656, + "rewards_train/margins": -0.402688592672348, + "rewards_train/rejected": -0.1866706907749176, + "step": 1763 + }, + { + "epoch": 0.49, + "logps_train/chosen": -189.75888061523438, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -185.0, + "logps_train/rejected": -227.2886199951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.775888204574585, + "rewards_train/margins": 1.4529740810394287, + "rewards_train/rejected": -4.228862285614014, + "step": 1763 + }, + { + "epoch": 0.49, + "logps_train/chosen": -61.25596237182617, + "logps_train/ref_chosen": -32.25, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -45.36150360107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9005963802337646, + "rewards_train/margins": 0.4855539798736572, + "rewards_train/rejected": -3.386150360107422, + "step": 1763 + }, + { + "epoch": 0.49, + "logps_train/chosen": -29.892425537109375, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -42.37630081176758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1767425537109375, + "rewards_train/margins": 2.029637575149536, + "rewards_train/rejected": -3.2063801288604736, + "step": 1763 + }, + { + "epoch": 0.49, + "learning_rate": 4.3764314233388054e-07, + "loss": 0.3057, + "step": 1764 + }, + { + "epoch": 0.49, + "logps_train/chosen": -3.4784185886383057, + "logps_train/ref_chosen": -1.25, + "logps_train/ref_rejected": -1.25, + "logps_train/rejected": -3.3829824924468994, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.22284185886383057, + "rewards_train/margins": -0.009543612599372864, + "rewards_train/rejected": -0.2132982462644577, + "step": 1764 + }, + { + "epoch": 0.49, + "logps_train/chosen": -181.54763793945312, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -161.8396759033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9547637701034546, + "rewards_train/margins": 1.2292038202285767, + "rewards_train/rejected": -3.1839675903320312, + "step": 1764 + }, + { + "epoch": 0.49, + "logps_train/chosen": -55.904136657714844, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -20.205856323242188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.3529136180877686, + "rewards_train/margins": -2.33857798576355, + "rewards_train/rejected": -1.0143356323242188, + "step": 1764 + }, + { + "epoch": 0.49, + "logps_train/chosen": -122.43212890625, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -128.58726501464844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.493212938308716, + "rewards_train/margins": -0.23448634147644043, + "rewards_train/rejected": -3.2587265968322754, + "step": 1764 + }, + { + "epoch": 0.49, + "logps_train/chosen": -9.442129135131836, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -7.71875, + "logps_train/rejected": -12.443315505981445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3973379135131836, + "rewards_train/margins": 0.07511863112449646, + "rewards_train/rejected": -0.47245654463768005, + "step": 1764 + }, + { + "epoch": 0.49, + "logps_train/chosen": -23.559532165527344, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -37.39570236206055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.499703288078308, + "rewards_train/margins": 1.7961169481277466, + "rewards_train/rejected": -3.2958202362060547, + "step": 1764 + }, + { + "epoch": 0.49, + "logps_train/chosen": -20.287982940673828, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -85.98030090332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1725482940673828, + "rewards_train/margins": 3.8754820823669434, + "rewards_train/rejected": -5.048030376434326, + "step": 1764 + }, + { + "epoch": 0.49, + "logps_train/chosen": -130.10401916503906, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -147.75051879882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.760401964187622, + "rewards_train/margins": 1.2646501064300537, + "rewards_train/rejected": -5.025052070617676, + "step": 1764 + }, + { + "epoch": 0.49, + "logps_train/chosen": -28.48724365234375, + "logps_train/ref_chosen": -25.375, + "logps_train/ref_rejected": -25.25, + "logps_train/rejected": -95.353759765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3112243711948395, + "rewards_train/margins": 6.6991516053676605, + "rewards_train/rejected": -7.0103759765625, + "step": 1765 + }, + { + "epoch": 0.49, + "logps_train/chosen": -103.07535552978516, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -183.9544219970703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1075356006622314, + "rewards_train/margins": 4.4879066944122314, + "rewards_train/rejected": -6.595442295074463, + "step": 1765 + }, + { + "epoch": 0.49, + "logps_train/chosen": -142.26658630371094, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -141.01959228515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.276658535003662, + "rewards_train/margins": -0.5746991634368896, + "rewards_train/rejected": -3.7019593715667725, + "step": 1765 + }, + { + "epoch": 0.49, + "logps_train/chosen": -172.03115844726562, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -166.71067810058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.3031158447265625, + "rewards_train/margins": 2.717951774597168, + "rewards_train/rejected": -9.02106761932373, + "step": 1765 + }, + { + "epoch": 0.49, + "logps_train/chosen": -245.70941162109375, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -200.6441650390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.570940971374512, + "rewards_train/margins": -2.3065242767333984, + "rewards_train/rejected": -6.264416694641113, + "step": 1765 + }, + { + "epoch": 0.49, + "logps_train/chosen": -102.18497467041016, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -203.0, + "logps_train/rejected": -215.678955078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3184974789619446, + "rewards_train/margins": 0.9493981003761292, + "rewards_train/rejected": -1.2678955793380737, + "step": 1765 + }, + { + "epoch": 0.49, + "logps_train/chosen": -123.74446105957031, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -42.39421844482422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.2244462966918945, + "rewards_train/margins": -1.2850244045257568, + "rewards_train/rejected": -2.9394218921661377, + "step": 1765 + }, + { + "epoch": 0.49, + "logps_train/chosen": -24.385482788085938, + "logps_train/ref_chosen": -5.21875, + "logps_train/ref_rejected": -3.421875, + "logps_train/rejected": -25.37574577331543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9166733026504517, + "rewards_train/margins": 0.27871382236480713, + "rewards_train/rejected": -2.195387125015259, + "step": 1765 + }, + { + "epoch": 0.49, + "learning_rate": 4.35457520278792e-07, + "loss": 0.7, + "step": 1766 + }, + { + "epoch": 0.49, + "logps_train/chosen": -72.99668884277344, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -77.91329193115234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20033112168312073, + "rewards_train/margins": 1.791660338640213, + "rewards_train/rejected": -1.5913292169570923, + "step": 1766 + }, + { + "epoch": 0.49, + "logps_train/chosen": -1.3806458711624146, + "logps_train/ref_chosen": -1.046875, + "logps_train/ref_rejected": -1.046875, + "logps_train/rejected": -1.3916430473327637, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.033377088606357574, + "rewards_train/margins": 0.0010997168719768524, + "rewards_train/rejected": -0.03447680547833443, + "step": 1766 + }, + { + "epoch": 0.49, + "logps_train/chosen": -118.39369201660156, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -212.56866455078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4393692016601562, + "rewards_train/margins": 4.317497253417969, + "rewards_train/rejected": -5.756866455078125, + "step": 1766 + }, + { + "epoch": 0.49, + "logps_train/chosen": -20.353652954101562, + "logps_train/ref_chosen": -28.375, + "logps_train/ref_rejected": -1.875, + "logps_train/rejected": -2.319460868835449, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.8021346926689148, + "rewards_train/margins": 0.8465807810425758, + "rewards_train/rejected": -0.04444608837366104, + "step": 1766 + }, + { + "epoch": 0.49, + "logps_train/chosen": -4.792877197265625, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -23.615875244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007412720005959272, + "rewards_train/margins": 1.5885498044081032, + "rewards_train/rejected": -1.5959625244140625, + "step": 1766 + }, + { + "epoch": 0.49, + "logps_train/chosen": -148.88815307617188, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -172.82713317871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9888153076171875, + "rewards_train/margins": 2.3938980102539062, + "rewards_train/rejected": -4.382713317871094, + "step": 1766 + }, + { + "epoch": 0.49, + "logps_train/chosen": -91.69110107421875, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -91.33181762695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5691101551055908, + "rewards_train/margins": 0.26407158374786377, + "rewards_train/rejected": -1.8331817388534546, + "step": 1766 + }, + { + "epoch": 0.49, + "logps_train/chosen": -10.25192642211914, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -19.29279899597168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2001926451921463, + "rewards_train/margins": 1.1884623020887375, + "rewards_train/rejected": -1.3886549472808838, + "step": 1766 + }, + { + "epoch": 0.49, + "logps_train/chosen": -21.664804458618164, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -7.78125, + "logps_train/rejected": -30.130962371826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2539805173873901, + "rewards_train/margins": 0.9809907674789429, + "rewards_train/rejected": -2.234971284866333, + "step": 1767 + }, + { + "epoch": 0.49, + "logps_train/chosen": -32.661376953125, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -40.738197326660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.847387671470642, + "rewards_train/margins": 0.6701821088790894, + "rewards_train/rejected": -2.5175697803497314, + "step": 1767 + }, + { + "epoch": 0.49, + "logps_train/chosen": -15.954353332519531, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -15.881694793701172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5829353332519531, + "rewards_train/margins": -0.007265865802764893, + "rewards_train/rejected": -0.5756694674491882, + "step": 1767 + }, + { + "epoch": 0.49, + "logps_train/chosen": -110.99076843261719, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -148.5789031982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6990768909454346, + "rewards_train/margins": 1.1588134765625, + "rewards_train/rejected": -2.8578903675079346, + "step": 1767 + }, + { + "epoch": 0.49, + "logps_train/chosen": -60.26156234741211, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -103.35669708251953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.401156187057495, + "rewards_train/margins": -0.46548640727996826, + "rewards_train/rejected": -1.9356697797775269, + "step": 1767 + }, + { + "epoch": 0.49, + "logps_train/chosen": -190.84332275390625, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -225.9641571044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.184332370758057, + "rewards_train/margins": 3.0120835304260254, + "rewards_train/rejected": -9.196415901184082, + "step": 1767 + }, + { + "epoch": 0.49, + "logps_train/chosen": -103.08264923095703, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -96.77301788330078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1582649946212769, + "rewards_train/margins": -0.7809632122516632, + "rewards_train/rejected": -0.37730178236961365, + "step": 1767 + }, + { + "epoch": 0.49, + "logps_train/chosen": -65.2500228881836, + "logps_train/ref_chosen": -34.0, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -44.74478530883789, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.125002384185791, + "rewards_train/margins": -0.24427390098571777, + "rewards_train/rejected": -2.8807284832000732, + "step": 1767 + }, + { + "epoch": 0.49, + "learning_rate": 4.332758494225105e-07, + "loss": 0.4383, + "step": 1768 + }, + { + "epoch": 0.49, + "logps_train/chosen": -8.88965129852295, + "logps_train/ref_chosen": -3.09375, + "logps_train/ref_rejected": -0.71875, + "logps_train/rejected": -3.5175037384033203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5795901417732239, + "rewards_train/margins": -0.2997147738933563, + "rewards_train/rejected": -0.27987536787986755, + "step": 1768 + }, + { + "epoch": 0.49, + "logps_train/chosen": -65.86123657226562, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -51.20392990112305, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8361237049102783, + "rewards_train/margins": -0.39073073863983154, + "rewards_train/rejected": -1.4453929662704468, + "step": 1768 + }, + { + "epoch": 0.49, + "logps_train/chosen": -50.0137939453125, + "logps_train/ref_chosen": -23.5, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -67.37618255615234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.651379346847534, + "rewards_train/margins": 0.8362388610839844, + "rewards_train/rejected": -3.4876182079315186, + "step": 1768 + }, + { + "epoch": 0.49, + "logps_train/chosen": -58.228736877441406, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -31.632965087890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.272873640060425, + "rewards_train/margins": 0.47167301177978516, + "rewards_train/rejected": -2.74454665184021, + "step": 1768 + }, + { + "epoch": 0.49, + "logps_train/chosen": -13.44709587097168, + "logps_train/ref_chosen": -1.4296875, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -27.924585342407227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2017408609390259, + "rewards_train/margins": -0.13428235054016113, + "rewards_train/rejected": -1.0674585103988647, + "step": 1768 + }, + { + "epoch": 0.49, + "logps_train/chosen": -3.6963376998901367, + "logps_train/ref_chosen": -3.453125, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -6.876713752746582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02432127110660076, + "rewards_train/margins": 0.23522509820759296, + "rewards_train/rejected": -0.2595463693141937, + "step": 1768 + }, + { + "epoch": 0.49, + "logps_train/chosen": -5.313623905181885, + "logps_train/ref_chosen": -0.8359375, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -35.899879455566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4477686583995819, + "rewards_train/margins": 1.4047192633152008, + "rewards_train/rejected": -1.8524879217147827, + "step": 1768 + }, + { + "epoch": 0.49, + "logps_train/chosen": -117.83212280273438, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -209.3618621826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.033212423324585, + "rewards_train/margins": 5.552973985671997, + "rewards_train/rejected": -8.586186408996582, + "step": 1768 + }, + { + "epoch": 0.49, + "logps_train/chosen": -162.53604125976562, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -183.44503784179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.703604221343994, + "rewards_train/margins": 4.040899753570557, + "rewards_train/rejected": -7.744503974914551, + "step": 1769 + }, + { + "epoch": 0.49, + "logps_train/chosen": -110.46051788330078, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -43.75, + "logps_train/rejected": -78.73402404785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1460517942905426, + "rewards_train/margins": 3.352350562810898, + "rewards_train/rejected": -3.4984023571014404, + "step": 1769 + }, + { + "epoch": 0.49, + "logps_train/chosen": -97.06845092773438, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -97.76336669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4068450927734375, + "rewards_train/margins": 1.719491720199585, + "rewards_train/rejected": -2.1263368129730225, + "step": 1769 + }, + { + "epoch": 0.49, + "logps_train/chosen": -5.321918964385986, + "logps_train/ref_chosen": -3.984375, + "logps_train/ref_rejected": -0.59765625, + "logps_train/rejected": -0.6401224732398987, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1337544023990631, + "rewards_train/margins": -0.12950778007507324, + "rewards_train/rejected": -0.004246622323989868, + "step": 1769 + }, + { + "epoch": 0.49, + "logps_train/chosen": -52.228065490722656, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -62.25, + "logps_train/rejected": -109.6727066040039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9978065490722656, + "rewards_train/margins": 3.74446439743042, + "rewards_train/rejected": -4.7422709465026855, + "step": 1769 + }, + { + "epoch": 0.49, + "logps_train/chosen": -149.47451782226562, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -160.56976318359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7474517822265625, + "rewards_train/margins": 2.1095244884490967, + "rewards_train/rejected": -2.856976270675659, + "step": 1769 + }, + { + "epoch": 0.49, + "logps_train/chosen": -44.83301544189453, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -1.625, + "logps_train/rejected": -15.852404594421387, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.252051591873169, + "rewards_train/margins": -1.8293111324310303, + "rewards_train/rejected": -1.4227404594421387, + "step": 1769 + }, + { + "epoch": 0.49, + "logps_train/chosen": -108.07593536376953, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -124.404052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4575936794281006, + "rewards_train/margins": 0.48281168937683105, + "rewards_train/rejected": -2.9404053688049316, + "step": 1769 + }, + { + "epoch": 0.49, + "learning_rate": 4.310981450344189e-07, + "loss": 0.4844, + "step": 1770 + }, + { + "epoch": 0.49, + "logps_train/chosen": -113.64024353027344, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -189.91860961914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1640243530273438, + "rewards_train/margins": 2.4278366565704346, + "rewards_train/rejected": -3.5918610095977783, + "step": 1770 + }, + { + "epoch": 0.49, + "logps_train/chosen": -8.10721206665039, + "logps_train/ref_chosen": -3.125, + "logps_train/ref_rejected": -2.4375, + "logps_train/rejected": -14.397669792175293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.498221218585968, + "rewards_train/margins": 0.6977958083152771, + "rewards_train/rejected": -1.1960170269012451, + "step": 1770 + }, + { + "epoch": 0.49, + "logps_train/chosen": -73.45830535888672, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -125.74758911132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3041694760322571, + "rewards_train/margins": 4.278928339481354, + "rewards_train/rejected": -3.9747588634490967, + "step": 1770 + }, + { + "epoch": 0.49, + "logps_train/chosen": -155.44577026367188, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -173.79978942871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.944577217102051, + "rewards_train/margins": 0.9354019165039062, + "rewards_train/rejected": -5.879979133605957, + "step": 1770 + }, + { + "epoch": 0.49, + "logps_train/chosen": -156.24566650390625, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -149.6864013671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.174566745758057, + "rewards_train/margins": -0.005926609039306641, + "rewards_train/rejected": -5.16864013671875, + "step": 1770 + }, + { + "epoch": 0.49, + "logps_train/chosen": -192.96304321289062, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -215.0, + "logps_train/rejected": -351.37249755859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1963043212890625, + "rewards_train/margins": 10.440945625305176, + "rewards_train/rejected": -13.637249946594238, + "step": 1770 + }, + { + "epoch": 0.49, + "logps_train/chosen": -138.01983642578125, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -150.54685974121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.951983690261841, + "rewards_train/margins": 0.3027024269104004, + "rewards_train/rejected": -3.254686117172241, + "step": 1770 + }, + { + "epoch": 0.49, + "logps_train/chosen": -76.789306640625, + "logps_train/ref_chosen": -45.0, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -85.05967712402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1789307594299316, + "rewards_train/margins": 1.852036952972412, + "rewards_train/rejected": -5.030967712402344, + "step": 1770 + }, + { + "epoch": 0.5, + "logps_train/chosen": -14.823843955993652, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -28.528297424316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22613440454006195, + "rewards_train/margins": 0.41419537365436554, + "rewards_train/rejected": -0.6403297781944275, + "step": 1771 + }, + { + "epoch": 0.5, + "logps_train/chosen": -140.25515747070312, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -129.16619873046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.42551589012146, + "rewards_train/margins": 0.34110403060913086, + "rewards_train/rejected": -3.766619920730591, + "step": 1771 + }, + { + "epoch": 0.5, + "logps_train/chosen": -214.4027099609375, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -158.52415466308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.34027099609375, + "rewards_train/margins": 0.4621443748474121, + "rewards_train/rejected": -6.802415370941162, + "step": 1771 + }, + { + "epoch": 0.5, + "logps_train/chosen": -139.5931396484375, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -257.60980224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.059314250946045, + "rewards_train/margins": 6.401666164398193, + "rewards_train/rejected": -10.460980415344238, + "step": 1771 + }, + { + "epoch": 0.5, + "logps_train/chosen": -0.9973515868186951, + "logps_train/ref_chosen": -0.337890625, + "logps_train/ref_rejected": -3.375, + "logps_train/rejected": -22.273014068603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06594609469175339, + "rewards_train/margins": 1.8238552883267403, + "rewards_train/rejected": -1.8898013830184937, + "step": 1771 + }, + { + "epoch": 0.5, + "logps_train/chosen": -15.72734260559082, + "logps_train/ref_chosen": -3.375, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -19.00066375732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.235234260559082, + "rewards_train/margins": 0.17733216285705566, + "rewards_train/rejected": -1.4125664234161377, + "step": 1771 + }, + { + "epoch": 0.5, + "logps_train/chosen": -95.3792953491211, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -229.940185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.837929606437683, + "rewards_train/margins": 4.656088948249817, + "rewards_train/rejected": -6.4940185546875, + "step": 1771 + }, + { + "epoch": 0.5, + "logps_train/chosen": -40.64885711669922, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -27.0, + "logps_train/rejected": -70.662353515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1336357593536377, + "rewards_train/margins": 1.2325994968414307, + "rewards_train/rejected": -4.366235256195068, + "step": 1771 + }, + { + "epoch": 0.5, + "learning_rate": 4.2892442235613855e-07, + "loss": 0.2992, + "step": 1772 + }, + { + "epoch": 0.5, + "logps_train/chosen": -190.38037109375, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -218.6552734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.738037109375, + "rewards_train/margins": 3.2274904251098633, + "rewards_train/rejected": -5.965527534484863, + "step": 1772 + }, + { + "epoch": 0.5, + "logps_train/chosen": -91.51556396484375, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -52.75, + "logps_train/rejected": -83.26946258544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6515564322471619, + "rewards_train/margins": 2.4003899693489075, + "rewards_train/rejected": -3.0519464015960693, + "step": 1772 + }, + { + "epoch": 0.5, + "logps_train/chosen": -45.85340881347656, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -2.1875, + "logps_train/rejected": -24.825191497802734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5978410243988037, + "rewards_train/margins": -0.3340718746185303, + "rewards_train/rejected": -2.2637691497802734, + "step": 1772 + }, + { + "epoch": 0.5, + "logps_train/chosen": -158.40261840820312, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -158.29627990722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.240262031555176, + "rewards_train/margins": 1.3893661499023438, + "rewards_train/rejected": -6.6296281814575195, + "step": 1772 + }, + { + "epoch": 0.5, + "logps_train/chosen": -85.23754119873047, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -48.75, + "logps_train/rejected": -112.08619689941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2737541198730469, + "rewards_train/margins": 5.059865474700928, + "rewards_train/rejected": -6.333619594573975, + "step": 1772 + }, + { + "epoch": 0.5, + "logps_train/chosen": -284.96490478515625, + "logps_train/ref_chosen": -203.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -261.8883361816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.196490287780762, + "rewards_train/margins": 5.342343330383301, + "rewards_train/rejected": -13.538833618164062, + "step": 1772 + }, + { + "epoch": 0.5, + "logps_train/chosen": -124.14177703857422, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -171.08551025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2141778469085693, + "rewards_train/margins": 0.5943732261657715, + "rewards_train/rejected": -2.808551073074341, + "step": 1772 + }, + { + "epoch": 0.5, + "logps_train/chosen": -47.466758728027344, + "logps_train/ref_chosen": -25.375, + "logps_train/ref_rejected": -47.25, + "logps_train/rejected": -92.78264617919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2091758251190186, + "rewards_train/margins": 2.3440887928009033, + "rewards_train/rejected": -4.553264617919922, + "step": 1772 + }, + { + "epoch": 0.5, + "logps_train/chosen": -114.05319213867188, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -154.03575134277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6053192019462585, + "rewards_train/margins": 4.098255932331085, + "rewards_train/rejected": -4.703575134277344, + "step": 1773 + }, + { + "epoch": 0.5, + "logps_train/chosen": -163.3978271484375, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -208.58346557617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2397828102111816, + "rewards_train/margins": 3.818563938140869, + "rewards_train/rejected": -7.058346748352051, + "step": 1773 + }, + { + "epoch": 0.5, + "logps_train/chosen": -18.47840118408203, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -24.99532699584961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7478401064872742, + "rewards_train/margins": -0.2733074128627777, + "rewards_train/rejected": -0.47453269362449646, + "step": 1773 + }, + { + "epoch": 0.5, + "logps_train/chosen": -20.988706588745117, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -2.15625, + "logps_train/rejected": -10.656656265258789, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3894957304000854, + "rewards_train/margins": -0.5394551157951355, + "rewards_train/rejected": -0.85004061460495, + "step": 1773 + }, + { + "epoch": 0.5, + "logps_train/chosen": -70.66847229003906, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -142.90467834472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18315277993679047, + "rewards_train/margins": 3.3236206620931625, + "rewards_train/rejected": -3.140467882156372, + "step": 1773 + }, + { + "epoch": 0.5, + "logps_train/chosen": -46.648094177246094, + "logps_train/ref_chosen": -6.84375, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -40.8603630065918, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9804344177246094, + "rewards_train/margins": -0.7256481647491455, + "rewards_train/rejected": -3.254786252975464, + "step": 1773 + }, + { + "epoch": 0.5, + "logps_train/chosen": -15.354036331176758, + "logps_train/ref_chosen": -3.21875, + "logps_train/ref_rejected": -2.21875, + "logps_train/rejected": -29.902442932128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2135286331176758, + "rewards_train/margins": 1.5548408031463623, + "rewards_train/rejected": -2.768369436264038, + "step": 1773 + }, + { + "epoch": 0.5, + "logps_train/chosen": -44.63168716430664, + "logps_train/ref_chosen": -37.75, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -87.63381958007812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6881687045097351, + "rewards_train/margins": -0.22478672862052917, + "rewards_train/rejected": -0.46338197588920593, + "step": 1773 + }, + { + "epoch": 0.5, + "learning_rate": 4.267546966014246e-07, + "loss": 0.3623, + "step": 1774 + }, + { + "epoch": 0.5, + "logps_train/chosen": -169.831298828125, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -123.08212280273438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5831298828125, + "rewards_train/margins": -0.37491750717163086, + "rewards_train/rejected": -2.208212375640869, + "step": 1774 + }, + { + "epoch": 0.5, + "logps_train/chosen": -36.3306999206543, + "logps_train/ref_chosen": -3.75, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -45.05047607421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2580699920654297, + "rewards_train/margins": 0.5938527584075928, + "rewards_train/rejected": -3.8519227504730225, + "step": 1774 + }, + { + "epoch": 0.5, + "logps_train/chosen": -8.17151927947998, + "logps_train/ref_chosen": -0.1142578125, + "logps_train/ref_rejected": -0.1142578125, + "logps_train/rejected": -7.929030418395996, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.805726170539856, + "rewards_train/margins": -0.024248898029327393, + "rewards_train/rejected": -0.7814772725105286, + "step": 1774 + }, + { + "epoch": 0.5, + "logps_train/chosen": -58.36167907714844, + "logps_train/ref_chosen": -59.25, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -138.14991760253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08883209526538849, + "rewards_train/margins": 1.2538238316774368, + "rewards_train/rejected": -1.1649917364120483, + "step": 1774 + }, + { + "epoch": 0.5, + "logps_train/chosen": -83.91597747802734, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -139.41824340820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44159775972366333, + "rewards_train/margins": 5.1502267718315125, + "rewards_train/rejected": -5.591824531555176, + "step": 1774 + }, + { + "epoch": 0.5, + "logps_train/chosen": -23.963350296020508, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -27.932090759277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6838350296020508, + "rewards_train/margins": 1.0406240224838257, + "rewards_train/rejected": -1.7244590520858765, + "step": 1774 + }, + { + "epoch": 0.5, + "logps_train/chosen": -39.393646240234375, + "logps_train/ref_chosen": -7.1875, + "logps_train/ref_rejected": -6.1875, + "logps_train/rejected": -67.51451110839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2206146717071533, + "rewards_train/margins": 2.9120867252349854, + "rewards_train/rejected": -6.132701396942139, + "step": 1774 + }, + { + "epoch": 0.5, + "logps_train/chosen": -7.264915466308594, + "logps_train/ref_chosen": -2.109375, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -16.896940231323242, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5155540704727173, + "rewards_train/margins": -0.2133600413799286, + "rewards_train/rejected": -0.3021940290927887, + "step": 1774 + }, + { + "epoch": 0.5, + "logps_train/chosen": -14.439167976379395, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -9.662784576416016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7157918214797974, + "rewards_train/margins": -0.3057633638381958, + "rewards_train/rejected": -0.41002845764160156, + "step": 1775 + }, + { + "epoch": 0.5, + "logps_train/chosen": -16.9999942779541, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -26.184770584106445, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2124994993209839, + "rewards_train/margins": -0.06902241706848145, + "rewards_train/rejected": -1.1434770822525024, + "step": 1775 + }, + { + "epoch": 0.5, + "logps_train/chosen": -150.92474365234375, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -167.23092651367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2924745082855225, + "rewards_train/margins": 3.4306180477142334, + "rewards_train/rejected": -6.723092555999756, + "step": 1775 + }, + { + "epoch": 0.5, + "logps_train/chosen": -101.80050659179688, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -91.86363983154297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7300506830215454, + "rewards_train/margins": -0.7936867028474808, + "rewards_train/rejected": 0.06363601982593536, + "step": 1775 + }, + { + "epoch": 0.5, + "logps_train/chosen": -17.940828323364258, + "logps_train/ref_chosen": -6.5625, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -27.717918395996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1378328800201416, + "rewards_train/margins": 1.0745840072631836, + "rewards_train/rejected": -2.212416887283325, + "step": 1775 + }, + { + "epoch": 0.5, + "logps_train/chosen": -14.42119026184082, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -13.497081756591797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9858690500259399, + "rewards_train/margins": -0.44866085052490234, + "rewards_train/rejected": -0.5372081995010376, + "step": 1775 + }, + { + "epoch": 0.5, + "logps_train/chosen": -42.07178497314453, + "logps_train/ref_chosen": -44.75, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -101.67953491210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2678215205669403, + "rewards_train/margins": 2.5857751071453094, + "rewards_train/rejected": -2.317953586578369, + "step": 1775 + }, + { + "epoch": 0.5, + "logps_train/chosen": -35.556644439697266, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -151.9869842529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1931644678115845, + "rewards_train/margins": 5.455533862113953, + "rewards_train/rejected": -6.648698329925537, + "step": 1775 + }, + { + "epoch": 0.5, + "learning_rate": 4.245889829560558e-07, + "loss": 0.472, + "step": 1776 + }, + { + "epoch": 0.5, + "logps_train/chosen": -40.31959533691406, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -48.32095718383789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6569595336914062, + "rewards_train/margins": 1.3501362800598145, + "rewards_train/rejected": -4.007095813751221, + "step": 1776 + }, + { + "epoch": 0.5, + "logps_train/chosen": -28.331180572509766, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -135.06982421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1956180334091187, + "rewards_train/margins": 1.2113643884658813, + "rewards_train/rejected": -2.406982421875, + "step": 1776 + }, + { + "epoch": 0.5, + "logps_train/chosen": -0.313866525888443, + "logps_train/ref_chosen": -0.5390625, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -8.47940444946289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02251959778368473, + "rewards_train/margins": 0.42046005465090275, + "rewards_train/rejected": -0.397940456867218, + "step": 1776 + }, + { + "epoch": 0.5, + "logps_train/chosen": -6.943716049194336, + "logps_train/ref_chosen": -7.65625, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -8.466865539550781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07125339657068253, + "rewards_train/margins": 0.17731495201587677, + "rewards_train/rejected": -0.10606155544519424, + "step": 1776 + }, + { + "epoch": 0.5, + "logps_train/chosen": -146.495361328125, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -161.85482788085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.599536418914795, + "rewards_train/margins": 0.8859462738037109, + "rewards_train/rejected": -6.485482692718506, + "step": 1776 + }, + { + "epoch": 0.5, + "logps_train/chosen": -214.365234375, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -206.06832885742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.4365234375, + "rewards_train/margins": 0.12030982971191406, + "rewards_train/rejected": -8.556833267211914, + "step": 1776 + }, + { + "epoch": 0.5, + "logps_train/chosen": -91.33634948730469, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -49.0, + "logps_train/rejected": -82.2828598022461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9336349368095398, + "rewards_train/margins": 2.3946509957313538, + "rewards_train/rejected": -3.3282859325408936, + "step": 1776 + }, + { + "epoch": 0.5, + "logps_train/chosen": -190.41867065429688, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -188.8475799560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.791867256164551, + "rewards_train/margins": 0.9428906440734863, + "rewards_train/rejected": -7.734757900238037, + "step": 1776 + }, + { + "epoch": 0.5, + "logps_train/chosen": -54.26579284667969, + "logps_train/ref_chosen": -22.75, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -47.47855758666992, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1515793800354004, + "rewards_train/margins": -0.21622347831726074, + "rewards_train/rejected": -2.9353559017181396, + "step": 1777 + }, + { + "epoch": 0.5, + "logps_train/chosen": -30.762779235839844, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -3.34375, + "logps_train/rejected": -18.144994735717773, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7262779474258423, + "rewards_train/margins": -0.24615347385406494, + "rewards_train/rejected": -1.4801244735717773, + "step": 1777 + }, + { + "epoch": 0.5, + "logps_train/chosen": -89.72457122802734, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -161.131103515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07245712727308273, + "rewards_train/margins": 5.440653510391712, + "rewards_train/rejected": -5.513110637664795, + "step": 1777 + }, + { + "epoch": 0.5, + "logps_train/chosen": -203.73956298828125, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -180.52110290527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.073956489562988, + "rewards_train/margins": 1.5781540870666504, + "rewards_train/rejected": -7.652110576629639, + "step": 1777 + }, + { + "epoch": 0.5, + "logps_train/chosen": -126.85458374023438, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -162.864990234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21454162895679474, + "rewards_train/margins": 2.9010407477617264, + "rewards_train/rejected": -2.6864991188049316, + "step": 1777 + }, + { + "epoch": 0.5, + "logps_train/chosen": -74.731689453125, + "logps_train/ref_chosen": -58.0, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -62.68145751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6731690168380737, + "rewards_train/margins": 2.6449767351150513, + "rewards_train/rejected": -4.318145751953125, + "step": 1777 + }, + { + "epoch": 0.5, + "logps_train/chosen": -14.221734046936035, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -24.4444637298584, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5409234166145325, + "rewards_train/margins": 0.5597730278968811, + "rewards_train/rejected": -1.1006964445114136, + "step": 1777 + }, + { + "epoch": 0.5, + "logps_train/chosen": -8.62696361541748, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -10.773592948913574, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41894635558128357, + "rewards_train/margins": 0.2724754512310028, + "rewards_train/rejected": -0.6914218068122864, + "step": 1777 + }, + { + "epoch": 0.5, + "learning_rate": 4.224272965777326e-07, + "loss": 0.3727, + "step": 1778 + }, + { + "epoch": 0.5, + "logps_train/chosen": -97.95401763916016, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -79.77986145019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7704017162323, + "rewards_train/margins": 0.9075844287872314, + "rewards_train/rejected": -4.677986145019531, + "step": 1778 + }, + { + "epoch": 0.5, + "logps_train/chosen": -150.06344604492188, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -150.5710906982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.356344699859619, + "rewards_train/margins": 1.2507643699645996, + "rewards_train/rejected": -4.607109069824219, + "step": 1778 + }, + { + "epoch": 0.5, + "logps_train/chosen": -23.448688507080078, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -42.534507751464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7667438983917236, + "rewards_train/margins": 1.2179570198059082, + "rewards_train/rejected": -2.984700918197632, + "step": 1778 + }, + { + "epoch": 0.5, + "logps_train/chosen": -161.40884399414062, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -257.43048095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.440884590148926, + "rewards_train/margins": 6.502163887023926, + "rewards_train/rejected": -10.943048477172852, + "step": 1778 + }, + { + "epoch": 0.5, + "logps_train/chosen": -77.26298522949219, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -221.51113891601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8762985467910767, + "rewards_train/margins": 8.124815344810486, + "rewards_train/rejected": -10.001113891601562, + "step": 1778 + }, + { + "epoch": 0.5, + "logps_train/chosen": -72.7662353515625, + "logps_train/ref_chosen": -44.25, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -69.08921813964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.85162353515625, + "rewards_train/margins": 2.257298469543457, + "rewards_train/rejected": -5.108922004699707, + "step": 1778 + }, + { + "epoch": 0.5, + "logps_train/chosen": -26.866840362548828, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -39.194175720214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3366841077804565, + "rewards_train/margins": 1.9264835119247437, + "rewards_train/rejected": -3.2631676197052, + "step": 1778 + }, + { + "epoch": 0.5, + "logps_train/chosen": -34.71307373046875, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -39.740814208984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3025574684143066, + "rewards_train/margins": 0.7715239524841309, + "rewards_train/rejected": -3.0740814208984375, + "step": 1778 + }, + { + "epoch": 0.5, + "logps_train/chosen": -167.12925720214844, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -87.25152587890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.962925910949707, + "rewards_train/margins": -0.587773323059082, + "rewards_train/rejected": -5.375152587890625, + "step": 1779 + }, + { + "epoch": 0.5, + "logps_train/chosen": -4.206799030303955, + "logps_train/ref_chosen": -2.140625, + "logps_train/ref_rejected": -7.90625, + "logps_train/rejected": -36.44255828857422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20661740005016327, + "rewards_train/margins": 2.647013381123543, + "rewards_train/rejected": -2.853630781173706, + "step": 1779 + }, + { + "epoch": 0.5, + "logps_train/chosen": -153.8968963623047, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -153.11904907226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.539689779281616, + "rewards_train/margins": 1.672215223312378, + "rewards_train/rejected": -5.211905002593994, + "step": 1779 + }, + { + "epoch": 0.5, + "logps_train/chosen": -97.62000274658203, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -127.99562072753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9620002508163452, + "rewards_train/margins": 1.1875618696212769, + "rewards_train/rejected": -3.149562120437622, + "step": 1779 + }, + { + "epoch": 0.5, + "logps_train/chosen": -9.665590286254883, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -29.44565200805664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4884340465068817, + "rewards_train/margins": 1.8030062019824982, + "rewards_train/rejected": -2.29144024848938, + "step": 1779 + }, + { + "epoch": 0.5, + "logps_train/chosen": -139.24716186523438, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -181.79901123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.824716329574585, + "rewards_train/margins": 2.9051849842071533, + "rewards_train/rejected": -6.729901313781738, + "step": 1779 + }, + { + "epoch": 0.5, + "logps_train/chosen": -139.83055114746094, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -196.8050537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8830551505088806, + "rewards_train/margins": 8.69745022058487, + "rewards_train/rejected": -9.58050537109375, + "step": 1779 + }, + { + "epoch": 0.5, + "logps_train/chosen": -167.5604705810547, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -150.24517822265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.056047201156616, + "rewards_train/margins": 0.01847076416015625, + "rewards_train/rejected": -3.0745179653167725, + "step": 1779 + }, + { + "epoch": 0.5, + "learning_rate": 4.202696525959666e-07, + "loss": 0.2433, + "step": 1780 + }, + { + "epoch": 0.5, + "logps_train/chosen": -8.989350318908691, + "logps_train/ref_chosen": -0.85546875, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -46.55763244628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8133881688117981, + "rewards_train/margins": 1.7673751711845398, + "rewards_train/rejected": -2.580763339996338, + "step": 1780 + }, + { + "epoch": 0.5, + "logps_train/chosen": -45.20397186279297, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -11.6875, + "logps_train/rejected": -50.7598991394043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.882897138595581, + "rewards_train/margins": 0.024342775344848633, + "rewards_train/rejected": -3.9072399139404297, + "step": 1780 + }, + { + "epoch": 0.5, + "logps_train/chosen": -134.03402709960938, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -198.789306640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.0034027099609375, + "rewards_train/margins": 6.575528144836426, + "rewards_train/rejected": -10.578930854797363, + "step": 1780 + }, + { + "epoch": 0.5, + "logps_train/chosen": -17.831186294555664, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -24.634288787841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5643686652183533, + "rewards_train/margins": 0.26156020164489746, + "rewards_train/rejected": -0.8259288668632507, + "step": 1780 + }, + { + "epoch": 0.5, + "logps_train/chosen": -135.58497619628906, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -131.9950714111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.508497714996338, + "rewards_train/margins": 2.8410096168518066, + "rewards_train/rejected": -6.3495073318481445, + "step": 1780 + }, + { + "epoch": 0.5, + "logps_train/chosen": -23.236797332763672, + "logps_train/ref_chosen": -3.703125, + "logps_train/ref_rejected": -2.4375, + "logps_train/rejected": -20.16146469116211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9533672332763672, + "rewards_train/margins": -0.18097078800201416, + "rewards_train/rejected": -1.772396445274353, + "step": 1780 + }, + { + "epoch": 0.5, + "logps_train/chosen": -15.05108642578125, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -5.25, + "logps_train/rejected": -15.84510612487793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6988586783409119, + "rewards_train/margins": 0.3606519103050232, + "rewards_train/rejected": -1.059510588645935, + "step": 1780 + }, + { + "epoch": 0.5, + "logps_train/chosen": -104.40548706054688, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -192.87127685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4905487298965454, + "rewards_train/margins": 4.7965792417526245, + "rewards_train/rejected": -6.28712797164917, + "step": 1780 + }, + { + "epoch": 0.5, + "logps_train/chosen": -53.46284484863281, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -64.14688110351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.365034580230713, + "rewards_train/margins": 0.899653434753418, + "rewards_train/rejected": -5.264688014984131, + "step": 1781 + }, + { + "epoch": 0.5, + "logps_train/chosen": -265.6529846191406, + "logps_train/ref_chosen": -184.0, + "logps_train/ref_rejected": -184.0, + "logps_train/rejected": -268.0799560546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.165298461914062, + "rewards_train/margins": 0.24269771575927734, + "rewards_train/rejected": -8.40799617767334, + "step": 1781 + }, + { + "epoch": 0.5, + "logps_train/chosen": -40.48316955566406, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -2.78125, + "logps_train/rejected": -42.81525802612305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2045669555664062, + "rewards_train/margins": 0.7988338470458984, + "rewards_train/rejected": -4.003400802612305, + "step": 1781 + }, + { + "epoch": 0.5, + "logps_train/chosen": -67.86759948730469, + "logps_train/ref_chosen": -52.25, + "logps_train/ref_rejected": -48.25, + "logps_train/rejected": -74.79026794433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5617599487304688, + "rewards_train/margins": 1.0922667980194092, + "rewards_train/rejected": -2.654026746749878, + "step": 1781 + }, + { + "epoch": 0.5, + "logps_train/chosen": -152.1081085205078, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -211.27154541015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6108109951019287, + "rewards_train/margins": 5.61634373664856, + "rewards_train/rejected": -8.227154731750488, + "step": 1781 + }, + { + "epoch": 0.5, + "logps_train/chosen": -122.50541687011719, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -300.7501220703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4505417346954346, + "rewards_train/margins": 9.324470281600952, + "rewards_train/rejected": -11.775012016296387, + "step": 1781 + }, + { + "epoch": 0.5, + "logps_train/chosen": -117.07664489746094, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -182.15921020507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.507664442062378, + "rewards_train/margins": 5.808256387710571, + "rewards_train/rejected": -8.31592082977295, + "step": 1781 + }, + { + "epoch": 0.5, + "logps_train/chosen": -36.56646728515625, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -3.578125, + "logps_train/rejected": -23.842931747436523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.781646728515625, + "rewards_train/margins": 0.24483394622802734, + "rewards_train/rejected": -2.0264806747436523, + "step": 1781 + }, + { + "epoch": 0.5, + "learning_rate": 4.1811606611197904e-07, + "loss": 0.3099, + "step": 1782 + }, + { + "epoch": 0.5, + "logps_train/chosen": -66.15652465820312, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -26.125, + "logps_train/rejected": -39.99708557128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2156524658203125, + "rewards_train/margins": 1.1715561151504517, + "rewards_train/rejected": -1.3872085809707642, + "step": 1782 + }, + { + "epoch": 0.5, + "logps_train/chosen": -12.74888801574707, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -26.177616119384766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.837388813495636, + "rewards_train/margins": 0.6803727746009827, + "rewards_train/rejected": -1.5177615880966187, + "step": 1782 + }, + { + "epoch": 0.5, + "logps_train/chosen": -12.001574516296387, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -2.9375, + "logps_train/rejected": -26.616125106811523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06234255060553551, + "rewards_train/margins": 2.430205013602972, + "rewards_train/rejected": -2.3678624629974365, + "step": 1782 + }, + { + "epoch": 0.5, + "logps_train/chosen": -137.6194610595703, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -152.8976593017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.811946153640747, + "rewards_train/margins": 4.827820062637329, + "rewards_train/rejected": -7.639766216278076, + "step": 1782 + }, + { + "epoch": 0.5, + "logps_train/chosen": -241.716796875, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -250.0, + "logps_train/rejected": -305.9623107910156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.571679592132568, + "rewards_train/margins": -0.9754486083984375, + "rewards_train/rejected": -5.596230983734131, + "step": 1782 + }, + { + "epoch": 0.5, + "logps_train/chosen": -4.0638041496276855, + "logps_train/ref_chosen": -3.0, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -9.915881156921387, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1063804179430008, + "rewards_train/margins": 0.2352076917886734, + "rewards_train/rejected": -0.3415881097316742, + "step": 1782 + }, + { + "epoch": 0.5, + "logps_train/chosen": -147.3784942626953, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -214.40623474121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.837849497795105, + "rewards_train/margins": 5.30277407169342, + "rewards_train/rejected": -7.140623569488525, + "step": 1782 + }, + { + "epoch": 0.5, + "logps_train/chosen": -170.76077270507812, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -226.84701538085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.176077365875244, + "rewards_train/margins": 3.6086244583129883, + "rewards_train/rejected": -6.784701824188232, + "step": 1782 + }, + { + "epoch": 0.5, + "logps_train/chosen": -185.57080078125, + "logps_train/ref_chosen": -189.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -110.19650268554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3429199159145355, + "rewards_train/margins": 1.812570184469223, + "rewards_train/rejected": -1.4696502685546875, + "step": 1783 + }, + { + "epoch": 0.5, + "logps_train/chosen": -168.1827392578125, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -159.24261474609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1182739734649658, + "rewards_train/margins": 1.3059875965118408, + "rewards_train/rejected": -2.4242615699768066, + "step": 1783 + }, + { + "epoch": 0.5, + "logps_train/chosen": -84.10231018066406, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -227.656005859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.06023108959198, + "rewards_train/margins": 9.855369687080383, + "rewards_train/rejected": -10.915600776672363, + "step": 1783 + }, + { + "epoch": 0.5, + "logps_train/chosen": -24.84900665283203, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -43.870277404785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2786506414413452, + "rewards_train/margins": 0.7833770513534546, + "rewards_train/rejected": -2.0620276927948, + "step": 1783 + }, + { + "epoch": 0.5, + "logps_train/chosen": -52.33525848388672, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -59.95915603637695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7960259914398193, + "rewards_train/margins": 0.937389612197876, + "rewards_train/rejected": -4.733415603637695, + "step": 1783 + }, + { + "epoch": 0.5, + "logps_train/chosen": -63.51874923706055, + "logps_train/ref_chosen": -42.75, + "logps_train/ref_rejected": -30.125, + "logps_train/rejected": -45.9624137878418, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0768749713897705, + "rewards_train/margins": -0.493133544921875, + "rewards_train/rejected": -1.5837414264678955, + "step": 1783 + }, + { + "epoch": 0.5, + "logps_train/chosen": -14.118986129760742, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -28.837406158447266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9900236129760742, + "rewards_train/margins": 1.4343421459197998, + "rewards_train/rejected": -2.424365758895874, + "step": 1783 + }, + { + "epoch": 0.5, + "logps_train/chosen": -137.6528778076172, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -184.1027069091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2652878761291504, + "rewards_train/margins": 5.244983196258545, + "rewards_train/rejected": -8.510271072387695, + "step": 1783 + }, + { + "epoch": 0.5, + "learning_rate": 4.1596655219859156e-07, + "loss": 0.3103, + "step": 1784 + }, + { + "epoch": 0.5, + "logps_train/chosen": -77.77531433105469, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -183.9798583984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9775314331054688, + "rewards_train/margins": 6.3204545974731445, + "rewards_train/rejected": -7.297986030578613, + "step": 1784 + }, + { + "epoch": 0.5, + "logps_train/chosen": -140.6314697265625, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -238.36294555664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.963146924972534, + "rewards_train/margins": 3.0731475353240967, + "rewards_train/rejected": -7.036294460296631, + "step": 1784 + }, + { + "epoch": 0.5, + "logps_train/chosen": -117.42133331298828, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -196.95941162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4421334266662598, + "rewards_train/margins": 5.203807830810547, + "rewards_train/rejected": -7.645941257476807, + "step": 1784 + }, + { + "epoch": 0.5, + "logps_train/chosen": -9.893146514892578, + "logps_train/ref_chosen": -1.9921875, + "logps_train/ref_rejected": -2.0, + "logps_train/rejected": -11.41650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7900959253311157, + "rewards_train/margins": 0.15155446529388428, + "rewards_train/rejected": -0.941650390625, + "step": 1784 + }, + { + "epoch": 0.5, + "logps_train/chosen": -182.1287078857422, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -205.22227478027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.762870788574219, + "rewards_train/margins": 2.559356689453125, + "rewards_train/rejected": -8.322227478027344, + "step": 1784 + }, + { + "epoch": 0.5, + "logps_train/chosen": -46.82091522216797, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -49.06027603149414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.307091474533081, + "rewards_train/margins": -0.5510637760162354, + "rewards_train/rejected": -2.7560276985168457, + "step": 1784 + }, + { + "epoch": 0.5, + "logps_train/chosen": -15.475540161132812, + "logps_train/ref_chosen": -5.3125, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -31.7233943939209, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0163040161132812, + "rewards_train/margins": 1.5435354709625244, + "rewards_train/rejected": -2.5598394870758057, + "step": 1784 + }, + { + "epoch": 0.5, + "logps_train/chosen": -223.37313842773438, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -236.03187561035156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.937314033508301, + "rewards_train/margins": -0.5341262817382812, + "rewards_train/rejected": -7.4031877517700195, + "step": 1784 + }, + { + "epoch": 0.5, + "logps_train/chosen": -6.008963584899902, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -21.021087646484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.19152136147022247, + "rewards_train/margins": -0.22691259905695915, + "rewards_train/rejected": 0.03539123758673668, + "step": 1785 + }, + { + "epoch": 0.5, + "logps_train/chosen": -17.95785903930664, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -17.634796142578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0667141005396843, + "rewards_train/margins": -0.032306283712387085, + "rewards_train/rejected": 0.09902038425207138, + "step": 1785 + }, + { + "epoch": 0.5, + "logps_train/chosen": -18.738109588623047, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -5.75, + "logps_train/rejected": -9.971367835998535, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1011890396475792, + "rewards_train/margins": 0.5233258232474327, + "rewards_train/rejected": -0.4221367835998535, + "step": 1785 + }, + { + "epoch": 0.5, + "logps_train/chosen": -33.85895538330078, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -0.84375, + "logps_train/rejected": -13.291319847106934, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0921456813812256, + "rewards_train/margins": -0.8473886251449585, + "rewards_train/rejected": -1.244757056236267, + "step": 1785 + }, + { + "epoch": 0.5, + "logps_train/chosen": -136.98936462402344, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -173.17312622070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.248936414718628, + "rewards_train/margins": 5.418376207351685, + "rewards_train/rejected": -7.6673126220703125, + "step": 1785 + }, + { + "epoch": 0.5, + "logps_train/chosen": -122.10166931152344, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -141.18634033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5601669549942017, + "rewards_train/margins": 2.0084670782089233, + "rewards_train/rejected": -2.568634033203125, + "step": 1785 + }, + { + "epoch": 0.5, + "logps_train/chosen": -126.3945083618164, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -50.0, + "logps_train/rejected": -73.22639465332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8894509077072144, + "rewards_train/margins": 0.4331885576248169, + "rewards_train/rejected": -2.3226394653320312, + "step": 1785 + }, + { + "epoch": 0.5, + "logps_train/chosen": -9.790390968322754, + "logps_train/ref_chosen": -0.8359375, + "logps_train/ref_rejected": -56.0, + "logps_train/rejected": -52.21821594238281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8954453468322754, + "rewards_train/margins": -1.273623764514923, + "rewards_train/rejected": 0.3781784176826477, + "step": 1785 + }, + { + "epoch": 0.5, + "learning_rate": 4.138211259001222e-07, + "loss": 0.518, + "step": 1786 + }, + { + "epoch": 0.5, + "logps_train/chosen": -131.47142028808594, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -202.42208862304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9471420049667358, + "rewards_train/margins": 3.49506676197052, + "rewards_train/rejected": -5.442208766937256, + "step": 1786 + }, + { + "epoch": 0.5, + "logps_train/chosen": -10.190781593322754, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -21.39777946472168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3815781772136688, + "rewards_train/margins": 0.783199816942215, + "rewards_train/rejected": -1.1647779941558838, + "step": 1786 + }, + { + "epoch": 0.5, + "logps_train/chosen": -187.49143981933594, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -233.7581787109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.449143886566162, + "rewards_train/margins": 0.6266741752624512, + "rewards_train/rejected": -5.075818061828613, + "step": 1786 + }, + { + "epoch": 0.5, + "logps_train/chosen": -92.43800354003906, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -49.883262634277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2938003540039062, + "rewards_train/margins": 1.6882760524749756, + "rewards_train/rejected": -3.982076406478882, + "step": 1786 + }, + { + "epoch": 0.5, + "logps_train/chosen": -7.549956321716309, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -18.37691307067871, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3487456440925598, + "rewards_train/margins": 0.2701956629753113, + "rewards_train/rejected": -0.6189413070678711, + "step": 1786 + }, + { + "epoch": 0.5, + "logps_train/chosen": -146.59503173828125, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -136.67095947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.759503126144409, + "rewards_train/margins": 1.157593011856079, + "rewards_train/rejected": -4.917096138000488, + "step": 1786 + }, + { + "epoch": 0.5, + "logps_train/chosen": -12.622367858886719, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -4.5625, + "logps_train/rejected": -18.45354461669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.018486786633729935, + "rewards_train/margins": 1.3706176988780499, + "rewards_train/rejected": -1.3891044855117798, + "step": 1786 + }, + { + "epoch": 0.5, + "logps_train/chosen": -227.54547119140625, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -354.43682861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.604547500610352, + "rewards_train/margins": 10.139135360717773, + "rewards_train/rejected": -20.743682861328125, + "step": 1786 + }, + { + "epoch": 0.5, + "logps_train/chosen": -135.1710662841797, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -152.46347045898438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6671066284179688, + "rewards_train/margins": -0.5207595825195312, + "rewards_train/rejected": -2.1463470458984375, + "step": 1787 + }, + { + "epoch": 0.5, + "logps_train/chosen": -8.695844650268555, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -3.671875, + "logps_train/rejected": -8.539711952209473, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3945844769477844, + "rewards_train/margins": 0.09219923615455627, + "rewards_train/rejected": -0.4867837131023407, + "step": 1787 + }, + { + "epoch": 0.5, + "logps_train/chosen": -139.21240234375, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -173.2833251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.271240234375, + "rewards_train/margins": 5.007092475891113, + "rewards_train/rejected": -6.278332710266113, + "step": 1787 + }, + { + "epoch": 0.5, + "logps_train/chosen": -95.53031921386719, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -39.75, + "logps_train/rejected": -73.49012756347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4530319273471832, + "rewards_train/margins": 2.920980781316757, + "rewards_train/rejected": -3.3740127086639404, + "step": 1787 + }, + { + "epoch": 0.5, + "logps_train/chosen": -155.30410766601562, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -232.38197326660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3304108381271362, + "rewards_train/margins": 2.90778648853302, + "rewards_train/rejected": -4.238197326660156, + "step": 1787 + }, + { + "epoch": 0.5, + "logps_train/chosen": -11.952768325805664, + "logps_train/ref_chosen": -3.78125, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -18.191452026367188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8171518445014954, + "rewards_train/margins": -0.4292566478252411, + "rewards_train/rejected": -0.3878951966762543, + "step": 1787 + }, + { + "epoch": 0.5, + "logps_train/chosen": -12.867657661437988, + "logps_train/ref_chosen": -5.21875, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -42.796424865722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7648907899856567, + "rewards_train/margins": 1.8272517919540405, + "rewards_train/rejected": -2.5921425819396973, + "step": 1787 + }, + { + "epoch": 0.5, + "logps_train/chosen": -70.54682922363281, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -124.96092224121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3546829223632812, + "rewards_train/margins": 1.59140944480896, + "rewards_train/rejected": -2.946092367172241, + "step": 1787 + }, + { + "epoch": 0.5, + "learning_rate": 4.116798022322807e-07, + "loss": 0.3175, + "step": 1788 + }, + { + "epoch": 0.5, + "logps_train/chosen": -184.29229736328125, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -187.11068725585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.229229927062988, + "rewards_train/margins": 2.381838798522949, + "rewards_train/rejected": -6.6110687255859375, + "step": 1788 + }, + { + "epoch": 0.5, + "logps_train/chosen": -28.1981201171875, + "logps_train/ref_chosen": -5.875, + "logps_train/ref_rejected": -3.984375, + "logps_train/rejected": -32.708221435546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.232311964035034, + "rewards_train/margins": 0.6400728225708008, + "rewards_train/rejected": -2.872384786605835, + "step": 1788 + }, + { + "epoch": 0.5, + "logps_train/chosen": -25.335247039794922, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -43.232582092285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.130399703979492, + "rewards_train/margins": 1.4741084575653076, + "rewards_train/rejected": -3.6045081615448, + "step": 1788 + }, + { + "epoch": 0.5, + "logps_train/chosen": -2.4825360774993896, + "logps_train/ref_chosen": -2.671875, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -5.558579444885254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018933892250061035, + "rewards_train/margins": 0.024791836738586426, + "rewards_train/rejected": -0.005857944488525391, + "step": 1788 + }, + { + "epoch": 0.5, + "logps_train/chosen": -190.67581176757812, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -202.9296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.01758098602295, + "rewards_train/margins": 1.075387954711914, + "rewards_train/rejected": -11.092968940734863, + "step": 1788 + }, + { + "epoch": 0.5, + "logps_train/chosen": -8.184423446655273, + "logps_train/ref_chosen": -7.5, + "logps_train/ref_rejected": -1.3828125, + "logps_train/rejected": -35.153682708740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06844234466552734, + "rewards_train/margins": 3.3086447715759277, + "rewards_train/rejected": -3.377087116241455, + "step": 1788 + }, + { + "epoch": 0.5, + "logps_train/chosen": -6.578222751617432, + "logps_train/ref_chosen": -3.5, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -46.804962158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3078222870826721, + "rewards_train/margins": 2.097674071788788, + "rewards_train/rejected": -2.40549635887146, + "step": 1788 + }, + { + "epoch": 0.5, + "logps_train/chosen": -33.793514251708984, + "logps_train/ref_chosen": -29.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -103.31288146972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42935141921043396, + "rewards_train/margins": 2.501936823129654, + "rewards_train/rejected": -2.931288242340088, + "step": 1788 + }, + { + "epoch": 0.5, + "logps_train/chosen": -88.11015319824219, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -166.33648681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1610153913497925, + "rewards_train/margins": 5.4226332902908325, + "rewards_train/rejected": -6.583648681640625, + "step": 1789 + }, + { + "epoch": 0.5, + "logps_train/chosen": -10.565835952758789, + "logps_train/ref_chosen": -2.640625, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -24.98200225830078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7925211191177368, + "rewards_train/margins": 1.2150541543960571, + "rewards_train/rejected": -2.007575273513794, + "step": 1789 + }, + { + "epoch": 0.5, + "logps_train/chosen": -60.35049057006836, + "logps_train/ref_chosen": -42.0, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -107.0782470703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.835049033164978, + "rewards_train/margins": 2.497775673866272, + "rewards_train/rejected": -4.33282470703125, + "step": 1789 + }, + { + "epoch": 0.5, + "logps_train/chosen": -27.84710693359375, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -36.816368103027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.997210681438446, + "rewards_train/margins": 0.7719261050224304, + "rewards_train/rejected": -1.7691367864608765, + "step": 1789 + }, + { + "epoch": 0.5, + "logps_train/chosen": -51.34728240966797, + "logps_train/ref_chosen": -23.0, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -51.41837692260742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.834728240966797, + "rewards_train/margins": 0.007109403610229492, + "rewards_train/rejected": -2.8418376445770264, + "step": 1789 + }, + { + "epoch": 0.5, + "logps_train/chosen": -51.948856353759766, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -3.828125, + "logps_train/rejected": -32.320579528808594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.332385778427124, + "rewards_train/margins": -0.483140230178833, + "rewards_train/rejected": -2.849245548248291, + "step": 1789 + }, + { + "epoch": 0.5, + "logps_train/chosen": -21.21477699279785, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -39.798057556152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7777277231216431, + "rewards_train/margins": 1.6833280324935913, + "rewards_train/rejected": -2.4610557556152344, + "step": 1789 + }, + { + "epoch": 0.5, + "logps_train/chosen": -168.3639373779297, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -186.98428344726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8363938331604004, + "rewards_train/margins": 4.162034511566162, + "rewards_train/rejected": -6.9984283447265625, + "step": 1789 + }, + { + "epoch": 0.5, + "learning_rate": 4.095425961820629e-07, + "loss": 0.2803, + "step": 1790 + }, + { + "epoch": 0.5, + "logps_train/chosen": -29.11459732055664, + "logps_train/ref_chosen": -24.375, + "logps_train/ref_rejected": -3.796875, + "logps_train/rejected": -14.391775131225586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.473959743976593, + "rewards_train/margins": 0.5855303406715393, + "rewards_train/rejected": -1.0594900846481323, + "step": 1790 + }, + { + "epoch": 0.5, + "logps_train/chosen": -86.7381820678711, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -117.18148803710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.223818302154541, + "rewards_train/margins": 1.744330644607544, + "rewards_train/rejected": -3.968148946762085, + "step": 1790 + }, + { + "epoch": 0.5, + "logps_train/chosen": -10.43635368347168, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -12.44981575012207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5467603802680969, + "rewards_train/margins": 0.282596230506897, + "rewards_train/rejected": -0.8293566107749939, + "step": 1790 + }, + { + "epoch": 0.5, + "logps_train/chosen": -63.08113098144531, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -47.75, + "logps_train/rejected": -58.59034729003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3581131100654602, + "rewards_train/margins": 0.7259216904640198, + "rewards_train/rejected": -1.08403480052948, + "step": 1790 + }, + { + "epoch": 0.5, + "logps_train/chosen": -6.685969829559326, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -7.752841472625732, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14359699189662933, + "rewards_train/margins": 0.11918716132640839, + "rewards_train/rejected": -0.2627841532230377, + "step": 1790 + }, + { + "epoch": 0.5, + "logps_train/chosen": -2.637545108795166, + "logps_train/ref_chosen": -0.6328125, + "logps_train/ref_rejected": -0.6328125, + "logps_train/rejected": -2.6915197372436523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20047326385974884, + "rewards_train/margins": 0.00539746880531311, + "rewards_train/rejected": -0.20587073266506195, + "step": 1790 + }, + { + "epoch": 0.5, + "logps_train/chosen": -145.1404266357422, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -190.2249755859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7140426635742188, + "rewards_train/margins": 4.558455467224121, + "rewards_train/rejected": -8.27249813079834, + "step": 1790 + }, + { + "epoch": 0.5, + "logps_train/chosen": -13.679959297180176, + "logps_train/ref_chosen": -2.171875, + "logps_train/ref_rejected": -4.46875, + "logps_train/rejected": -41.573856353759766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1508084535598755, + "rewards_train/margins": 2.5597022771835327, + "rewards_train/rejected": -3.710510730743408, + "step": 1790 + }, + { + "epoch": 0.5, + "logps_train/chosen": -15.846222877502441, + "logps_train/ref_chosen": -2.828125, + "logps_train/ref_rejected": -1.640625, + "logps_train/rejected": -12.035114288330078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3018097877502441, + "rewards_train/margins": -0.2623608112335205, + "rewards_train/rejected": -1.0394489765167236, + "step": 1791 + }, + { + "epoch": 0.5, + "logps_train/chosen": -31.256385803222656, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -41.0911865234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8631386160850525, + "rewards_train/margins": 1.7959800362586975, + "rewards_train/rejected": -2.65911865234375, + "step": 1791 + }, + { + "epoch": 0.5, + "logps_train/chosen": -21.657241821289062, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -25.01988983154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.184474229812622, + "rewards_train/margins": 0.2175147533416748, + "rewards_train/rejected": -1.4019889831542969, + "step": 1791 + }, + { + "epoch": 0.5, + "logps_train/chosen": -13.238523483276367, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -14.625, + "logps_train/rejected": -40.49776077270508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09489765018224716, + "rewards_train/margins": 2.6821738705039024, + "rewards_train/rejected": -2.5872762203216553, + "step": 1791 + }, + { + "epoch": 0.5, + "logps_train/chosen": -46.12479782104492, + "logps_train/ref_chosen": -35.0, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -84.38819885253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.11247980594635, + "rewards_train/margins": 0.8763401508331299, + "rewards_train/rejected": -1.98881995677948, + "step": 1791 + }, + { + "epoch": 0.5, + "logps_train/chosen": -45.248878479003906, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -3.625, + "logps_train/rejected": -18.010765075683594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8498878479003906, + "rewards_train/margins": -0.4113112688064575, + "rewards_train/rejected": -1.438576579093933, + "step": 1791 + }, + { + "epoch": 0.5, + "logps_train/chosen": -162.30032348632812, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -235.4798583984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.230032444000244, + "rewards_train/margins": 6.917953968048096, + "rewards_train/rejected": -10.14798641204834, + "step": 1791 + }, + { + "epoch": 0.5, + "logps_train/chosen": -130.41018676757812, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -153.99847412109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6410186886787415, + "rewards_train/margins": 5.658828914165497, + "rewards_train/rejected": -6.299847602844238, + "step": 1791 + }, + { + "epoch": 0.5, + "learning_rate": 4.074095227076446e-07, + "loss": 0.3678, + "step": 1792 + }, + { + "epoch": 0.5, + "logps_train/chosen": -240.07684326171875, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -189.0, + "logps_train/rejected": -282.44976806640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.907684326171875, + "rewards_train/margins": 1.4372930526733398, + "rewards_train/rejected": -9.344977378845215, + "step": 1792 + }, + { + "epoch": 0.5, + "logps_train/chosen": -4.5223188400268555, + "logps_train/ref_chosen": -3.1875, + "logps_train/ref_rejected": -2.90625, + "logps_train/rejected": -4.919524192810059, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13348188996315002, + "rewards_train/margins": 0.06784553825855255, + "rewards_train/rejected": -0.20132742822170258, + "step": 1792 + }, + { + "epoch": 0.5, + "logps_train/chosen": -115.40159606933594, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -151.82937622070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2401596307754517, + "rewards_train/margins": 3.542777895927429, + "rewards_train/rejected": -4.782937526702881, + "step": 1792 + }, + { + "epoch": 0.5, + "logps_train/chosen": -91.4578857421875, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -124.68280792236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.345788598060608, + "rewards_train/margins": 4.6724923849105835, + "rewards_train/rejected": -6.018280982971191, + "step": 1792 + }, + { + "epoch": 0.5, + "logps_train/chosen": -104.9721908569336, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -154.5216064453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9972190856933594, + "rewards_train/margins": 5.404941558837891, + "rewards_train/rejected": -6.40216064453125, + "step": 1792 + }, + { + "epoch": 0.5, + "logps_train/chosen": -113.59364318847656, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -182.78897094726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.259364366531372, + "rewards_train/margins": 7.569533109664917, + "rewards_train/rejected": -8.828897476196289, + "step": 1792 + }, + { + "epoch": 0.5, + "logps_train/chosen": -62.45265197753906, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -85.11392211914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4702652096748352, + "rewards_train/margins": 1.5411271452903748, + "rewards_train/rejected": -2.01139235496521, + "step": 1792 + }, + { + "epoch": 0.5, + "logps_train/chosen": -38.058616638183594, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -24.875, + "logps_train/rejected": -79.25987243652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.068361759185791, + "rewards_train/margins": 3.3701257705688477, + "rewards_train/rejected": -5.438487529754639, + "step": 1792 + }, + { + "epoch": 0.5, + "logps_train/chosen": -5.4670562744140625, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -30.664600372314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13108062744140625, + "rewards_train/margins": 0.272879421710968, + "rewards_train/rejected": -0.40396004915237427, + "step": 1793 + }, + { + "epoch": 0.5, + "logps_train/chosen": -101.02376556396484, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -173.5825958251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0023765563964844, + "rewards_train/margins": 2.955883026123047, + "rewards_train/rejected": -3.9582595825195312, + "step": 1793 + }, + { + "epoch": 0.5, + "logps_train/chosen": -125.17508697509766, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -131.22885131835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4675087928771973, + "rewards_train/margins": 0.25537633895874023, + "rewards_train/rejected": -3.7228851318359375, + "step": 1793 + }, + { + "epoch": 0.5, + "logps_train/chosen": -221.72349548339844, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -217.78866577148438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.472350120544434, + "rewards_train/margins": -1.3934836387634277, + "rewards_train/rejected": -7.078866481781006, + "step": 1793 + }, + { + "epoch": 0.5, + "logps_train/chosen": -100.6448745727539, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -183.8351287841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.364487409591675, + "rewards_train/margins": 6.019026041030884, + "rewards_train/rejected": -8.383513450622559, + "step": 1793 + }, + { + "epoch": 0.5, + "logps_train/chosen": -19.781274795532227, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -57.75, + "logps_train/rejected": -79.78863525390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0343774557113647, + "rewards_train/margins": 1.169486165046692, + "rewards_train/rejected": -2.2038636207580566, + "step": 1793 + }, + { + "epoch": 0.5, + "logps_train/chosen": -8.711751937866211, + "logps_train/ref_chosen": -7.90625, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -6.67240047454834, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0805501937866211, + "rewards_train/margins": -0.03831014409661293, + "rewards_train/rejected": -0.04224004969000816, + "step": 1793 + }, + { + "epoch": 0.5, + "logps_train/chosen": -101.69796752929688, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -127.80302429199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8197968006134033, + "rewards_train/margins": 0.9605057239532471, + "rewards_train/rejected": -2.7803025245666504, + "step": 1793 + }, + { + "epoch": 0.5, + "learning_rate": 4.0528059673827887e-07, + "loss": 0.3286, + "step": 1794 + }, + { + "epoch": 0.5, + "logps_train/chosen": -41.5726318359375, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -38.264373779296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.47601318359375, + "rewards_train/margins": -0.762075662612915, + "rewards_train/rejected": -2.713937520980835, + "step": 1794 + }, + { + "epoch": 0.5, + "logps_train/chosen": -17.775775909423828, + "logps_train/ref_chosen": -3.90625, + "logps_train/ref_rejected": -1.6796875, + "logps_train/rejected": -15.864897727966309, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3869526386260986, + "rewards_train/margins": 0.03156840801239014, + "rewards_train/rejected": -1.4185210466384888, + "step": 1794 + }, + { + "epoch": 0.5, + "logps_train/chosen": -57.639190673828125, + "logps_train/ref_chosen": -26.125, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -22.783447265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.151419162750244, + "rewards_train/margins": -2.3105744123458862, + "rewards_train/rejected": -0.8408447504043579, + "step": 1794 + }, + { + "epoch": 0.5, + "logps_train/chosen": -70.36653137207031, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -121.42123413085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7866532802581787, + "rewards_train/margins": 1.955470323562622, + "rewards_train/rejected": -5.742123603820801, + "step": 1794 + }, + { + "epoch": 0.5, + "logps_train/chosen": -19.252544403076172, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -50.35844802856445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4940044581890106, + "rewards_train/margins": 2.0418403446674347, + "rewards_train/rejected": -2.5358448028564453, + "step": 1794 + }, + { + "epoch": 0.5, + "logps_train/chosen": -87.54557037353516, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -142.04249572753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8045570850372314, + "rewards_train/margins": 3.749692678451538, + "rewards_train/rejected": -5.5542497634887695, + "step": 1794 + }, + { + "epoch": 0.5, + "logps_train/chosen": -8.438526153564453, + "logps_train/ref_chosen": -0.41796875, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -13.258935928344727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8020557761192322, + "rewards_train/margins": 0.10508781671524048, + "rewards_train/rejected": -0.9071435928344727, + "step": 1794 + }, + { + "epoch": 0.5, + "logps_train/chosen": -33.516212463378906, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -72.37294006347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2391213178634644, + "rewards_train/margins": 2.4231728315353394, + "rewards_train/rejected": -3.6622941493988037, + "step": 1794 + }, + { + "epoch": 0.5, + "logps_train/chosen": -135.1883087158203, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -186.59645080566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.568830966949463, + "rewards_train/margins": 4.940814018249512, + "rewards_train/rejected": -7.509644985198975, + "step": 1795 + }, + { + "epoch": 0.5, + "logps_train/chosen": -10.712879180908203, + "logps_train/ref_chosen": -2.34375, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -53.252410888671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8369129300117493, + "rewards_train/margins": 3.0820781588554382, + "rewards_train/rejected": -3.9189910888671875, + "step": 1795 + }, + { + "epoch": 0.5, + "logps_train/chosen": -67.155029296875, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -179.0567169189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4905030727386475, + "rewards_train/margins": 5.615168809890747, + "rewards_train/rejected": -8.105671882629395, + "step": 1795 + }, + { + "epoch": 0.5, + "logps_train/chosen": -1.218250036239624, + "logps_train/ref_chosen": -0.66015625, + "logps_train/ref_rejected": -1.0703125, + "logps_train/rejected": -4.334840297698975, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0558093786239624, + "rewards_train/margins": 0.270643413066864, + "rewards_train/rejected": -0.3264527916908264, + "step": 1795 + }, + { + "epoch": 0.5, + "logps_train/chosen": -25.570892333984375, + "logps_train/ref_chosen": -15.8125, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -49.89116668701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9758392572402954, + "rewards_train/margins": 3.1382774114608765, + "rewards_train/rejected": -4.114116668701172, + "step": 1795 + }, + { + "epoch": 0.5, + "logps_train/chosen": -17.215126037597656, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -32.89027404785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3449500799179077, + "rewards_train/margins": 1.2940772771835327, + "rewards_train/rejected": -2.6390273571014404, + "step": 1795 + }, + { + "epoch": 0.5, + "logps_train/chosen": -46.64603805541992, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -69.80228424072266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.389603853225708, + "rewards_train/margins": 1.7656245231628418, + "rewards_train/rejected": -3.15522837638855, + "step": 1795 + }, + { + "epoch": 0.5, + "logps_train/chosen": -53.6492805480957, + "logps_train/ref_chosen": -34.0, + "logps_train/ref_rejected": -3.984375, + "logps_train/rejected": -47.83217239379883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9649280309677124, + "rewards_train/margins": 2.4198518991470337, + "rewards_train/rejected": -4.384779930114746, + "step": 1795 + }, + { + "epoch": 0.5, + "learning_rate": 4.0315583317418967e-07, + "loss": 0.3989, + "step": 1796 + }, + { + "epoch": 0.5, + "logps_train/chosen": -100.34667205810547, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -99.564697265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4346672296524048, + "rewards_train/margins": -0.07819747924804688, + "rewards_train/rejected": -1.356469750404358, + "step": 1796 + }, + { + "epoch": 0.5, + "logps_train/chosen": -39.59028625488281, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -46.807518005371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3402786254882812, + "rewards_train/margins": 0.2654731273651123, + "rewards_train/rejected": -3.6057517528533936, + "step": 1796 + }, + { + "epoch": 0.5, + "logps_train/chosen": -25.27115821838379, + "logps_train/ref_chosen": -4.5, + "logps_train/ref_rejected": -29.625, + "logps_train/rejected": -39.40426254272461, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.077115774154663, + "rewards_train/margins": -1.0991895198822021, + "rewards_train/rejected": -0.9779262542724609, + "step": 1796 + }, + { + "epoch": 0.5, + "logps_train/chosen": -6.7426557540893555, + "logps_train/ref_chosen": -1.953125, + "logps_train/ref_rejected": -0.333984375, + "logps_train/rejected": -7.873589038848877, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.478953093290329, + "rewards_train/margins": 0.2750073969364166, + "rewards_train/rejected": -0.7539604902267456, + "step": 1796 + }, + { + "epoch": 0.5, + "logps_train/chosen": -0.42963671684265137, + "logps_train/ref_chosen": -0.2041015625, + "logps_train/ref_rejected": -0.2041015625, + "logps_train/rejected": -0.4299216568470001, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.022553516551852226, + "rewards_train/margins": 2.849288284778595e-05, + "rewards_train/rejected": -0.022582009434700012, + "step": 1796 + }, + { + "epoch": 0.5, + "logps_train/chosen": -8.396219253540039, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -2.765625, + "logps_train/rejected": -3.6639628410339355, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06662807613611221, + "rewards_train/margins": 0.15646186470985413, + "rewards_train/rejected": -0.08983378857374191, + "step": 1796 + }, + { + "epoch": 0.5, + "logps_train/chosen": -119.48794555664062, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -162.12045288085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6987946033477783, + "rewards_train/margins": 2.563250780105591, + "rewards_train/rejected": -6.262045383453369, + "step": 1796 + }, + { + "epoch": 0.5, + "logps_train/chosen": -91.94044494628906, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -126.22329711914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5440444946289062, + "rewards_train/margins": 2.528285503387451, + "rewards_train/rejected": -6.072329998016357, + "step": 1796 + }, + { + "epoch": 0.5, + "logps_train/chosen": -10.853229522705078, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -7.59375, + "logps_train/rejected": -37.369903564453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6728229522705078, + "rewards_train/margins": 2.3047924041748047, + "rewards_train/rejected": -2.9776153564453125, + "step": 1797 + }, + { + "epoch": 0.5, + "logps_train/chosen": -46.526493072509766, + "logps_train/ref_chosen": -34.75, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -88.5007095336914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1776493787765503, + "rewards_train/margins": 1.5474215745925903, + "rewards_train/rejected": -2.7250709533691406, + "step": 1797 + }, + { + "epoch": 0.5, + "logps_train/chosen": -15.972569465637207, + "logps_train/ref_chosen": -7.9375, + "logps_train/ref_rejected": -26.125, + "logps_train/rejected": -42.86375427246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8035069704055786, + "rewards_train/margins": 0.870368480682373, + "rewards_train/rejected": -1.6738754510879517, + "step": 1797 + }, + { + "epoch": 0.5, + "logps_train/chosen": -127.32721710205078, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -37.89772033691406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.282721757888794, + "rewards_train/margins": -0.880449652671814, + "rewards_train/rejected": -1.40227210521698, + "step": 1797 + }, + { + "epoch": 0.5, + "logps_train/chosen": -19.573307037353516, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -40.207645416259766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4729557037353516, + "rewards_train/margins": 2.1196839809417725, + "rewards_train/rejected": -3.592639684677124, + "step": 1797 + }, + { + "epoch": 0.5, + "logps_train/chosen": -5.887540817260742, + "logps_train/ref_chosen": -3.59375, + "logps_train/ref_rejected": -1.8984375, + "logps_train/rejected": -6.557458877563477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2293790876865387, + "rewards_train/margins": 0.23652306199073792, + "rewards_train/rejected": -0.4659021496772766, + "step": 1797 + }, + { + "epoch": 0.5, + "logps_train/chosen": -61.01624298095703, + "logps_train/ref_chosen": -30.75, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -115.05746459960938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0266244411468506, + "rewards_train/margins": -0.5708780288696289, + "rewards_train/rejected": -2.4557464122772217, + "step": 1797 + }, + { + "epoch": 0.5, + "logps_train/chosen": -9.71805477142334, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -9.13239574432373, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4530554711818695, + "rewards_train/margins": -0.5273158997297287, + "rewards_train/rejected": 0.07426042854785919, + "step": 1797 + }, + { + "epoch": 0.5, + "learning_rate": 4.0103524688647007e-07, + "loss": 0.581, + "step": 1798 + }, + { + "epoch": 0.5, + "logps_train/chosen": -5.930283546447754, + "logps_train/ref_chosen": -0.06005859375, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -13.538022994995117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5870224833488464, + "rewards_train/margins": -0.17697018384933472, + "rewards_train/rejected": -0.4100522994995117, + "step": 1798 + }, + { + "epoch": 0.5, + "logps_train/chosen": -161.61192321777344, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -168.99893188476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4611923694610596, + "rewards_train/margins": 0.23870086669921875, + "rewards_train/rejected": -2.6998932361602783, + "step": 1798 + }, + { + "epoch": 0.5, + "logps_train/chosen": -78.91522979736328, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -175.50045776367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.741523027420044, + "rewards_train/margins": 4.558522939682007, + "rewards_train/rejected": -6.300045967102051, + "step": 1798 + }, + { + "epoch": 0.5, + "logps_train/chosen": -33.5345458984375, + "logps_train/ref_chosen": -2.4375, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -51.91890335083008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1097047328948975, + "rewards_train/margins": 0.2071857452392578, + "rewards_train/rejected": -3.3168904781341553, + "step": 1798 + }, + { + "epoch": 0.5, + "logps_train/chosen": -221.9962158203125, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -235.77935791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.849621772766113, + "rewards_train/margins": 2.1283140182495117, + "rewards_train/rejected": -12.977935791015625, + "step": 1798 + }, + { + "epoch": 0.5, + "logps_train/chosen": -127.43438720703125, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -192.3758544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.493438720703125, + "rewards_train/margins": 4.894146919250488, + "rewards_train/rejected": -7.387585639953613, + "step": 1798 + }, + { + "epoch": 0.5, + "logps_train/chosen": -175.512451171875, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -184.0, + "logps_train/rejected": -238.62948608398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.6512451171875, + "rewards_train/margins": 0.8117036819458008, + "rewards_train/rejected": -5.462948799133301, + "step": 1798 + }, + { + "epoch": 0.5, + "logps_train/chosen": -34.08719253540039, + "logps_train/ref_chosen": -2.421875, + "logps_train/ref_rejected": -2.6875, + "logps_train/rejected": -22.43868064880371, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.166531801223755, + "rewards_train/margins": -1.1914137601852417, + "rewards_train/rejected": -1.9751180410385132, + "step": 1798 + }, + { + "epoch": 0.5, + "logps_train/chosen": -198.8033447265625, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -241.7252197265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.680334568023682, + "rewards_train/margins": 2.7921876907348633, + "rewards_train/rejected": -7.472522258758545, + "step": 1799 + }, + { + "epoch": 0.5, + "logps_train/chosen": -18.314910888671875, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -28.812053680419922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2877410650253296, + "rewards_train/margins": 0.4872143268585205, + "rewards_train/rejected": -1.77495539188385, + "step": 1799 + }, + { + "epoch": 0.5, + "logps_train/chosen": -204.09024047851562, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -165.16517639160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.409024238586426, + "rewards_train/margins": -0.4425063133239746, + "rewards_train/rejected": -4.966517925262451, + "step": 1799 + }, + { + "epoch": 0.5, + "logps_train/chosen": -272.870849609375, + "logps_train/ref_chosen": -186.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -214.6053009033203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.687085151672363, + "rewards_train/margins": -2.1265549659729004, + "rewards_train/rejected": -6.560530185699463, + "step": 1799 + }, + { + "epoch": 0.5, + "logps_train/chosen": -0.08535979688167572, + "logps_train/ref_chosen": -0.37109375, + "logps_train/ref_rejected": -0.37109375, + "logps_train/rejected": -0.09716734290122986, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02857339382171631, + "rewards_train/margins": 0.0011807531118392944, + "rewards_train/rejected": 0.027392640709877014, + "step": 1799 + }, + { + "epoch": 0.5, + "logps_train/chosen": -8.906789779663086, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -4.8528008460998535, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.256303995847702, + "rewards_train/margins": -0.7647739350795746, + "rewards_train/rejected": 0.5084699392318726, + "step": 1799 + }, + { + "epoch": 0.5, + "logps_train/chosen": -6.087926387786865, + "logps_train/ref_chosen": -3.96875, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -16.89585304260254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21191763877868652, + "rewards_train/margins": 0.40266770124435425, + "rewards_train/rejected": -0.6145853400230408, + "step": 1799 + }, + { + "epoch": 0.5, + "logps_train/chosen": -47.85889434814453, + "logps_train/ref_chosen": -24.875, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -49.6921501159668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.298389434814453, + "rewards_train/margins": 1.5895755290985107, + "rewards_train/rejected": -3.887964963912964, + "step": 1799 + }, + { + "epoch": 0.5, + "learning_rate": 3.989188527169749e-07, + "loss": 0.6354, + "step": 1800 + }, + { + "epoch": 0.5, + "logps_train/chosen": -8.434779167175293, + "logps_train/ref_chosen": -1.484375, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -32.67798614501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6950404047966003, + "rewards_train/margins": 0.6352582573890686, + "rewards_train/rejected": -1.330298662185669, + "step": 1800 + }, + { + "epoch": 0.5, + "logps_train/chosen": -156.7738037109375, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -206.61373901367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.827380657196045, + "rewards_train/margins": 3.583993434906006, + "rewards_train/rejected": -8.41137409210205, + "step": 1800 + }, + { + "epoch": 0.5, + "logps_train/chosen": -33.14157485961914, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -3.40625, + "logps_train/rejected": -24.967960357666016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4641574621200562, + "rewards_train/margins": 0.6920136213302612, + "rewards_train/rejected": -2.1561710834503174, + "step": 1800 + }, + { + "epoch": 0.5, + "logps_train/chosen": -151.26052856445312, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -221.57498168945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.026052951812744, + "rewards_train/margins": 4.531445503234863, + "rewards_train/rejected": -6.557498455047607, + "step": 1800 + }, + { + "epoch": 0.5, + "logps_train/chosen": -42.20188903808594, + "logps_train/ref_chosen": -27.25, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -156.0335693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4951889514923096, + "rewards_train/margins": 7.108168363571167, + "rewards_train/rejected": -8.603357315063477, + "step": 1800 + }, + { + "epoch": 0.5, + "logps_train/chosen": -66.67151641845703, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -143.53790283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.542151689529419, + "rewards_train/margins": 1.0116386413574219, + "rewards_train/rejected": -2.553790330886841, + "step": 1800 + }, + { + "epoch": 0.5, + "logps_train/chosen": -63.91494369506836, + "logps_train/ref_chosen": -55.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -182.6392364501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8414943814277649, + "rewards_train/margins": 4.72242945432663, + "rewards_train/rejected": -5.5639238357543945, + "step": 1800 + }, + { + "epoch": 0.5, + "logps_train/chosen": -150.9127197265625, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -226.80039978027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.541271924972534, + "rewards_train/margins": 5.438768625259399, + "rewards_train/rejected": -8.980040550231934, + "step": 1800 + }, + { + "epoch": 0.5, + "logps_train/chosen": -165.79653930664062, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -141.01417541503906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.97965407371521, + "rewards_train/margins": -0.9282364845275879, + "rewards_train/rejected": -2.051417589187622, + "step": 1801 + }, + { + "epoch": 0.5, + "logps_train/chosen": -133.62301635742188, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -165.8795166015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6623016595840454, + "rewards_train/margins": 0.0256500244140625, + "rewards_train/rejected": -1.687951683998108, + "step": 1801 + }, + { + "epoch": 0.5, + "logps_train/chosen": -5.108272552490234, + "logps_train/ref_chosen": -0.71484375, + "logps_train/ref_rejected": -2.015625, + "logps_train/rejected": -3.967161178588867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4393428862094879, + "rewards_train/margins": -0.24418926239013672, + "rewards_train/rejected": -0.1951536238193512, + "step": 1801 + }, + { + "epoch": 0.5, + "logps_train/chosen": -0.000593181699514389, + "logps_train/ref_chosen": -0.02197265625, + "logps_train/ref_rejected": -0.02197265625, + "logps_train/rejected": -0.0005589832435362041, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0021379475947469473, + "rewards_train/margins": -3.419816493988037e-06, + "rewards_train/rejected": 0.0021413674112409353, + "step": 1801 + }, + { + "epoch": 0.5, + "logps_train/chosen": -21.16436004638672, + "logps_train/ref_chosen": -0.51171875, + "logps_train/ref_rejected": -0.51171875, + "logps_train/rejected": -20.7706356048584, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0652642250061035, + "rewards_train/margins": -0.03937244415283203, + "rewards_train/rejected": -2.0258917808532715, + "step": 1801 + }, + { + "epoch": 0.5, + "logps_train/chosen": -26.154212951660156, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -43.06110763549805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1310462951660156, + "rewards_train/margins": 1.5594394207000732, + "rewards_train/rejected": -3.690485715866089, + "step": 1801 + }, + { + "epoch": 0.5, + "logps_train/chosen": -180.78709411621094, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -251.63621520996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.378709316253662, + "rewards_train/margins": 4.0849127769470215, + "rewards_train/rejected": -8.463622093200684, + "step": 1801 + }, + { + "epoch": 0.5, + "logps_train/chosen": -101.98297119140625, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -217.48626708984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9982972145080566, + "rewards_train/margins": 7.300329685211182, + "rewards_train/rejected": -10.298626899719238, + "step": 1801 + }, + { + "epoch": 0.5, + "learning_rate": 3.9680666547821997e-07, + "loss": 0.3486, + "step": 1802 + }, + { + "epoch": 0.5, + "logps_train/chosen": -175.48391723632812, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -237.04568481445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.198391914367676, + "rewards_train/margins": 1.506176471710205, + "rewards_train/rejected": -6.704568386077881, + "step": 1802 + }, + { + "epoch": 0.5, + "logps_train/chosen": -34.269500732421875, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -119.67110443115234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9207000732421875, + "rewards_train/margins": -0.9535896182060242, + "rewards_train/rejected": -0.9671104550361633, + "step": 1802 + }, + { + "epoch": 0.5, + "logps_train/chosen": -40.242225646972656, + "logps_train/ref_chosen": -28.125, + "logps_train/ref_rejected": -28.625, + "logps_train/rejected": -35.09730529785156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2117226123809814, + "rewards_train/margins": -0.5644920468330383, + "rewards_train/rejected": -0.6472305655479431, + "step": 1802 + }, + { + "epoch": 0.5, + "logps_train/chosen": -8.938218116760254, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -14.807633399963379, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5313218235969543, + "rewards_train/margins": 0.2650665044784546, + "rewards_train/rejected": -0.7963883280754089, + "step": 1802 + }, + { + "epoch": 0.5, + "logps_train/chosen": -17.64392852783203, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -18.807392120361328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2800178527832031, + "rewards_train/margins": -0.1367785930633545, + "rewards_train/rejected": -1.1432392597198486, + "step": 1802 + }, + { + "epoch": 0.5, + "logps_train/chosen": -13.6143217086792, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -36.6115837097168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9239321947097778, + "rewards_train/margins": 1.4684761762619019, + "rewards_train/rejected": -2.3924083709716797, + "step": 1802 + }, + { + "epoch": 0.5, + "logps_train/chosen": -37.341552734375, + "logps_train/ref_chosen": -1.734375, + "logps_train/ref_rejected": -5.75, + "logps_train/rejected": -44.267669677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.560717821121216, + "rewards_train/margins": 0.2910492420196533, + "rewards_train/rejected": -3.851767063140869, + "step": 1802 + }, + { + "epoch": 0.5, + "logps_train/chosen": -9.738157272338867, + "logps_train/ref_chosen": -5.5625, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -12.378801345825195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4175657331943512, + "rewards_train/margins": 0.27031442523002625, + "rewards_train/rejected": -0.6878801584243774, + "step": 1802 + }, + { + "epoch": 0.5, + "logps_train/chosen": -50.18921661376953, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -46.857215881347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1439217329025269, + "rewards_train/margins": 1.9417999982833862, + "rewards_train/rejected": -3.085721731185913, + "step": 1803 + }, + { + "epoch": 0.5, + "logps_train/chosen": -197.03118896484375, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -202.427734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.703118801116943, + "rewards_train/margins": -0.260345458984375, + "rewards_train/rejected": -5.442773342132568, + "step": 1803 + }, + { + "epoch": 0.5, + "logps_train/chosen": -27.326366424560547, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -33.782867431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0826367139816284, + "rewards_train/margins": 0.9394000768661499, + "rewards_train/rejected": -2.0220367908477783, + "step": 1803 + }, + { + "epoch": 0.5, + "logps_train/chosen": -23.244979858398438, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -0.005462646484375, + "logps_train/rejected": -3.168168783187866, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3369979858398438, + "rewards_train/margins": -1.0207273662090302, + "rewards_train/rejected": -0.3162706196308136, + "step": 1803 + }, + { + "epoch": 0.5, + "logps_train/chosen": -35.188316345214844, + "logps_train/ref_chosen": -31.5, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -24.30663299560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3688316345214844, + "rewards_train/margins": 0.08683165907859802, + "rewards_train/rejected": -0.4556632936000824, + "step": 1803 + }, + { + "epoch": 0.5, + "logps_train/chosen": -7.282549858093262, + "logps_train/ref_chosen": -3.953125, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -15.923794746398926, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.33294248580932617, + "rewards_train/margins": -0.40306301414966583, + "rewards_train/rejected": 0.07012052834033966, + "step": 1803 + }, + { + "epoch": 0.5, + "logps_train/chosen": -137.43923950195312, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -158.66465759277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8439239263534546, + "rewards_train/margins": 1.1225417852401733, + "rewards_train/rejected": -2.966465711593628, + "step": 1803 + }, + { + "epoch": 0.5, + "logps_train/chosen": -115.89476776123047, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -225.2092742919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2394769191741943, + "rewards_train/margins": 8.031450510025024, + "rewards_train/rejected": -11.270927429199219, + "step": 1803 + }, + { + "epoch": 0.5, + "learning_rate": 3.9469869995327687e-07, + "loss": 0.6023, + "step": 1804 + }, + { + "epoch": 0.5, + "logps_train/chosen": -28.84573745727539, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -33.51325607299805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.340823769569397, + "rewards_train/margins": 1.1855019330978394, + "rewards_train/rejected": -2.5263257026672363, + "step": 1804 + }, + { + "epoch": 0.5, + "logps_train/chosen": -9.527995109558105, + "logps_train/ref_chosen": -6.59375, + "logps_train/ref_rejected": -2.84375, + "logps_train/rejected": -14.33862018585205, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.293424516916275, + "rewards_train/margins": 0.85606250166893, + "rewards_train/rejected": -1.149487018585205, + "step": 1804 + }, + { + "epoch": 0.5, + "logps_train/chosen": -26.69115447998047, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -31.625, + "logps_train/rejected": -73.67701721191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.419115424156189, + "rewards_train/margins": 2.7860862016677856, + "rewards_train/rejected": -4.205201625823975, + "step": 1804 + }, + { + "epoch": 0.5, + "logps_train/chosen": -29.815296173095703, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -47.875083923339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2190296649932861, + "rewards_train/margins": 1.1059787273406982, + "rewards_train/rejected": -2.3250083923339844, + "step": 1804 + }, + { + "epoch": 0.5, + "logps_train/chosen": -11.624703407287598, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -45.80331039428711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06252966076135635, + "rewards_train/margins": 3.405360795557499, + "rewards_train/rejected": -3.3428311347961426, + "step": 1804 + }, + { + "epoch": 0.5, + "logps_train/chosen": -173.88565063476562, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -240.64303588867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.088565349578857, + "rewards_train/margins": 6.27573823928833, + "rewards_train/rejected": -11.364303588867188, + "step": 1804 + }, + { + "epoch": 0.5, + "logps_train/chosen": -64.7691421508789, + "logps_train/ref_chosen": -35.25, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -40.25172424316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9519143104553223, + "rewards_train/margins": 0.16075825691223145, + "rewards_train/rejected": -3.1126725673675537, + "step": 1804 + }, + { + "epoch": 0.5, + "logps_train/chosen": -79.09919738769531, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -85.21263122558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9099197387695312, + "rewards_train/margins": 0.9613434076309204, + "rewards_train/rejected": -1.8712631464004517, + "step": 1804 + }, + { + "epoch": 0.5, + "logps_train/chosen": -13.378092765808105, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -35.96870422363281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7034342885017395, + "rewards_train/margins": 2.2496861815452576, + "rewards_train/rejected": -2.953120470046997, + "step": 1805 + }, + { + "epoch": 0.5, + "logps_train/chosen": -13.857039451599121, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -16.443082809448242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04820394515991211, + "rewards_train/margins": 0.15860433876514435, + "rewards_train/rejected": -0.20680828392505646, + "step": 1805 + }, + { + "epoch": 0.5, + "logps_train/chosen": -184.8822021484375, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -158.47943115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.588220119476318, + "rewards_train/margins": 0.40972328186035156, + "rewards_train/rejected": -4.99794340133667, + "step": 1805 + }, + { + "epoch": 0.5, + "logps_train/chosen": -122.28898620605469, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -138.7493438720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7788987159729004, + "rewards_train/margins": 0.946035623550415, + "rewards_train/rejected": -3.7249343395233154, + "step": 1805 + }, + { + "epoch": 0.5, + "logps_train/chosen": -80.08414459228516, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -187.46615600585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3584144711494446, + "rewards_train/margins": 3.188201129436493, + "rewards_train/rejected": -3.5466156005859375, + "step": 1805 + }, + { + "epoch": 0.5, + "logps_train/chosen": -190.49154663085938, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -227.95404052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.549154758453369, + "rewards_train/margins": 6.346249103546143, + "rewards_train/rejected": -8.895403861999512, + "step": 1805 + }, + { + "epoch": 0.5, + "logps_train/chosen": -39.01335144042969, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -28.0, + "logps_train/rejected": -53.29725646972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4388351440429688, + "rewards_train/margins": 1.090890645980835, + "rewards_train/rejected": -2.5297257900238037, + "step": 1805 + }, + { + "epoch": 0.5, + "logps_train/chosen": -96.03721618652344, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -152.5558319091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6537216901779175, + "rewards_train/margins": 1.101861596107483, + "rewards_train/rejected": -2.7555832862854004, + "step": 1805 + }, + { + "epoch": 0.5, + "learning_rate": 3.9259497089566886e-07, + "loss": 0.2571, + "step": 1806 + }, + { + "epoch": 0.5, + "logps_train/chosen": -38.144535064697266, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -23.73157501220703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.876953601837158, + "rewards_train/margins": -1.2006710767745972, + "rewards_train/rejected": -1.676282525062561, + "step": 1806 + }, + { + "epoch": 0.5, + "logps_train/chosen": -48.28029251098633, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -38.83248519897461, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.3155293464660645, + "rewards_train/margins": -0.782280683517456, + "rewards_train/rejected": -2.5332486629486084, + "step": 1806 + }, + { + "epoch": 0.5, + "logps_train/chosen": -12.875078201293945, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -33.62598419189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21874217689037323, + "rewards_train/margins": 2.2875905483961105, + "rewards_train/rejected": -2.0688483715057373, + "step": 1806 + }, + { + "epoch": 0.5, + "logps_train/chosen": -66.26397705078125, + "logps_train/ref_chosen": -49.25, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -117.48049926757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7013977766036987, + "rewards_train/margins": 1.896652102470398, + "rewards_train/rejected": -3.5980498790740967, + "step": 1806 + }, + { + "epoch": 0.5, + "logps_train/chosen": -112.6201171875, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -111.4337158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2120118141174316, + "rewards_train/margins": 0.33135986328125, + "rewards_train/rejected": -2.5433716773986816, + "step": 1806 + }, + { + "epoch": 0.5, + "logps_train/chosen": -320.2318115234375, + "logps_train/ref_chosen": -254.0, + "logps_train/ref_rejected": -203.0, + "logps_train/rejected": -371.3980712890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.623181343078613, + "rewards_train/margins": 10.216626167297363, + "rewards_train/rejected": -16.839807510375977, + "step": 1806 + }, + { + "epoch": 0.5, + "logps_train/chosen": -17.202682495117188, + "logps_train/ref_chosen": -4.40625, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -30.160966873168945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2796432971954346, + "rewards_train/margins": 0.9302034378051758, + "rewards_train/rejected": -2.2098467350006104, + "step": 1806 + }, + { + "epoch": 0.5, + "logps_train/chosen": -135.32415771484375, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -156.8227996826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4324158430099487, + "rewards_train/margins": 2.2498642206192017, + "rewards_train/rejected": -3.6822800636291504, + "step": 1806 + }, + { + "epoch": 0.51, + "logps_train/chosen": -7.148680210113525, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -1.8984375, + "logps_train/rejected": -32.06058120727539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18674302101135254, + "rewards_train/margins": 2.8294713497161865, + "rewards_train/rejected": -3.016214370727539, + "step": 1807 + }, + { + "epoch": 0.51, + "logps_train/chosen": -109.05458068847656, + "logps_train/ref_chosen": -59.75, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -158.11087036132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.930458068847656, + "rewards_train/margins": 1.9806289672851562, + "rewards_train/rejected": -6.9110870361328125, + "step": 1807 + }, + { + "epoch": 0.51, + "logps_train/chosen": -21.020856857299805, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -45.823123931884766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2458356618881226, + "rewards_train/margins": 1.8677266836166382, + "rewards_train/rejected": -3.1135623455047607, + "step": 1807 + }, + { + "epoch": 0.51, + "logps_train/chosen": -89.40064239501953, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -56.26824951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.390064239501953, + "rewards_train/margins": 0.036760807037353516, + "rewards_train/rejected": -2.4268250465393066, + "step": 1807 + }, + { + "epoch": 0.51, + "logps_train/chosen": -137.88168334960938, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -185.322265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1881684064865112, + "rewards_train/margins": 4.044058442115784, + "rewards_train/rejected": -5.232226848602295, + "step": 1807 + }, + { + "epoch": 0.51, + "logps_train/chosen": -163.15530395507812, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -217.88619995117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0155303478240967, + "rewards_train/margins": 3.9730899333953857, + "rewards_train/rejected": -5.988620281219482, + "step": 1807 + }, + { + "epoch": 0.51, + "logps_train/chosen": -136.4310760498047, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -196.25192260742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4431076049804688, + "rewards_train/margins": 7.082084655761719, + "rewards_train/rejected": -10.525192260742188, + "step": 1807 + }, + { + "epoch": 0.51, + "logps_train/chosen": -34.0075569152832, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -36.253299713134766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.519505739212036, + "rewards_train/margins": 0.35582423210144043, + "rewards_train/rejected": -2.8753299713134766, + "step": 1807 + }, + { + "epoch": 0.51, + "learning_rate": 3.9049549302926887e-07, + "loss": 0.3379, + "step": 1808 + }, + { + "epoch": 0.51, + "logps_train/chosen": -151.43212890625, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -186.7161102294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.493212938308716, + "rewards_train/margins": 3.728398084640503, + "rewards_train/rejected": -6.221611022949219, + "step": 1808 + }, + { + "epoch": 0.51, + "logps_train/chosen": -32.22840881347656, + "logps_train/ref_chosen": -25.375, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -20.292152404785156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6853408813476562, + "rewards_train/margins": -0.2686256468296051, + "rewards_train/rejected": -0.41671523451805115, + "step": 1808 + }, + { + "epoch": 0.51, + "logps_train/chosen": -161.41845703125, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -220.15585327148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5418457388877869, + "rewards_train/margins": 4.573739588260651, + "rewards_train/rejected": -5.1155853271484375, + "step": 1808 + }, + { + "epoch": 0.51, + "logps_train/chosen": -223.54486083984375, + "logps_train/ref_chosen": -202.0, + "logps_train/ref_rejected": -181.0, + "logps_train/rejected": -197.5126953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1544861793518066, + "rewards_train/margins": -0.5032166242599487, + "rewards_train/rejected": -1.651269555091858, + "step": 1808 + }, + { + "epoch": 0.51, + "logps_train/chosen": -21.379640579223633, + "logps_train/ref_chosen": -0.86328125, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -20.615747451782227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.051635980606079, + "rewards_train/margins": -1.1713111996650696, + "rewards_train/rejected": -0.8803247809410095, + "step": 1808 + }, + { + "epoch": 0.51, + "logps_train/chosen": -14.768543243408203, + "logps_train/ref_chosen": -1.8828125, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -39.87328338623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.288573145866394, + "rewards_train/margins": 1.7175053358078003, + "rewards_train/rejected": -3.0060784816741943, + "step": 1808 + }, + { + "epoch": 0.51, + "logps_train/chosen": -128.46676635742188, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -128.50338745117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.9533233642578125, + "rewards_train/margins": 0.003662109375, + "rewards_train/rejected": 0.9496612548828125, + "step": 1808 + }, + { + "epoch": 0.51, + "logps_train/chosen": -39.735198974609375, + "logps_train/ref_chosen": -29.75, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -19.474170684814453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9985198974609375, + "rewards_train/margins": -0.03235280513763428, + "rewards_train/rejected": -0.9661670923233032, + "step": 1808 + }, + { + "epoch": 0.51, + "logps_train/chosen": -251.56845092773438, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -242.90504455566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -12.356844902038574, + "rewards_train/margins": 0.683659553527832, + "rewards_train/rejected": -13.040504455566406, + "step": 1809 + }, + { + "epoch": 0.51, + "logps_train/chosen": -24.500194549560547, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -35.302467346191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27501946687698364, + "rewards_train/margins": 1.692727267742157, + "rewards_train/rejected": -1.9677467346191406, + "step": 1809 + }, + { + "epoch": 0.51, + "logps_train/chosen": -25.44225311279297, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -37.96253204345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2004753351211548, + "rewards_train/margins": 0.9207779169082642, + "rewards_train/rejected": -2.121253252029419, + "step": 1809 + }, + { + "epoch": 0.51, + "logps_train/chosen": -148.1083221435547, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -123.48452758789062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.460832118988037, + "rewards_train/margins": -0.8623790740966797, + "rewards_train/rejected": -4.598453044891357, + "step": 1809 + }, + { + "epoch": 0.51, + "logps_train/chosen": -146.82412719726562, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -169.17800903320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5824127197265625, + "rewards_train/margins": 1.285388469696045, + "rewards_train/rejected": -4.867801189422607, + "step": 1809 + }, + { + "epoch": 0.51, + "logps_train/chosen": -9.963253021240234, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -2.1875, + "logps_train/rejected": -5.261512279510498, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5682002902030945, + "rewards_train/margins": -0.2607990503311157, + "rewards_train/rejected": -0.30740123987197876, + "step": 1809 + }, + { + "epoch": 0.51, + "logps_train/chosen": -3.204340934753418, + "logps_train/ref_chosen": -2.046875, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -11.031390190124512, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11574659496545792, + "rewards_train/margins": 0.5061424598097801, + "rewards_train/rejected": -0.621889054775238, + "step": 1809 + }, + { + "epoch": 0.51, + "logps_train/chosen": -50.86322784423828, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -65.40647888183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.742572784423828, + "rewards_train/margins": 0.5980753898620605, + "rewards_train/rejected": -4.340648174285889, + "step": 1809 + }, + { + "epoch": 0.51, + "learning_rate": 3.884002810481958e-07, + "loss": 0.5602, + "step": 1810 + }, + { + "epoch": 0.51, + "logps_train/chosen": -234.18809509277344, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -187.0, + "logps_train/rejected": -254.6619415283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.018809795379639, + "rewards_train/margins": 0.7473845481872559, + "rewards_train/rejected": -6.7661943435668945, + "step": 1810 + }, + { + "epoch": 0.51, + "logps_train/chosen": -134.85707092285156, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -215.58847045898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.685707092285156, + "rewards_train/margins": 4.473139762878418, + "rewards_train/rejected": -9.158846855163574, + "step": 1810 + }, + { + "epoch": 0.51, + "logps_train/chosen": -9.910482406616211, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -26.239351272583008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20354823768138885, + "rewards_train/margins": 0.4453869014978409, + "rewards_train/rejected": -0.6489351391792297, + "step": 1810 + }, + { + "epoch": 0.51, + "logps_train/chosen": -149.5586700439453, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -106.43790435791016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6558670401573181, + "rewards_train/margins": 0.18792343139648438, + "rewards_train/rejected": -0.8437904715538025, + "step": 1810 + }, + { + "epoch": 0.51, + "logps_train/chosen": -150.87136840820312, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -243.34872436523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.887136936187744, + "rewards_train/margins": 7.797735691070557, + "rewards_train/rejected": -12.6848726272583, + "step": 1810 + }, + { + "epoch": 0.51, + "logps_train/chosen": -26.914440155029297, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -26.807058334350586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7226940393447876, + "rewards_train/margins": -0.716988205909729, + "rewards_train/rejected": -1.0057058334350586, + "step": 1810 + }, + { + "epoch": 0.51, + "logps_train/chosen": -24.14810562133789, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -60.0, + "logps_train/rejected": -63.07545471191406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7210606336593628, + "rewards_train/margins": -1.4135151505470276, + "rewards_train/rejected": -0.3075454831123352, + "step": 1810 + }, + { + "epoch": 0.51, + "logps_train/chosen": -85.00626373291016, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -136.92489624023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15062637627124786, + "rewards_train/margins": 3.4418633431196213, + "rewards_train/rejected": -3.592489719390869, + "step": 1810 + }, + { + "epoch": 0.51, + "logps_train/chosen": -54.02889633178711, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -35.232078552246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0278897285461426, + "rewards_train/margins": 0.9453182220458984, + "rewards_train/rejected": -2.973207950592041, + "step": 1811 + }, + { + "epoch": 0.51, + "logps_train/chosen": -84.78394317626953, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -97.64340209960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4283943176269531, + "rewards_train/margins": 2.0359458923339844, + "rewards_train/rejected": -3.4643402099609375, + "step": 1811 + }, + { + "epoch": 0.51, + "logps_train/chosen": -12.676942825317383, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -38.41478729248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6333193182945251, + "rewards_train/margins": 1.6144095063209534, + "rewards_train/rejected": -2.2477288246154785, + "step": 1811 + }, + { + "epoch": 0.51, + "logps_train/chosen": -135.48239135742188, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -164.78265380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.748239278793335, + "rewards_train/margins": 4.330026388168335, + "rewards_train/rejected": -7.07826566696167, + "step": 1811 + }, + { + "epoch": 0.51, + "logps_train/chosen": -102.73145294189453, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -163.82028198242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7231453657150269, + "rewards_train/margins": 2.5088831186294556, + "rewards_train/rejected": -4.232028484344482, + "step": 1811 + }, + { + "epoch": 0.51, + "logps_train/chosen": -201.7635498046875, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -221.8316650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.276355266571045, + "rewards_train/margins": 1.1068115234375, + "rewards_train/rejected": -6.383166790008545, + "step": 1811 + }, + { + "epoch": 0.51, + "logps_train/chosen": -34.1878662109375, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -1.78125, + "logps_train/rejected": -18.818323135375977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6437866687774658, + "rewards_train/margins": 0.059920668601989746, + "rewards_train/rejected": -1.7037073373794556, + "step": 1811 + }, + { + "epoch": 0.51, + "logps_train/chosen": -9.747062683105469, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -30.534761428833008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.030956268310546875, + "rewards_train/margins": 1.347519874572754, + "rewards_train/rejected": -1.3784761428833008, + "step": 1811 + }, + { + "epoch": 0.51, + "learning_rate": 3.8630934961671234e-07, + "loss": 0.3858, + "step": 1812 + }, + { + "epoch": 0.51, + "logps_train/chosen": -76.83192443847656, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -166.4832000732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.40819251537323, + "rewards_train/margins": 5.390127778053284, + "rewards_train/rejected": -6.798320293426514, + "step": 1812 + }, + { + "epoch": 0.51, + "logps_train/chosen": -168.90740966796875, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -248.78973388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.490741014480591, + "rewards_train/margins": 4.188232660293579, + "rewards_train/rejected": -7.67897367477417, + "step": 1812 + }, + { + "epoch": 0.51, + "logps_train/chosen": -44.63441848754883, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -112.62309265136719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.200941801071167, + "rewards_train/margins": 3.911367654800415, + "rewards_train/rejected": -6.112309455871582, + "step": 1812 + }, + { + "epoch": 0.51, + "logps_train/chosen": -92.29571533203125, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -80.9208984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4295716285705566, + "rewards_train/margins": -0.7374817132949829, + "rewards_train/rejected": -1.6920899152755737, + "step": 1812 + }, + { + "epoch": 0.51, + "logps_train/chosen": -109.08697509765625, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -105.39924621582031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5586975812911987, + "rewards_train/margins": -1.2687729597091675, + "rewards_train/rejected": -0.28992462158203125, + "step": 1812 + }, + { + "epoch": 0.51, + "logps_train/chosen": -161.86175537109375, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -173.61651611328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.086175441741943, + "rewards_train/margins": 0.07547616958618164, + "rewards_train/rejected": -6.161651611328125, + "step": 1812 + }, + { + "epoch": 0.51, + "logps_train/chosen": -19.098033905029297, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -10.36693000793457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7473034262657166, + "rewards_train/margins": -0.24811041355133057, + "rewards_train/rejected": -0.499193012714386, + "step": 1812 + }, + { + "epoch": 0.51, + "logps_train/chosen": -120.60527801513672, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -150.23687744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46052780747413635, + "rewards_train/margins": 5.113160222768784, + "rewards_train/rejected": -5.57368803024292, + "step": 1812 + }, + { + "epoch": 0.51, + "logps_train/chosen": -90.33497619628906, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -90.04072570800781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6834976077079773, + "rewards_train/margins": -0.02942502498626709, + "rewards_train/rejected": -0.6540725827217102, + "step": 1813 + }, + { + "epoch": 0.51, + "logps_train/chosen": -2.639526605606079, + "logps_train/ref_chosen": -3.5, + "logps_train/ref_rejected": -0.498046875, + "logps_train/rejected": -2.751424789428711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08604734390974045, + "rewards_train/margins": 0.3113851323723793, + "rewards_train/rejected": -0.22533778846263885, + "step": 1813 + }, + { + "epoch": 0.51, + "logps_train/chosen": -76.90345001220703, + "logps_train/ref_chosen": -47.0, + "logps_train/ref_rejected": -28.625, + "logps_train/rejected": -65.23069763183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.990345001220703, + "rewards_train/margins": 0.6702249050140381, + "rewards_train/rejected": -3.660569906234741, + "step": 1813 + }, + { + "epoch": 0.51, + "logps_train/chosen": -54.676231384277344, + "logps_train/ref_chosen": -39.5, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -56.81745147705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5176231861114502, + "rewards_train/margins": 0.7141220569610596, + "rewards_train/rejected": -2.2317452430725098, + "step": 1813 + }, + { + "epoch": 0.51, + "logps_train/chosen": -5.316608428955078, + "logps_train/ref_chosen": -0.353515625, + "logps_train/ref_rejected": -0.353515625, + "logps_train/rejected": -4.997742176055908, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4963092803955078, + "rewards_train/margins": -0.03188660740852356, + "rewards_train/rejected": -0.46442267298698425, + "step": 1813 + }, + { + "epoch": 0.51, + "logps_train/chosen": -195.3111572265625, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -258.74945068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.031115770339966, + "rewards_train/margins": 7.043829679489136, + "rewards_train/rejected": -9.074945449829102, + "step": 1813 + }, + { + "epoch": 0.51, + "logps_train/chosen": -28.676828384399414, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -5.15625, + "logps_train/rejected": -31.304302215576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4301828145980835, + "rewards_train/margins": 1.1846224069595337, + "rewards_train/rejected": -2.614805221557617, + "step": 1813 + }, + { + "epoch": 0.51, + "logps_train/chosen": -89.58101654052734, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -231.70066833496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6081016063690186, + "rewards_train/margins": 6.261965036392212, + "rewards_train/rejected": -8.87006664276123, + "step": 1813 + }, + { + "epoch": 0.51, + "learning_rate": 3.8422271336912195e-07, + "loss": 0.4512, + "step": 1814 + }, + { + "epoch": 0.51, + "logps_train/chosen": -20.9239559173584, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -2.6875, + "logps_train/rejected": -18.94593620300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4548956155776978, + "rewards_train/margins": 0.17094802856445312, + "rewards_train/rejected": -1.6258436441421509, + "step": 1814 + }, + { + "epoch": 0.51, + "logps_train/chosen": -85.98907470703125, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -200.30484008789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2989075183868408, + "rewards_train/margins": 7.931576490402222, + "rewards_train/rejected": -9.230484008789062, + "step": 1814 + }, + { + "epoch": 0.51, + "logps_train/chosen": -75.114013671875, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -88.5711669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16140137612819672, + "rewards_train/margins": 0.34571535885334015, + "rewards_train/rejected": -0.5071167349815369, + "step": 1814 + }, + { + "epoch": 0.51, + "logps_train/chosen": -11.691651344299316, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -2.6875, + "logps_train/rejected": -33.274986267089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5379151701927185, + "rewards_train/margins": 2.5208335518836975, + "rewards_train/rejected": -3.058748722076416, + "step": 1814 + }, + { + "epoch": 0.51, + "logps_train/chosen": -176.95077514648438, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -264.57879638671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4950774908065796, + "rewards_train/margins": 7.362802147865295, + "rewards_train/rejected": -8.857879638671875, + "step": 1814 + }, + { + "epoch": 0.51, + "logps_train/chosen": -132.44036865234375, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -181.64608764648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6440370082855225, + "rewards_train/margins": 0.5205719470977783, + "rewards_train/rejected": -4.164608955383301, + "step": 1814 + }, + { + "epoch": 0.51, + "logps_train/chosen": -170.67678833007812, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -215.27671813964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.76767897605896, + "rewards_train/margins": 1.559993028640747, + "rewards_train/rejected": -4.327672004699707, + "step": 1814 + }, + { + "epoch": 0.51, + "logps_train/chosen": -7.711706638336182, + "logps_train/ref_chosen": -3.9375, + "logps_train/ref_rejected": -1.4765625, + "logps_train/rejected": -7.228843688964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37742066383361816, + "rewards_train/margins": 0.19780749082565308, + "rewards_train/rejected": -0.5752281546592712, + "step": 1814 + }, + { + "epoch": 0.51, + "logps_train/chosen": -104.51110076904297, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -115.77919006347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.34888991713523865, + "rewards_train/margins": 0.6268089413642883, + "rewards_train/rejected": -0.2779190242290497, + "step": 1815 + }, + { + "epoch": 0.51, + "logps_train/chosen": -39.59944152832031, + "logps_train/ref_chosen": -15.5, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -43.522216796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4099442958831787, + "rewards_train/margins": -0.2577226161956787, + "rewards_train/rejected": -2.1522216796875, + "step": 1815 + }, + { + "epoch": 0.51, + "logps_train/chosen": -166.235107421875, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -180.67022705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3235108852386475, + "rewards_train/margins": 1.3435118198394775, + "rewards_train/rejected": -4.667022705078125, + "step": 1815 + }, + { + "epoch": 0.51, + "logps_train/chosen": -29.241928100585938, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -1.9140625, + "logps_train/rejected": -13.703015327453613, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0491927862167358, + "rewards_train/margins": 0.12970256805419922, + "rewards_train/rejected": -1.178895354270935, + "step": 1815 + }, + { + "epoch": 0.51, + "logps_train/chosen": -205.767578125, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -250.61032104492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.376757621765137, + "rewards_train/margins": 1.184274673461914, + "rewards_train/rejected": -9.56103229522705, + "step": 1815 + }, + { + "epoch": 0.51, + "logps_train/chosen": -171.21209716796875, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -188.61917114257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.621209621429443, + "rewards_train/margins": 1.7407073974609375, + "rewards_train/rejected": -7.361917018890381, + "step": 1815 + }, + { + "epoch": 0.51, + "logps_train/chosen": -84.36985778808594, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -99.90676879882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0369858741760254, + "rewards_train/margins": 0.35369110107421875, + "rewards_train/rejected": -2.390676975250244, + "step": 1815 + }, + { + "epoch": 0.51, + "logps_train/chosen": -22.16644287109375, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -20.933855056762695, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.091644287109375, + "rewards_train/margins": -0.010758757591247559, + "rewards_train/rejected": -1.0808855295181274, + "step": 1815 + }, + { + "epoch": 0.51, + "learning_rate": 3.8214038690966577e-07, + "loss": 0.3911, + "step": 1816 + }, + { + "epoch": 0.51, + "logps_train/chosen": -100.08724975585938, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -196.72962951660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9087249636650085, + "rewards_train/margins": 7.264238178730011, + "rewards_train/rejected": -8.17296314239502, + "step": 1816 + }, + { + "epoch": 0.51, + "logps_train/chosen": -187.45001220703125, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -256.76312255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.245001316070557, + "rewards_train/margins": 3.731311321258545, + "rewards_train/rejected": -9.976312637329102, + "step": 1816 + }, + { + "epoch": 0.51, + "logps_train/chosen": -83.86163330078125, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -72.06043243408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8861633539199829, + "rewards_train/margins": 0.11987996101379395, + "rewards_train/rejected": -1.0060433149337769, + "step": 1816 + }, + { + "epoch": 0.51, + "logps_train/chosen": -147.46627807617188, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -181.933837890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1966278553009033, + "rewards_train/margins": 4.346756219863892, + "rewards_train/rejected": -7.543384075164795, + "step": 1816 + }, + { + "epoch": 0.51, + "logps_train/chosen": -97.37962341308594, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -129.1175079345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6379623413085938, + "rewards_train/margins": 0.7237884998321533, + "rewards_train/rejected": -1.361750841140747, + "step": 1816 + }, + { + "epoch": 0.51, + "logps_train/chosen": -5.330482006072998, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -7.687077045440674, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.020076800137758255, + "rewards_train/margins": 0.22940950468182564, + "rewards_train/rejected": -0.20933270454406738, + "step": 1816 + }, + { + "epoch": 0.51, + "logps_train/chosen": -38.756927490234375, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -72.77306365966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6881927251815796, + "rewards_train/margins": 0.5141137838363647, + "rewards_train/rejected": -2.2023065090179443, + "step": 1816 + }, + { + "epoch": 0.51, + "logps_train/chosen": -7.274646759033203, + "logps_train/ref_chosen": -7.84375, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -9.549161911010742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05691032484173775, + "rewards_train/margins": 0.32745151594281197, + "rewards_train/rejected": -0.2705411911010742, + "step": 1816 + }, + { + "epoch": 0.51, + "logps_train/chosen": -144.77117919921875, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -146.71575927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.077117919921875, + "rewards_train/margins": 1.644458293914795, + "rewards_train/rejected": -5.72157621383667, + "step": 1817 + }, + { + "epoch": 0.51, + "logps_train/chosen": -102.03279876708984, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -122.22296142578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2532799243927, + "rewards_train/margins": 0.369016170501709, + "rewards_train/rejected": -3.622296094894409, + "step": 1817 + }, + { + "epoch": 0.51, + "logps_train/chosen": -128.6571044921875, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -184.06097412109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.16571044921875, + "rewards_train/margins": 3.8903870582580566, + "rewards_train/rejected": -6.056097507476807, + "step": 1817 + }, + { + "epoch": 0.51, + "logps_train/chosen": -33.94296646118164, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -40.124366760253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4817967414855957, + "rewards_train/margins": 0.4993898868560791, + "rewards_train/rejected": -2.981186628341675, + "step": 1817 + }, + { + "epoch": 0.51, + "logps_train/chosen": -12.25965690612793, + "logps_train/ref_chosen": -2.53125, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -13.422819137573242, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9728407263755798, + "rewards_train/margins": -0.12743377685546875, + "rewards_train/rejected": -0.8454069495201111, + "step": 1817 + }, + { + "epoch": 0.51, + "logps_train/chosen": -54.61314392089844, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -59.01874542236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2863143980503082, + "rewards_train/margins": 0.09056013822555542, + "rewards_train/rejected": -0.37687453627586365, + "step": 1817 + }, + { + "epoch": 0.51, + "logps_train/chosen": -190.2115478515625, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -197.2261962890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.171154975891113, + "rewards_train/margins": -1.24853515625, + "rewards_train/rejected": -6.922619819641113, + "step": 1817 + }, + { + "epoch": 0.51, + "logps_train/chosen": -103.43116760253906, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -160.60545349121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8931167125701904, + "rewards_train/margins": 4.317428827285767, + "rewards_train/rejected": -7.210545539855957, + "step": 1817 + }, + { + "epoch": 0.51, + "learning_rate": 3.800623848124209e-07, + "loss": 0.4239, + "step": 1818 + }, + { + "epoch": 0.51, + "logps_train/chosen": -133.44117736816406, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -241.96385192871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5441176891326904, + "rewards_train/margins": 4.55226731300354, + "rewards_train/rejected": -8.09638500213623, + "step": 1818 + }, + { + "epoch": 0.51, + "logps_train/chosen": -5.162689208984375, + "logps_train/ref_chosen": -1.203125, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -10.163239479064941, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.395956426858902, + "rewards_train/margins": 0.17661753296852112, + "rewards_train/rejected": -0.5725739598274231, + "step": 1818 + }, + { + "epoch": 0.51, + "logps_train/chosen": -87.55517578125, + "logps_train/ref_chosen": -46.75, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -122.34951782226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.080517768859863, + "rewards_train/margins": 1.9544339179992676, + "rewards_train/rejected": -6.034951686859131, + "step": 1818 + }, + { + "epoch": 0.51, + "logps_train/chosen": -6.382111072540283, + "logps_train/ref_chosen": -7.6875, + "logps_train/ref_rejected": -1.6015625, + "logps_train/rejected": -19.000415802001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13053889572620392, + "rewards_train/margins": 1.8704242259263992, + "rewards_train/rejected": -1.7398853302001953, + "step": 1818 + }, + { + "epoch": 0.51, + "logps_train/chosen": -12.88814926147461, + "logps_train/ref_chosen": -3.265625, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -33.56809997558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9622524380683899, + "rewards_train/margins": 1.7914325594902039, + "rewards_train/rejected": -2.7536849975585938, + "step": 1818 + }, + { + "epoch": 0.51, + "logps_train/chosen": -190.3024444580078, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -187.88250732421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.7302446365356445, + "rewards_train/margins": -1.1419939994812012, + "rewards_train/rejected": -4.588250637054443, + "step": 1818 + }, + { + "epoch": 0.51, + "logps_train/chosen": -62.746665954589844, + "logps_train/ref_chosen": -45.75, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -72.66239929199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6996666193008423, + "rewards_train/margins": 2.116573452949524, + "rewards_train/rejected": -3.816240072250366, + "step": 1818 + }, + { + "epoch": 0.51, + "logps_train/chosen": -70.1792221069336, + "logps_train/ref_chosen": -55.75, + "logps_train/ref_rejected": -57.0, + "logps_train/rejected": -73.76455688476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4429222345352173, + "rewards_train/margins": 0.23353350162506104, + "rewards_train/rejected": -1.6764557361602783, + "step": 1818 + }, + { + "epoch": 0.51, + "logps_train/chosen": -123.19412994384766, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -233.4295196533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.619413137435913, + "rewards_train/margins": 10.023538827896118, + "rewards_train/rejected": -12.642951965332031, + "step": 1819 + }, + { + "epoch": 0.51, + "logps_train/chosen": -219.16860961914062, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -207.21966552734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.316861152648926, + "rewards_train/margins": 1.3051056861877441, + "rewards_train/rejected": -6.62196683883667, + "step": 1819 + }, + { + "epoch": 0.51, + "logps_train/chosen": -1.799546718597412, + "logps_train/ref_chosen": -2.03125, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -7.990363121032715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02317032776772976, + "rewards_train/margins": 0.022206639871001244, + "rewards_train/rejected": 0.0009636878967285156, + "step": 1819 + }, + { + "epoch": 0.51, + "logps_train/chosen": -93.78360748291016, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -109.5322265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6283607482910156, + "rewards_train/margins": 3.7998619079589844, + "rewards_train/rejected": -5.42822265625, + "step": 1819 + }, + { + "epoch": 0.51, + "logps_train/chosen": -2.088866949081421, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -2.6875, + "logps_train/rejected": -24.855083465576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.26923832297325134, + "rewards_train/margins": 2.485996812582016, + "rewards_train/rejected": -2.2167584896087646, + "step": 1819 + }, + { + "epoch": 0.51, + "logps_train/chosen": -188.2967071533203, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -230.07525634765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.8296709060668945, + "rewards_train/margins": 5.4778547286987305, + "rewards_train/rejected": -10.307525634765625, + "step": 1819 + }, + { + "epoch": 0.51, + "logps_train/chosen": -2.479039192199707, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -12.075411796569824, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.038033582270145416, + "rewards_train/margins": 0.8596372976899147, + "rewards_train/rejected": -0.8216037154197693, + "step": 1819 + }, + { + "epoch": 0.51, + "logps_train/chosen": -107.44798278808594, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -2.984375, + "logps_train/rejected": -14.017938613891602, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8447983264923096, + "rewards_train/margins": -1.7414419651031494, + "rewards_train/rejected": -1.1033563613891602, + "step": 1819 + }, + { + "epoch": 0.51, + "learning_rate": 3.7798872162119944e-07, + "loss": 0.4032, + "step": 1820 + }, + { + "epoch": 0.51, + "logps_train/chosen": -6.903633117675781, + "logps_train/ref_chosen": -5.6875, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -20.170726776123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12161331623792648, + "rewards_train/margins": 0.4267093613743782, + "rewards_train/rejected": -0.5483226776123047, + "step": 1820 + }, + { + "epoch": 0.51, + "logps_train/chosen": -81.71087646484375, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -138.22216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47108766436576843, + "rewards_train/margins": 5.551129132509232, + "rewards_train/rejected": -6.022216796875, + "step": 1820 + }, + { + "epoch": 0.51, + "logps_train/chosen": -5.673531532287598, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -24.90219497680664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24391566216945648, + "rewards_train/margins": 0.9900539070367813, + "rewards_train/rejected": -1.2339695692062378, + "step": 1820 + }, + { + "epoch": 0.51, + "logps_train/chosen": -22.444557189941406, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -26.125, + "logps_train/rejected": -57.49056625366211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2507057189941406, + "rewards_train/margins": 1.8858509063720703, + "rewards_train/rejected": -3.136556625366211, + "step": 1820 + }, + { + "epoch": 0.51, + "logps_train/chosen": -153.74636840820312, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -139.29608154296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9746367931365967, + "rewards_train/margins": 0.9049713611602783, + "rewards_train/rejected": -4.879608154296875, + "step": 1820 + }, + { + "epoch": 0.51, + "logps_train/chosen": -133.89987182617188, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -212.59429931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7899872064590454, + "rewards_train/margins": 8.669442534446716, + "rewards_train/rejected": -9.459429740905762, + "step": 1820 + }, + { + "epoch": 0.51, + "logps_train/chosen": -123.94136047363281, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -185.54354858398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5941359996795654, + "rewards_train/margins": 2.9102189540863037, + "rewards_train/rejected": -6.504354953765869, + "step": 1820 + }, + { + "epoch": 0.51, + "logps_train/chosen": -70.46981811523438, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -70.55744934082031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.25301820039749146, + "rewards_train/margins": 0.008763134479522705, + "rewards_train/rejected": 0.24425506591796875, + "step": 1820 + }, + { + "epoch": 0.51, + "logps_train/chosen": -122.06038665771484, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -203.9168701171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8060386776924133, + "rewards_train/margins": 8.735648334026337, + "rewards_train/rejected": -9.54168701171875, + "step": 1821 + }, + { + "epoch": 0.51, + "logps_train/chosen": -71.0147476196289, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -132.34483337402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6014747619628906, + "rewards_train/margins": 0.033008575439453125, + "rewards_train/rejected": -0.6344833374023438, + "step": 1821 + }, + { + "epoch": 0.51, + "logps_train/chosen": -30.64324378967285, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -46.889774322509766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.451824426651001, + "rewards_train/margins": -0.8878470063209534, + "rewards_train/rejected": -0.5639774203300476, + "step": 1821 + }, + { + "epoch": 0.51, + "logps_train/chosen": -118.55335998535156, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -143.50341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6053359508514404, + "rewards_train/margins": 0.24500584602355957, + "rewards_train/rejected": -3.850341796875, + "step": 1821 + }, + { + "epoch": 0.51, + "logps_train/chosen": -116.59501647949219, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -175.31744384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.159501552581787, + "rewards_train/margins": 2.0722427368164062, + "rewards_train/rejected": -7.231744289398193, + "step": 1821 + }, + { + "epoch": 0.51, + "logps_train/chosen": -16.91541862487793, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -17.951309204101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4477918744087219, + "rewards_train/margins": 0.9285890460014343, + "rewards_train/rejected": -1.3763809204101562, + "step": 1821 + }, + { + "epoch": 0.51, + "logps_train/chosen": -205.8865509033203, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -221.20574951171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.688655376434326, + "rewards_train/margins": -0.2680802345275879, + "rewards_train/rejected": -6.420575141906738, + "step": 1821 + }, + { + "epoch": 0.51, + "logps_train/chosen": -39.0235710144043, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -34.401702880859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4961071014404297, + "rewards_train/margins": -0.09968686103820801, + "rewards_train/rejected": -2.3964202404022217, + "step": 1821 + }, + { + "epoch": 0.51, + "learning_rate": 3.7591941184944485e-07, + "loss": 0.4103, + "step": 1822 + }, + { + "epoch": 0.51, + "logps_train/chosen": -20.433622360229492, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -1.75, + "logps_train/rejected": -15.811586380004883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6621122360229492, + "rewards_train/margins": 0.7440464496612549, + "rewards_train/rejected": -1.406158685684204, + "step": 1822 + }, + { + "epoch": 0.51, + "logps_train/chosen": -36.722412109375, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -45.54899978637695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.522241234779358, + "rewards_train/margins": 1.3326588869094849, + "rewards_train/rejected": -2.8549001216888428, + "step": 1822 + }, + { + "epoch": 0.51, + "logps_train/chosen": -29.61888313293457, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -54.5, + "logps_train/rejected": -83.51988220214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1900134086608887, + "rewards_train/margins": 0.7119748592376709, + "rewards_train/rejected": -2.9019882678985596, + "step": 1822 + }, + { + "epoch": 0.51, + "logps_train/chosen": -176.43714904785156, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -184.16537475585938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.19371509552002, + "rewards_train/margins": -0.2771773338317871, + "rewards_train/rejected": -7.916537761688232, + "step": 1822 + }, + { + "epoch": 0.51, + "logps_train/chosen": -164.9953155517578, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -54.25, + "logps_train/rejected": -79.04158020019531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.399531602859497, + "rewards_train/margins": -0.9203734397888184, + "rewards_train/rejected": -2.4791581630706787, + "step": 1822 + }, + { + "epoch": 0.51, + "logps_train/chosen": -9.930232048034668, + "logps_train/ref_chosen": -2.140625, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -24.548166275024414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7789607048034668, + "rewards_train/margins": 0.13835591077804565, + "rewards_train/rejected": -0.9173166155815125, + "step": 1822 + }, + { + "epoch": 0.51, + "logps_train/chosen": -126.84249877929688, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -245.01153564453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2342498302459717, + "rewards_train/margins": 6.36690354347229, + "rewards_train/rejected": -9.601153373718262, + "step": 1822 + }, + { + "epoch": 0.51, + "logps_train/chosen": -57.707096099853516, + "logps_train/ref_chosen": -35.75, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -40.268096923828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.195709705352783, + "rewards_train/margins": 0.3748500347137451, + "rewards_train/rejected": -2.5705597400665283, + "step": 1822 + }, + { + "epoch": 0.51, + "logps_train/chosen": -86.87820434570312, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -137.04629516601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4878205060958862, + "rewards_train/margins": 1.266809105873108, + "rewards_train/rejected": -2.754629611968994, + "step": 1823 + }, + { + "epoch": 0.51, + "logps_train/chosen": -126.91500854492188, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -210.9019317626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.2415008544921875, + "rewards_train/margins": 6.848692893981934, + "rewards_train/rejected": -11.090193748474121, + "step": 1823 + }, + { + "epoch": 0.51, + "logps_train/chosen": -202.01739501953125, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -229.67184448242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.301739692687988, + "rewards_train/margins": 0.7654447555541992, + "rewards_train/rejected": -7.0671844482421875, + "step": 1823 + }, + { + "epoch": 0.51, + "logps_train/chosen": -38.39337158203125, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -35.939796447753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2018373012542725, + "rewards_train/margins": 0.3983924388885498, + "rewards_train/rejected": -2.6002297401428223, + "step": 1823 + }, + { + "epoch": 0.51, + "logps_train/chosen": -8.294498443603516, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -7.15625, + "logps_train/rejected": -21.210113525390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42007485032081604, + "rewards_train/margins": 0.9853114783763885, + "rewards_train/rejected": -1.4053863286972046, + "step": 1823 + }, + { + "epoch": 0.51, + "logps_train/chosen": -28.184955596923828, + "logps_train/ref_chosen": -7.25, + "logps_train/ref_rejected": -3.421875, + "logps_train/rejected": -51.989112854003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0934956073760986, + "rewards_train/margins": 2.763228178024292, + "rewards_train/rejected": -4.856723785400391, + "step": 1823 + }, + { + "epoch": 0.51, + "logps_train/chosen": -170.11859130859375, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -169.33811950683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.51185941696167, + "rewards_train/margins": 0.32195281982421875, + "rewards_train/rejected": -4.833812236785889, + "step": 1823 + }, + { + "epoch": 0.51, + "logps_train/chosen": -114.48217010498047, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -143.21922302246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1982170343399048, + "rewards_train/margins": 4.623705267906189, + "rewards_train/rejected": -5.821922302246094, + "step": 1823 + }, + { + "epoch": 0.51, + "learning_rate": 3.7385446998013114e-07, + "loss": 0.3955, + "step": 1824 + }, + { + "epoch": 0.51, + "logps_train/chosen": -71.87211608886719, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -105.65637969970703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7122116088867188, + "rewards_train/margins": 1.9534263610839844, + "rewards_train/rejected": -3.665637969970703, + "step": 1824 + }, + { + "epoch": 0.51, + "logps_train/chosen": -21.68146514892578, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -14.625, + "logps_train/rejected": -59.691307067871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7493965029716492, + "rewards_train/margins": 3.7572343945503235, + "rewards_train/rejected": -4.506630897521973, + "step": 1824 + }, + { + "epoch": 0.51, + "logps_train/chosen": -17.843420028686523, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -2.453125, + "logps_train/rejected": -26.613906860351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9468420147895813, + "rewards_train/margins": 1.4692363142967224, + "rewards_train/rejected": -2.4160783290863037, + "step": 1824 + }, + { + "epoch": 0.51, + "logps_train/chosen": -78.44374084472656, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -56.5, + "logps_train/rejected": -62.78904724121094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4443740844726562, + "rewards_train/margins": -0.8154693245887756, + "rewards_train/rejected": -0.6289047598838806, + "step": 1824 + }, + { + "epoch": 0.51, + "logps_train/chosen": -10.981781959533691, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -45.575538635253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47005319595336914, + "rewards_train/margins": 3.0062506198883057, + "rewards_train/rejected": -3.476303815841675, + "step": 1824 + }, + { + "epoch": 0.51, + "logps_train/chosen": -48.659732818603516, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -120.81212615966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09097328037023544, + "rewards_train/margins": 4.390239335596561, + "rewards_train/rejected": -4.481212615966797, + "step": 1824 + }, + { + "epoch": 0.51, + "logps_train/chosen": -195.2171630859375, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -213.55947875976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.021716594696045, + "rewards_train/margins": 0.9342312812805176, + "rewards_train/rejected": -7.9559478759765625, + "step": 1824 + }, + { + "epoch": 0.51, + "logps_train/chosen": -121.13818359375, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -238.0264892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1138184070587158, + "rewards_train/margins": 5.788830518722534, + "rewards_train/rejected": -6.90264892578125, + "step": 1824 + }, + { + "epoch": 0.51, + "logps_train/chosen": -12.646563529968262, + "logps_train/ref_chosen": -0.703125, + "logps_train/ref_rejected": -2.65625, + "logps_train/rejected": -18.89301872253418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1943439245224, + "rewards_train/margins": 0.429332971572876, + "rewards_train/rejected": -1.6236768960952759, + "step": 1825 + }, + { + "epoch": 0.51, + "logps_train/chosen": -24.650068283081055, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -3.0, + "logps_train/rejected": -61.48332214355469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2025068998336792, + "rewards_train/margins": 4.645825505256653, + "rewards_train/rejected": -5.848332405090332, + "step": 1825 + }, + { + "epoch": 0.51, + "logps_train/chosen": -30.066097259521484, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -48.06171798706055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0691097974777222, + "rewards_train/margins": 3.218311905860901, + "rewards_train/rejected": -4.287421703338623, + "step": 1825 + }, + { + "epoch": 0.51, + "logps_train/chosen": -19.193634033203125, + "logps_train/ref_chosen": -7.9375, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -43.47260665893555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1256134510040283, + "rewards_train/margins": 1.4466471672058105, + "rewards_train/rejected": -2.572260618209839, + "step": 1825 + }, + { + "epoch": 0.51, + "logps_train/chosen": -16.071134567260742, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -11.643815040588379, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.283676028251648, + "rewards_train/margins": -0.8849195241928101, + "rewards_train/rejected": -0.3987565040588379, + "step": 1825 + }, + { + "epoch": 0.51, + "logps_train/chosen": -125.11695861816406, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -125.95902252197266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8116958141326904, + "rewards_train/margins": -0.36579346656799316, + "rewards_train/rejected": -2.4459023475646973, + "step": 1825 + }, + { + "epoch": 0.51, + "logps_train/chosen": -7.044517517089844, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -31.31831932067871, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19554825127124786, + "rewards_train/margins": 1.16488017141819, + "rewards_train/rejected": -0.9693319201469421, + "step": 1825 + }, + { + "epoch": 0.51, + "logps_train/chosen": -130.65283203125, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -138.98985290527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7152832746505737, + "rewards_train/margins": 3.6837021112442017, + "rewards_train/rejected": -5.398985385894775, + "step": 1825 + }, + { + "epoch": 0.51, + "learning_rate": 3.717939104656626e-07, + "loss": 0.3201, + "step": 1826 + }, + { + "epoch": 0.51, + "logps_train/chosen": -12.678755760192871, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -3.40625, + "logps_train/rejected": -13.819737434387207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7616255879402161, + "rewards_train/margins": 0.27972322702407837, + "rewards_train/rejected": -1.0413488149642944, + "step": 1826 + }, + { + "epoch": 0.51, + "logps_train/chosen": -4.746926307678223, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -6.28125, + "logps_train/rejected": -32.3828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.030942631885409355, + "rewards_train/margins": 2.5792136657983065, + "rewards_train/rejected": -2.610156297683716, + "step": 1826 + }, + { + "epoch": 0.51, + "logps_train/chosen": -32.865318298339844, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -65.46903991699219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.474031925201416, + "rewards_train/margins": 0.4728720188140869, + "rewards_train/rejected": -2.946903944015503, + "step": 1826 + }, + { + "epoch": 0.51, + "logps_train/chosen": -23.490894317626953, + "logps_train/ref_chosen": -7.90625, + "logps_train/ref_rejected": -46.75, + "logps_train/rejected": -81.61143493652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5584644079208374, + "rewards_train/margins": 1.927679181098938, + "rewards_train/rejected": -3.4861435890197754, + "step": 1826 + }, + { + "epoch": 0.51, + "logps_train/chosen": -86.7987289428711, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -122.2056655883789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7798728942871094, + "rewards_train/margins": 2.9906938076019287, + "rewards_train/rejected": -3.770566701889038, + "step": 1826 + }, + { + "epoch": 0.51, + "logps_train/chosen": -82.76156616210938, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -7.4375, + "logps_train/rejected": -24.869741439819336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7761566638946533, + "rewards_train/margins": -0.03293251991271973, + "rewards_train/rejected": -1.7432241439819336, + "step": 1826 + }, + { + "epoch": 0.51, + "logps_train/chosen": -198.81536865234375, + "logps_train/ref_chosen": -204.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -147.89312744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.518463134765625, + "rewards_train/margins": 3.8077759742736816, + "rewards_train/rejected": -3.2893128395080566, + "step": 1826 + }, + { + "epoch": 0.51, + "logps_train/chosen": -19.65899658203125, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -31.125, + "logps_train/rejected": -48.34410095214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.390899658203125, + "rewards_train/margins": 1.3310104608535767, + "rewards_train/rejected": -1.7219101190567017, + "step": 1826 + }, + { + "epoch": 0.51, + "logps_train/chosen": -145.558349609375, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -186.45433044433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8558349609375, + "rewards_train/margins": 3.989598274230957, + "rewards_train/rejected": -4.845433235168457, + "step": 1827 + }, + { + "epoch": 0.51, + "logps_train/chosen": -0.7649487257003784, + "logps_train/ref_chosen": -0.353515625, + "logps_train/ref_rejected": -0.353515625, + "logps_train/rejected": -0.7476757764816284, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04114330932497978, + "rewards_train/margins": -0.0017272941768169403, + "rewards_train/rejected": -0.03941601514816284, + "step": 1827 + }, + { + "epoch": 0.51, + "logps_train/chosen": -136.82728576660156, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -175.61764526367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4327285289764404, + "rewards_train/margins": 0.6290361881256104, + "rewards_train/rejected": -4.061764717102051, + "step": 1827 + }, + { + "epoch": 0.51, + "logps_train/chosen": -10.239188194274902, + "logps_train/ref_chosen": -7.03125, + "logps_train/ref_rejected": -5.28125, + "logps_train/rejected": -24.00093650817871, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32079383730888367, + "rewards_train/margins": 1.5511747896671295, + "rewards_train/rejected": -1.8719686269760132, + "step": 1827 + }, + { + "epoch": 0.51, + "logps_train/chosen": -7.106492042541504, + "logps_train/ref_chosen": -1.09375, + "logps_train/ref_rejected": -1.3046875, + "logps_train/rejected": -1.7414345741271973, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6012741923332214, + "rewards_train/margins": -0.5575994849205017, + "rewards_train/rejected": -0.04367470741271973, + "step": 1827 + }, + { + "epoch": 0.51, + "logps_train/chosen": -16.472457885742188, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -18.921144485473633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3659957945346832, + "rewards_train/margins": 0.4386186897754669, + "rewards_train/rejected": -0.8046144843101501, + "step": 1827 + }, + { + "epoch": 0.51, + "logps_train/chosen": -12.415539741516113, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -7.90625, + "logps_train/rejected": -11.27608585357666, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.36655399203300476, + "rewards_train/margins": -0.029570400714874268, + "rewards_train/rejected": -0.3369835913181305, + "step": 1827 + }, + { + "epoch": 0.51, + "logps_train/chosen": -103.49494934082031, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -189.0084228515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6994950771331787, + "rewards_train/margins": 3.9513471126556396, + "rewards_train/rejected": -6.650842189788818, + "step": 1827 + }, + { + "epoch": 0.51, + "learning_rate": 3.6973774772777135e-07, + "loss": 0.365, + "step": 1828 + }, + { + "epoch": 0.51, + "logps_train/chosen": -73.31217956542969, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -55.5, + "logps_train/rejected": -98.31433868408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1562180519104004, + "rewards_train/margins": 2.125216007232666, + "rewards_train/rejected": -4.281434059143066, + "step": 1828 + }, + { + "epoch": 0.51, + "logps_train/chosen": -34.95883560180664, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -22.161285400390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9708835482597351, + "rewards_train/margins": 0.2702450156211853, + "rewards_train/rejected": -1.2411285638809204, + "step": 1828 + }, + { + "epoch": 0.51, + "logps_train/chosen": -11.548751831054688, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -26.479787826538086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6986252069473267, + "rewards_train/margins": 1.5181037187576294, + "rewards_train/rejected": -2.216728925704956, + "step": 1828 + }, + { + "epoch": 0.51, + "logps_train/chosen": -1.2518465518951416, + "logps_train/ref_chosen": -0.3515625, + "logps_train/ref_rejected": -0.3515625, + "logps_train/rejected": -1.2502398490905762, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09002840518951416, + "rewards_train/margins": -0.00016067177057266235, + "rewards_train/rejected": -0.0898677334189415, + "step": 1828 + }, + { + "epoch": 0.51, + "logps_train/chosen": -18.469209671020508, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -1.9296875, + "logps_train/rejected": -2.689990282058716, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4719209671020508, + "rewards_train/margins": -0.3958906903862953, + "rewards_train/rejected": -0.07603027671575546, + "step": 1828 + }, + { + "epoch": 0.51, + "logps_train/chosen": -15.0150146484375, + "logps_train/ref_chosen": -4.9375, + "logps_train/ref_rejected": -3.765625, + "logps_train/rejected": -24.56004524230957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.00775146484375, + "rewards_train/margins": 1.071690559387207, + "rewards_train/rejected": -2.079442024230957, + "step": 1828 + }, + { + "epoch": 0.51, + "logps_train/chosen": -166.56890869140625, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -177.3011016845703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.956890821456909, + "rewards_train/margins": 6.723219156265259, + "rewards_train/rejected": -9.680109977722168, + "step": 1828 + }, + { + "epoch": 0.51, + "logps_train/chosen": -17.491453170776367, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -22.825843811035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3633546829223633, + "rewards_train/margins": 1.1084390878677368, + "rewards_train/rejected": -0.7450844049453735, + "step": 1828 + }, + { + "epoch": 0.51, + "logps_train/chosen": -54.246604919433594, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -37.0, + "logps_train/rejected": -59.992271423339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.050339508801698685, + "rewards_train/margins": 2.3495667465031147, + "rewards_train/rejected": -2.299227237701416, + "step": 1829 + }, + { + "epoch": 0.51, + "logps_train/chosen": -128.68658447265625, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -150.06195068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3686585426330566, + "rewards_train/margins": 3.1375365257263184, + "rewards_train/rejected": -5.506195068359375, + "step": 1829 + }, + { + "epoch": 0.51, + "logps_train/chosen": -46.24120330810547, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -57.31452941894531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.117870330810547, + "rewards_train/margins": 1.6448326110839844, + "rewards_train/rejected": -4.762702941894531, + "step": 1829 + }, + { + "epoch": 0.51, + "logps_train/chosen": -19.64768409729004, + "logps_train/ref_chosen": -11.5625, + "logps_train/ref_rejected": -7.5625, + "logps_train/rejected": -42.42301559448242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8085184097290039, + "rewards_train/margins": 2.6775331497192383, + "rewards_train/rejected": -3.486051559448242, + "step": 1829 + }, + { + "epoch": 0.51, + "logps_train/chosen": -18.43168830871582, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -49.34597396850586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19316883385181427, + "rewards_train/margins": 2.691428706049919, + "rewards_train/rejected": -2.8845975399017334, + "step": 1829 + }, + { + "epoch": 0.51, + "logps_train/chosen": -26.698606491088867, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -62.99742889404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4948606491088867, + "rewards_train/margins": 2.0173823833465576, + "rewards_train/rejected": -3.5122430324554443, + "step": 1829 + }, + { + "epoch": 0.51, + "logps_train/chosen": -279.78643798828125, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -262.187744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.978644371032715, + "rewards_train/margins": 0.040130615234375, + "rewards_train/rejected": -12.01877498626709, + "step": 1829 + }, + { + "epoch": 0.51, + "logps_train/chosen": -81.92768859863281, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -205.65582275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4927688539028168, + "rewards_train/margins": 6.772813707590103, + "rewards_train/rejected": -7.26558256149292, + "step": 1829 + }, + { + "epoch": 0.51, + "learning_rate": 3.676859961574161e-07, + "loss": 0.269, + "step": 1830 + }, + { + "epoch": 0.51, + "logps_train/chosen": -176.4032745361328, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -178.7015380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.140327453613281, + "rewards_train/margins": 0.32982635498046875, + "rewards_train/rejected": -4.47015380859375, + "step": 1830 + }, + { + "epoch": 0.51, + "logps_train/chosen": -19.869773864746094, + "logps_train/ref_chosen": -4.9375, + "logps_train/ref_rejected": -3.171875, + "logps_train/rejected": -15.01640510559082, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4932273626327515, + "rewards_train/margins": -0.30877435207366943, + "rewards_train/rejected": -1.184453010559082, + "step": 1830 + }, + { + "epoch": 0.51, + "logps_train/chosen": -20.412891387939453, + "logps_train/ref_chosen": -3.875, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -31.930452346801758, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6537891626358032, + "rewards_train/margins": -0.39824390411376953, + "rewards_train/rejected": -1.2555452585220337, + "step": 1830 + }, + { + "epoch": 0.51, + "logps_train/chosen": -170.07310485839844, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -238.568603515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.507310390472412, + "rewards_train/margins": 6.349549770355225, + "rewards_train/rejected": -11.856860160827637, + "step": 1830 + }, + { + "epoch": 0.51, + "logps_train/chosen": -143.13758850097656, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -203.90826416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.213758945465088, + "rewards_train/margins": 4.177067279815674, + "rewards_train/rejected": -10.390826225280762, + "step": 1830 + }, + { + "epoch": 0.51, + "logps_train/chosen": -24.866376876831055, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -122.03866577148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9085127115249634, + "rewards_train/margins": 0.4953538179397583, + "rewards_train/rejected": -2.4038665294647217, + "step": 1830 + }, + { + "epoch": 0.51, + "logps_train/chosen": -7.569441795349121, + "logps_train/ref_chosen": -1.109375, + "logps_train/ref_rejected": -3.625, + "logps_train/rejected": -2.1831917762756348, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.64600670337677, + "rewards_train/margins": -0.7901875227689743, + "rewards_train/rejected": 0.14418081939220428, + "step": 1830 + }, + { + "epoch": 0.51, + "logps_train/chosen": -103.8971176147461, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -20.375, + "logps_train/rejected": -58.65205383300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.339711904525757, + "rewards_train/margins": 1.4879934787750244, + "rewards_train/rejected": -3.8277053833007812, + "step": 1830 + }, + { + "epoch": 0.51, + "logps_train/chosen": -196.32339477539062, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -210.18707275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.432339668273926, + "rewards_train/margins": 1.3863677978515625, + "rewards_train/rejected": -6.818707466125488, + "step": 1831 + }, + { + "epoch": 0.51, + "logps_train/chosen": -50.84687042236328, + "logps_train/ref_chosen": -26.5, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -52.54042053222656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4346871376037598, + "rewards_train/margins": -1.5806450843811035, + "rewards_train/rejected": -0.8540420532226562, + "step": 1831 + }, + { + "epoch": 0.51, + "logps_train/chosen": -69.7545166015625, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -49.07707977294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02545166015625, + "rewards_train/margins": 2.9072563648223877, + "rewards_train/rejected": -2.9327080249786377, + "step": 1831 + }, + { + "epoch": 0.51, + "logps_train/chosen": -160.88397216796875, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -191.82159423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.138397216796875, + "rewards_train/margins": 4.893762588500977, + "rewards_train/rejected": -9.032159805297852, + "step": 1831 + }, + { + "epoch": 0.51, + "logps_train/chosen": -74.82283782958984, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -152.9688720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2322837859392166, + "rewards_train/margins": 0.7646034210920334, + "rewards_train/rejected": -0.99688720703125, + "step": 1831 + }, + { + "epoch": 0.51, + "logps_train/chosen": -33.0682373046875, + "logps_train/ref_chosen": -25.0, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -22.508014678955078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.80682373046875, + "rewards_train/margins": -0.25602227449417114, + "rewards_train/rejected": -0.5508014559745789, + "step": 1831 + }, + { + "epoch": 0.51, + "logps_train/chosen": -14.217650413513184, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -26.380922317504883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7842650413513184, + "rewards_train/margins": 0.26632726192474365, + "rewards_train/rejected": -1.050592303276062, + "step": 1831 + }, + { + "epoch": 0.51, + "logps_train/chosen": -46.816925048828125, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -51.42496871948242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3941924571990967, + "rewards_train/margins": 1.7170546054840088, + "rewards_train/rejected": -4.1112470626831055, + "step": 1831 + }, + { + "epoch": 0.51, + "learning_rate": 3.656386701146826e-07, + "loss": 0.5103, + "step": 1832 + }, + { + "epoch": 0.51, + "logps_train/chosen": -188.3510284423828, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -155.39959716796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.13510274887085, + "rewards_train/margins": -0.7951428890228271, + "rewards_train/rejected": -3.3399598598480225, + "step": 1832 + }, + { + "epoch": 0.51, + "logps_train/chosen": -122.58674621582031, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -171.64306640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7586746215820312, + "rewards_train/margins": 1.205631971359253, + "rewards_train/rejected": -3.964306592941284, + "step": 1832 + }, + { + "epoch": 0.51, + "logps_train/chosen": -41.975887298583984, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -3.8125, + "logps_train/rejected": -12.79794692993164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.42258882522583, + "rewards_train/margins": -1.5240440964698792, + "rewards_train/rejected": -0.8985447287559509, + "step": 1832 + }, + { + "epoch": 0.51, + "logps_train/chosen": -73.09813690185547, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -154.12989807128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40981370210647583, + "rewards_train/margins": 2.2531761527061462, + "rewards_train/rejected": -2.662989854812622, + "step": 1832 + }, + { + "epoch": 0.51, + "logps_train/chosen": -171.36203002929688, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -234.6294403076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.386203289031982, + "rewards_train/margins": 3.976741313934326, + "rewards_train/rejected": -10.362944602966309, + "step": 1832 + }, + { + "epoch": 0.51, + "logps_train/chosen": -8.186594009399414, + "logps_train/ref_chosen": -7.375, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -7.710999965667725, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08115940541028976, + "rewards_train/margins": 0.0055655911564826965, + "rewards_train/rejected": -0.08672499656677246, + "step": 1832 + }, + { + "epoch": 0.51, + "logps_train/chosen": -9.546902656555176, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -7.78125, + "logps_train/rejected": -9.545762062072754, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1765652745962143, + "rewards_train/margins": -0.00011406838893890381, + "rewards_train/rejected": -0.1764512062072754, + "step": 1832 + }, + { + "epoch": 0.51, + "logps_train/chosen": -80.44609069824219, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -62.75, + "logps_train/rejected": -94.572998046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05539093166589737, + "rewards_train/margins": 3.237690784037113, + "rewards_train/rejected": -3.182299852371216, + "step": 1832 + }, + { + "epoch": 0.51, + "logps_train/chosen": -49.383182525634766, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -47.0, + "logps_train/rejected": -65.32267761230469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.475818395614624, + "rewards_train/margins": -0.6435506343841553, + "rewards_train/rejected": -1.8322677612304688, + "step": 1833 + }, + { + "epoch": 0.51, + "logps_train/chosen": -174.24960327148438, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -146.37013244628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0249603986740112, + "rewards_train/margins": 2.762052893638611, + "rewards_train/rejected": -3.787013292312622, + "step": 1833 + }, + { + "epoch": 0.51, + "logps_train/chosen": -16.814559936523438, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -13.026250839233398, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1752059906721115, + "rewards_train/margins": 0.10241909325122833, + "rewards_train/rejected": -0.27762508392333984, + "step": 1833 + }, + { + "epoch": 0.51, + "logps_train/chosen": -29.679676055908203, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -11.75, + "logps_train/rejected": -25.285865783691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1554676294326782, + "rewards_train/margins": 0.1981189250946045, + "rewards_train/rejected": -1.3535865545272827, + "step": 1833 + }, + { + "epoch": 0.51, + "logps_train/chosen": -101.94094848632812, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -168.75755310058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9440948963165283, + "rewards_train/margins": 3.6316606998443604, + "rewards_train/rejected": -5.575755596160889, + "step": 1833 + }, + { + "epoch": 0.51, + "logps_train/chosen": -93.24258422851562, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -55.5, + "logps_train/rejected": -150.90975952148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.149258613586426, + "rewards_train/margins": 5.391717910766602, + "rewards_train/rejected": -9.540976524353027, + "step": 1833 + }, + { + "epoch": 0.51, + "logps_train/chosen": -0.1910741627216339, + "logps_train/ref_chosen": -0.21875, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -20.044084548950195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0027675838209688663, + "rewards_train/margins": 1.3946761102415621, + "rewards_train/rejected": -1.3919085264205933, + "step": 1833 + }, + { + "epoch": 0.51, + "logps_train/chosen": -174.92808532714844, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -242.23739624023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.342808723449707, + "rewards_train/margins": 4.3809309005737305, + "rewards_train/rejected": -9.723739624023438, + "step": 1833 + }, + { + "epoch": 0.51, + "learning_rate": 3.6359578392868216e-07, + "loss": 0.4578, + "step": 1834 + }, + { + "epoch": 0.51, + "logps_train/chosen": -63.2755012512207, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -113.98945617675781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5775501132011414, + "rewards_train/margins": 2.571395456790924, + "rewards_train/rejected": -3.1489455699920654, + "step": 1834 + }, + { + "epoch": 0.51, + "logps_train/chosen": -11.278322219848633, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -3.203125, + "logps_train/rejected": -7.50068473815918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10283222049474716, + "rewards_train/margins": 0.32692376524209976, + "rewards_train/rejected": -0.4297559857368469, + "step": 1834 + }, + { + "epoch": 0.51, + "logps_train/chosen": -191.80853271484375, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -202.2279052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.080853462219238, + "rewards_train/margins": 3.141937255859375, + "rewards_train/rejected": -9.222790718078613, + "step": 1834 + }, + { + "epoch": 0.51, + "logps_train/chosen": -184.09014892578125, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -220.36257934570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.509015083312988, + "rewards_train/margins": 0.6272430419921875, + "rewards_train/rejected": -5.136258125305176, + "step": 1834 + }, + { + "epoch": 0.51, + "logps_train/chosen": -20.179784774780273, + "logps_train/ref_chosen": -0.6015625, + "logps_train/ref_rejected": -2.21875, + "logps_train/rejected": -20.72088623046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9578222036361694, + "rewards_train/margins": -0.10760855674743652, + "rewards_train/rejected": -1.850213646888733, + "step": 1834 + }, + { + "epoch": 0.51, + "logps_train/chosen": -23.216854095458984, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -28.595094680786133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9466854333877563, + "rewards_train/margins": 0.6065740585327148, + "rewards_train/rejected": -1.5532594919204712, + "step": 1834 + }, + { + "epoch": 0.51, + "logps_train/chosen": -14.887588500976562, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -3.578125, + "logps_train/rejected": -14.759739875793457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.076258897781372, + "rewards_train/margins": 0.04190266132354736, + "rewards_train/rejected": -1.1181615591049194, + "step": 1834 + }, + { + "epoch": 0.51, + "logps_train/chosen": -283.51898193359375, + "logps_train/ref_chosen": -191.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -275.93084716796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.251898765563965, + "rewards_train/margins": 0.44118595123291016, + "rewards_train/rejected": -9.693084716796875, + "step": 1834 + }, + { + "epoch": 0.51, + "logps_train/chosen": -175.78717041015625, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -203.49334716796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.428717613220215, + "rewards_train/margins": -0.8293828964233398, + "rewards_train/rejected": -7.599334716796875, + "step": 1835 + }, + { + "epoch": 0.51, + "logps_train/chosen": -145.3529052734375, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -133.8388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9352905750274658, + "rewards_train/margins": 1.6485962867736816, + "rewards_train/rejected": -3.5838868618011475, + "step": 1835 + }, + { + "epoch": 0.51, + "logps_train/chosen": -16.263031005859375, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -35.291168212890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0861968994140625, + "rewards_train/margins": 1.1528137922286987, + "rewards_train/rejected": -1.0666168928146362, + "step": 1835 + }, + { + "epoch": 0.51, + "logps_train/chosen": -4.474931240081787, + "logps_train/ref_chosen": -1.9609375, + "logps_train/ref_rejected": -3.796875, + "logps_train/rejected": -17.1533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25139936804771423, + "rewards_train/margins": 1.0842452347278595, + "rewards_train/rejected": -1.3356446027755737, + "step": 1835 + }, + { + "epoch": 0.51, + "logps_train/chosen": -26.43693733215332, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -29.565479278564453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3561937808990479, + "rewards_train/margins": -0.19964587688446045, + "rewards_train/rejected": -1.1565479040145874, + "step": 1835 + }, + { + "epoch": 0.51, + "logps_train/chosen": -14.290443420410156, + "logps_train/ref_chosen": -3.28125, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -15.270040512084961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1009193658828735, + "rewards_train/margins": 0.007334709167480469, + "rewards_train/rejected": -1.108254075050354, + "step": 1835 + }, + { + "epoch": 0.51, + "logps_train/chosen": -128.97792053222656, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -94.01773071289062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.847792148590088, + "rewards_train/margins": -1.1460189819335938, + "rewards_train/rejected": -2.701773166656494, + "step": 1835 + }, + { + "epoch": 0.51, + "logps_train/chosen": -157.44415283203125, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -197.93597412109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.944415330886841, + "rewards_train/margins": 4.4491822719573975, + "rewards_train/rejected": -7.393597602844238, + "step": 1835 + }, + { + "epoch": 0.51, + "learning_rate": 3.615573518974525e-07, + "loss": 0.5183, + "step": 1836 + }, + { + "epoch": 0.51, + "logps_train/chosen": -164.692626953125, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -243.2801513671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3692626953125, + "rewards_train/margins": 3.7587523460388184, + "rewards_train/rejected": -6.128015041351318, + "step": 1836 + }, + { + "epoch": 0.51, + "logps_train/chosen": -5.8260273933410645, + "logps_train/ref_chosen": -1.921875, + "logps_train/ref_rejected": -3.8125, + "logps_train/rejected": -13.983661651611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3904152512550354, + "rewards_train/margins": 0.6267009377479553, + "rewards_train/rejected": -1.0171161890029907, + "step": 1836 + }, + { + "epoch": 0.51, + "logps_train/chosen": -195.27984619140625, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -216.9199676513672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.427984714508057, + "rewards_train/margins": 0.46401214599609375, + "rewards_train/rejected": -6.89199686050415, + "step": 1836 + }, + { + "epoch": 0.51, + "logps_train/chosen": -29.745140075683594, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -21.125, + "logps_train/rejected": -63.61469650268555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.555764079093933, + "rewards_train/margins": 2.69320547580719, + "rewards_train/rejected": -4.248969554901123, + "step": 1836 + }, + { + "epoch": 0.51, + "logps_train/chosen": -239.91567993164062, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -305.645263671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.191568374633789, + "rewards_train/margins": 7.272958755493164, + "rewards_train/rejected": -16.464527130126953, + "step": 1836 + }, + { + "epoch": 0.51, + "logps_train/chosen": -16.005563735961914, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -34.39408493041992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7818064093589783, + "rewards_train/margins": 1.4701021313667297, + "rewards_train/rejected": -2.251908540725708, + "step": 1836 + }, + { + "epoch": 0.51, + "logps_train/chosen": -5.7443037033081055, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -42.655494689941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06619463115930557, + "rewards_train/margins": 3.8192442432045937, + "rewards_train/rejected": -3.753049612045288, + "step": 1836 + }, + { + "epoch": 0.51, + "logps_train/chosen": -24.15011215209961, + "logps_train/ref_chosen": -4.625, + "logps_train/ref_rejected": -2.8125, + "logps_train/rejected": -26.859249114990234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.952511191368103, + "rewards_train/margins": 0.45216381549835205, + "rewards_train/rejected": -2.404675006866455, + "step": 1836 + }, + { + "epoch": 0.51, + "logps_train/chosen": -41.76471710205078, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -27.978639602661133, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.488971710205078, + "rewards_train/margins": -0.8473577499389648, + "rewards_train/rejected": -1.6416139602661133, + "step": 1837 + }, + { + "epoch": 0.51, + "logps_train/chosen": -1.2099926471710205, + "logps_train/ref_chosen": -0.310546875, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -37.346492767333984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08994457870721817, + "rewards_train/margins": 2.9884548410773277, + "rewards_train/rejected": -3.078399419784546, + "step": 1837 + }, + { + "epoch": 0.51, + "logps_train/chosen": -32.7623291015625, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -6.59375, + "logps_train/rejected": -20.57192039489746, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5012329816818237, + "rewards_train/margins": -0.10341596603393555, + "rewards_train/rejected": -1.3978170156478882, + "step": 1837 + }, + { + "epoch": 0.51, + "logps_train/chosen": -23.14122200012207, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -50.220027923583984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9328722357749939, + "rewards_train/margins": 3.1891308426856995, + "rewards_train/rejected": -4.122003078460693, + "step": 1837 + }, + { + "epoch": 0.51, + "logps_train/chosen": -1.0993244647979736, + "logps_train/ref_chosen": -1.75, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -8.43635368347168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06506755203008652, + "rewards_train/margins": 0.052452920004725456, + "rewards_train/rejected": 0.012614632025361061, + "step": 1837 + }, + { + "epoch": 0.51, + "logps_train/chosen": -88.4939193725586, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -128.85060119628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.074392080307007, + "rewards_train/margins": 2.0106680393218994, + "rewards_train/rejected": -5.085060119628906, + "step": 1837 + }, + { + "epoch": 0.51, + "logps_train/chosen": -25.84224510192871, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -46.62674331665039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0904744863510132, + "rewards_train/margins": 1.3596998453140259, + "rewards_train/rejected": -2.450174331665039, + "step": 1837 + }, + { + "epoch": 0.51, + "logps_train/chosen": -27.175432205200195, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -41.489646911621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8550432324409485, + "rewards_train/margins": 1.6189214587211609, + "rewards_train/rejected": -2.4739646911621094, + "step": 1837 + }, + { + "epoch": 0.51, + "learning_rate": 3.595233882878569e-07, + "loss": 0.3106, + "step": 1838 + }, + { + "epoch": 0.51, + "logps_train/chosen": -114.53718566894531, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -158.170654296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.003718614578247, + "rewards_train/margins": 0.4633469581604004, + "rewards_train/rejected": -3.4670655727386475, + "step": 1838 + }, + { + "epoch": 0.51, + "logps_train/chosen": -30.40857696533203, + "logps_train/ref_chosen": -5.875, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -38.177520751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.453357696533203, + "rewards_train/margins": 0.923769474029541, + "rewards_train/rejected": -3.377127170562744, + "step": 1838 + }, + { + "epoch": 0.51, + "logps_train/chosen": -148.3592987060547, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -191.08282470703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2359299659729004, + "rewards_train/margins": 4.4223527908325195, + "rewards_train/rejected": -7.65828275680542, + "step": 1838 + }, + { + "epoch": 0.51, + "logps_train/chosen": -26.86869239807129, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -25.128498077392578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8493692874908447, + "rewards_train/margins": -1.211519479751587, + "rewards_train/rejected": -0.6378498077392578, + "step": 1838 + }, + { + "epoch": 0.51, + "logps_train/chosen": -28.112688064575195, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -46.10316467285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2050187587738037, + "rewards_train/margins": 0.4302978515625, + "rewards_train/rejected": -2.6353166103363037, + "step": 1838 + }, + { + "epoch": 0.51, + "logps_train/chosen": -74.61382293701172, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -126.53527069091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7386177182197571, + "rewards_train/margins": 2.0421448349952698, + "rewards_train/rejected": -1.3035271167755127, + "step": 1838 + }, + { + "epoch": 0.51, + "logps_train/chosen": -88.28326416015625, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -103.75647735595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.628326416015625, + "rewards_train/margins": 2.8973214626312256, + "rewards_train/rejected": -3.5256478786468506, + "step": 1838 + }, + { + "epoch": 0.51, + "logps_train/chosen": -11.723795890808105, + "logps_train/ref_chosen": -3.203125, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -23.709617614746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8520671129226685, + "rewards_train/margins": 0.8626446723937988, + "rewards_train/rejected": -1.7147117853164673, + "step": 1838 + }, + { + "epoch": 0.51, + "logps_train/chosen": -129.4638671875, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -148.47837829589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.54638671875, + "rewards_train/margins": 3.1514511108398438, + "rewards_train/rejected": -5.697837829589844, + "step": 1839 + }, + { + "epoch": 0.51, + "logps_train/chosen": -62.24463653564453, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -101.0450668334961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.3744635581970215, + "rewards_train/margins": -2.469956874847412, + "rewards_train/rejected": -1.9045066833496094, + "step": 1839 + }, + { + "epoch": 0.51, + "logps_train/chosen": -155.75643920898438, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -153.82691955566406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.775644063949585, + "rewards_train/margins": -0.5429520606994629, + "rewards_train/rejected": -3.232692003250122, + "step": 1839 + }, + { + "epoch": 0.51, + "logps_train/chosen": -111.4194107055664, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -223.20932006835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9419411420822144, + "rewards_train/margins": 7.078991055488586, + "rewards_train/rejected": -9.0209321975708, + "step": 1839 + }, + { + "epoch": 0.51, + "logps_train/chosen": -4.144834518432617, + "logps_train/ref_chosen": -0.55078125, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -33.122989654541016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3594053387641907, + "rewards_train/margins": 2.4997686743736267, + "rewards_train/rejected": -2.8591740131378174, + "step": 1839 + }, + { + "epoch": 0.51, + "logps_train/chosen": -53.72025680541992, + "logps_train/ref_chosen": -31.125, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -43.00006103515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.259525775909424, + "rewards_train/margins": 1.227980375289917, + "rewards_train/rejected": -3.487506151199341, + "step": 1839 + }, + { + "epoch": 0.51, + "logps_train/chosen": -86.83541870117188, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -175.524169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6835418939590454, + "rewards_train/margins": 5.568875193595886, + "rewards_train/rejected": -7.252417087554932, + "step": 1839 + }, + { + "epoch": 0.51, + "logps_train/chosen": -14.279484748840332, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -5.75, + "logps_train/rejected": -25.399181365966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.45955154299736023, + "rewards_train/margins": 2.42446967959404, + "rewards_train/rejected": -1.9649181365966797, + "step": 1839 + }, + { + "epoch": 0.51, + "learning_rate": 3.574939073354838e-07, + "loss": 0.4597, + "step": 1840 + }, + { + "epoch": 0.51, + "logps_train/chosen": -24.59741973876953, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -25.390445709228516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7972419857978821, + "rewards_train/margins": 0.6043025851249695, + "rewards_train/rejected": -1.4015445709228516, + "step": 1840 + }, + { + "epoch": 0.51, + "logps_train/chosen": -147.16787719726562, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -167.15325927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6167876720428467, + "rewards_train/margins": 3.7485382556915283, + "rewards_train/rejected": -6.365325927734375, + "step": 1840 + }, + { + "epoch": 0.51, + "logps_train/chosen": -22.657926559448242, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -37.46449661254883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5157926678657532, + "rewards_train/margins": 2.7931570410728455, + "rewards_train/rejected": -3.3089497089385986, + "step": 1840 + }, + { + "epoch": 0.51, + "logps_train/chosen": -19.27805519104004, + "logps_train/ref_chosen": -21.375, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -19.91958236694336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2096944898366928, + "rewards_train/margins": 0.5141527205705643, + "rewards_train/rejected": -0.30445823073387146, + "step": 1840 + }, + { + "epoch": 0.51, + "logps_train/chosen": -0.12628662586212158, + "logps_train/ref_chosen": -0.349609375, + "logps_train/ref_rejected": -0.349609375, + "logps_train/rejected": -0.12376458942890167, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.02233227528631687, + "rewards_train/margins": -0.0002522040158510208, + "rewards_train/rejected": 0.022584479302167892, + "step": 1840 + }, + { + "epoch": 0.51, + "logps_train/chosen": -193.24267578125, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -196.32212829589844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.424267768859863, + "rewards_train/margins": -1.092054843902588, + "rewards_train/rejected": -5.332212924957275, + "step": 1840 + }, + { + "epoch": 0.51, + "logps_train/chosen": -15.765129089355469, + "logps_train/ref_chosen": -1.8203125, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -44.312435150146484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3944816589355469, + "rewards_train/margins": 2.193011999130249, + "rewards_train/rejected": -3.587493658065796, + "step": 1840 + }, + { + "epoch": 0.51, + "logps_train/chosen": -25.715288162231445, + "logps_train/ref_chosen": -5.8125, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -20.251142501831055, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9902788400650024, + "rewards_train/margins": -0.8714145421981812, + "rewards_train/rejected": -1.1188642978668213, + "step": 1840 + }, + { + "epoch": 0.51, + "logps_train/chosen": -92.91346740722656, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -124.06581115722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34134674072265625, + "rewards_train/margins": 0.7652343511581421, + "rewards_train/rejected": -1.1065810918807983, + "step": 1841 + }, + { + "epoch": 0.51, + "logps_train/chosen": -119.19009399414062, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -241.4385528564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.369009494781494, + "rewards_train/margins": 7.8748459815979, + "rewards_train/rejected": -10.243855476379395, + "step": 1841 + }, + { + "epoch": 0.51, + "logps_train/chosen": -16.99751853942871, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -21.708723068237305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9997518658638, + "rewards_train/margins": 0.49924546480178833, + "rewards_train/rejected": -1.4989973306655884, + "step": 1841 + }, + { + "epoch": 0.51, + "logps_train/chosen": -165.76763916015625, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -173.88160705566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.876763939857483, + "rewards_train/margins": 5.411396861076355, + "rewards_train/rejected": -7.288160800933838, + "step": 1841 + }, + { + "epoch": 0.51, + "logps_train/chosen": -9.546789169311523, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -16.1029109954834, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.026571083813905716, + "rewards_train/margins": 0.6931121833622456, + "rewards_train/rejected": -0.6665410995483398, + "step": 1841 + }, + { + "epoch": 0.51, + "logps_train/chosen": -110.87980651855469, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -139.89996337890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6379806399345398, + "rewards_train/margins": 3.852015793323517, + "rewards_train/rejected": -4.489996433258057, + "step": 1841 + }, + { + "epoch": 0.51, + "logps_train/chosen": -39.85310745239258, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -30.498374938964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.554060697555542, + "rewards_train/margins": 0.04265189170837402, + "rewards_train/rejected": -2.596712589263916, + "step": 1841 + }, + { + "epoch": 0.51, + "logps_train/chosen": -113.10153198242188, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -112.46527862548828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3601531982421875, + "rewards_train/margins": -0.06362533569335938, + "rewards_train/rejected": -1.2965278625488281, + "step": 1841 + }, + { + "epoch": 0.51, + "learning_rate": 3.554689232445477e-07, + "loss": 0.4422, + "step": 1842 + }, + { + "epoch": 0.51, + "logps_train/chosen": -11.579571723937988, + "logps_train/ref_chosen": -6.96875, + "logps_train/ref_rejected": -0.6796875, + "logps_train/rejected": -18.956554412841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46108219027519226, + "rewards_train/margins": 1.3666044771671295, + "rewards_train/rejected": -1.8276866674423218, + "step": 1842 + }, + { + "epoch": 0.51, + "logps_train/chosen": -131.7596893310547, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -169.66419982910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.225968837738037, + "rewards_train/margins": 1.4904513359069824, + "rewards_train/rejected": -6.7164201736450195, + "step": 1842 + }, + { + "epoch": 0.51, + "logps_train/chosen": -158.51220703125, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -183.35610961914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6012208461761475, + "rewards_train/margins": 3.5343902111053467, + "rewards_train/rejected": -7.135611057281494, + "step": 1842 + }, + { + "epoch": 0.51, + "logps_train/chosen": -31.16593360900879, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -5.21875, + "logps_train/rejected": -13.924013137817383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8540934324264526, + "rewards_train/margins": -0.9835671186447144, + "rewards_train/rejected": -0.8705263137817383, + "step": 1842 + }, + { + "epoch": 0.51, + "logps_train/chosen": -8.321646690368652, + "logps_train/ref_chosen": -0.796875, + "logps_train/ref_rejected": -1.546875, + "logps_train/rejected": -14.960018157958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7524771690368652, + "rewards_train/margins": 0.5888371467590332, + "rewards_train/rejected": -1.3413143157958984, + "step": 1842 + }, + { + "epoch": 0.51, + "logps_train/chosen": -87.94984436035156, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -52.5, + "logps_train/rejected": -76.87289428710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.144984483718872, + "rewards_train/margins": 0.29230499267578125, + "rewards_train/rejected": -2.4372894763946533, + "step": 1842 + }, + { + "epoch": 0.51, + "logps_train/chosen": -24.784465789794922, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -52.66352081298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9815715551376343, + "rewards_train/margins": 0.809780478477478, + "rewards_train/rejected": -2.7913520336151123, + "step": 1842 + }, + { + "epoch": 0.51, + "logps_train/chosen": -29.966529846191406, + "logps_train/ref_chosen": -2.28125, + "logps_train/ref_rejected": -3.875, + "logps_train/rejected": -32.62922668457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7685279846191406, + "rewards_train/margins": 0.10689473152160645, + "rewards_train/rejected": -2.875422716140747, + "step": 1842 + }, + { + "epoch": 0.52, + "logps_train/chosen": -13.396696090698242, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -22.730690002441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8209196329116821, + "rewards_train/margins": 0.4146493673324585, + "rewards_train/rejected": -1.2355690002441406, + "step": 1843 + }, + { + "epoch": 0.52, + "logps_train/chosen": -131.3201141357422, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -166.03768920898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8820114135742188, + "rewards_train/margins": 1.5217576026916504, + "rewards_train/rejected": -3.403769016265869, + "step": 1843 + }, + { + "epoch": 0.52, + "logps_train/chosen": -174.6060791015625, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -174.79307556152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.760607957839966, + "rewards_train/margins": 2.9186995029449463, + "rewards_train/rejected": -6.679307460784912, + "step": 1843 + }, + { + "epoch": 0.52, + "logps_train/chosen": -9.601556777954102, + "logps_train/ref_chosen": -2.453125, + "logps_train/ref_rejected": -3.9375, + "logps_train/rejected": -13.443153381347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.714843213558197, + "rewards_train/margins": 0.2357221245765686, + "rewards_train/rejected": -0.9505653381347656, + "step": 1843 + }, + { + "epoch": 0.52, + "logps_train/chosen": -150.0758056640625, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -190.25375366210938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.95758056640625, + "rewards_train/margins": -0.5322051048278809, + "rewards_train/rejected": -5.425375461578369, + "step": 1843 + }, + { + "epoch": 0.52, + "logps_train/chosen": -135.40943908691406, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -232.89710998535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.440943956375122, + "rewards_train/margins": 6.948767423629761, + "rewards_train/rejected": -8.389711380004883, + "step": 1843 + }, + { + "epoch": 0.52, + "logps_train/chosen": -191.22842407226562, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -209.87159729003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.372842311859131, + "rewards_train/margins": 0.714317798614502, + "rewards_train/rejected": -8.087160110473633, + "step": 1843 + }, + { + "epoch": 0.52, + "logps_train/chosen": -93.43130493164062, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -147.21749877929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3931305408477783, + "rewards_train/margins": 3.0786192417144775, + "rewards_train/rejected": -4.471749782562256, + "step": 1843 + }, + { + "epoch": 0.52, + "learning_rate": 3.5344845018779114e-07, + "loss": 0.4091, + "step": 1844 + }, + { + "epoch": 0.52, + "logps_train/chosen": -39.212310791015625, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -178.1173095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.396231174468994, + "rewards_train/margins": 5.565499782562256, + "rewards_train/rejected": -7.96173095703125, + "step": 1844 + }, + { + "epoch": 0.52, + "logps_train/chosen": -19.644176483154297, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -25.00263214111328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0894176959991455, + "rewards_train/margins": -0.13915449380874634, + "rewards_train/rejected": -0.9502632021903992, + "step": 1844 + }, + { + "epoch": 0.52, + "logps_train/chosen": -40.91987991333008, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -31.625, + "logps_train/rejected": -70.8456802368164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.90448796749115, + "rewards_train/margins": 2.0175801515579224, + "rewards_train/rejected": -3.9220681190490723, + "step": 1844 + }, + { + "epoch": 0.52, + "logps_train/chosen": -152.16827392578125, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -174.53564453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1168274879455566, + "rewards_train/margins": 2.436737060546875, + "rewards_train/rejected": -4.553564548492432, + "step": 1844 + }, + { + "epoch": 0.52, + "logps_train/chosen": -12.384377479553223, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -17.313295364379883, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.25718775391578674, + "rewards_train/margins": -0.25085821747779846, + "rewards_train/rejected": -0.006329536437988281, + "step": 1844 + }, + { + "epoch": 0.52, + "logps_train/chosen": -29.66993522644043, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -6.65625, + "logps_train/rejected": -27.436065673828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.188868522644043, + "rewards_train/margins": -0.11088681221008301, + "rewards_train/rejected": -2.07798171043396, + "step": 1844 + }, + { + "epoch": 0.52, + "logps_train/chosen": -53.40859603881836, + "logps_train/ref_chosen": -27.25, + "logps_train/ref_rejected": -26.75, + "logps_train/rejected": -50.555389404296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6158597469329834, + "rewards_train/margins": -0.2353208065032959, + "rewards_train/rejected": -2.3805389404296875, + "step": 1844 + }, + { + "epoch": 0.52, + "logps_train/chosen": -105.21951293945312, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -172.85552978515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7219513654708862, + "rewards_train/margins": 5.46360170841217, + "rewards_train/rejected": -7.185553073883057, + "step": 1844 + }, + { + "epoch": 0.52, + "logps_train/chosen": -137.33335876464844, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -171.69393920898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.433336019515991, + "rewards_train/margins": 0.9360580444335938, + "rewards_train/rejected": -3.369394063949585, + "step": 1845 + }, + { + "epoch": 0.52, + "logps_train/chosen": -169.25665283203125, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -116.14323425292969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2256653308868408, + "rewards_train/margins": 2.7386581897735596, + "rewards_train/rejected": -3.9643235206604004, + "step": 1845 + }, + { + "epoch": 0.52, + "logps_train/chosen": -25.32964515686035, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -37.93717956542969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2517145872116089, + "rewards_train/margins": 1.7170034646987915, + "rewards_train/rejected": -2.9687180519104004, + "step": 1845 + }, + { + "epoch": 0.52, + "logps_train/chosen": -133.4517822265625, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -185.59405517578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3951783180236816, + "rewards_train/margins": 1.4642271995544434, + "rewards_train/rejected": -3.859405517578125, + "step": 1845 + }, + { + "epoch": 0.52, + "logps_train/chosen": -33.614532470703125, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -70.01116180419922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7114533185958862, + "rewards_train/margins": 1.1646629571914673, + "rewards_train/rejected": -2.8761162757873535, + "step": 1845 + }, + { + "epoch": 0.52, + "logps_train/chosen": -44.21704864501953, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -40.66523361206055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.459204912185669, + "rewards_train/margins": 0.7573184967041016, + "rewards_train/rejected": -3.2165234088897705, + "step": 1845 + }, + { + "epoch": 0.52, + "logps_train/chosen": -129.0052490234375, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -195.5475616455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0005249977111816, + "rewards_train/margins": 3.9542312622070312, + "rewards_train/rejected": -5.954756259918213, + "step": 1845 + }, + { + "epoch": 0.52, + "logps_train/chosen": -9.175606727600098, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -18.068586349487305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21131066977977753, + "rewards_train/margins": 0.9236729890108109, + "rewards_train/rejected": -1.1349836587905884, + "step": 1845 + }, + { + "epoch": 0.52, + "learning_rate": 3.5143250230638245e-07, + "loss": 0.322, + "step": 1846 + }, + { + "epoch": 0.52, + "logps_train/chosen": -90.86602020263672, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -188.18312072753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8366020917892456, + "rewards_train/margins": 5.331709980964661, + "rewards_train/rejected": -7.168312072753906, + "step": 1846 + }, + { + "epoch": 0.52, + "logps_train/chosen": -31.076231002807617, + "logps_train/ref_chosen": -26.0, + "logps_train/ref_rejected": -6.1875, + "logps_train/rejected": -25.484010696411133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5076231360435486, + "rewards_train/margins": 1.4220280051231384, + "rewards_train/rejected": -1.929651141166687, + "step": 1846 + }, + { + "epoch": 0.52, + "logps_train/chosen": -86.17891693115234, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -98.64888763427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6678916811943054, + "rewards_train/margins": 1.7469971776008606, + "rewards_train/rejected": -2.414888858795166, + "step": 1846 + }, + { + "epoch": 0.52, + "logps_train/chosen": -96.14566802978516, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -20.375, + "logps_train/rejected": -65.09291076660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4645668268203735, + "rewards_train/margins": 3.007224440574646, + "rewards_train/rejected": -4.4717912673950195, + "step": 1846 + }, + { + "epoch": 0.52, + "logps_train/chosen": -10.074111938476562, + "logps_train/ref_chosen": -0.89453125, + "logps_train/ref_rejected": -22.25, + "logps_train/rejected": -32.951133728027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9179580807685852, + "rewards_train/margins": 0.152155339717865, + "rewards_train/rejected": -1.0701134204864502, + "step": 1846 + }, + { + "epoch": 0.52, + "logps_train/chosen": -198.12559509277344, + "logps_train/ref_chosen": -180.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -227.54776000976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8125594854354858, + "rewards_train/margins": 3.3422166109085083, + "rewards_train/rejected": -5.154776096343994, + "step": 1846 + }, + { + "epoch": 0.52, + "logps_train/chosen": -39.61848068237305, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -53.19329833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8805980682373047, + "rewards_train/margins": 1.785606861114502, + "rewards_train/rejected": -4.666204929351807, + "step": 1846 + }, + { + "epoch": 0.52, + "logps_train/chosen": -140.6160888671875, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -231.7269744873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.96160888671875, + "rewards_train/margins": 6.811088562011719, + "rewards_train/rejected": -7.772697448730469, + "step": 1846 + }, + { + "epoch": 0.52, + "logps_train/chosen": -191.5547637939453, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -253.8206329345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.9554762840271, + "rewards_train/margins": 6.326587200164795, + "rewards_train/rejected": -11.282063484191895, + "step": 1847 + }, + { + "epoch": 0.52, + "logps_train/chosen": -30.95290184020996, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -42.04521560668945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.189040184020996, + "rewards_train/margins": 0.8092315196990967, + "rewards_train/rejected": -2.9982717037200928, + "step": 1847 + }, + { + "epoch": 0.52, + "logps_train/chosen": -187.041015625, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -211.0, + "logps_train/rejected": -317.3581237792969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.804101467132568, + "rewards_train/margins": 4.831711292266846, + "rewards_train/rejected": -10.635812759399414, + "step": 1847 + }, + { + "epoch": 0.52, + "logps_train/chosen": -17.686737060546875, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -43.18354034423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11882629245519638, + "rewards_train/margins": 3.2246803268790245, + "rewards_train/rejected": -3.105854034423828, + "step": 1847 + }, + { + "epoch": 0.52, + "logps_train/chosen": -102.15676879882812, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -101.43553161621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0656769275665283, + "rewards_train/margins": -0.07212376594543457, + "rewards_train/rejected": -0.9935531616210938, + "step": 1847 + }, + { + "epoch": 0.52, + "logps_train/chosen": -7.1683807373046875, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -3.5, + "logps_train/rejected": -17.167221069335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4081619381904602, + "rewards_train/margins": 1.774884045124054, + "rewards_train/rejected": -1.3667221069335938, + "step": 1847 + }, + { + "epoch": 0.52, + "logps_train/chosen": -16.61902618408203, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -5.84375, + "logps_train/rejected": -28.058879852294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5869026184082031, + "rewards_train/margins": 1.6346104145050049, + "rewards_train/rejected": -2.221513032913208, + "step": 1847 + }, + { + "epoch": 0.52, + "logps_train/chosen": -3.0605416297912598, + "logps_train/ref_chosen": -1.1484375, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -36.5960693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19121041893959045, + "rewards_train/margins": 2.368396610021591, + "rewards_train/rejected": -2.5596070289611816, + "step": 1847 + }, + { + "epoch": 0.52, + "learning_rate": 3.494210937098202e-07, + "loss": 0.1759, + "step": 1848 + }, + { + "epoch": 0.52, + "logps_train/chosen": -107.5928955078125, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -107.22135925292969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3092895448207855, + "rewards_train/margins": 2.162846475839615, + "rewards_train/rejected": -2.4721360206604004, + "step": 1848 + }, + { + "epoch": 0.52, + "logps_train/chosen": -120.09140014648438, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -201.86837768554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.909140110015869, + "rewards_train/margins": 3.4776978492736816, + "rewards_train/rejected": -7.386837959289551, + "step": 1848 + }, + { + "epoch": 0.52, + "logps_train/chosen": -11.25266170501709, + "logps_train/ref_chosen": -3.0625, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -22.900775909423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.81901615858078, + "rewards_train/margins": 0.8804364800453186, + "rewards_train/rejected": -1.6994526386260986, + "step": 1848 + }, + { + "epoch": 0.52, + "logps_train/chosen": -124.71867370605469, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -148.34231567382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9218673706054688, + "rewards_train/margins": 2.8623642921447754, + "rewards_train/rejected": -3.784231662750244, + "step": 1848 + }, + { + "epoch": 0.52, + "logps_train/chosen": -78.28469848632812, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -54.5, + "logps_train/rejected": -54.567108154296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5284698605537415, + "rewards_train/margins": -0.5217590448446572, + "rewards_train/rejected": -0.006710815709084272, + "step": 1848 + }, + { + "epoch": 0.52, + "logps_train/chosen": -30.19330596923828, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -44.3303108215332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.706830620765686, + "rewards_train/margins": 1.6012004613876343, + "rewards_train/rejected": -2.3080310821533203, + "step": 1848 + }, + { + "epoch": 0.52, + "logps_train/chosen": -60.83712387084961, + "logps_train/ref_chosen": -42.75, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -96.56376647949219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.808712363243103, + "rewards_train/margins": 2.3226643800735474, + "rewards_train/rejected": -4.13137674331665, + "step": 1848 + }, + { + "epoch": 0.52, + "logps_train/chosen": -116.80452728271484, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -186.60736083984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.380452871322632, + "rewards_train/margins": 3.480283498764038, + "rewards_train/rejected": -5.86073637008667, + "step": 1848 + }, + { + "epoch": 0.52, + "logps_train/chosen": -122.78226470947266, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -123.42707824707031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.028226375579834, + "rewards_train/margins": 0.06448173522949219, + "rewards_train/rejected": -4.092708110809326, + "step": 1849 + }, + { + "epoch": 0.52, + "logps_train/chosen": -24.120182037353516, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -44.673797607421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5807682275772095, + "rewards_train/margins": 2.080361485481262, + "rewards_train/rejected": -3.6611297130584717, + "step": 1849 + }, + { + "epoch": 0.52, + "logps_train/chosen": -14.441596984863281, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -13.644139289855957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08084030449390411, + "rewards_train/margins": 0.6015042215585709, + "rewards_train/rejected": -0.5206639170646667, + "step": 1849 + }, + { + "epoch": 0.52, + "logps_train/chosen": -5.6510090827941895, + "logps_train/ref_chosen": -1.296875, + "logps_train/ref_rejected": -15.6875, + "logps_train/rejected": -24.08413314819336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4354134202003479, + "rewards_train/margins": 0.404249906539917, + "rewards_train/rejected": -0.8396633267402649, + "step": 1849 + }, + { + "epoch": 0.52, + "logps_train/chosen": -126.32865905761719, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -134.2962646484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3828659057617188, + "rewards_train/margins": 0.19676053524017334, + "rewards_train/rejected": -1.579626441001892, + "step": 1849 + }, + { + "epoch": 0.52, + "logps_train/chosen": -110.11109924316406, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -121.33797454833984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1611099243164062, + "rewards_train/margins": 1.4726874828338623, + "rewards_train/rejected": -3.6337974071502686, + "step": 1849 + }, + { + "epoch": 0.52, + "logps_train/chosen": -95.44352722167969, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -95.16012573242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3556472957134247, + "rewards_train/margins": 1.9716598689556122, + "rewards_train/rejected": -1.6160125732421875, + "step": 1849 + }, + { + "epoch": 0.52, + "logps_train/chosen": -147.68675231933594, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -205.66397094726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.168675422668457, + "rewards_train/margins": 4.647722244262695, + "rewards_train/rejected": -8.816397666931152, + "step": 1849 + }, + { + "epoch": 0.52, + "learning_rate": 3.4741423847583127e-07, + "loss": 0.2818, + "step": 1850 + }, + { + "epoch": 0.52, + "logps_train/chosen": -114.0606918334961, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -59.5, + "logps_train/rejected": -114.60020446777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.456069469451904, + "rewards_train/margins": 0.053951263427734375, + "rewards_train/rejected": -5.510020732879639, + "step": 1850 + }, + { + "epoch": 0.52, + "logps_train/chosen": -160.55740356445312, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -248.71096801757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.605740547180176, + "rewards_train/margins": 8.815356254577637, + "rewards_train/rejected": -13.421096801757812, + "step": 1850 + }, + { + "epoch": 0.52, + "logps_train/chosen": -79.44320678710938, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -101.13774871826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1943206787109375, + "rewards_train/margins": 0.3194541931152344, + "rewards_train/rejected": -0.5137748718261719, + "step": 1850 + }, + { + "epoch": 0.52, + "logps_train/chosen": -26.450103759765625, + "logps_train/ref_chosen": -4.625, + "logps_train/ref_rejected": -5.21875, + "logps_train/rejected": -44.835044860839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1825103759765625, + "rewards_train/margins": 1.7791192531585693, + "rewards_train/rejected": -3.961629629135132, + "step": 1850 + }, + { + "epoch": 0.52, + "logps_train/chosen": -6.966397285461426, + "logps_train/ref_chosen": -1.765625, + "logps_train/ref_rejected": -1.609375, + "logps_train/rejected": -7.217623710632324, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5200772285461426, + "rewards_train/margins": 0.040747642517089844, + "rewards_train/rejected": -0.5608248710632324, + "step": 1850 + }, + { + "epoch": 0.52, + "logps_train/chosen": -67.25056457519531, + "logps_train/ref_chosen": -50.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -160.03529357910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.725056529045105, + "rewards_train/margins": 6.978472828865051, + "rewards_train/rejected": -8.703529357910156, + "step": 1850 + }, + { + "epoch": 0.52, + "logps_train/chosen": -143.40127563476562, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -155.88916015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.9901275634765625, + "rewards_train/margins": 0.5487885475158691, + "rewards_train/rejected": -7.538916110992432, + "step": 1850 + }, + { + "epoch": 0.52, + "logps_train/chosen": -75.21873474121094, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -46.25, + "logps_train/rejected": -78.16496276855469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9718734622001648, + "rewards_train/margins": 2.2196229100227356, + "rewards_train/rejected": -3.1914963722229004, + "step": 1850 + }, + { + "epoch": 0.52, + "logps_train/chosen": -26.459455490112305, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -6.90625, + "logps_train/rejected": -54.28701400756836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1334455013275146, + "rewards_train/margins": 2.604631185531616, + "rewards_train/rejected": -4.738076686859131, + "step": 1851 + }, + { + "epoch": 0.52, + "logps_train/chosen": -43.03118896484375, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -32.709381103515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.540618896484375, + "rewards_train/margins": -0.5759307146072388, + "rewards_train/rejected": -1.9646881818771362, + "step": 1851 + }, + { + "epoch": 0.52, + "logps_train/chosen": -111.1290283203125, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -191.95556640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6129028797149658, + "rewards_train/margins": 4.282654047012329, + "rewards_train/rejected": -5.895556926727295, + "step": 1851 + }, + { + "epoch": 0.52, + "logps_train/chosen": -72.42559051513672, + "logps_train/ref_chosen": -39.25, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -117.87379455566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.317559003829956, + "rewards_train/margins": 3.4948203563690186, + "rewards_train/rejected": -6.812379360198975, + "step": 1851 + }, + { + "epoch": 0.52, + "logps_train/chosen": -28.62994384765625, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -51.75, + "logps_train/rejected": -79.80028533935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.062994360923767, + "rewards_train/margins": 1.7420343160629272, + "rewards_train/rejected": -2.8050286769866943, + "step": 1851 + }, + { + "epoch": 0.52, + "logps_train/chosen": -65.63577270507812, + "logps_train/ref_chosen": -43.5, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -50.144004821777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2135772705078125, + "rewards_train/margins": 1.0633232593536377, + "rewards_train/rejected": -3.27690052986145, + "step": 1851 + }, + { + "epoch": 0.52, + "logps_train/chosen": -21.395978927612305, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -33.29716873168945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1395978927612305, + "rewards_train/margins": 1.5776190757751465, + "rewards_train/rejected": -2.717216968536377, + "step": 1851 + }, + { + "epoch": 0.52, + "logps_train/chosen": -3.6347508430480957, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -28.678531646728516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04902491718530655, + "rewards_train/margins": 0.07937808148562908, + "rewards_train/rejected": -0.030353164300322533, + "step": 1851 + }, + { + "epoch": 0.52, + "learning_rate": 3.454119506502753e-07, + "loss": 0.3149, + "step": 1852 + }, + { + "epoch": 0.52, + "logps_train/chosen": -2.6521084308624268, + "logps_train/ref_chosen": -0.69140625, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -24.471099853515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19607022404670715, + "rewards_train/margins": 1.7604148089885712, + "rewards_train/rejected": -1.9564850330352783, + "step": 1852 + }, + { + "epoch": 0.52, + "logps_train/chosen": -94.86253356933594, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -54.75, + "logps_train/rejected": -78.0527114868164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3862533569335938, + "rewards_train/margins": 0.9440178871154785, + "rewards_train/rejected": -2.3302712440490723, + "step": 1852 + }, + { + "epoch": 0.52, + "logps_train/chosen": -35.561275482177734, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -38.77991485595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0842525959014893, + "rewards_train/margins": 0.1906139850616455, + "rewards_train/rejected": -3.2748665809631348, + "step": 1852 + }, + { + "epoch": 0.52, + "logps_train/chosen": -35.72776412963867, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -17.25666046142578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6352765560150146, + "rewards_train/margins": -1.3221105337142944, + "rewards_train/rejected": -1.3131660223007202, + "step": 1852 + }, + { + "epoch": 0.52, + "logps_train/chosen": -97.73324584960938, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -94.34577941894531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22332458198070526, + "rewards_train/margins": 1.961253359913826, + "rewards_train/rejected": -2.1845779418945312, + "step": 1852 + }, + { + "epoch": 0.52, + "logps_train/chosen": -78.1460952758789, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -55.75, + "logps_train/rejected": -78.02557373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7396095991134644, + "rewards_train/margins": 0.48794782161712646, + "rewards_train/rejected": -2.227557420730591, + "step": 1852 + }, + { + "epoch": 0.52, + "logps_train/chosen": -30.275497436523438, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -27.91275405883789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5462998151779175, + "rewards_train/margins": 0.49185073375701904, + "rewards_train/rejected": -2.0381505489349365, + "step": 1852 + }, + { + "epoch": 0.52, + "logps_train/chosen": -171.0343017578125, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -260.50140380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.603430271148682, + "rewards_train/margins": 8.64670991897583, + "rewards_train/rejected": -13.250140190124512, + "step": 1852 + }, + { + "epoch": 0.52, + "logps_train/chosen": -154.70455932617188, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -330.0, + "logps_train/rejected": -410.5441589355469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.270455837249756, + "rewards_train/margins": 3.7839598655700684, + "rewards_train/rejected": -8.054415702819824, + "step": 1853 + }, + { + "epoch": 0.52, + "logps_train/chosen": -268.6255187988281, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -209.0, + "logps_train/rejected": -323.5213623046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.062552452087402, + "rewards_train/margins": 0.3895835876464844, + "rewards_train/rejected": -11.452136039733887, + "step": 1853 + }, + { + "epoch": 0.52, + "logps_train/chosen": -15.038189888000488, + "logps_train/ref_chosen": -2.28125, + "logps_train/ref_rejected": -3.09375, + "logps_train/rejected": -17.078975677490234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2756940126419067, + "rewards_train/margins": 0.12282860279083252, + "rewards_train/rejected": -1.3985226154327393, + "step": 1853 + }, + { + "epoch": 0.52, + "logps_train/chosen": -20.55548667907715, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -2.734375, + "logps_train/rejected": -23.64019012451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8492986559867859, + "rewards_train/margins": 1.2412829995155334, + "rewards_train/rejected": -2.0905816555023193, + "step": 1853 + }, + { + "epoch": 0.52, + "logps_train/chosen": -9.93179702758789, + "logps_train/ref_chosen": -0.8359375, + "logps_train/ref_rejected": -3.734375, + "logps_train/rejected": -12.40296745300293, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9095859527587891, + "rewards_train/margins": -0.04272669553756714, + "rewards_train/rejected": -0.8668592572212219, + "step": 1853 + }, + { + "epoch": 0.52, + "logps_train/chosen": -19.213218688964844, + "logps_train/ref_chosen": -2.453125, + "logps_train/ref_rejected": -0.314453125, + "logps_train/rejected": -20.125041961669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6760094165802002, + "rewards_train/margins": 0.3050495386123657, + "rewards_train/rejected": -1.981058955192566, + "step": 1853 + }, + { + "epoch": 0.52, + "logps_train/chosen": -82.02877807617188, + "logps_train/ref_chosen": -44.25, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -174.46432495117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7778778076171875, + "rewards_train/margins": 1.2685546875, + "rewards_train/rejected": -5.0464324951171875, + "step": 1853 + }, + { + "epoch": 0.52, + "logps_train/chosen": -163.34555053710938, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -206.40994262695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.034555196762085, + "rewards_train/margins": 5.306439638137817, + "rewards_train/rejected": -8.340994834899902, + "step": 1853 + }, + { + "epoch": 0.52, + "learning_rate": 3.434142442470437e-07, + "loss": 0.4181, + "step": 1854 + }, + { + "epoch": 0.52, + "logps_train/chosen": -191.01263427734375, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -198.88609313964844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.551263332366943, + "rewards_train/margins": -0.06265401840209961, + "rewards_train/rejected": -6.488609313964844, + "step": 1854 + }, + { + "epoch": 0.52, + "logps_train/chosen": -19.2850341796875, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -18.797447204589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.222253441810608, + "rewards_train/margins": 0.3934288024902344, + "rewards_train/rejected": -1.6156822443008423, + "step": 1854 + }, + { + "epoch": 0.52, + "logps_train/chosen": -20.316041946411133, + "logps_train/ref_chosen": -5.84375, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -66.85363006591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.447229266166687, + "rewards_train/margins": 4.00688374042511, + "rewards_train/rejected": -5.454113006591797, + "step": 1854 + }, + { + "epoch": 0.52, + "logps_train/chosen": -14.789544105529785, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -26.052473068237305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41645440459251404, + "rewards_train/margins": 0.6575429737567902, + "rewards_train/rejected": -1.0739973783493042, + "step": 1854 + }, + { + "epoch": 0.52, + "logps_train/chosen": -5.2078986167907715, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -19.584733963012695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02921013906598091, + "rewards_train/margins": 1.0251835472881794, + "rewards_train/rejected": -0.9959734082221985, + "step": 1854 + }, + { + "epoch": 0.52, + "logps_train/chosen": -96.21355438232422, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -60.0, + "logps_train/rejected": -77.2102279663086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7213554382324219, + "rewards_train/margins": -0.0003325939178466797, + "rewards_train/rejected": -1.7210228443145752, + "step": 1854 + }, + { + "epoch": 0.52, + "logps_train/chosen": -144.81338500976562, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -118.26107788085938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9313385486602783, + "rewards_train/margins": -0.00523066520690918, + "rewards_train/rejected": -3.926107883453369, + "step": 1854 + }, + { + "epoch": 0.52, + "logps_train/chosen": -32.40781784057617, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -77.7601089477539, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.978281855583191, + "rewards_train/margins": -0.3272709846496582, + "rewards_train/rejected": -1.6510108709335327, + "step": 1854 + }, + { + "epoch": 0.52, + "logps_train/chosen": -220.9065399169922, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -180.6985626220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.490653991699219, + "rewards_train/margins": 1.0792021751403809, + "rewards_train/rejected": -6.5698561668396, + "step": 1855 + }, + { + "epoch": 0.52, + "logps_train/chosen": -12.261186599731445, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -21.75043487548828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6479936838150024, + "rewards_train/margins": -0.09795016050338745, + "rewards_train/rejected": -0.550043523311615, + "step": 1855 + }, + { + "epoch": 0.52, + "logps_train/chosen": -225.30108642578125, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -228.66175842285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.430109024047852, + "rewards_train/margins": 2.2360668182373047, + "rewards_train/rejected": -12.666175842285156, + "step": 1855 + }, + { + "epoch": 0.52, + "logps_train/chosen": -183.47354125976562, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -217.41705322265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.547354221343994, + "rewards_train/margins": 4.694351673126221, + "rewards_train/rejected": -12.241705894470215, + "step": 1855 + }, + { + "epoch": 0.52, + "logps_train/chosen": -43.3371467590332, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -25.38866424560547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.996214747428894, + "rewards_train/margins": -1.3948482871055603, + "rewards_train/rejected": -0.6013664603233337, + "step": 1855 + }, + { + "epoch": 0.52, + "logps_train/chosen": -183.23532104492188, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -211.55819702148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.023532390594482, + "rewards_train/margins": 3.6322875022888184, + "rewards_train/rejected": -8.6558198928833, + "step": 1855 + }, + { + "epoch": 0.52, + "logps_train/chosen": -7.1733317375183105, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -43.91694641113281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20170818269252777, + "rewards_train/margins": 1.939986601471901, + "rewards_train/rejected": -2.1416947841644287, + "step": 1855 + }, + { + "epoch": 0.52, + "logps_train/chosen": -124.28498840332031, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -227.8814239501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7284988164901733, + "rewards_train/margins": 5.7596436738967896, + "rewards_train/rejected": -7.488142490386963, + "step": 1855 + }, + { + "epoch": 0.52, + "learning_rate": 3.414211332479634e-07, + "loss": 0.4476, + "step": 1856 + }, + { + "epoch": 0.52, + "logps_train/chosen": -169.2618408203125, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -262.4843444824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.476184368133545, + "rewards_train/margins": 6.872250080108643, + "rewards_train/rejected": -11.348434448242188, + "step": 1856 + }, + { + "epoch": 0.52, + "logps_train/chosen": -27.38127899169922, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -44.826271057128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6756279468536377, + "rewards_train/margins": 2.3351242542266846, + "rewards_train/rejected": -4.010752201080322, + "step": 1856 + }, + { + "epoch": 0.52, + "logps_train/chosen": -22.777629852294922, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -45.5, + "logps_train/rejected": -90.40607452392578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7808879613876343, + "rewards_train/margins": 2.7097197771072388, + "rewards_train/rejected": -4.490607738494873, + "step": 1856 + }, + { + "epoch": 0.52, + "logps_train/chosen": -21.321996688842773, + "logps_train/ref_chosen": -6.25, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -35.703758239746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5071996450424194, + "rewards_train/margins": 0.6631761789321899, + "rewards_train/rejected": -2.1703758239746094, + "step": 1856 + }, + { + "epoch": 0.52, + "logps_train/chosen": -76.80140686035156, + "logps_train/ref_chosen": -48.25, + "logps_train/ref_rejected": -51.0, + "logps_train/rejected": -91.785888671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8551406860351562, + "rewards_train/margins": 1.2234482765197754, + "rewards_train/rejected": -4.078588962554932, + "step": 1856 + }, + { + "epoch": 0.52, + "logps_train/chosen": -24.912578582763672, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -43.78007507324219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9850078821182251, + "rewards_train/margins": 0.6429996490478516, + "rewards_train/rejected": -1.6280075311660767, + "step": 1856 + }, + { + "epoch": 0.52, + "logps_train/chosen": -3.756779432296753, + "logps_train/ref_chosen": -1.4453125, + "logps_train/ref_rejected": -1.8984375, + "logps_train/rejected": -5.547423839569092, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2311466932296753, + "rewards_train/margins": 0.13375195860862732, + "rewards_train/rejected": -0.3648986518383026, + "step": 1856 + }, + { + "epoch": 0.52, + "logps_train/chosen": -73.9185562133789, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -167.4928436279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3168556690216064, + "rewards_train/margins": 6.832428693771362, + "rewards_train/rejected": -8.149284362792969, + "step": 1856 + }, + { + "epoch": 0.52, + "logps_train/chosen": -171.59544372558594, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -226.890380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.859544515609741, + "rewards_train/margins": 5.7294933795928955, + "rewards_train/rejected": -9.589037895202637, + "step": 1857 + }, + { + "epoch": 0.52, + "logps_train/chosen": -158.24502563476562, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -199.669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.574502468109131, + "rewards_train/margins": 4.292489528656006, + "rewards_train/rejected": -8.866991996765137, + "step": 1857 + }, + { + "epoch": 0.52, + "logps_train/chosen": -53.611019134521484, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -130.10491943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7361019849777222, + "rewards_train/margins": 3.474390149116516, + "rewards_train/rejected": -5.210492134094238, + "step": 1857 + }, + { + "epoch": 0.52, + "logps_train/chosen": -123.85110473632812, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -218.92062377929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0351104736328125, + "rewards_train/margins": 5.85695219039917, + "rewards_train/rejected": -7.892062664031982, + "step": 1857 + }, + { + "epoch": 0.52, + "logps_train/chosen": -122.98255920410156, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -212.82705688476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.048255920410156, + "rewards_train/margins": 6.534449577331543, + "rewards_train/rejected": -11.5827054977417, + "step": 1857 + }, + { + "epoch": 0.52, + "logps_train/chosen": -82.79381561279297, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -96.58716583251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.129381537437439, + "rewards_train/margins": 1.2293351888656616, + "rewards_train/rejected": -2.3587167263031006, + "step": 1857 + }, + { + "epoch": 0.52, + "logps_train/chosen": -161.88729858398438, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -224.35418701171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.488729953765869, + "rewards_train/margins": 4.346688747406006, + "rewards_train/rejected": -6.835418701171875, + "step": 1857 + }, + { + "epoch": 0.52, + "logps_train/chosen": -198.03341674804688, + "logps_train/ref_chosen": -193.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -218.83995056152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5033416748046875, + "rewards_train/margins": 5.380653381347656, + "rewards_train/rejected": -5.883995056152344, + "step": 1857 + }, + { + "epoch": 0.52, + "learning_rate": 3.3943263160269764e-07, + "loss": 0.1381, + "step": 1858 + }, + { + "epoch": 0.52, + "logps_train/chosen": -111.38446044921875, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -161.28225708007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.488446235656738, + "rewards_train/margins": 1.7897796630859375, + "rewards_train/rejected": -6.278225898742676, + "step": 1858 + }, + { + "epoch": 0.52, + "logps_train/chosen": -106.2899169921875, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -125.15949249267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12899170815944672, + "rewards_train/margins": 3.3369575887918472, + "rewards_train/rejected": -3.465949296951294, + "step": 1858 + }, + { + "epoch": 0.52, + "logps_train/chosen": -120.59854125976562, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -63.23936462402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9098541140556335, + "rewards_train/margins": 3.2765825390815735, + "rewards_train/rejected": -4.186436653137207, + "step": 1858 + }, + { + "epoch": 0.52, + "logps_train/chosen": -34.59748458862305, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -27.75, + "logps_train/rejected": -46.921539306640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4347485303878784, + "rewards_train/margins": 0.482405424118042, + "rewards_train/rejected": -1.9171539545059204, + "step": 1858 + }, + { + "epoch": 0.52, + "logps_train/chosen": -0.18826688826084137, + "logps_train/ref_chosen": -0.5234375, + "logps_train/ref_rejected": -2.515625, + "logps_train/rejected": -4.181821346282959, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03351706266403198, + "rewards_train/margins": 0.2001367062330246, + "rewards_train/rejected": -0.16661964356899261, + "step": 1858 + }, + { + "epoch": 0.52, + "logps_train/chosen": -94.27743530273438, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -123.23597717285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.427743524312973, + "rewards_train/margins": 3.6458543837070465, + "rewards_train/rejected": -4.0735979080200195, + "step": 1858 + }, + { + "epoch": 0.52, + "logps_train/chosen": -88.61398315429688, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -119.21661376953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5113983154296875, + "rewards_train/margins": 2.4602630138397217, + "rewards_train/rejected": -2.971661329269409, + "step": 1858 + }, + { + "epoch": 0.52, + "logps_train/chosen": -1.8103342056274414, + "logps_train/ref_chosen": -0.328125, + "logps_train/ref_rejected": -0.328125, + "logps_train/rejected": -1.815797209739685, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14822092652320862, + "rewards_train/margins": 0.0005462914705276489, + "rewards_train/rejected": -0.14876721799373627, + "step": 1858 + }, + { + "epoch": 0.52, + "logps_train/chosen": -43.85624694824219, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -26.5, + "logps_train/rejected": -43.04184341430664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9356247186660767, + "rewards_train/margins": -0.2814403772354126, + "rewards_train/rejected": -1.654184341430664, + "step": 1859 + }, + { + "epoch": 0.52, + "logps_train/chosen": -255.26571655273438, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -251.23214721679688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.4265718460083, + "rewards_train/margins": -0.3033571243286133, + "rewards_train/rejected": -10.123214721679688, + "step": 1859 + }, + { + "epoch": 0.52, + "logps_train/chosen": -134.3418731689453, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -158.85159301757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.3841872215271, + "rewards_train/margins": 1.200972080230713, + "rewards_train/rejected": -5.5851593017578125, + "step": 1859 + }, + { + "epoch": 0.52, + "logps_train/chosen": -110.78468322753906, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -145.55213928222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2784683406352997, + "rewards_train/margins": 5.7267458736896515, + "rewards_train/rejected": -6.005214214324951, + "step": 1859 + }, + { + "epoch": 0.52, + "logps_train/chosen": -8.077308654785156, + "logps_train/ref_chosen": -2.84375, + "logps_train/ref_rejected": -7.15625, + "logps_train/rejected": -28.55362892150879, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5233559012413025, + "rewards_train/margins": 1.6163819432258606, + "rewards_train/rejected": -2.139737844467163, + "step": 1859 + }, + { + "epoch": 0.52, + "logps_train/chosen": -27.148488998413086, + "logps_train/ref_chosen": -25.5, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -43.00550079345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1648489087820053, + "rewards_train/margins": 0.21070118248462677, + "rewards_train/rejected": -0.3755500912666321, + "step": 1859 + }, + { + "epoch": 0.52, + "logps_train/chosen": -56.833824157714844, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -20.27311134338379, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7833824157714844, + "rewards_train/margins": -0.26857125759124756, + "rewards_train/rejected": -1.5148111581802368, + "step": 1859 + }, + { + "epoch": 0.52, + "logps_train/chosen": -82.44847869873047, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -62.25, + "logps_train/rejected": -106.24636840820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.944847822189331, + "rewards_train/margins": 1.4547889232635498, + "rewards_train/rejected": -4.399636745452881, + "step": 1859 + }, + { + "epoch": 0.52, + "learning_rate": 3.374487532286503e-07, + "loss": 0.3679, + "step": 1860 + }, + { + "epoch": 0.52, + "logps_train/chosen": -220.03367614746094, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -248.10971069335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.703367710113525, + "rewards_train/margins": 0.20760345458984375, + "rewards_train/rejected": -7.910971164703369, + "step": 1860 + }, + { + "epoch": 0.52, + "logps_train/chosen": -141.18214416503906, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -102.35342407226562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.468214511871338, + "rewards_train/margins": -0.48287200927734375, + "rewards_train/rejected": -2.985342502593994, + "step": 1860 + }, + { + "epoch": 0.52, + "logps_train/chosen": -16.64614486694336, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -3.03125, + "logps_train/rejected": -12.25008487701416, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7708644866943359, + "rewards_train/margins": 0.15101903676986694, + "rewards_train/rejected": -0.9218835234642029, + "step": 1860 + }, + { + "epoch": 0.52, + "logps_train/chosen": -84.27044677734375, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -104.64441680908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.127044677734375, + "rewards_train/margins": 0.33739709854125977, + "rewards_train/rejected": -3.4644417762756348, + "step": 1860 + }, + { + "epoch": 0.52, + "logps_train/chosen": -216.9637451171875, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -221.0, + "logps_train/rejected": -327.55548095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.09637451171875, + "rewards_train/margins": 6.559173583984375, + "rewards_train/rejected": -10.655548095703125, + "step": 1860 + }, + { + "epoch": 0.52, + "logps_train/chosen": -9.184089660644531, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -26.875, + "logps_train/rejected": -51.831878662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4902839660644531, + "rewards_train/margins": 2.005403995513916, + "rewards_train/rejected": -2.495687961578369, + "step": 1860 + }, + { + "epoch": 0.52, + "logps_train/chosen": -9.377479553222656, + "logps_train/ref_chosen": -3.671875, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -51.813716888427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5705604553222656, + "rewards_train/margins": 3.142061233520508, + "rewards_train/rejected": -3.7126216888427734, + "step": 1860 + }, + { + "epoch": 0.52, + "logps_train/chosen": -105.41014099121094, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -280.4150390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0410140752792358, + "rewards_train/margins": 14.000489830970764, + "rewards_train/rejected": -15.04150390625, + "step": 1860 + }, + { + "epoch": 0.52, + "logps_train/chosen": -113.95108032226562, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -59.91944122314453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8951079845428467, + "rewards_train/margins": -0.4781637191772461, + "rewards_train/rejected": -2.4169442653656006, + "step": 1861 + }, + { + "epoch": 0.52, + "logps_train/chosen": -265.99407958984375, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -227.22625732421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.999407768249512, + "rewards_train/margins": -2.876781463623047, + "rewards_train/rejected": -9.122626304626465, + "step": 1861 + }, + { + "epoch": 0.52, + "logps_train/chosen": -106.62718200683594, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -126.67630004882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.012718200683594, + "rewards_train/margins": 1.6049118041992188, + "rewards_train/rejected": -5.6176300048828125, + "step": 1861 + }, + { + "epoch": 0.52, + "logps_train/chosen": -140.8780517578125, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -135.75650024414062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.337805271148682, + "rewards_train/margins": -1.012155294418335, + "rewards_train/rejected": -3.3256499767303467, + "step": 1861 + }, + { + "epoch": 0.52, + "logps_train/chosen": -140.89627075195312, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -209.18045043945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0896270275115967, + "rewards_train/margins": 3.428417921066284, + "rewards_train/rejected": -5.518044948577881, + "step": 1861 + }, + { + "epoch": 0.52, + "logps_train/chosen": -16.534181594848633, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -22.0072021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5784181952476501, + "rewards_train/margins": 0.041052043437957764, + "rewards_train/rejected": -0.6194702386856079, + "step": 1861 + }, + { + "epoch": 0.52, + "logps_train/chosen": -21.38411521911621, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -2.75, + "logps_train/rejected": -27.8411808013916, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1259115934371948, + "rewards_train/margins": 1.3832064867019653, + "rewards_train/rejected": -2.50911808013916, + "step": 1861 + }, + { + "epoch": 0.52, + "logps_train/chosen": -260.69635009765625, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -253.74603271484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.669634819030762, + "rewards_train/margins": -0.6950311660766602, + "rewards_train/rejected": -8.974603652954102, + "step": 1861 + }, + { + "epoch": 0.52, + "learning_rate": 3.354695120108667e-07, + "loss": 0.6446, + "step": 1862 + }, + { + "epoch": 0.52, + "logps_train/chosen": -229.559814453125, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -226.87380981445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.855981349945068, + "rewards_train/margins": 0.33139944076538086, + "rewards_train/rejected": -8.18738079071045, + "step": 1862 + }, + { + "epoch": 0.52, + "logps_train/chosen": -184.98190307617188, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -217.02896118164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.648190498352051, + "rewards_train/margins": 3.854705810546875, + "rewards_train/rejected": -10.502896308898926, + "step": 1862 + }, + { + "epoch": 0.52, + "logps_train/chosen": -129.478759765625, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -227.9891815185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.397876024246216, + "rewards_train/margins": 4.801042318344116, + "rewards_train/rejected": -7.198918342590332, + "step": 1862 + }, + { + "epoch": 0.52, + "logps_train/chosen": -100.25697326660156, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -178.83258056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.325697422027588, + "rewards_train/margins": 4.557560920715332, + "rewards_train/rejected": -6.88325834274292, + "step": 1862 + }, + { + "epoch": 0.52, + "logps_train/chosen": -136.8699188232422, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -177.82003784179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4369919300079346, + "rewards_train/margins": 3.195011854171753, + "rewards_train/rejected": -6.6320037841796875, + "step": 1862 + }, + { + "epoch": 0.52, + "logps_train/chosen": -3.1077356338500977, + "logps_train/ref_chosen": -1.84375, + "logps_train/ref_rejected": -1.84375, + "logps_train/rejected": -3.0061628818511963, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.12639856338500977, + "rewards_train/margins": -0.010157272219657898, + "rewards_train/rejected": -0.11624129116535187, + "step": 1862 + }, + { + "epoch": 0.52, + "logps_train/chosen": -0.008048931136727333, + "logps_train/ref_chosen": -0.057373046875, + "logps_train/ref_rejected": -0.057373046875, + "logps_train/rejected": -0.007227080874145031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0049324119463562965, + "rewards_train/margins": -8.21845605969429e-05, + "rewards_train/rejected": 0.0050145965069532394, + "step": 1862 + }, + { + "epoch": 0.52, + "logps_train/chosen": -23.244823455810547, + "logps_train/ref_chosen": -11.5625, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -29.237661361694336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1682323217391968, + "rewards_train/margins": 1.3461588621139526, + "rewards_train/rejected": -2.5143911838531494, + "step": 1862 + }, + { + "epoch": 0.52, + "logps_train/chosen": -106.01945495605469, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -172.70260620117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.351945638656616, + "rewards_train/margins": 1.7183148860931396, + "rewards_train/rejected": -4.070260524749756, + "step": 1863 + }, + { + "epoch": 0.52, + "logps_train/chosen": -8.448690414428711, + "logps_train/ref_chosen": -2.046875, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -16.895151138305664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6401815414428711, + "rewards_train/margins": 0.28683358430862427, + "rewards_train/rejected": -0.9270151257514954, + "step": 1863 + }, + { + "epoch": 0.52, + "logps_train/chosen": -16.949764251708984, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -21.569847106933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05747642740607262, + "rewards_train/margins": 1.5963833071291447, + "rewards_train/rejected": -1.6538597345352173, + "step": 1863 + }, + { + "epoch": 0.52, + "logps_train/chosen": -122.2456283569336, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -139.35385131835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9745628833770752, + "rewards_train/margins": 0.41082239151000977, + "rewards_train/rejected": -2.385385274887085, + "step": 1863 + }, + { + "epoch": 0.52, + "logps_train/chosen": -6.403318405151367, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -21.766817092895508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07783184200525284, + "rewards_train/margins": 1.51759984344244, + "rewards_train/rejected": -1.5954316854476929, + "step": 1863 + }, + { + "epoch": 0.52, + "logps_train/chosen": -22.845386505126953, + "logps_train/ref_chosen": -6.65625, + "logps_train/ref_rejected": -1.2421875, + "logps_train/rejected": -19.250099182128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6189136505126953, + "rewards_train/margins": 0.1818774938583374, + "rewards_train/rejected": -1.8007911443710327, + "step": 1863 + }, + { + "epoch": 0.52, + "logps_train/chosen": -72.37107849121094, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -71.10275268554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5371078848838806, + "rewards_train/margins": 4.448167383670807, + "rewards_train/rejected": -4.9852752685546875, + "step": 1863 + }, + { + "epoch": 0.52, + "logps_train/chosen": -252.21963500976562, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -259.4017028808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.221963882446289, + "rewards_train/margins": 3.5182065963745117, + "rewards_train/rejected": -11.7401704788208, + "step": 1863 + }, + { + "epoch": 0.52, + "learning_rate": 3.334949218019366e-07, + "loss": 0.2816, + "step": 1864 + }, + { + "epoch": 0.52, + "logps_train/chosen": -198.01388549804688, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -131.7034912109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.301388740539551, + "rewards_train/margins": -0.6310396194458008, + "rewards_train/rejected": -5.67034912109375, + "step": 1864 + }, + { + "epoch": 0.52, + "logps_train/chosen": -275.7923889160156, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -238.0239715576172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.979239463806152, + "rewards_train/margins": -2.2768421173095703, + "rewards_train/rejected": -9.702397346496582, + "step": 1864 + }, + { + "epoch": 0.52, + "logps_train/chosen": -20.613203048706055, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -22.625, + "logps_train/rejected": -24.64691925048828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.31132030487060547, + "rewards_train/margins": -0.10912837088108063, + "rewards_train/rejected": -0.20219193398952484, + "step": 1864 + }, + { + "epoch": 0.52, + "logps_train/chosen": -160.35589599609375, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -152.91490173339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.485589504241943, + "rewards_train/margins": 0.4059009552001953, + "rewards_train/rejected": -6.891490459442139, + "step": 1864 + }, + { + "epoch": 0.52, + "logps_train/chosen": -182.14190673828125, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -254.04135131835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.514190673828125, + "rewards_train/margins": 6.589944839477539, + "rewards_train/rejected": -11.104135513305664, + "step": 1864 + }, + { + "epoch": 0.52, + "logps_train/chosen": -25.700923919677734, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -3.25, + "logps_train/rejected": -11.272247314453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8419674634933472, + "rewards_train/margins": -1.0397427082061768, + "rewards_train/rejected": -0.8022247552871704, + "step": 1864 + }, + { + "epoch": 0.52, + "logps_train/chosen": -158.48574829101562, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -225.48147583007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8985748291015625, + "rewards_train/margins": 2.8495726585388184, + "rewards_train/rejected": -6.748147487640381, + "step": 1864 + }, + { + "epoch": 0.52, + "logps_train/chosen": -16.415939331054688, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -38.60161209106445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1353439092636108, + "rewards_train/margins": 1.0998173952102661, + "rewards_train/rejected": -2.235161304473877, + "step": 1864 + }, + { + "epoch": 0.52, + "logps_train/chosen": -34.63063049316406, + "logps_train/ref_chosen": -39.25, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -90.77894592285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.46193695068359375, + "rewards_train/margins": 1.13983154296875, + "rewards_train/rejected": -0.6778945922851562, + "step": 1865 + }, + { + "epoch": 0.52, + "logps_train/chosen": -19.759077072143555, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -31.264606475830078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1509077548980713, + "rewards_train/margins": 0.6818028688430786, + "rewards_train/rejected": -1.83271062374115, + "step": 1865 + }, + { + "epoch": 0.52, + "logps_train/chosen": -15.296401023864746, + "logps_train/ref_chosen": -1.3984375, + "logps_train/ref_rejected": -2.828125, + "logps_train/rejected": -11.942699432373047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3897963762283325, + "rewards_train/margins": -0.47833889722824097, + "rewards_train/rejected": -0.9114574790000916, + "step": 1865 + }, + { + "epoch": 0.52, + "logps_train/chosen": -0.20800168812274933, + "logps_train/ref_chosen": -0.48828125, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -5.251762866973877, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028027957305312157, + "rewards_train/margins": 0.09382924251258373, + "rewards_train/rejected": -0.06580128520727158, + "step": 1865 + }, + { + "epoch": 0.52, + "logps_train/chosen": -19.264368057250977, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -47.36274719238281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6326867938041687, + "rewards_train/margins": 1.5660880208015442, + "rewards_train/rejected": -2.198774814605713, + "step": 1865 + }, + { + "epoch": 0.52, + "logps_train/chosen": -59.138450622558594, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -52.96410369873047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.063845157623291, + "rewards_train/margins": -0.4674346446990967, + "rewards_train/rejected": -3.5964105129241943, + "step": 1865 + }, + { + "epoch": 0.52, + "logps_train/chosen": -58.34042739868164, + "logps_train/ref_chosen": -34.0, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -43.74094009399414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4340426921844482, + "rewards_train/margins": -0.6224486827850342, + "rewards_train/rejected": -1.811594009399414, + "step": 1865 + }, + { + "epoch": 0.52, + "logps_train/chosen": -33.067787170410156, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -45.96501541137695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2567787170410156, + "rewards_train/margins": 1.6897227764129639, + "rewards_train/rejected": -2.9465014934539795, + "step": 1865 + }, + { + "epoch": 0.52, + "learning_rate": 3.315249964218976e-07, + "loss": 0.6899, + "step": 1866 + }, + { + "epoch": 0.52, + "logps_train/chosen": -31.47589874267578, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -32.663352966308594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.335089921951294, + "rewards_train/margins": -0.24375462532043457, + "rewards_train/rejected": -1.0913352966308594, + "step": 1866 + }, + { + "epoch": 0.52, + "logps_train/chosen": -3.430070161819458, + "logps_train/ref_chosen": -2.828125, + "logps_train/ref_rejected": -5.21875, + "logps_train/rejected": -9.653314590454102, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06019451841711998, + "rewards_train/margins": 0.38326195254921913, + "rewards_train/rejected": -0.4434564709663391, + "step": 1866 + }, + { + "epoch": 0.52, + "logps_train/chosen": -21.12229347229004, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -17.897308349609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5059794187545776, + "rewards_train/margins": -0.13187360763549805, + "rewards_train/rejected": -1.3741058111190796, + "step": 1866 + }, + { + "epoch": 0.52, + "logps_train/chosen": -83.9358901977539, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -61.5432243347168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.743589162826538, + "rewards_train/margins": 1.6982333660125732, + "rewards_train/rejected": -4.441822528839111, + "step": 1866 + }, + { + "epoch": 0.52, + "logps_train/chosen": -26.021968841552734, + "logps_train/ref_chosen": -1.953125, + "logps_train/ref_rejected": -3.25, + "logps_train/rejected": -26.581626892089844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4068844318389893, + "rewards_train/margins": -0.07372164726257324, + "rewards_train/rejected": -2.333162784576416, + "step": 1866 + }, + { + "epoch": 0.52, + "logps_train/chosen": -97.87269592285156, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -126.24119567871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3872696161270142, + "rewards_train/margins": 1.536850094795227, + "rewards_train/rejected": -2.924119710922241, + "step": 1866 + }, + { + "epoch": 0.52, + "logps_train/chosen": -70.41637420654297, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -51.75, + "logps_train/rejected": -59.142295837402344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7916374206542969, + "rewards_train/margins": -0.052407801151275635, + "rewards_train/rejected": -0.7392296195030212, + "step": 1866 + }, + { + "epoch": 0.52, + "logps_train/chosen": -47.89808654785156, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -53.707523345947266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5023086071014404, + "rewards_train/margins": 0.13094377517700195, + "rewards_train/rejected": -2.6332523822784424, + "step": 1866 + }, + { + "epoch": 0.52, + "logps_train/chosen": -26.603256225585938, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -26.25, + "logps_train/rejected": -53.37611389160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3228256702423096, + "rewards_train/margins": 1.3897857666015625, + "rewards_train/rejected": -2.712611436843872, + "step": 1867 + }, + { + "epoch": 0.52, + "logps_train/chosen": -146.3017578125, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -269.7217712402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.230175971984863, + "rewards_train/margins": 5.842000961303711, + "rewards_train/rejected": -11.072176933288574, + "step": 1867 + }, + { + "epoch": 0.52, + "logps_train/chosen": -122.0528564453125, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -162.2022705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2052857875823975, + "rewards_train/margins": 2.314941167831421, + "rewards_train/rejected": -4.520226955413818, + "step": 1867 + }, + { + "epoch": 0.52, + "logps_train/chosen": -98.66014862060547, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -130.46780395507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8160148859024048, + "rewards_train/margins": 1.5807656049728394, + "rewards_train/rejected": -3.396780490875244, + "step": 1867 + }, + { + "epoch": 0.52, + "logps_train/chosen": -18.858322143554688, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -3.109375, + "logps_train/rejected": -4.307811737060547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4295822083950043, + "rewards_train/margins": -0.30973853170871735, + "rewards_train/rejected": -0.11984367668628693, + "step": 1867 + }, + { + "epoch": 0.52, + "logps_train/chosen": -6.904969215393066, + "logps_train/ref_chosen": -1.1875, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -9.69050407409668, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5717469453811646, + "rewards_train/margins": -0.018321514129638672, + "rewards_train/rejected": -0.5534254312515259, + "step": 1867 + }, + { + "epoch": 0.52, + "logps_train/chosen": -14.271614074707031, + "logps_train/ref_chosen": -2.296875, + "logps_train/ref_rejected": -2.203125, + "logps_train/rejected": -13.623786926269531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1974738836288452, + "rewards_train/margins": -0.05540764331817627, + "rewards_train/rejected": -1.142066240310669, + "step": 1867 + }, + { + "epoch": 0.52, + "logps_train/chosen": -59.29286193847656, + "logps_train/ref_chosen": -46.5, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -81.09851837158203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.27928626537323, + "rewards_train/margins": -0.11943435668945312, + "rewards_train/rejected": -1.1598519086837769, + "step": 1867 + }, + { + "epoch": 0.52, + "learning_rate": 3.295597496581395e-07, + "loss": 0.5056, + "step": 1868 + }, + { + "epoch": 0.52, + "logps_train/chosen": -172.8386688232422, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -210.0, + "logps_train/rejected": -262.94476318359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8838669061660767, + "rewards_train/margins": 3.4106096029281616, + "rewards_train/rejected": -5.294476509094238, + "step": 1868 + }, + { + "epoch": 0.52, + "logps_train/chosen": -39.38507843017578, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -83.68448638916016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.738507866859436, + "rewards_train/margins": -0.07005923986434937, + "rewards_train/rejected": -0.6684486269950867, + "step": 1868 + }, + { + "epoch": 0.52, + "logps_train/chosen": -24.459762573242188, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -79.68650817871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0022262334823608, + "rewards_train/margins": 2.016424536705017, + "rewards_train/rejected": -3.018650770187378, + "step": 1868 + }, + { + "epoch": 0.52, + "logps_train/chosen": -142.19406127929688, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -197.57530212402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.819406032562256, + "rewards_train/margins": 2.938124179840088, + "rewards_train/rejected": -7.757530212402344, + "step": 1868 + }, + { + "epoch": 0.52, + "logps_train/chosen": -83.21694946289062, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -158.5584259033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1216949224472046, + "rewards_train/margins": 5.084147572517395, + "rewards_train/rejected": -6.2058424949646, + "step": 1868 + }, + { + "epoch": 0.52, + "logps_train/chosen": -176.31521606445312, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -121.76016235351562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.931521892547607, + "rewards_train/margins": -0.4055056571960449, + "rewards_train/rejected": -4.5260162353515625, + "step": 1868 + }, + { + "epoch": 0.52, + "logps_train/chosen": -40.509700775146484, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -2.546875, + "logps_train/rejected": -19.583301544189453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.04472017288208, + "rewards_train/margins": -1.341077446937561, + "rewards_train/rejected": -1.703642725944519, + "step": 1868 + }, + { + "epoch": 0.52, + "logps_train/chosen": -137.01144409179688, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -154.1680145263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30114442110061646, + "rewards_train/margins": 3.4156569838523865, + "rewards_train/rejected": -3.716801404953003, + "step": 1868 + }, + { + "epoch": 0.52, + "logps_train/chosen": -119.94767761230469, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -136.79397583007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.694767713546753, + "rewards_train/margins": 0.584630012512207, + "rewards_train/rejected": -3.27939772605896, + "step": 1869 + }, + { + "epoch": 0.52, + "logps_train/chosen": -198.78131103515625, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -195.7386474609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0781311988830566, + "rewards_train/margins": 4.195733547210693, + "rewards_train/rejected": -7.27386474609375, + "step": 1869 + }, + { + "epoch": 0.52, + "logps_train/chosen": -276.0391845703125, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -181.0, + "logps_train/rejected": -262.5310974121094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.403918266296387, + "rewards_train/margins": -2.2508087158203125, + "rewards_train/rejected": -8.153109550476074, + "step": 1869 + }, + { + "epoch": 0.52, + "logps_train/chosen": -10.019257545471191, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -40.990966796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2925507724285126, + "rewards_train/margins": 3.181546002626419, + "rewards_train/rejected": -3.4740967750549316, + "step": 1869 + }, + { + "epoch": 0.52, + "logps_train/chosen": -0.7156500220298767, + "logps_train/ref_chosen": -0.640625, + "logps_train/ref_rejected": -0.640625, + "logps_train/rejected": -0.9184545278549194, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.007502502296119928, + "rewards_train/margins": 0.020280450116842985, + "rewards_train/rejected": -0.027782952412962914, + "step": 1869 + }, + { + "epoch": 0.52, + "logps_train/chosen": -10.707268714904785, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -44.29459762573242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5613518953323364, + "rewards_train/margins": 2.8931080102920532, + "rewards_train/rejected": -3.4544599056243896, + "step": 1869 + }, + { + "epoch": 0.52, + "logps_train/chosen": -217.77642822265625, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -151.88363647460938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.37764310836792, + "rewards_train/margins": -0.13927936553955078, + "rewards_train/rejected": -4.238363742828369, + "step": 1869 + }, + { + "epoch": 0.52, + "logps_train/chosen": -179.7767333984375, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -220.53494262695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2776734828948975, + "rewards_train/margins": 6.025820970535278, + "rewards_train/rejected": -9.303494453430176, + "step": 1869 + }, + { + "epoch": 0.52, + "learning_rate": 3.275991952653053e-07, + "loss": 0.4888, + "step": 1870 + }, + { + "epoch": 0.52, + "logps_train/chosen": -31.061599731445312, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -41.98235321044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5124099254608154, + "rewards_train/margins": 1.2264504432678223, + "rewards_train/rejected": -3.7388603687286377, + "step": 1870 + }, + { + "epoch": 0.52, + "logps_train/chosen": -8.452329635620117, + "logps_train/ref_chosen": -3.59375, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -23.511655807495117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4858579635620117, + "rewards_train/margins": 1.2403076887130737, + "rewards_train/rejected": -1.7261656522750854, + "step": 1870 + }, + { + "epoch": 0.52, + "logps_train/chosen": -9.071756362915039, + "logps_train/ref_chosen": -1.9921875, + "logps_train/ref_rejected": -2.765625, + "logps_train/rejected": -31.538421630859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7079569101333618, + "rewards_train/margins": 2.1693228483200073, + "rewards_train/rejected": -2.877279758453369, + "step": 1870 + }, + { + "epoch": 0.52, + "logps_train/chosen": -6.278946876525879, + "logps_train/ref_chosen": -1.6015625, + "logps_train/ref_rejected": -11.5, + "logps_train/rejected": -37.722084045410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46773844957351685, + "rewards_train/margins": 2.154469907283783, + "rewards_train/rejected": -2.6222083568573, + "step": 1870 + }, + { + "epoch": 0.52, + "logps_train/chosen": -18.574914932250977, + "logps_train/ref_chosen": -3.390625, + "logps_train/ref_rejected": -1.625, + "logps_train/rejected": -19.27184295654297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5184290409088135, + "rewards_train/margins": 0.2462552785873413, + "rewards_train/rejected": -1.7646843194961548, + "step": 1870 + }, + { + "epoch": 0.52, + "logps_train/chosen": -91.20165252685547, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -52.5, + "logps_train/rejected": -148.9995574951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.845165252685547, + "rewards_train/margins": 3.804790496826172, + "rewards_train/rejected": -9.649955749511719, + "step": 1870 + }, + { + "epoch": 0.52, + "logps_train/chosen": -116.19009399414062, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -255.53955078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4690093994140625, + "rewards_train/margins": 11.834945678710938, + "rewards_train/rejected": -13.303955078125, + "step": 1870 + }, + { + "epoch": 0.52, + "logps_train/chosen": -86.28429412841797, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -89.53860473632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8284294009208679, + "rewards_train/margins": 0.7254311442375183, + "rewards_train/rejected": -1.5538605451583862, + "step": 1870 + }, + { + "epoch": 0.52, + "logps_train/chosen": -237.76145935058594, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -206.80838012695312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.076146125793457, + "rewards_train/margins": -0.6953082084655762, + "rewards_train/rejected": -7.380837917327881, + "step": 1871 + }, + { + "epoch": 0.52, + "logps_train/chosen": -17.778339385986328, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -8.091797828674316, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1153339371085167, + "rewards_train/margins": -0.33740415424108505, + "rewards_train/rejected": 0.22207021713256836, + "step": 1871 + }, + { + "epoch": 0.52, + "logps_train/chosen": -31.844404220581055, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -48.51637649536133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7000653743743896, + "rewards_train/margins": 0.3265724182128906, + "rewards_train/rejected": -3.0266377925872803, + "step": 1871 + }, + { + "epoch": 0.52, + "logps_train/chosen": -20.504661560058594, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -3.328125, + "logps_train/rejected": -8.656006813049316, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6285911798477173, + "rewards_train/margins": -1.0958029627799988, + "rewards_train/rejected": -0.5327882170677185, + "step": 1871 + }, + { + "epoch": 0.52, + "logps_train/chosen": -16.830259323120117, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -1.2734375, + "logps_train/rejected": -1.3875523805618286, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.43302592635154724, + "rewards_train/margins": -0.4216144382953644, + "rewards_train/rejected": -0.011411488056182861, + "step": 1871 + }, + { + "epoch": 0.52, + "logps_train/chosen": -15.863101959228516, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -34.67198181152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6925601959228516, + "rewards_train/margins": 1.8246381282806396, + "rewards_train/rejected": -2.517198324203491, + "step": 1871 + }, + { + "epoch": 0.52, + "logps_train/chosen": -68.4488525390625, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -40.25, + "logps_train/rejected": -79.01750183105469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19488525390625, + "rewards_train/margins": 3.6818649768829346, + "rewards_train/rejected": -3.8767502307891846, + "step": 1871 + }, + { + "epoch": 0.52, + "logps_train/chosen": -150.48757934570312, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -249.07159423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.548758029937744, + "rewards_train/margins": 5.958401203155518, + "rewards_train/rejected": -8.507159233093262, + "step": 1871 + }, + { + "epoch": 0.52, + "learning_rate": 3.256433469651978e-07, + "loss": 0.4204, + "step": 1872 + }, + { + "epoch": 0.52, + "logps_train/chosen": -4.953428268432617, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -20.793642044067383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05159282684326172, + "rewards_train/margins": 1.5152714252471924, + "rewards_train/rejected": -1.566864252090454, + "step": 1872 + }, + { + "epoch": 0.52, + "logps_train/chosen": -123.38139343261719, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -194.43873596191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.138139486312866, + "rewards_train/margins": 8.455734491348267, + "rewards_train/rejected": -10.593873977661133, + "step": 1872 + }, + { + "epoch": 0.52, + "logps_train/chosen": -79.36323547363281, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -94.16964721679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0363235473632812, + "rewards_train/margins": 1.0306413173675537, + "rewards_train/rejected": -2.066964864730835, + "step": 1872 + }, + { + "epoch": 0.52, + "logps_train/chosen": -162.91119384765625, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -203.0, + "logps_train/rejected": -285.7884826660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6411194801330566, + "rewards_train/margins": 4.637729167938232, + "rewards_train/rejected": -8.278848648071289, + "step": 1872 + }, + { + "epoch": 0.52, + "logps_train/chosen": -21.45830535888672, + "logps_train/ref_chosen": -3.484375, + "logps_train/ref_rejected": -31.125, + "logps_train/rejected": -77.34786987304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7973930835723877, + "rewards_train/margins": 2.8248941898345947, + "rewards_train/rejected": -4.622287273406982, + "step": 1872 + }, + { + "epoch": 0.52, + "logps_train/chosen": -153.50018310546875, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -153.99972534179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.850018262863159, + "rewards_train/margins": 0.04995441436767578, + "rewards_train/rejected": -2.899972677230835, + "step": 1872 + }, + { + "epoch": 0.52, + "logps_train/chosen": -89.8615493774414, + "logps_train/ref_chosen": -56.75, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -138.57589721679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.311155080795288, + "rewards_train/margins": 2.0964348316192627, + "rewards_train/rejected": -5.407589912414551, + "step": 1872 + }, + { + "epoch": 0.52, + "logps_train/chosen": -19.33915138244629, + "logps_train/ref_chosen": -2.484375, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -30.952129364013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.685477614402771, + "rewards_train/margins": 0.05973529815673828, + "rewards_train/rejected": -1.7452129125595093, + "step": 1872 + }, + { + "epoch": 0.52, + "logps_train/chosen": -203.89599609375, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -191.87359619140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.189599514007568, + "rewards_train/margins": 1.5977602005004883, + "rewards_train/rejected": -7.787359714508057, + "step": 1873 + }, + { + "epoch": 0.52, + "logps_train/chosen": -55.465789794921875, + "logps_train/ref_chosen": -44.75, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -138.71347045898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0715789794921875, + "rewards_train/margins": 1.69976806640625, + "rewards_train/rejected": -2.7713470458984375, + "step": 1873 + }, + { + "epoch": 0.52, + "logps_train/chosen": -7.376001358032227, + "logps_train/ref_chosen": -0.404296875, + "logps_train/ref_rejected": -2.03125, + "logps_train/rejected": -19.043981552124023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6971704363822937, + "rewards_train/margins": 1.0041027665138245, + "rewards_train/rejected": -1.7012732028961182, + "step": 1873 + }, + { + "epoch": 0.52, + "logps_train/chosen": -22.341012954711914, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -33.54695129394531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9028512835502625, + "rewards_train/margins": 1.889343798160553, + "rewards_train/rejected": -2.7921950817108154, + "step": 1873 + }, + { + "epoch": 0.52, + "logps_train/chosen": -53.29975509643555, + "logps_train/ref_chosen": -55.75, + "logps_train/ref_rejected": -55.75, + "logps_train/rejected": -53.15475845336914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.24502448737621307, + "rewards_train/margins": -0.014499679207801819, + "rewards_train/rejected": 0.2595241665840149, + "step": 1873 + }, + { + "epoch": 0.52, + "logps_train/chosen": -69.0060806274414, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -83.05924987792969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35060808062553406, + "rewards_train/margins": 0.8053169548511505, + "rewards_train/rejected": -1.1559250354766846, + "step": 1873 + }, + { + "epoch": 0.52, + "logps_train/chosen": -3.336859703063965, + "logps_train/ref_chosen": -0.38671875, + "logps_train/ref_rejected": -0.38671875, + "logps_train/rejected": -3.748155355453491, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2950141131877899, + "rewards_train/margins": 0.04112955927848816, + "rewards_train/rejected": -0.3361436724662781, + "step": 1873 + }, + { + "epoch": 0.52, + "logps_train/chosen": -206.18673706054688, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -253.1860809326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.518673896789551, + "rewards_train/margins": 5.999934196472168, + "rewards_train/rejected": -12.518608093261719, + "step": 1873 + }, + { + "epoch": 0.52, + "learning_rate": 3.236922184466806e-07, + "loss": 0.2853, + "step": 1874 + }, + { + "epoch": 0.52, + "logps_train/chosen": -98.37092590332031, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -2.359375, + "logps_train/rejected": -24.484210968017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7370926141738892, + "rewards_train/margins": 0.4753910303115845, + "rewards_train/rejected": -2.2124836444854736, + "step": 1874 + }, + { + "epoch": 0.52, + "logps_train/chosen": -11.526984214782715, + "logps_train/ref_chosen": -7.5, + "logps_train/ref_rejected": -3.1875, + "logps_train/rejected": -13.095474243164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40269842743873596, + "rewards_train/margins": 0.5880990326404572, + "rewards_train/rejected": -0.9907974600791931, + "step": 1874 + }, + { + "epoch": 0.52, + "logps_train/chosen": -104.00230407714844, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -142.20541381835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.200230360031128, + "rewards_train/margins": 1.720311164855957, + "rewards_train/rejected": -3.920541524887085, + "step": 1874 + }, + { + "epoch": 0.52, + "logps_train/chosen": -238.96127319335938, + "logps_train/ref_chosen": -210.0, + "logps_train/ref_rejected": -205.0, + "logps_train/rejected": -227.5531005859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.896127462387085, + "rewards_train/margins": -0.640817403793335, + "rewards_train/rejected": -2.25531005859375, + "step": 1874 + }, + { + "epoch": 0.52, + "logps_train/chosen": -14.619192123413086, + "logps_train/ref_chosen": -7.90625, + "logps_train/ref_rejected": -33.25, + "logps_train/rejected": -45.25548553466797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6712942123413086, + "rewards_train/margins": 0.5292543172836304, + "rewards_train/rejected": -1.200548529624939, + "step": 1874 + }, + { + "epoch": 0.52, + "logps_train/chosen": -272.974365234375, + "logps_train/ref_chosen": -205.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -283.9415588378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.797436714172363, + "rewards_train/margins": 4.796719551086426, + "rewards_train/rejected": -11.594156265258789, + "step": 1874 + }, + { + "epoch": 0.52, + "logps_train/chosen": -91.33634948730469, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -181.4483184814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0836349725723267, + "rewards_train/margins": 5.361196875572205, + "rewards_train/rejected": -6.444831848144531, + "step": 1874 + }, + { + "epoch": 0.52, + "logps_train/chosen": -53.65613555908203, + "logps_train/ref_chosen": -46.75, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -52.498321533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6906135678291321, + "rewards_train/margins": 3.302968680858612, + "rewards_train/rejected": -3.993582248687744, + "step": 1874 + }, + { + "epoch": 0.52, + "logps_train/chosen": -8.385953903198242, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -31.388187408447266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2948454022407532, + "rewards_train/margins": 1.5877233147621155, + "rewards_train/rejected": -1.8825687170028687, + "step": 1875 + }, + { + "epoch": 0.52, + "logps_train/chosen": -6.31980562210083, + "logps_train/ref_chosen": -1.2109375, + "logps_train/ref_rejected": -3.0625, + "logps_train/rejected": -5.294236660003662, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5108868479728699, + "rewards_train/margins": -0.2877131849527359, + "rewards_train/rejected": -0.22317366302013397, + "step": 1875 + }, + { + "epoch": 0.52, + "logps_train/chosen": -195.30886840820312, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -183.31329345703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.030887126922607, + "rewards_train/margins": -0.5995578765869141, + "rewards_train/rejected": -5.431329250335693, + "step": 1875 + }, + { + "epoch": 0.52, + "logps_train/chosen": -232.82064819335938, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -228.9197540283203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.782065391540527, + "rewards_train/margins": -0.3900899887084961, + "rewards_train/rejected": -9.391975402832031, + "step": 1875 + }, + { + "epoch": 0.52, + "logps_train/chosen": -7.815924167633057, + "logps_train/ref_chosen": -5.03125, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -40.2353630065918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27846741676330566, + "rewards_train/margins": 2.15756893157959, + "rewards_train/rejected": -2.4360363483428955, + "step": 1875 + }, + { + "epoch": 0.52, + "logps_train/chosen": -17.560497283935547, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -31.964698791503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5935497283935547, + "rewards_train/margins": 1.3904201984405518, + "rewards_train/rejected": -1.9839699268341064, + "step": 1875 + }, + { + "epoch": 0.52, + "logps_train/chosen": -77.00637817382812, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -47.20926284790039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9506378173828125, + "rewards_train/margins": 1.432788610458374, + "rewards_train/rejected": -2.3834264278411865, + "step": 1875 + }, + { + "epoch": 0.52, + "logps_train/chosen": -145.1859130859375, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -249.80960083007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9685914516448975, + "rewards_train/margins": 7.912369012832642, + "rewards_train/rejected": -10.880960464477539, + "step": 1875 + }, + { + "epoch": 0.52, + "learning_rate": 3.2174582336558553e-07, + "loss": 0.3874, + "step": 1876 + }, + { + "epoch": 0.52, + "logps_train/chosen": -123.10269165039062, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -165.69876098632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7102692127227783, + "rewards_train/margins": 1.3096067905426025, + "rewards_train/rejected": -5.019876003265381, + "step": 1876 + }, + { + "epoch": 0.52, + "logps_train/chosen": -92.37872314453125, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -171.7482452392578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9878723621368408, + "rewards_train/margins": 3.8369524478912354, + "rewards_train/rejected": -5.824824810028076, + "step": 1876 + }, + { + "epoch": 0.52, + "logps_train/chosen": -10.460046768188477, + "logps_train/ref_chosen": -6.5625, + "logps_train/ref_rejected": -1.859375, + "logps_train/rejected": -3.1168909072875977, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.38975468277931213, + "rewards_train/margins": -0.26400308310985565, + "rewards_train/rejected": -0.12575159966945648, + "step": 1876 + }, + { + "epoch": 0.52, + "logps_train/chosen": -78.95606231689453, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -127.14614868164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8956062197685242, + "rewards_train/margins": 3.6690085530281067, + "rewards_train/rejected": -4.564614772796631, + "step": 1876 + }, + { + "epoch": 0.52, + "logps_train/chosen": -85.78144836425781, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -86.18511962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.728144884109497, + "rewards_train/margins": 0.04036712646484375, + "rewards_train/rejected": -1.7685120105743408, + "step": 1876 + }, + { + "epoch": 0.52, + "logps_train/chosen": -249.17564392089844, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -257.1418151855469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -12.417564392089844, + "rewards_train/margins": 0.7966175079345703, + "rewards_train/rejected": -13.214181900024414, + "step": 1876 + }, + { + "epoch": 0.52, + "logps_train/chosen": -27.623167037963867, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -63.70353698730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3091917037963867, + "rewards_train/margins": 1.998661994934082, + "rewards_train/rejected": -4.307853698730469, + "step": 1876 + }, + { + "epoch": 0.52, + "logps_train/chosen": -32.56521224975586, + "logps_train/ref_chosen": -15.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -131.78778076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.756521224975586, + "rewards_train/margins": 2.772256851196289, + "rewards_train/rejected": -4.528778076171875, + "step": 1876 + }, + { + "epoch": 0.52, + "logps_train/chosen": -2.616503953933716, + "logps_train/ref_chosen": -0.53515625, + "logps_train/ref_rejected": -3.34375, + "logps_train/rejected": -13.06549072265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20813477039337158, + "rewards_train/margins": 0.7640393376350403, + "rewards_train/rejected": -0.9721741080284119, + "step": 1877 + }, + { + "epoch": 0.52, + "logps_train/chosen": -12.335211753845215, + "logps_train/ref_chosen": -3.75, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -29.036224365234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8585211634635925, + "rewards_train/margins": 1.5919762253761292, + "rewards_train/rejected": -2.4504973888397217, + "step": 1877 + }, + { + "epoch": 0.52, + "logps_train/chosen": -37.81153869628906, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -1.9453125, + "logps_train/rejected": -20.092466354370117, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5061538219451904, + "rewards_train/margins": -0.6914384365081787, + "rewards_train/rejected": -1.8147153854370117, + "step": 1877 + }, + { + "epoch": 0.52, + "logps_train/chosen": -25.537965774536133, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -56.75, + "logps_train/rejected": -73.53816223144531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4412965774536133, + "rewards_train/margins": 1.23751962184906, + "rewards_train/rejected": -1.6788161993026733, + "step": 1877 + }, + { + "epoch": 0.52, + "logps_train/chosen": -114.38045501708984, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -154.82171630859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7880455255508423, + "rewards_train/margins": 6.044126391410828, + "rewards_train/rejected": -7.83217191696167, + "step": 1877 + }, + { + "epoch": 0.52, + "logps_train/chosen": -42.737674713134766, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -43.64171600341797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6800174713134766, + "rewards_train/margins": -0.015845775604248047, + "rewards_train/rejected": -2.6641716957092285, + "step": 1877 + }, + { + "epoch": 0.52, + "logps_train/chosen": -47.75246047973633, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -41.5, + "logps_train/rejected": -48.10924530029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6252460479736328, + "rewards_train/margins": 0.03567850589752197, + "rewards_train/rejected": -0.6609245538711548, + "step": 1877 + }, + { + "epoch": 0.52, + "logps_train/chosen": -90.17155456542969, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -174.27252197265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8171554803848267, + "rewards_train/margins": 6.160097002983093, + "rewards_train/rejected": -7.97725248336792, + "step": 1877 + }, + { + "epoch": 0.52, + "learning_rate": 3.1980417534461403e-07, + "loss": 0.3532, + "step": 1878 + }, + { + "epoch": 0.52, + "logps_train/chosen": -34.322044372558594, + "logps_train/ref_chosen": -30.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -197.37420654296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4322044551372528, + "rewards_train/margins": 6.5552161037921906, + "rewards_train/rejected": -6.987420558929443, + "step": 1878 + }, + { + "epoch": 0.52, + "logps_train/chosen": -177.4575958251953, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -247.78387451171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7457597255706787, + "rewards_train/margins": 4.532628297805786, + "rewards_train/rejected": -8.278388023376465, + "step": 1878 + }, + { + "epoch": 0.52, + "logps_train/chosen": -134.00347900390625, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -192.89947509765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6003479361534119, + "rewards_train/margins": 6.5395994782447815, + "rewards_train/rejected": -7.139947414398193, + "step": 1878 + }, + { + "epoch": 0.52, + "logps_train/chosen": -69.33726501464844, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -39.0, + "logps_train/rejected": -59.50178527832031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.033726453781128, + "rewards_train/margins": -0.9835479259490967, + "rewards_train/rejected": -2.0501785278320312, + "step": 1878 + }, + { + "epoch": 0.52, + "logps_train/chosen": -160.3262176513672, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -180.68243408203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.682621955871582, + "rewards_train/margins": -0.6143784523010254, + "rewards_train/rejected": -4.068243503570557, + "step": 1878 + }, + { + "epoch": 0.52, + "logps_train/chosen": -108.06437683105469, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -107.78376770019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.206437826156616, + "rewards_train/margins": 0.02193903923034668, + "rewards_train/rejected": -2.228376865386963, + "step": 1878 + }, + { + "epoch": 0.52, + "logps_train/chosen": -66.56964111328125, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -83.16285705566406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7069641351699829, + "rewards_train/margins": -0.49067842960357666, + "rewards_train/rejected": -0.21628570556640625, + "step": 1878 + }, + { + "epoch": 0.52, + "logps_train/chosen": -156.7858428955078, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -154.82040405273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.82858419418335, + "rewards_train/margins": 1.2534561157226562, + "rewards_train/rejected": -6.082040309906006, + "step": 1878 + }, + { + "epoch": 0.53, + "logps_train/chosen": -31.91745948791504, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -28.97931480407715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8104959726333618, + "rewards_train/margins": 0.624935507774353, + "rewards_train/rejected": -2.435431480407715, + "step": 1879 + }, + { + "epoch": 0.53, + "logps_train/chosen": -129.37692260742188, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -151.10096740722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6376922130584717, + "rewards_train/margins": 0.3724048137664795, + "rewards_train/rejected": -4.010097026824951, + "step": 1879 + }, + { + "epoch": 0.53, + "logps_train/chosen": -17.932598114013672, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -1.21875, + "logps_train/rejected": -19.78029441833496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05575981363654137, + "rewards_train/margins": 1.8003946281969547, + "rewards_train/rejected": -1.856154441833496, + "step": 1879 + }, + { + "epoch": 0.53, + "logps_train/chosen": -197.22979736328125, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -246.92723083496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.522979736328125, + "rewards_train/margins": 7.969743728637695, + "rewards_train/rejected": -11.49272346496582, + "step": 1879 + }, + { + "epoch": 0.53, + "logps_train/chosen": -142.18154907226562, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -160.18780517578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8681548833847046, + "rewards_train/margins": 4.550625920295715, + "rewards_train/rejected": -6.41878080368042, + "step": 1879 + }, + { + "epoch": 0.53, + "logps_train/chosen": -48.30432891845703, + "logps_train/ref_chosen": -29.25, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -100.87675476074219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.905432939529419, + "rewards_train/margins": 0.18224263191223145, + "rewards_train/rejected": -2.0876755714416504, + "step": 1879 + }, + { + "epoch": 0.53, + "logps_train/chosen": -121.83299255371094, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -152.90541076660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.783299446105957, + "rewards_train/margins": 0.7572417259216309, + "rewards_train/rejected": -5.540541172027588, + "step": 1879 + }, + { + "epoch": 0.53, + "logps_train/chosen": -13.265533447265625, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -39.0, + "logps_train/rejected": -73.79345703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03905334696173668, + "rewards_train/margins": 3.440292451530695, + "rewards_train/rejected": -3.4793457984924316, + "step": 1879 + }, + { + "epoch": 0.53, + "learning_rate": 3.178672879732435e-07, + "loss": 0.4002, + "step": 1880 + }, + { + "epoch": 0.53, + "logps_train/chosen": -30.43314552307129, + "logps_train/ref_chosen": -7.53125, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -57.16515350341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.290189504623413, + "rewards_train/margins": 2.288825750350952, + "rewards_train/rejected": -4.579015254974365, + "step": 1880 + }, + { + "epoch": 0.53, + "logps_train/chosen": -108.2708969116211, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -105.25653076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8770896792411804, + "rewards_train/margins": 2.4985633492469788, + "rewards_train/rejected": -3.375653028488159, + "step": 1880 + }, + { + "epoch": 0.53, + "logps_train/chosen": -19.8980770111084, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -69.21134948730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6710577011108398, + "rewards_train/margins": 5.300077438354492, + "rewards_train/rejected": -5.971135139465332, + "step": 1880 + }, + { + "epoch": 0.53, + "logps_train/chosen": -139.29115295410156, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -217.70326232910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.829115390777588, + "rewards_train/margins": 4.691211223602295, + "rewards_train/rejected": -9.520326614379883, + "step": 1880 + }, + { + "epoch": 0.53, + "logps_train/chosen": -153.5603790283203, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -142.29013061523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6560379266738892, + "rewards_train/margins": 0.27297520637512207, + "rewards_train/rejected": -1.9290131330490112, + "step": 1880 + }, + { + "epoch": 0.53, + "logps_train/chosen": -15.299604415893555, + "logps_train/ref_chosen": -1.7109375, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -46.714927673339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3588666915893555, + "rewards_train/margins": 0.8501260280609131, + "rewards_train/rejected": -2.2089927196502686, + "step": 1880 + }, + { + "epoch": 0.53, + "logps_train/chosen": -103.0934066772461, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -210.3408966064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0093406438827515, + "rewards_train/margins": 8.67474901676178, + "rewards_train/rejected": -9.684089660644531, + "step": 1880 + }, + { + "epoch": 0.53, + "logps_train/chosen": -5.017747402191162, + "logps_train/ref_chosen": -6.15625, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -14.250795364379883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11385025829076767, + "rewards_train/margins": 0.17642979323863983, + "rewards_train/rejected": -0.06257953494787216, + "step": 1880 + }, + { + "epoch": 0.53, + "logps_train/chosen": -147.70883178710938, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -169.79925537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4708831310272217, + "rewards_train/margins": 0.509042501449585, + "rewards_train/rejected": -2.9799256324768066, + "step": 1881 + }, + { + "epoch": 0.53, + "logps_train/chosen": -21.4421329498291, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -2.1875, + "logps_train/rejected": -15.728494644165039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9504632949829102, + "rewards_train/margins": 0.40363621711730957, + "rewards_train/rejected": -1.3540995121002197, + "step": 1881 + }, + { + "epoch": 0.53, + "logps_train/chosen": -142.36358642578125, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -172.82308959960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.486358642578125, + "rewards_train/margins": 3.445950508117676, + "rewards_train/rejected": -6.932309150695801, + "step": 1881 + }, + { + "epoch": 0.53, + "logps_train/chosen": -67.07960510253906, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -98.11824035644531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3329605162143707, + "rewards_train/margins": 1.1788634955883026, + "rewards_train/rejected": -1.5118240118026733, + "step": 1881 + }, + { + "epoch": 0.53, + "logps_train/chosen": -26.83580780029297, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -2.625, + "logps_train/rejected": -24.840946197509766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6335808038711548, + "rewards_train/margins": 1.588013768196106, + "rewards_train/rejected": -2.2215945720672607, + "step": 1881 + }, + { + "epoch": 0.53, + "logps_train/chosen": -32.282806396484375, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -28.066509246826172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3657805919647217, + "rewards_train/margins": -0.46537959575653076, + "rewards_train/rejected": -1.900400996208191, + "step": 1881 + }, + { + "epoch": 0.53, + "logps_train/chosen": -25.494930267333984, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -59.587615966796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8619930148124695, + "rewards_train/margins": 2.559268534183502, + "rewards_train/rejected": -3.4212615489959717, + "step": 1881 + }, + { + "epoch": 0.53, + "logps_train/chosen": -159.09786987304688, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -166.20623779296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9097869992256165, + "rewards_train/margins": 3.810836970806122, + "rewards_train/rejected": -4.720623970031738, + "step": 1881 + }, + { + "epoch": 0.53, + "learning_rate": 3.1593517480763234e-07, + "loss": 0.2648, + "step": 1882 + }, + { + "epoch": 0.53, + "logps_train/chosen": -149.3446044921875, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -205.2242431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.934460401535034, + "rewards_train/margins": 4.1879637241363525, + "rewards_train/rejected": -8.122424125671387, + "step": 1882 + }, + { + "epoch": 0.53, + "logps_train/chosen": -64.41763305664062, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -195.8396453857422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6917633414268494, + "rewards_train/margins": 8.44220119714737, + "rewards_train/rejected": -9.133964538574219, + "step": 1882 + }, + { + "epoch": 0.53, + "logps_train/chosen": -41.17497634887695, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -36.045467376708984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3424975872039795, + "rewards_train/margins": 0.7745492458343506, + "rewards_train/rejected": -3.11704683303833, + "step": 1882 + }, + { + "epoch": 0.53, + "logps_train/chosen": -21.862869262695312, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -3.296875, + "logps_train/rejected": -10.021244049072266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8050369620323181, + "rewards_train/margins": -0.1326000690460205, + "rewards_train/rejected": -0.6724368929862976, + "step": 1882 + }, + { + "epoch": 0.53, + "logps_train/chosen": -37.579673767089844, + "logps_train/ref_chosen": -29.0, + "logps_train/ref_rejected": -36.5, + "logps_train/rejected": -78.15787506103516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8579673767089844, + "rewards_train/margins": 3.3078203201293945, + "rewards_train/rejected": -4.165787696838379, + "step": 1882 + }, + { + "epoch": 0.53, + "logps_train/chosen": -86.14614868164062, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -144.78872680664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7146148681640625, + "rewards_train/margins": 1.9642579555511475, + "rewards_train/rejected": -2.67887282371521, + "step": 1882 + }, + { + "epoch": 0.53, + "logps_train/chosen": -265.516845703125, + "logps_train/ref_chosen": -182.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -262.6439514160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.3516845703125, + "rewards_train/margins": -0.2872896194458008, + "rewards_train/rejected": -8.0643949508667, + "step": 1882 + }, + { + "epoch": 0.53, + "logps_train/chosen": -101.27589416503906, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -163.89358520507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8275893926620483, + "rewards_train/margins": 3.311769127845764, + "rewards_train/rejected": -5.1393585205078125, + "step": 1882 + }, + { + "epoch": 0.53, + "logps_train/chosen": -72.67478942871094, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -46.75, + "logps_train/rejected": -77.90591430664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0924789905548096, + "rewards_train/margins": 1.0231125354766846, + "rewards_train/rejected": -3.115591526031494, + "step": 1883 + }, + { + "epoch": 0.53, + "logps_train/chosen": -96.9007568359375, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -172.38670349121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.990075707435608, + "rewards_train/margins": 5.798594832420349, + "rewards_train/rejected": -7.788670539855957, + "step": 1883 + }, + { + "epoch": 0.53, + "logps_train/chosen": -20.595069885253906, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -36.515220642089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9345070123672485, + "rewards_train/margins": 2.301390051841736, + "rewards_train/rejected": -3.2358970642089844, + "step": 1883 + }, + { + "epoch": 0.53, + "logps_train/chosen": -134.044921875, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -147.37274169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.304492473602295, + "rewards_train/margins": 2.7327818870544434, + "rewards_train/rejected": -7.037274360656738, + "step": 1883 + }, + { + "epoch": 0.53, + "logps_train/chosen": -63.85204315185547, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -106.42012023925781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2602043151855469, + "rewards_train/margins": 2.081807851791382, + "rewards_train/rejected": -3.3420121669769287, + "step": 1883 + }, + { + "epoch": 0.53, + "logps_train/chosen": -133.1172637939453, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -117.00494384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5617263317108154, + "rewards_train/margins": 0.03876805305480957, + "rewards_train/rejected": -3.600494384765625, + "step": 1883 + }, + { + "epoch": 0.53, + "logps_train/chosen": -114.45419311523438, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -219.47637939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2454192638397217, + "rewards_train/margins": 7.452219247817993, + "rewards_train/rejected": -9.697638511657715, + "step": 1883 + }, + { + "epoch": 0.53, + "logps_train/chosen": -70.53247833251953, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -47.75, + "logps_train/rejected": -58.98899841308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10324783623218536, + "rewards_train/margins": 1.0206519812345505, + "rewards_train/rejected": -1.1238998174667358, + "step": 1883 + }, + { + "epoch": 0.53, + "learning_rate": 3.1400784937052373e-07, + "loss": 0.236, + "step": 1884 + }, + { + "epoch": 0.53, + "logps_train/chosen": -197.35223388671875, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -199.20156860351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.635223388671875, + "rewards_train/margins": 2.684933662414551, + "rewards_train/rejected": -7.320157051086426, + "step": 1884 + }, + { + "epoch": 0.53, + "logps_train/chosen": -36.34665298461914, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -55.420631408691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.47216534614563, + "rewards_train/margins": 2.360522985458374, + "rewards_train/rejected": -4.832688331604004, + "step": 1884 + }, + { + "epoch": 0.53, + "logps_train/chosen": -31.260526657104492, + "logps_train/ref_chosen": -1.6015625, + "logps_train/ref_rejected": -2.546875, + "logps_train/rejected": -10.600250244140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9658963680267334, + "rewards_train/margins": -2.160558819770813, + "rewards_train/rejected": -0.8053375482559204, + "step": 1884 + }, + { + "epoch": 0.53, + "logps_train/chosen": -39.36265182495117, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -3.40625, + "logps_train/rejected": -3.872069835662842, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.636265218257904, + "rewards_train/margins": -0.5896832346916199, + "rewards_train/rejected": -0.04658198356628418, + "step": 1884 + }, + { + "epoch": 0.53, + "logps_train/chosen": -42.50676727294922, + "logps_train/ref_chosen": -30.125, + "logps_train/ref_rejected": -20.375, + "logps_train/rejected": -33.54833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.238176703453064, + "rewards_train/margins": 0.07915735244750977, + "rewards_train/rejected": -1.3173340559005737, + "step": 1884 + }, + { + "epoch": 0.53, + "logps_train/chosen": -63.432037353515625, + "logps_train/ref_chosen": -44.25, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -152.81494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9182037115097046, + "rewards_train/margins": 2.763290524482727, + "rewards_train/rejected": -4.681494235992432, + "step": 1884 + }, + { + "epoch": 0.53, + "logps_train/chosen": -64.54166412353516, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -217.80332946777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17916642129421234, + "rewards_train/margins": 9.351167097687721, + "rewards_train/rejected": -9.530333518981934, + "step": 1884 + }, + { + "epoch": 0.53, + "logps_train/chosen": -191.23635864257812, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -185.57781982421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7236359119415283, + "rewards_train/margins": 2.2341461181640625, + "rewards_train/rejected": -3.957782030105591, + "step": 1884 + }, + { + "epoch": 0.53, + "logps_train/chosen": -17.35028839111328, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -35.38692092895508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6037788391113281, + "rewards_train/margins": 1.5974133014678955, + "rewards_train/rejected": -2.2011921405792236, + "step": 1885 + }, + { + "epoch": 0.53, + "logps_train/chosen": -13.843240737915039, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -54.66790771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1531759351491928, + "rewards_train/margins": 4.4824668020009995, + "rewards_train/rejected": -4.329290866851807, + "step": 1885 + }, + { + "epoch": 0.53, + "logps_train/chosen": -85.83612060546875, + "logps_train/ref_chosen": -49.5, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -27.450374603271484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6336121559143066, + "rewards_train/margins": -1.8135746717453003, + "rewards_train/rejected": -1.8200374841690063, + "step": 1885 + }, + { + "epoch": 0.53, + "logps_train/chosen": -6.9128828048706055, + "logps_train/ref_chosen": -1.4765625, + "logps_train/ref_rejected": -0.96875, + "logps_train/rejected": -1.554335117340088, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5436320304870605, + "rewards_train/margins": -0.4850735180079937, + "rewards_train/rejected": -0.05855851247906685, + "step": 1885 + }, + { + "epoch": 0.53, + "logps_train/chosen": -43.075523376464844, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -42.535003662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.082552433013916, + "rewards_train/margins": 0.4521980285644531, + "rewards_train/rejected": -3.534750461578369, + "step": 1885 + }, + { + "epoch": 0.53, + "logps_train/chosen": -90.68426513671875, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -133.4208984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.018426537513733, + "rewards_train/margins": 1.7736634016036987, + "rewards_train/rejected": -2.7920899391174316, + "step": 1885 + }, + { + "epoch": 0.53, + "logps_train/chosen": -32.75614547729492, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -47.0, + "logps_train/rejected": -93.13379669189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.63811457157135, + "rewards_train/margins": 2.975265383720398, + "rewards_train/rejected": -4.613379955291748, + "step": 1885 + }, + { + "epoch": 0.53, + "logps_train/chosen": -60.53081130981445, + "logps_train/ref_chosen": -27.625, + "logps_train/ref_rejected": -14.1875, + "logps_train/rejected": -54.58039093017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.290581226348877, + "rewards_train/margins": 0.7487077713012695, + "rewards_train/rejected": -4.0392889976501465, + "step": 1885 + }, + { + "epoch": 0.53, + "learning_rate": 3.1208532515115294e-07, + "loss": 0.5302, + "step": 1886 + }, + { + "epoch": 0.53, + "logps_train/chosen": -114.17066192626953, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -113.53014373779297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6170662045478821, + "rewards_train/margins": 3.9859482645988464, + "rewards_train/rejected": -4.6030144691467285, + "step": 1886 + }, + { + "epoch": 0.53, + "logps_train/chosen": -228.1072998046875, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -264.4378967285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.31072998046875, + "rewards_train/margins": 4.333060264587402, + "rewards_train/rejected": -12.643790245056152, + "step": 1886 + }, + { + "epoch": 0.53, + "logps_train/chosen": -121.54183197021484, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -115.11131286621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.004183292388916, + "rewards_train/margins": -0.1430518627166748, + "rewards_train/rejected": -2.861131429672241, + "step": 1886 + }, + { + "epoch": 0.53, + "logps_train/chosen": -144.52560424804688, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -178.82666015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1525604724884033, + "rewards_train/margins": 3.13010573387146, + "rewards_train/rejected": -6.282666206359863, + "step": 1886 + }, + { + "epoch": 0.53, + "logps_train/chosen": -15.655288696289062, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -27.70159912109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6905288696289062, + "rewards_train/margins": 1.2608810663223267, + "rewards_train/rejected": -1.951409935951233, + "step": 1886 + }, + { + "epoch": 0.53, + "logps_train/chosen": -5.220109939575195, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -5.25, + "logps_train/rejected": -6.923429012298584, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07173901051282883, + "rewards_train/margins": 0.23908191174268723, + "rewards_train/rejected": -0.1673429012298584, + "step": 1886 + }, + { + "epoch": 0.53, + "logps_train/chosen": -26.900903701782227, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -3.984375, + "logps_train/rejected": -36.745338439941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1463403701782227, + "rewards_train/margins": 1.129755973815918, + "rewards_train/rejected": -3.2760963439941406, + "step": 1886 + }, + { + "epoch": 0.53, + "logps_train/chosen": -71.46568298339844, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -57.79256820678711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.421568393707275, + "rewards_train/margins": -3.2673115730285645, + "rewards_train/rejected": -2.154256820678711, + "step": 1886 + }, + { + "epoch": 0.53, + "logps_train/chosen": -98.15560150146484, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -83.9384994506836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9655601382255554, + "rewards_train/margins": 0.578289806842804, + "rewards_train/rejected": -1.5438499450683594, + "step": 1887 + }, + { + "epoch": 0.53, + "logps_train/chosen": -185.82659912109375, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -126.39793395996094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.032660007476807, + "rewards_train/margins": -2.0428667068481445, + "rewards_train/rejected": -4.989793300628662, + "step": 1887 + }, + { + "epoch": 0.53, + "logps_train/chosen": -14.07564926147461, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -24.50164031982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42631492018699646, + "rewards_train/margins": 0.6738491356372833, + "rewards_train/rejected": -1.1001640558242798, + "step": 1887 + }, + { + "epoch": 0.53, + "logps_train/chosen": -47.87822341918945, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -47.0, + "logps_train/rejected": -86.6241455078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.575322389602661, + "rewards_train/margins": 1.387092113494873, + "rewards_train/rejected": -3.962414503097534, + "step": 1887 + }, + { + "epoch": 0.53, + "logps_train/chosen": -93.41914367675781, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -182.86239624023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5669143199920654, + "rewards_train/margins": 6.419325113296509, + "rewards_train/rejected": -9.986239433288574, + "step": 1887 + }, + { + "epoch": 0.53, + "logps_train/chosen": -48.393157958984375, + "logps_train/ref_chosen": -30.125, + "logps_train/ref_rejected": -37.5, + "logps_train/rejected": -65.49839782714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8268158435821533, + "rewards_train/margins": 0.9730238914489746, + "rewards_train/rejected": -2.799839735031128, + "step": 1887 + }, + { + "epoch": 0.53, + "logps_train/chosen": -138.41412353515625, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -218.84869384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9414123892784119, + "rewards_train/margins": 8.543457567691803, + "rewards_train/rejected": -9.484869956970215, + "step": 1887 + }, + { + "epoch": 0.53, + "logps_train/chosen": -25.966211318969727, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -56.75, + "logps_train/rejected": -102.26988220214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7153711318969727, + "rewards_train/margins": 2.8366169929504395, + "rewards_train/rejected": -4.551988124847412, + "step": 1887 + }, + { + "epoch": 0.53, + "learning_rate": 3.101676156051506e-07, + "loss": 0.5551, + "step": 1888 + }, + { + "epoch": 0.53, + "logps_train/chosen": -172.38372802734375, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -172.19190979003906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.06162719801068306, + "rewards_train/margins": -0.01918182149529457, + "rewards_train/rejected": 0.08080901950597763, + "step": 1888 + }, + { + "epoch": 0.53, + "logps_train/chosen": -43.36427307128906, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -34.653072357177734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9614273309707642, + "rewards_train/margins": 1.435130000114441, + "rewards_train/rejected": -2.396557331085205, + "step": 1888 + }, + { + "epoch": 0.53, + "logps_train/chosen": -21.05972671508789, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -1.84375, + "logps_train/rejected": -20.488203048706055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.930972695350647, + "rewards_train/margins": 0.9334726333618164, + "rewards_train/rejected": -1.8644453287124634, + "step": 1888 + }, + { + "epoch": 0.53, + "logps_train/chosen": -33.49881362915039, + "logps_train/ref_chosen": -25.625, + "logps_train/ref_rejected": -2.8125, + "logps_train/rejected": -12.216632843017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7873813509941101, + "rewards_train/margins": 0.15303194522857666, + "rewards_train/rejected": -0.9404132962226868, + "step": 1888 + }, + { + "epoch": 0.53, + "logps_train/chosen": -191.48768615722656, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -223.54336547851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.948768615722656, + "rewards_train/margins": 1.4055681228637695, + "rewards_train/rejected": -7.354336738586426, + "step": 1888 + }, + { + "epoch": 0.53, + "logps_train/chosen": -69.38419342041016, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -110.5418701171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.38841935992240906, + "rewards_train/margins": 3.215767651796341, + "rewards_train/rejected": -3.60418701171875, + "step": 1888 + }, + { + "epoch": 0.53, + "logps_train/chosen": -17.70260238647461, + "logps_train/ref_chosen": -7.6875, + "logps_train/ref_rejected": -2.96875, + "logps_train/rejected": -19.687368392944336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0015102624893188, + "rewards_train/margins": 0.6703516244888306, + "rewards_train/rejected": -1.6718618869781494, + "step": 1888 + }, + { + "epoch": 0.53, + "logps_train/chosen": -175.48492431640625, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -223.40713500976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.148492336273193, + "rewards_train/margins": 3.7922210693359375, + "rewards_train/rejected": -7.940713405609131, + "step": 1888 + }, + { + "epoch": 0.53, + "logps_train/chosen": -145.60049438476562, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -154.68878173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.460049629211426, + "rewards_train/margins": 1.3588285446166992, + "rewards_train/rejected": -5.818878173828125, + "step": 1889 + }, + { + "epoch": 0.53, + "logps_train/chosen": -55.82588195800781, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -1.8359375, + "logps_train/rejected": -22.2530517578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1325881481170654, + "rewards_train/margins": -1.090876579284668, + "rewards_train/rejected": -2.0417115688323975, + "step": 1889 + }, + { + "epoch": 0.53, + "logps_train/chosen": -8.625320434570312, + "logps_train/ref_chosen": -3.984375, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -7.116738319396973, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4640945494174957, + "rewards_train/margins": -0.28367070853710175, + "rewards_train/rejected": -0.18042384088039398, + "step": 1889 + }, + { + "epoch": 0.53, + "logps_train/chosen": -18.812620162963867, + "logps_train/ref_chosen": -0.98046875, + "logps_train/ref_rejected": -0.98046875, + "logps_train/rejected": -18.812631607055664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7832151651382446, + "rewards_train/margins": 1.1920928955078125e-06, + "rewards_train/rejected": -1.7832163572311401, + "step": 1889 + }, + { + "epoch": 0.53, + "logps_train/chosen": -3.0311009883880615, + "logps_train/ref_chosen": -1.2265625, + "logps_train/ref_rejected": -2.53125, + "logps_train/rejected": -15.391814231872559, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1804538518190384, + "rewards_train/margins": 1.1056025475263596, + "rewards_train/rejected": -1.286056399345398, + "step": 1889 + }, + { + "epoch": 0.53, + "logps_train/chosen": -14.798202514648438, + "logps_train/ref_chosen": -1.3515625, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -35.203025817871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3446639776229858, + "rewards_train/margins": 0.45063865184783936, + "rewards_train/rejected": -1.7953026294708252, + "step": 1889 + }, + { + "epoch": 0.53, + "logps_train/chosen": -19.052059173583984, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -19.750429153442383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09270592033863068, + "rewards_train/margins": 0.4135870188474655, + "rewards_train/rejected": -0.5062929391860962, + "step": 1889 + }, + { + "epoch": 0.53, + "logps_train/chosen": -26.1640682220459, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -30.0, + "logps_train/rejected": -72.30152893066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.335156798362732, + "rewards_train/margins": 2.8949962854385376, + "rewards_train/rejected": -4.2301530838012695, + "step": 1889 + }, + { + "epoch": 0.53, + "learning_rate": 3.082547341544507e-07, + "loss": 0.441, + "step": 1890 + }, + { + "epoch": 0.53, + "logps_train/chosen": -25.655887603759766, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -2.109375, + "logps_train/rejected": -25.081003189086914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6780887842178345, + "rewards_train/margins": 0.6190739870071411, + "rewards_train/rejected": -2.2971627712249756, + "step": 1890 + }, + { + "epoch": 0.53, + "logps_train/chosen": -0.15601864457130432, + "logps_train/ref_chosen": -0.263671875, + "logps_train/ref_rejected": -0.263671875, + "logps_train/rejected": -0.1560162603855133, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.010765323415398598, + "rewards_train/margins": -2.384185791015625e-07, + "rewards_train/rejected": 0.0107655618339777, + "step": 1890 + }, + { + "epoch": 0.53, + "logps_train/chosen": -69.39845275878906, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -111.16355895996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6101547479629517, + "rewards_train/margins": 0.8765106499195099, + "rewards_train/rejected": -0.2663559019565582, + "step": 1890 + }, + { + "epoch": 0.53, + "logps_train/chosen": -55.47800064086914, + "logps_train/ref_chosen": -37.25, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -45.88665771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8228000402450562, + "rewards_train/margins": 2.153365731239319, + "rewards_train/rejected": -3.976165771484375, + "step": 1890 + }, + { + "epoch": 0.53, + "logps_train/chosen": -168.11355590820312, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -202.9026641845703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8113555908203125, + "rewards_train/margins": 2.678910732269287, + "rewards_train/rejected": -6.4902663230896, + "step": 1890 + }, + { + "epoch": 0.53, + "logps_train/chosen": -121.5532455444336, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -121.29031372070312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.855324625968933, + "rewards_train/margins": -0.026293277740478516, + "rewards_train/rejected": -1.8290313482284546, + "step": 1890 + }, + { + "epoch": 0.53, + "logps_train/chosen": -118.90792846679688, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -139.52420043945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6407928466796875, + "rewards_train/margins": 2.311627149581909, + "rewards_train/rejected": -3.9524199962615967, + "step": 1890 + }, + { + "epoch": 0.53, + "logps_train/chosen": -168.8580322265625, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -247.19219970703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.63580322265625, + "rewards_train/margins": 3.1834168434143066, + "rewards_train/rejected": -7.819220066070557, + "step": 1890 + }, + { + "epoch": 0.53, + "logps_train/chosen": -4.969814777374268, + "logps_train/ref_chosen": -2.171875, + "logps_train/ref_rejected": -1.3984375, + "logps_train/rejected": -5.535637378692627, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27979397773742676, + "rewards_train/margins": 0.13392600417137146, + "rewards_train/rejected": -0.4137199819087982, + "step": 1891 + }, + { + "epoch": 0.53, + "logps_train/chosen": -24.292768478393555, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -12.875, + "logps_train/rejected": -61.83650588989258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3542768955230713, + "rewards_train/margins": 3.5418736934661865, + "rewards_train/rejected": -4.896150588989258, + "step": 1891 + }, + { + "epoch": 0.53, + "logps_train/chosen": -173.26846313476562, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -240.81295776367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.326846599578857, + "rewards_train/margins": 3.5544495582580566, + "rewards_train/rejected": -8.881296157836914, + "step": 1891 + }, + { + "epoch": 0.53, + "logps_train/chosen": -20.614294052124023, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -72.63430786132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0614293813705444, + "rewards_train/margins": 4.5270015001297, + "rewards_train/rejected": -5.588430881500244, + "step": 1891 + }, + { + "epoch": 0.53, + "logps_train/chosen": -118.85185241699219, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -187.0, + "logps_train/rejected": -249.12478637695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.985185384750366, + "rewards_train/margins": 3.2272932529449463, + "rewards_train/rejected": -6.2124786376953125, + "step": 1891 + }, + { + "epoch": 0.53, + "logps_train/chosen": -259.1002197265625, + "logps_train/ref_chosen": -204.0, + "logps_train/ref_rejected": -228.0, + "logps_train/rejected": -289.28424072265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.510022163391113, + "rewards_train/margins": 0.6184020042419434, + "rewards_train/rejected": -6.128424167633057, + "step": 1891 + }, + { + "epoch": 0.53, + "logps_train/chosen": -25.92336654663086, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -53.33208465576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8673366904258728, + "rewards_train/margins": 3.609622061252594, + "rewards_train/rejected": -4.476958751678467, + "step": 1891 + }, + { + "epoch": 0.53, + "logps_train/chosen": -11.36289119720459, + "logps_train/ref_chosen": -7.9375, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -28.317432403564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34253913164138794, + "rewards_train/margins": 0.9954541325569153, + "rewards_train/rejected": -1.3379932641983032, + "step": 1891 + }, + { + "epoch": 0.53, + "learning_rate": 3.0634669418719514e-07, + "loss": 0.2498, + "step": 1892 + }, + { + "epoch": 0.53, + "logps_train/chosen": -5.466352462768555, + "logps_train/ref_chosen": -1.4921875, + "logps_train/ref_rejected": -1.1328125, + "logps_train/rejected": -2.4361603260040283, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.39741650223731995, + "rewards_train/margins": -0.26708172261714935, + "rewards_train/rejected": -0.1303347796201706, + "step": 1892 + }, + { + "epoch": 0.53, + "logps_train/chosen": -155.17674255371094, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -204.845947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6176742911338806, + "rewards_train/margins": 5.466920435428619, + "rewards_train/rejected": -6.0845947265625, + "step": 1892 + }, + { + "epoch": 0.53, + "logps_train/chosen": -266.59197998046875, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -292.04217529296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.859198570251465, + "rewards_train/margins": 2.24501895904541, + "rewards_train/rejected": -12.104217529296875, + "step": 1892 + }, + { + "epoch": 0.53, + "logps_train/chosen": -10.28003215789795, + "logps_train/ref_chosen": -0.875, + "logps_train/ref_rejected": -3.078125, + "logps_train/rejected": -20.64779281616211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9405032396316528, + "rewards_train/margins": 0.8164635896682739, + "rewards_train/rejected": -1.7569668292999268, + "step": 1892 + }, + { + "epoch": 0.53, + "logps_train/chosen": -70.31132507324219, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -62.323307037353516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.88113260269165, + "rewards_train/margins": -1.9988019466400146, + "rewards_train/rejected": -2.8823306560516357, + "step": 1892 + }, + { + "epoch": 0.53, + "logps_train/chosen": -83.89262390136719, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -105.87324523925781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3392623960971832, + "rewards_train/margins": 0.14806213974952698, + "rewards_train/rejected": -0.4873245358467102, + "step": 1892 + }, + { + "epoch": 0.53, + "logps_train/chosen": -37.63417053222656, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -5.25, + "logps_train/rejected": -29.55147933959961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1352920532226562, + "rewards_train/margins": -0.7051441669464111, + "rewards_train/rejected": -2.430147886276245, + "step": 1892 + }, + { + "epoch": 0.53, + "logps_train/chosen": -4.284186363220215, + "logps_train/ref_chosen": -4.65625, + "logps_train/ref_rejected": -3.234375, + "logps_train/rejected": -23.847761154174805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.037206362932920456, + "rewards_train/margins": 2.0985450260341167, + "rewards_train/rejected": -2.0613386631011963, + "step": 1892 + }, + { + "epoch": 0.53, + "logps_train/chosen": -44.92039489746094, + "logps_train/ref_chosen": -25.125, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -42.69267272949219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9795395135879517, + "rewards_train/margins": 1.2397278547286987, + "rewards_train/rejected": -3.2192673683166504, + "step": 1893 + }, + { + "epoch": 0.53, + "logps_train/chosen": -197.14804077148438, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -185.1666259765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.014804363250732, + "rewards_train/margins": 4.351858615875244, + "rewards_train/rejected": -8.366662979125977, + "step": 1893 + }, + { + "epoch": 0.53, + "logps_train/chosen": -158.9552764892578, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -162.2845458984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.295527935028076, + "rewards_train/margins": -0.5170731544494629, + "rewards_train/rejected": -4.778454780578613, + "step": 1893 + }, + { + "epoch": 0.53, + "logps_train/chosen": -168.59547424316406, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -197.90625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5595474243164062, + "rewards_train/margins": 1.4310777187347412, + "rewards_train/rejected": -3.9906251430511475, + "step": 1893 + }, + { + "epoch": 0.53, + "logps_train/chosen": -106.05535888671875, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -51.25, + "logps_train/rejected": -95.19593811035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5555360317230225, + "rewards_train/margins": 1.8390576839447021, + "rewards_train/rejected": -4.394593715667725, + "step": 1893 + }, + { + "epoch": 0.53, + "logps_train/chosen": -34.78125762939453, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -15.173832893371582, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4281257390975952, + "rewards_train/margins": -0.47636741399765015, + "rewards_train/rejected": -0.9517583250999451, + "step": 1893 + }, + { + "epoch": 0.53, + "logps_train/chosen": -121.30586242675781, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -200.61300659179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6805862784385681, + "rewards_train/margins": 5.480714380741119, + "rewards_train/rejected": -6.1613006591796875, + "step": 1893 + }, + { + "epoch": 0.53, + "logps_train/chosen": -128.0974884033203, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -213.82867431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6097488403320312, + "rewards_train/margins": 7.3731184005737305, + "rewards_train/rejected": -9.982867240905762, + "step": 1893 + }, + { + "epoch": 0.53, + "learning_rate": 3.044435090576405e-07, + "loss": 0.4908, + "step": 1894 + }, + { + "epoch": 0.53, + "logps_train/chosen": -160.24508666992188, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -195.26583862304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.424508810043335, + "rewards_train/margins": 2.6020753383636475, + "rewards_train/rejected": -5.026584148406982, + "step": 1894 + }, + { + "epoch": 0.53, + "logps_train/chosen": -18.659435272216797, + "logps_train/ref_chosen": -4.5, + "logps_train/ref_rejected": -3.203125, + "logps_train/rejected": -40.29072570800781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4159435033798218, + "rewards_train/margins": 2.2928165197372437, + "rewards_train/rejected": -3.7087600231170654, + "step": 1894 + }, + { + "epoch": 0.53, + "logps_train/chosen": -129.6731719970703, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -223.67063903808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.067317247390747, + "rewards_train/margins": 7.499747037887573, + "rewards_train/rejected": -9.56706428527832, + "step": 1894 + }, + { + "epoch": 0.53, + "logps_train/chosen": -13.462862014770508, + "logps_train/ref_chosen": -5.5625, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -14.858238220214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7900362014770508, + "rewards_train/margins": 0.2645376920700073, + "rewards_train/rejected": -1.054573893547058, + "step": 1894 + }, + { + "epoch": 0.53, + "logps_train/chosen": -7.6058573722839355, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -11.009865760803223, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12308573722839355, + "rewards_train/margins": 0.07165084779262543, + "rewards_train/rejected": -0.19473658502101898, + "step": 1894 + }, + { + "epoch": 0.53, + "logps_train/chosen": -13.07960033416748, + "logps_train/ref_chosen": -7.40625, + "logps_train/ref_rejected": -2.21875, + "logps_train/rejected": -18.618968963623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5673350691795349, + "rewards_train/margins": 1.0726868510246277, + "rewards_train/rejected": -1.6400219202041626, + "step": 1894 + }, + { + "epoch": 0.53, + "logps_train/chosen": -84.29214477539062, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -170.36740112304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.9207855463027954, + "rewards_train/margins": 6.657525658607483, + "rewards_train/rejected": -5.7367401123046875, + "step": 1894 + }, + { + "epoch": 0.53, + "logps_train/chosen": -25.914138793945312, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -34.19588851928711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0976638793945312, + "rewards_train/margins": 1.9094250202178955, + "rewards_train/rejected": -3.0070888996124268, + "step": 1894 + }, + { + "epoch": 0.53, + "logps_train/chosen": -198.82354736328125, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -161.6913299560547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.982354640960693, + "rewards_train/margins": -0.8632216453552246, + "rewards_train/rejected": -5.119132995605469, + "step": 1895 + }, + { + "epoch": 0.53, + "logps_train/chosen": -108.11042785644531, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -103.1007308959961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.461042881011963, + "rewards_train/margins": -0.9509698152542114, + "rewards_train/rejected": -1.5100730657577515, + "step": 1895 + }, + { + "epoch": 0.53, + "logps_train/chosen": -81.9378662109375, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -169.5003662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.643786609172821, + "rewards_train/margins": 6.306249916553497, + "rewards_train/rejected": -6.950036525726318, + "step": 1895 + }, + { + "epoch": 0.53, + "logps_train/chosen": -4.810328006744385, + "logps_train/ref_chosen": -1.0859375, + "logps_train/ref_rejected": -1.96875, + "logps_train/rejected": -15.219893455505371, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37243905663490295, + "rewards_train/margins": 0.9526753127574921, + "rewards_train/rejected": -1.325114369392395, + "step": 1895 + }, + { + "epoch": 0.53, + "logps_train/chosen": -92.1450424194336, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -91.64117431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5645042657852173, + "rewards_train/margins": 0.09961318969726562, + "rewards_train/rejected": -1.664117455482483, + "step": 1895 + }, + { + "epoch": 0.53, + "logps_train/chosen": -15.56301498413086, + "logps_train/ref_chosen": -6.84375, + "logps_train/ref_rejected": -6.28125, + "logps_train/rejected": -12.175726890563965, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.871926486492157, + "rewards_train/margins": -0.28247880935668945, + "rewards_train/rejected": -0.5894476771354675, + "step": 1895 + }, + { + "epoch": 0.53, + "logps_train/chosen": -22.860010147094727, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -38.45985412597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2547510862350464, + "rewards_train/margins": 0.4162343740463257, + "rewards_train/rejected": -1.670985460281372, + "step": 1895 + }, + { + "epoch": 0.53, + "logps_train/chosen": -75.9490737915039, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -101.4378433227539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0949074029922485, + "rewards_train/margins": 0.6988769769668579, + "rewards_train/rejected": -1.7937843799591064, + "step": 1895 + }, + { + "epoch": 0.53, + "learning_rate": 3.025451920860649e-07, + "loss": 0.4406, + "step": 1896 + }, + { + "epoch": 0.53, + "logps_train/chosen": -139.78396606445312, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -166.4674530029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.128396511077881, + "rewards_train/margins": 3.718348979949951, + "rewards_train/rejected": -7.846745491027832, + "step": 1896 + }, + { + "epoch": 0.53, + "logps_train/chosen": -213.73463439941406, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -218.87347412109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.723464012145996, + "rewards_train/margins": 0.5638837814331055, + "rewards_train/rejected": -10.287347793579102, + "step": 1896 + }, + { + "epoch": 0.53, + "logps_train/chosen": -6.496278762817383, + "logps_train/ref_chosen": -2.1875, + "logps_train/ref_rejected": -0.5859375, + "logps_train/rejected": -6.896096229553223, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4308778941631317, + "rewards_train/margins": 0.20013800263404846, + "rewards_train/rejected": -0.6310158967971802, + "step": 1896 + }, + { + "epoch": 0.53, + "logps_train/chosen": -127.65377044677734, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -216.70419311523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0653769969940186, + "rewards_train/margins": 8.605042695999146, + "rewards_train/rejected": -10.670419692993164, + "step": 1896 + }, + { + "epoch": 0.53, + "logps_train/chosen": -96.07093811035156, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -175.99349975585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9570939540863037, + "rewards_train/margins": 4.792256116867065, + "rewards_train/rejected": -7.749350070953369, + "step": 1896 + }, + { + "epoch": 0.53, + "logps_train/chosen": -98.67167663574219, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -53.61114501953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4828323423862457, + "rewards_train/margins": 4.793947130441666, + "rewards_train/rejected": -4.31111478805542, + "step": 1896 + }, + { + "epoch": 0.53, + "logps_train/chosen": -68.3780288696289, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -103.79925537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5378029346466064, + "rewards_train/margins": 3.4421226978302, + "rewards_train/rejected": -6.979925632476807, + "step": 1896 + }, + { + "epoch": 0.53, + "logps_train/chosen": -42.65631866455078, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -24.0, + "logps_train/rejected": -91.40060424804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3406319618225098, + "rewards_train/margins": 4.399428367614746, + "rewards_train/rejected": -6.740060329437256, + "step": 1896 + }, + { + "epoch": 0.53, + "logps_train/chosen": -177.11355590820312, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -247.4661865234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.911355495452881, + "rewards_train/margins": 3.335263729095459, + "rewards_train/rejected": -9.24661922454834, + "step": 1897 + }, + { + "epoch": 0.53, + "logps_train/chosen": -57.45478439331055, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -46.75, + "logps_train/rejected": -50.68977737426758, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6954784393310547, + "rewards_train/margins": -0.30150070786476135, + "rewards_train/rejected": -0.39397773146629333, + "step": 1897 + }, + { + "epoch": 0.53, + "logps_train/chosen": -19.722858428955078, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -31.276809692382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3785358667373657, + "rewards_train/margins": 0.9522701501846313, + "rewards_train/rejected": -2.330806016921997, + "step": 1897 + }, + { + "epoch": 0.53, + "logps_train/chosen": -267.4262390136719, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -281.97528076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.642623901367188, + "rewards_train/margins": 2.454904556274414, + "rewards_train/rejected": -13.097528457641602, + "step": 1897 + }, + { + "epoch": 0.53, + "logps_train/chosen": -2.893643379211426, + "logps_train/ref_chosen": -2.8125, + "logps_train/ref_rejected": -3.0625, + "logps_train/rejected": -5.222365379333496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.008114337921142578, + "rewards_train/margins": 0.2078721970319748, + "rewards_train/rejected": -0.21598653495311737, + "step": 1897 + }, + { + "epoch": 0.53, + "logps_train/chosen": -55.26610565185547, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -29.903379440307617, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.839110851287842, + "rewards_train/margins": -2.26127290725708, + "rewards_train/rejected": -2.5778379440307617, + "step": 1897 + }, + { + "epoch": 0.53, + "logps_train/chosen": -12.128762245178223, + "logps_train/ref_chosen": -1.6640625, + "logps_train/ref_rejected": -2.28125, + "logps_train/rejected": -49.786643981933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.046470046043396, + "rewards_train/margins": 3.7040692567825317, + "rewards_train/rejected": -4.750539302825928, + "step": 1897 + }, + { + "epoch": 0.53, + "logps_train/chosen": -224.01234436035156, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -276.1910705566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.601234436035156, + "rewards_train/margins": 6.417872428894043, + "rewards_train/rejected": -11.0191068649292, + "step": 1897 + }, + { + "epoch": 0.53, + "learning_rate": 3.006517565586749e-07, + "loss": 0.3383, + "step": 1898 + }, + { + "epoch": 0.53, + "logps_train/chosen": -271.12689208984375, + "logps_train/ref_chosen": -226.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -206.87396240234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.512689113616943, + "rewards_train/margins": -0.12529277801513672, + "rewards_train/rejected": -4.387396335601807, + "step": 1898 + }, + { + "epoch": 0.53, + "logps_train/chosen": -211.48606872558594, + "logps_train/ref_chosen": -182.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -178.48292541503906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9486069679260254, + "rewards_train/margins": -2.2003144025802612, + "rewards_train/rejected": -0.7482925653457642, + "step": 1898 + }, + { + "epoch": 0.53, + "logps_train/chosen": -189.91357421875, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -256.98443603515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.391357421875, + "rewards_train/margins": 7.207086563110352, + "rewards_train/rejected": -13.598443984985352, + "step": 1898 + }, + { + "epoch": 0.53, + "logps_train/chosen": -132.0981903076172, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -154.57843017578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.609818935394287, + "rewards_train/margins": -1.0019760131835938, + "rewards_train/rejected": -4.607842922210693, + "step": 1898 + }, + { + "epoch": 0.53, + "logps_train/chosen": -22.766862869262695, + "logps_train/ref_chosen": -0.73828125, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -31.700307846069336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2028582096099854, + "rewards_train/margins": -0.23907744884490967, + "rewards_train/rejected": -1.9637807607650757, + "step": 1898 + }, + { + "epoch": 0.53, + "logps_train/chosen": -22.548309326171875, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -15.0, + "logps_train/rejected": -33.55226135253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4298309087753296, + "rewards_train/margins": 0.42539525032043457, + "rewards_train/rejected": -1.8552261590957642, + "step": 1898 + }, + { + "epoch": 0.53, + "logps_train/chosen": -184.28167724609375, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -308.95391845703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.22816801071167, + "rewards_train/margins": 8.767223834991455, + "rewards_train/rejected": -13.995391845703125, + "step": 1898 + }, + { + "epoch": 0.53, + "logps_train/chosen": -4.327663898468018, + "logps_train/ref_chosen": -2.96875, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -6.689291000366211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.135891392827034, + "rewards_train/margins": 0.029912710189819336, + "rewards_train/rejected": -0.16580410301685333, + "step": 1898 + }, + { + "epoch": 0.53, + "logps_train/chosen": -209.07608032226562, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -221.6749267578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.307608127593994, + "rewards_train/margins": -0.5401153564453125, + "rewards_train/rejected": -6.767492771148682, + "step": 1899 + }, + { + "epoch": 0.53, + "logps_train/chosen": -23.56076431274414, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -56.055023193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6185764670372009, + "rewards_train/margins": 3.5869260430336, + "rewards_train/rejected": -4.205502510070801, + "step": 1899 + }, + { + "epoch": 0.53, + "logps_train/chosen": -22.057659149169922, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -26.111679077148438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7620159387588501, + "rewards_train/margins": -0.08834803104400635, + "rewards_train/rejected": -0.6736679077148438, + "step": 1899 + }, + { + "epoch": 0.53, + "logps_train/chosen": -8.699559211730957, + "logps_train/ref_chosen": -6.5, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -26.116439819335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2199559211730957, + "rewards_train/margins": 1.3604381084442139, + "rewards_train/rejected": -1.5803940296173096, + "step": 1899 + }, + { + "epoch": 0.53, + "logps_train/chosen": -11.628876686096191, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -21.17858123779297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.876950204372406, + "rewards_train/margins": -0.27159208059310913, + "rewards_train/rejected": -0.6053581237792969, + "step": 1899 + }, + { + "epoch": 0.53, + "logps_train/chosen": -106.70440673828125, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -88.29600524902344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.420440673828125, + "rewards_train/margins": -1.9908401370048523, + "rewards_train/rejected": -0.4296005368232727, + "step": 1899 + }, + { + "epoch": 0.53, + "logps_train/chosen": -116.15521240234375, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -148.03448486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4655212461948395, + "rewards_train/margins": 2.637927383184433, + "rewards_train/rejected": -3.1034486293792725, + "step": 1899 + }, + { + "epoch": 0.53, + "logps_train/chosen": -55.60254669189453, + "logps_train/ref_chosen": -48.25, + "logps_train/ref_rejected": -47.25, + "logps_train/rejected": -90.7589111328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.73525470495224, + "rewards_train/margins": 3.61563640832901, + "rewards_train/rejected": -4.35089111328125, + "step": 1899 + }, + { + "epoch": 0.53, + "learning_rate": 2.987632157275114e-07, + "loss": 0.714, + "step": 1900 + }, + { + "epoch": 0.53, + "logps_train/chosen": -11.300094604492188, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -12.563529014587402, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3050094544887543, + "rewards_train/margins": -0.3174065528437495, + "rewards_train/rejected": 0.01239709835499525, + "step": 1900 + }, + { + "epoch": 0.53, + "logps_train/chosen": -131.1715545654297, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -205.88711547851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4171555042266846, + "rewards_train/margins": 3.871556043624878, + "rewards_train/rejected": -7.2887115478515625, + "step": 1900 + }, + { + "epoch": 0.53, + "logps_train/chosen": -46.06071853637695, + "logps_train/ref_chosen": -28.25, + "logps_train/ref_rejected": -28.75, + "logps_train/rejected": -68.55485534667969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7810719013214111, + "rewards_train/margins": 2.199413776397705, + "rewards_train/rejected": -3.980485677719116, + "step": 1900 + }, + { + "epoch": 0.53, + "logps_train/chosen": -54.0770378112793, + "logps_train/ref_chosen": -42.0, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -27.653099060058594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2077038288116455, + "rewards_train/margins": -0.14239394664764404, + "rewards_train/rejected": -1.0653098821640015, + "step": 1900 + }, + { + "epoch": 0.53, + "logps_train/chosen": -43.11921691894531, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -53.954620361328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2181718349456787, + "rewards_train/margins": -0.48520970344543457, + "rewards_train/rejected": -2.732962131500244, + "step": 1900 + }, + { + "epoch": 0.53, + "logps_train/chosen": -128.8939208984375, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -173.35476684570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9893921613693237, + "rewards_train/margins": 4.046084523200989, + "rewards_train/rejected": -6.0354766845703125, + "step": 1900 + }, + { + "epoch": 0.53, + "logps_train/chosen": -134.81222534179688, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -218.40530395507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8312225341796875, + "rewards_train/margins": 2.6093077659606934, + "rewards_train/rejected": -6.440530300140381, + "step": 1900 + }, + { + "epoch": 0.53, + "logps_train/chosen": -137.85948181152344, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -150.01275634765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.085948467254639, + "rewards_train/margins": 0.4153270721435547, + "rewards_train/rejected": -6.501275539398193, + "step": 1900 + }, + { + "epoch": 0.53, + "logps_train/chosen": -1.6519132852554321, + "logps_train/ref_chosen": -1.546875, + "logps_train/ref_rejected": -1.546875, + "logps_train/rejected": -1.6645207405090332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010503828525543213, + "rewards_train/margins": 0.0012607453390955925, + "rewards_train/rejected": -0.011764573864638805, + "step": 1901 + }, + { + "epoch": 0.53, + "logps_train/chosen": -110.19866943359375, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -150.69512939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.769866943359375, + "rewards_train/margins": 5.19964599609375, + "rewards_train/rejected": -6.969512939453125, + "step": 1901 + }, + { + "epoch": 0.53, + "logps_train/chosen": -155.2662353515625, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -123.49855041503906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.026623725891113, + "rewards_train/margins": -1.076768398284912, + "rewards_train/rejected": -4.949855327606201, + "step": 1901 + }, + { + "epoch": 0.53, + "logps_train/chosen": -71.3611068725586, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -110.94434356689453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9111106991767883, + "rewards_train/margins": 3.2333237528800964, + "rewards_train/rejected": -4.144434452056885, + "step": 1901 + }, + { + "epoch": 0.53, + "logps_train/chosen": -178.5584716796875, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -189.3404541015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1558473110198975, + "rewards_train/margins": 0.37819814682006836, + "rewards_train/rejected": -3.534045457839966, + "step": 1901 + }, + { + "epoch": 0.53, + "logps_train/chosen": -211.86993408203125, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -336.3591613769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.586993217468262, + "rewards_train/margins": 7.4489240646362305, + "rewards_train/rejected": -18.035917282104492, + "step": 1901 + }, + { + "epoch": 0.53, + "logps_train/chosen": -165.50738525390625, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -208.97848510742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.500738620758057, + "rewards_train/margins": 5.247109889984131, + "rewards_train/rejected": -9.747848510742188, + "step": 1901 + }, + { + "epoch": 0.53, + "logps_train/chosen": -129.22265625, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -188.7371826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.722265720367432, + "rewards_train/margins": 4.001452922821045, + "rewards_train/rejected": -8.723718643188477, + "step": 1901 + }, + { + "epoch": 0.53, + "learning_rate": 2.9687958281035797e-07, + "loss": 0.3731, + "step": 1902 + }, + { + "epoch": 0.53, + "logps_train/chosen": -13.922263145446777, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -32.045265197753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.13902369141578674, + "rewards_train/margins": 2.893550306558609, + "rewards_train/rejected": -2.7545266151428223, + "step": 1902 + }, + { + "epoch": 0.53, + "logps_train/chosen": -104.43663024902344, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -85.98048400878906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7436630725860596, + "rewards_train/margins": -0.09561467170715332, + "rewards_train/rejected": -1.6480484008789062, + "step": 1902 + }, + { + "epoch": 0.53, + "logps_train/chosen": -186.02346801757812, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -200.9739990234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.002346992492676, + "rewards_train/margins": 2.545053482055664, + "rewards_train/rejected": -8.54740047454834, + "step": 1902 + }, + { + "epoch": 0.53, + "logps_train/chosen": -12.377565383911133, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -49.5006103515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04974346235394478, + "rewards_train/margins": 4.571679402142763, + "rewards_train/rejected": -4.521935939788818, + "step": 1902 + }, + { + "epoch": 0.53, + "logps_train/chosen": -40.052162170410156, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -49.183509826660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8989663124084473, + "rewards_train/margins": 0.20688462257385254, + "rewards_train/rejected": -3.1058509349823, + "step": 1902 + }, + { + "epoch": 0.53, + "logps_train/chosen": -214.62847900390625, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -153.62322998046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.162847995758057, + "rewards_train/margins": -1.7505249977111816, + "rewards_train/rejected": -3.412322998046875, + "step": 1902 + }, + { + "epoch": 0.53, + "logps_train/chosen": -74.58065795898438, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -177.49957275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5580658316612244, + "rewards_train/margins": 6.841891348361969, + "rewards_train/rejected": -7.399957180023193, + "step": 1902 + }, + { + "epoch": 0.53, + "logps_train/chosen": -27.049591064453125, + "logps_train/ref_chosen": -3.5625, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -26.14541244506836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3487091064453125, + "rewards_train/margins": -1.1341677904129028, + "rewards_train/rejected": -1.2145413160324097, + "step": 1902 + }, + { + "epoch": 0.53, + "logps_train/chosen": -28.560009002685547, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -6.75, + "logps_train/rejected": -47.19013214111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5372508764266968, + "rewards_train/margins": 2.5067626237869263, + "rewards_train/rejected": -4.044013500213623, + "step": 1903 + }, + { + "epoch": 0.53, + "logps_train/chosen": -59.462615966796875, + "logps_train/ref_chosen": -40.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -206.00631713867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8962615728378296, + "rewards_train/margins": 4.004370331764221, + "rewards_train/rejected": -5.900631904602051, + "step": 1903 + }, + { + "epoch": 0.53, + "logps_train/chosen": -39.98273468017578, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -71.11170196533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.785773515701294, + "rewards_train/margins": 1.8503966331481934, + "rewards_train/rejected": -3.6361701488494873, + "step": 1903 + }, + { + "epoch": 0.53, + "logps_train/chosen": -177.9483184814453, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -174.432861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0948318243026733, + "rewards_train/margins": 1.3484543561935425, + "rewards_train/rejected": -2.443286180496216, + "step": 1903 + }, + { + "epoch": 0.53, + "logps_train/chosen": -183.49636840820312, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -205.23651123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.949636936187744, + "rewards_train/margins": 2.074014186859131, + "rewards_train/rejected": -7.023651123046875, + "step": 1903 + }, + { + "epoch": 0.53, + "logps_train/chosen": -28.45343017578125, + "logps_train/ref_chosen": -2.96875, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -28.18951988220215, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5484681129455566, + "rewards_train/margins": -1.6545161008834839, + "rewards_train/rejected": -0.8939520120620728, + "step": 1903 + }, + { + "epoch": 0.53, + "logps_train/chosen": -120.84691619873047, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -137.40859985351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.234691619873047, + "rewards_train/margins": 1.2561683654785156, + "rewards_train/rejected": -4.4908599853515625, + "step": 1903 + }, + { + "epoch": 0.53, + "logps_train/chosen": -13.1940279006958, + "logps_train/ref_chosen": -1.125, + "logps_train/ref_rejected": -1.8046875, + "logps_train/rejected": -6.761405944824219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2069028615951538, + "rewards_train/margins": -0.7112310230731964, + "rewards_train/rejected": -0.4956718385219574, + "step": 1903 + }, + { + "epoch": 0.53, + "learning_rate": 2.950008709906475e-07, + "loss": 0.5363, + "step": 1904 + }, + { + "epoch": 0.53, + "logps_train/chosen": -22.943737030029297, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -43.75, + "logps_train/rejected": -72.63703918457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1068737506866455, + "rewards_train/margins": 1.7818303108215332, + "rewards_train/rejected": -2.8887040615081787, + "step": 1904 + }, + { + "epoch": 0.53, + "logps_train/chosen": -30.526851654052734, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -26.375, + "logps_train/rejected": -49.02564239501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8339351415634155, + "rewards_train/margins": 0.4311290979385376, + "rewards_train/rejected": -2.265064239501953, + "step": 1904 + }, + { + "epoch": 0.53, + "logps_train/chosen": -55.01621627807617, + "logps_train/ref_chosen": -46.25, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -51.334434509277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.876621663570404, + "rewards_train/margins": 1.7193217873573303, + "rewards_train/rejected": -2.5959434509277344, + "step": 1904 + }, + { + "epoch": 0.53, + "logps_train/chosen": -50.0595817565918, + "logps_train/ref_chosen": -42.75, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -48.51239776611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7309581637382507, + "rewards_train/margins": 3.220281660556793, + "rewards_train/rejected": -3.951239824295044, + "step": 1904 + }, + { + "epoch": 0.53, + "logps_train/chosen": -33.04384994506836, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -28.75, + "logps_train/rejected": -53.18329620361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.404384970664978, + "rewards_train/margins": 1.0389446020126343, + "rewards_train/rejected": -2.4433295726776123, + "step": 1904 + }, + { + "epoch": 0.53, + "logps_train/chosen": -28.33562469482422, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -29.75, + "logps_train/rejected": -56.10578536987305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0960625410079956, + "rewards_train/margins": 1.5395160913467407, + "rewards_train/rejected": -2.6355786323547363, + "step": 1904 + }, + { + "epoch": 0.53, + "logps_train/chosen": -1.7899162769317627, + "logps_train/ref_chosen": -1.5625, + "logps_train/ref_rejected": -0.86328125, + "logps_train/rejected": -42.27138137817383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02274162881076336, + "rewards_train/margins": 4.1180683840066195, + "rewards_train/rejected": -4.140810012817383, + "step": 1904 + }, + { + "epoch": 0.53, + "logps_train/chosen": -8.560430526733398, + "logps_train/ref_chosen": -2.125, + "logps_train/ref_rejected": -7.71875, + "logps_train/rejected": -28.487564086914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6435430645942688, + "rewards_train/margins": 1.4333383440971375, + "rewards_train/rejected": -2.0768814086914062, + "step": 1904 + }, + { + "epoch": 0.53, + "logps_train/chosen": -11.995067596435547, + "logps_train/ref_chosen": -1.5390625, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -22.82207679748535, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0456005334854126, + "rewards_train/margins": 0.9975446462631226, + "rewards_train/rejected": -2.043145179748535, + "step": 1905 + }, + { + "epoch": 0.53, + "logps_train/chosen": -22.875431060791016, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -31.393314361572266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2187931537628174, + "rewards_train/margins": 1.1080384254455566, + "rewards_train/rejected": -2.326831579208374, + "step": 1905 + }, + { + "epoch": 0.53, + "logps_train/chosen": -59.268959045410156, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -161.4142608642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3018959164619446, + "rewards_train/margins": 6.189530074596405, + "rewards_train/rejected": -6.49142599105835, + "step": 1905 + }, + { + "epoch": 0.53, + "logps_train/chosen": -28.04607582092285, + "logps_train/ref_chosen": -3.5, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -25.995162963867188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4546077251434326, + "rewards_train/margins": -0.8550914525985718, + "rewards_train/rejected": -1.5995162725448608, + "step": 1905 + }, + { + "epoch": 0.53, + "logps_train/chosen": -55.83633804321289, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -33.135406494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4086337983608246, + "rewards_train/margins": 1.51115682721138, + "rewards_train/rejected": -1.9197906255722046, + "step": 1905 + }, + { + "epoch": 0.53, + "logps_train/chosen": -107.41719055175781, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -129.25918579101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1917190551757812, + "rewards_train/margins": 1.184199571609497, + "rewards_train/rejected": -3.3759186267852783, + "step": 1905 + }, + { + "epoch": 0.53, + "logps_train/chosen": -12.542829513549805, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -25.536540985107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37303295731544495, + "rewards_train/margins": 1.6024961173534393, + "rewards_train/rejected": -1.9755290746688843, + "step": 1905 + }, + { + "epoch": 0.53, + "logps_train/chosen": -148.33558654785156, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -141.16253662109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.7835588455200195, + "rewards_train/margins": -0.3173050880432129, + "rewards_train/rejected": -5.466253757476807, + "step": 1905 + }, + { + "epoch": 0.53, + "learning_rate": 2.9312709341737154e-07, + "loss": 0.3071, + "step": 1906 + }, + { + "epoch": 0.53, + "logps_train/chosen": -16.53925895690918, + "logps_train/ref_chosen": -3.359375, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -29.903955459594727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.317988395690918, + "rewards_train/margins": 1.1130321025848389, + "rewards_train/rejected": -2.431020498275757, + "step": 1906 + }, + { + "epoch": 0.53, + "logps_train/chosen": -29.363040924072266, + "logps_train/ref_chosen": -25.0, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -34.26478958129883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43630409240722656, + "rewards_train/margins": 2.0776748657226562, + "rewards_train/rejected": -2.513978958129883, + "step": 1906 + }, + { + "epoch": 0.53, + "logps_train/chosen": -14.50719165802002, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -6.375, + "logps_train/rejected": -14.770998001098633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39446917176246643, + "rewards_train/margins": 0.4451306164264679, + "rewards_train/rejected": -0.8395997881889343, + "step": 1906 + }, + { + "epoch": 0.53, + "logps_train/chosen": -46.377403259277344, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -56.62526321411133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.32524037361145, + "rewards_train/margins": 1.956035852432251, + "rewards_train/rejected": -4.281276226043701, + "step": 1906 + }, + { + "epoch": 0.53, + "logps_train/chosen": -31.238941192626953, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -50.457706451416016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4988940954208374, + "rewards_train/margins": 0.7968765497207642, + "rewards_train/rejected": -2.2957706451416016, + "step": 1906 + }, + { + "epoch": 0.53, + "logps_train/chosen": -158.86561584472656, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -230.232421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6865615844726562, + "rewards_train/margins": 5.036680698394775, + "rewards_train/rejected": -7.723242282867432, + "step": 1906 + }, + { + "epoch": 0.53, + "logps_train/chosen": -223.79244995117188, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -241.88356018066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.779244899749756, + "rewards_train/margins": 1.209111213684082, + "rewards_train/rejected": -6.988356113433838, + "step": 1906 + }, + { + "epoch": 0.53, + "logps_train/chosen": -23.230186462402344, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -17.18408966064453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.398018717765808, + "rewards_train/margins": -0.06867218017578125, + "rewards_train/rejected": -1.3293465375900269, + "step": 1906 + }, + { + "epoch": 0.53, + "logps_train/chosen": -198.32717895507812, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -212.96702575683594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.732718467712402, + "rewards_train/margins": -0.08601570129394531, + "rewards_train/rejected": -8.646702766418457, + "step": 1907 + }, + { + "epoch": 0.53, + "logps_train/chosen": -18.194732666015625, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -47.5, + "logps_train/rejected": -68.23482513427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5944733023643494, + "rewards_train/margins": 1.479009211063385, + "rewards_train/rejected": -2.0734825134277344, + "step": 1907 + }, + { + "epoch": 0.53, + "logps_train/chosen": -154.48362731933594, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -197.48338317871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.298362731933594, + "rewards_train/margins": 4.99997615814209, + "rewards_train/rejected": -9.298338890075684, + "step": 1907 + }, + { + "epoch": 0.53, + "logps_train/chosen": -10.782350540161133, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -3.484375, + "logps_train/rejected": -6.098457336425781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5594850778579712, + "rewards_train/margins": -0.2980768382549286, + "rewards_train/rejected": -0.2614082396030426, + "step": 1907 + }, + { + "epoch": 0.53, + "logps_train/chosen": -297.34564208984375, + "logps_train/ref_chosen": -207.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -234.6147918701172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.034564018249512, + "rewards_train/margins": -0.573084831237793, + "rewards_train/rejected": -8.461479187011719, + "step": 1907 + }, + { + "epoch": 0.53, + "logps_train/chosen": -96.11266326904297, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -57.75, + "logps_train/rejected": -119.77647399902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7612663507461548, + "rewards_train/margins": 4.441381335258484, + "rewards_train/rejected": -6.202647686004639, + "step": 1907 + }, + { + "epoch": 0.53, + "logps_train/chosen": -15.47765064239502, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -23.053340911865234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7852650880813599, + "rewards_train/margins": 0.20131903886795044, + "rewards_train/rejected": -0.9865841269493103, + "step": 1907 + }, + { + "epoch": 0.53, + "logps_train/chosen": -30.721939086914062, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -22.300640106201172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7034438848495483, + "rewards_train/margins": -0.34837985038757324, + "rewards_train/rejected": -1.355064034461975, + "step": 1907 + }, + { + "epoch": 0.53, + "learning_rate": 2.912582632049857e-07, + "loss": 0.4194, + "step": 1908 + }, + { + "epoch": 0.53, + "logps_train/chosen": -4.190535068511963, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -56.41234588623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05282149463891983, + "rewards_train/margins": 5.13780627399683, + "rewards_train/rejected": -5.08498477935791, + "step": 1908 + }, + { + "epoch": 0.53, + "logps_train/chosen": -73.10211944580078, + "logps_train/ref_chosen": -37.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -111.33808898925781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6102120876312256, + "rewards_train/margins": -0.47640323638916016, + "rewards_train/rejected": -3.1338088512420654, + "step": 1908 + }, + { + "epoch": 0.53, + "logps_train/chosen": -49.475616455078125, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -74.70428466796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8475615978240967, + "rewards_train/margins": 1.8103668689727783, + "rewards_train/rejected": -5.657928466796875, + "step": 1908 + }, + { + "epoch": 0.53, + "logps_train/chosen": -160.24169921875, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -184.4276123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.524169921875, + "rewards_train/margins": 3.2185912132263184, + "rewards_train/rejected": -6.742761135101318, + "step": 1908 + }, + { + "epoch": 0.53, + "logps_train/chosen": -31.18204689025879, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -34.91685104370117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.78070467710495, + "rewards_train/margins": 1.060980498790741, + "rewards_train/rejected": -1.841685175895691, + "step": 1908 + }, + { + "epoch": 0.53, + "logps_train/chosen": -25.646759033203125, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -41.24602127075195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5646759271621704, + "rewards_train/margins": 1.3099262714385986, + "rewards_train/rejected": -1.874602198600769, + "step": 1908 + }, + { + "epoch": 0.53, + "logps_train/chosen": -37.350006103515625, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -65.4047622680664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.86625075340271, + "rewards_train/margins": 0.4242255687713623, + "rewards_train/rejected": -3.2904763221740723, + "step": 1908 + }, + { + "epoch": 0.53, + "logps_train/chosen": -4.6573076248168945, + "logps_train/ref_chosen": -2.96875, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -40.402557373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16885577142238617, + "rewards_train/margins": 1.5589000135660172, + "rewards_train/rejected": -1.7277557849884033, + "step": 1908 + }, + { + "epoch": 0.53, + "logps_train/chosen": -140.41798400878906, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -157.38259887695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.441798448562622, + "rewards_train/margins": 4.146461725234985, + "rewards_train/rejected": -6.588260173797607, + "step": 1909 + }, + { + "epoch": 0.53, + "logps_train/chosen": -28.687650680541992, + "logps_train/ref_chosen": -8.0, + "logps_train/ref_rejected": -48.5, + "logps_train/rejected": -67.27780151367188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.068765163421631, + "rewards_train/margins": -0.19098496437072754, + "rewards_train/rejected": -1.8777801990509033, + "step": 1909 + }, + { + "epoch": 0.53, + "logps_train/chosen": -115.44482421875, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -120.78536987304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.244482517242432, + "rewards_train/margins": 0.08405447006225586, + "rewards_train/rejected": -4.3285369873046875, + "step": 1909 + }, + { + "epoch": 0.53, + "logps_train/chosen": -169.83360290527344, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -213.19247436523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.033360481262207, + "rewards_train/margins": 3.0858869552612305, + "rewards_train/rejected": -9.119247436523438, + "step": 1909 + }, + { + "epoch": 0.53, + "logps_train/chosen": -14.553779602050781, + "logps_train/ref_chosen": -3.6875, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -13.125137329101562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0866279602050781, + "rewards_train/margins": -0.3772392272949219, + "rewards_train/rejected": -0.7093887329101562, + "step": 1909 + }, + { + "epoch": 0.53, + "logps_train/chosen": -125.46626281738281, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -171.53346252441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.146626353263855, + "rewards_train/margins": 4.456719994544983, + "rewards_train/rejected": -5.603346347808838, + "step": 1909 + }, + { + "epoch": 0.53, + "logps_train/chosen": -40.427974700927734, + "logps_train/ref_chosen": -32.25, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -33.27493667602539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8177974820137024, + "rewards_train/margins": 0.9534462094306946, + "rewards_train/rejected": -1.771243691444397, + "step": 1909 + }, + { + "epoch": 0.53, + "logps_train/chosen": -33.67876434326172, + "logps_train/ref_chosen": -22.25, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -26.768142700195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1428765058517456, + "rewards_train/margins": 0.9745627641677856, + "rewards_train/rejected": -2.1174392700195312, + "step": 1909 + }, + { + "epoch": 0.53, + "learning_rate": 2.8939439343332085e-07, + "loss": 0.3403, + "step": 1910 + }, + { + "epoch": 0.53, + "logps_train/chosen": -159.17633056640625, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -143.93545532226562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.617633104324341, + "rewards_train/margins": -0.32408761978149414, + "rewards_train/rejected": -2.2935454845428467, + "step": 1910 + }, + { + "epoch": 0.53, + "logps_train/chosen": -30.081802368164062, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -31.0, + "logps_train/rejected": -66.49223327636719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8394302129745483, + "rewards_train/margins": 1.709793210029602, + "rewards_train/rejected": -3.5492234230041504, + "step": 1910 + }, + { + "epoch": 0.53, + "logps_train/chosen": -62.52370834350586, + "logps_train/ref_chosen": -50.25, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -124.01901245117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2273708581924438, + "rewards_train/margins": 3.8245304822921753, + "rewards_train/rejected": -5.051901340484619, + "step": 1910 + }, + { + "epoch": 0.53, + "logps_train/chosen": -196.0385284423828, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -120.39017486572266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.403852939605713, + "rewards_train/margins": -4.014835357666016, + "rewards_train/rejected": -2.3890175819396973, + "step": 1910 + }, + { + "epoch": 0.53, + "logps_train/chosen": -96.45845794677734, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -186.96218872070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5958458185195923, + "rewards_train/margins": 5.850373148918152, + "rewards_train/rejected": -7.446218967437744, + "step": 1910 + }, + { + "epoch": 0.53, + "logps_train/chosen": -19.931718826293945, + "logps_train/ref_chosen": -4.40625, + "logps_train/ref_rejected": -6.65625, + "logps_train/rejected": -47.01220703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5525468587875366, + "rewards_train/margins": 2.4830490350723267, + "rewards_train/rejected": -4.035595893859863, + "step": 1910 + }, + { + "epoch": 0.53, + "logps_train/chosen": -233.39105224609375, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -255.77508544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.639105796813965, + "rewards_train/margins": 3.1884031295776367, + "rewards_train/rejected": -12.827508926391602, + "step": 1910 + }, + { + "epoch": 0.53, + "logps_train/chosen": -195.4866943359375, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -169.3021697998047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.548669338226318, + "rewards_train/margins": -0.918452262878418, + "rewards_train/rejected": -4.6302170753479, + "step": 1910 + }, + { + "epoch": 0.53, + "logps_train/chosen": -98.6291275024414, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -111.31688690185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.562912702560425, + "rewards_train/margins": 0.1687760353088379, + "rewards_train/rejected": -2.7316887378692627, + "step": 1911 + }, + { + "epoch": 0.53, + "logps_train/chosen": -30.212432861328125, + "logps_train/ref_chosen": -15.6875, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -45.16777801513672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4524933099746704, + "rewards_train/margins": 1.4392844438552856, + "rewards_train/rejected": -2.891777753829956, + "step": 1911 + }, + { + "epoch": 0.53, + "logps_train/chosen": -151.9071044921875, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -193.35595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3907105922698975, + "rewards_train/margins": 5.044884920120239, + "rewards_train/rejected": -8.435595512390137, + "step": 1911 + }, + { + "epoch": 0.53, + "logps_train/chosen": -75.13194274902344, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -87.91226196289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31319427490234375, + "rewards_train/margins": 0.27803194522857666, + "rewards_train/rejected": -0.5912262201309204, + "step": 1911 + }, + { + "epoch": 0.53, + "logps_train/chosen": -260.2835388183594, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -304.2263488769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.328353881835938, + "rewards_train/margins": 3.294281005859375, + "rewards_train/rejected": -12.622634887695312, + "step": 1911 + }, + { + "epoch": 0.53, + "logps_train/chosen": -159.37847900390625, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -188.37123107910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.187847852706909, + "rewards_train/margins": 4.549275159835815, + "rewards_train/rejected": -7.737123012542725, + "step": 1911 + }, + { + "epoch": 0.53, + "logps_train/chosen": -54.6164665222168, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -58.56523513793945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.974146604537964, + "rewards_train/margins": 1.001127004623413, + "rewards_train/rejected": -4.975273609161377, + "step": 1911 + }, + { + "epoch": 0.53, + "logps_train/chosen": -125.8703842163086, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -190.76614379882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7370383739471436, + "rewards_train/margins": 5.4395763874053955, + "rewards_train/rejected": -9.176614761352539, + "step": 1911 + }, + { + "epoch": 0.53, + "learning_rate": 2.8753549714748846e-07, + "loss": 0.5141, + "step": 1912 + }, + { + "epoch": 0.53, + "logps_train/chosen": -104.94889068603516, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -147.5687255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6448891162872314, + "rewards_train/margins": 3.511983633041382, + "rewards_train/rejected": -6.156872749328613, + "step": 1912 + }, + { + "epoch": 0.53, + "logps_train/chosen": -195.16387939453125, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -241.00997924804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.916388034820557, + "rewards_train/margins": 4.284610271453857, + "rewards_train/rejected": -11.200998306274414, + "step": 1912 + }, + { + "epoch": 0.53, + "logps_train/chosen": -120.22065734863281, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -201.1258544921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7220656871795654, + "rewards_train/margins": 2.1905200481414795, + "rewards_train/rejected": -5.912585735321045, + "step": 1912 + }, + { + "epoch": 0.53, + "logps_train/chosen": -20.5942325592041, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -33.265960693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4250482320785522, + "rewards_train/margins": 0.8327978849411011, + "rewards_train/rejected": -2.2578461170196533, + "step": 1912 + }, + { + "epoch": 0.53, + "logps_train/chosen": -26.28101348876953, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -1.328125, + "logps_train/rejected": -23.296844482421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9531013369560242, + "rewards_train/margins": 1.2437706589698792, + "rewards_train/rejected": -2.1968719959259033, + "step": 1912 + }, + { + "epoch": 0.53, + "logps_train/chosen": -8.357985496520996, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -30.77858543395996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10767354816198349, + "rewards_train/margins": 1.3076849952340126, + "rewards_train/rejected": -1.415358543395996, + "step": 1912 + }, + { + "epoch": 0.53, + "logps_train/chosen": -282.35760498046875, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -274.3695373535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.435760498046875, + "rewards_train/margins": 1.7011938095092773, + "rewards_train/rejected": -13.136954307556152, + "step": 1912 + }, + { + "epoch": 0.53, + "logps_train/chosen": -191.33615112304688, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -224.7797393798828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.0336151123046875, + "rewards_train/margins": -0.5556411743164062, + "rewards_train/rejected": -6.477973937988281, + "step": 1912 + }, + { + "epoch": 0.53, + "logps_train/chosen": -141.61001586914062, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -195.5140380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.911001682281494, + "rewards_train/margins": 3.590402126312256, + "rewards_train/rejected": -7.50140380859375, + "step": 1913 + }, + { + "epoch": 0.53, + "logps_train/chosen": -229.67288208007812, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -247.8109130859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.517288208007812, + "rewards_train/margins": 0.9638032913208008, + "rewards_train/rejected": -12.481091499328613, + "step": 1913 + }, + { + "epoch": 0.53, + "logps_train/chosen": -219.2706756591797, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -268.77227783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.477067947387695, + "rewards_train/margins": 1.200160026550293, + "rewards_train/rejected": -10.677227973937988, + "step": 1913 + }, + { + "epoch": 0.53, + "logps_train/chosen": -120.095703125, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -155.57420349121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9595704078674316, + "rewards_train/margins": 2.247849941253662, + "rewards_train/rejected": -5.207420349121094, + "step": 1913 + }, + { + "epoch": 0.53, + "logps_train/chosen": -10.8555908203125, + "logps_train/ref_chosen": -0.466796875, + "logps_train/ref_rejected": -0.466796875, + "logps_train/rejected": -10.597336769104004, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.03887939453125, + "rewards_train/margins": -0.0258253812789917, + "rewards_train/rejected": -1.0130540132522583, + "step": 1913 + }, + { + "epoch": 0.53, + "logps_train/chosen": -106.004150390625, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -48.5, + "logps_train/rejected": -95.44489288330078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.350414991378784, + "rewards_train/margins": 2.3440744876861572, + "rewards_train/rejected": -4.694489479064941, + "step": 1913 + }, + { + "epoch": 0.53, + "logps_train/chosen": -27.10364532470703, + "logps_train/ref_chosen": -2.375, + "logps_train/ref_rejected": -3.21875, + "logps_train/rejected": -24.05535125732422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4728646278381348, + "rewards_train/margins": -0.3892045021057129, + "rewards_train/rejected": -2.083660125732422, + "step": 1913 + }, + { + "epoch": 0.53, + "logps_train/chosen": -24.348508834838867, + "logps_train/ref_chosen": -6.5625, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -23.39414405822754, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7786009311676025, + "rewards_train/margins": -0.27668654918670654, + "rewards_train/rejected": -1.501914381980896, + "step": 1913 + }, + { + "epoch": 0.53, + "learning_rate": 2.8568158735779237e-07, + "loss": 0.3399, + "step": 1914 + }, + { + "epoch": 0.53, + "logps_train/chosen": -5.44961404800415, + "logps_train/ref_chosen": -1.8203125, + "logps_train/ref_rejected": -0.51953125, + "logps_train/rejected": -8.03055191040039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36293014883995056, + "rewards_train/margins": 0.3881719410419464, + "rewards_train/rejected": -0.751102089881897, + "step": 1914 + }, + { + "epoch": 0.53, + "logps_train/chosen": -163.07290649414062, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -42.25, + "logps_train/rejected": -59.71392059326172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.257290840148926, + "rewards_train/margins": -3.51089870929718, + "rewards_train/rejected": -1.7463921308517456, + "step": 1914 + }, + { + "epoch": 0.53, + "logps_train/chosen": -129.63580322265625, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -163.84619140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6135803461074829, + "rewards_train/margins": 2.1710387468338013, + "rewards_train/rejected": -2.784619092941284, + "step": 1914 + }, + { + "epoch": 0.53, + "logps_train/chosen": -10.58297061920166, + "logps_train/ref_chosen": -2.046875, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -10.708207130432129, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.853609561920166, + "rewards_train/margins": -0.19528883695602417, + "rewards_train/rejected": -0.6583207249641418, + "step": 1914 + }, + { + "epoch": 0.53, + "logps_train/chosen": -158.310546875, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -217.78756713867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2310547828674316, + "rewards_train/margins": 7.097702503204346, + "rewards_train/rejected": -10.328757286071777, + "step": 1914 + }, + { + "epoch": 0.53, + "logps_train/chosen": -31.441268920898438, + "logps_train/ref_chosen": -7.65625, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -34.539398193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3785018920898438, + "rewards_train/margins": 0.5629379749298096, + "rewards_train/rejected": -2.9414398670196533, + "step": 1914 + }, + { + "epoch": 0.53, + "logps_train/chosen": -18.756790161132812, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -23.718700408935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8756790161132812, + "rewards_train/margins": 0.6649410724639893, + "rewards_train/rejected": -1.5406200885772705, + "step": 1914 + }, + { + "epoch": 0.53, + "logps_train/chosen": -13.52791976928711, + "logps_train/ref_chosen": -5.5625, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -19.345170974731445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7965419888496399, + "rewards_train/margins": 0.6942251324653625, + "rewards_train/rejected": -1.4907671213150024, + "step": 1914 + }, + { + "epoch": 0.54, + "logps_train/chosen": -179.47238159179688, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -202.24844360351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.897238254547119, + "rewards_train/margins": 4.927606105804443, + "rewards_train/rejected": -10.824844360351562, + "step": 1915 + }, + { + "epoch": 0.54, + "logps_train/chosen": -167.1746826171875, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -192.2279510498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.71746826171875, + "rewards_train/margins": 1.505326747894287, + "rewards_train/rejected": -4.222795009613037, + "step": 1915 + }, + { + "epoch": 0.54, + "logps_train/chosen": -18.177303314208984, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -37.09880447387695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44898033142089844, + "rewards_train/margins": 1.3859001398086548, + "rewards_train/rejected": -1.8348804712295532, + "step": 1915 + }, + { + "epoch": 0.54, + "logps_train/chosen": -109.53913879394531, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -196.85482788085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.903913974761963, + "rewards_train/margins": 5.281569004058838, + "rewards_train/rejected": -9.1854829788208, + "step": 1915 + }, + { + "epoch": 0.54, + "logps_train/chosen": -30.288970947265625, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -66.94882202148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8413971066474915, + "rewards_train/margins": 3.990985095500946, + "rewards_train/rejected": -4.8323822021484375, + "step": 1915 + }, + { + "epoch": 0.54, + "logps_train/chosen": -159.35140991210938, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -220.2652587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.985141277313232, + "rewards_train/margins": 1.1413846015930176, + "rewards_train/rejected": -8.12652587890625, + "step": 1915 + }, + { + "epoch": 0.54, + "logps_train/chosen": -12.255006790161133, + "logps_train/ref_chosen": -1.1171875, + "logps_train/ref_rejected": -2.4375, + "logps_train/rejected": -20.570152282714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1137819290161133, + "rewards_train/margins": 0.6994832754135132, + "rewards_train/rejected": -1.8132652044296265, + "step": 1915 + }, + { + "epoch": 0.54, + "logps_train/chosen": -156.58880615234375, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -174.12745666503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5588806867599487, + "rewards_train/margins": 3.3538652658462524, + "rewards_train/rejected": -4.912745952606201, + "step": 1915 + }, + { + "epoch": 0.54, + "learning_rate": 2.838326770396352e-07, + "loss": 0.4642, + "step": 1916 + }, + { + "epoch": 0.54, + "logps_train/chosen": -267.6117858886719, + "logps_train/ref_chosen": -187.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -259.32366943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.061179161071777, + "rewards_train/margins": 0.27118778228759766, + "rewards_train/rejected": -8.332366943359375, + "step": 1916 + }, + { + "epoch": 0.54, + "logps_train/chosen": -142.44500732421875, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -193.99053955078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9445008039474487, + "rewards_train/margins": 5.254553437232971, + "rewards_train/rejected": -7.19905424118042, + "step": 1916 + }, + { + "epoch": 0.54, + "logps_train/chosen": -160.8281707763672, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -159.21249389648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.382817268371582, + "rewards_train/margins": 0.4384322166442871, + "rewards_train/rejected": -4.821249485015869, + "step": 1916 + }, + { + "epoch": 0.54, + "logps_train/chosen": -115.04721069335938, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -132.182373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2047210931777954, + "rewards_train/margins": 3.413516402244568, + "rewards_train/rejected": -4.618237495422363, + "step": 1916 + }, + { + "epoch": 0.54, + "logps_train/chosen": -100.28042602539062, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -94.25732421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7780426740646362, + "rewards_train/margins": -0.7523102760314941, + "rewards_train/rejected": -1.025732398033142, + "step": 1916 + }, + { + "epoch": 0.54, + "logps_train/chosen": -22.177955627441406, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -24.5, + "logps_train/rejected": -55.603538513183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2677955627441406, + "rewards_train/margins": 2.8425583839416504, + "rewards_train/rejected": -3.110353946685791, + "step": 1916 + }, + { + "epoch": 0.54, + "logps_train/chosen": -49.134674072265625, + "logps_train/ref_chosen": -44.0, + "logps_train/ref_rejected": -40.75, + "logps_train/rejected": -69.03226470947266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5134674310684204, + "rewards_train/margins": 2.314759135246277, + "rewards_train/rejected": -2.8282265663146973, + "step": 1916 + }, + { + "epoch": 0.54, + "logps_train/chosen": -113.76107788085938, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -241.63958740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.976108074188232, + "rewards_train/margins": 6.287850856781006, + "rewards_train/rejected": -11.263958930969238, + "step": 1916 + }, + { + "epoch": 0.54, + "logps_train/chosen": -137.84312438964844, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -192.73167419433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.684312582015991, + "rewards_train/margins": 2.9888551235198975, + "rewards_train/rejected": -6.673167705535889, + "step": 1917 + }, + { + "epoch": 0.54, + "logps_train/chosen": -119.10606384277344, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -173.30723571777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9106063842773438, + "rewards_train/margins": 2.820117473602295, + "rewards_train/rejected": -4.730723857879639, + "step": 1917 + }, + { + "epoch": 0.54, + "logps_train/chosen": -20.966859817504883, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -3.96875, + "logps_train/rejected": -7.0110955238342285, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5404359698295593, + "rewards_train/margins": -0.23620140552520752, + "rewards_train/rejected": -0.3042345643043518, + "step": 1917 + }, + { + "epoch": 0.54, + "logps_train/chosen": -6.498814582824707, + "logps_train/ref_chosen": -3.59375, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -25.722923278808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2905064523220062, + "rewards_train/margins": 1.6817858517169952, + "rewards_train/rejected": -1.9722923040390015, + "step": 1917 + }, + { + "epoch": 0.54, + "logps_train/chosen": -139.8509063720703, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -121.99300384521484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.085090637207031, + "rewards_train/margins": -1.0857901573181152, + "rewards_train/rejected": -2.999300479888916, + "step": 1917 + }, + { + "epoch": 0.54, + "logps_train/chosen": -130.4277801513672, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -99.58598327636719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.842777967453003, + "rewards_train/margins": 0.36582040786743164, + "rewards_train/rejected": -3.2085983753204346, + "step": 1917 + }, + { + "epoch": 0.54, + "logps_train/chosen": -23.684127807617188, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -19.24626922607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.205912783741951, + "rewards_train/margins": 0.42496417462825775, + "rewards_train/rejected": -0.6308769583702087, + "step": 1917 + }, + { + "epoch": 0.54, + "logps_train/chosen": -150.9371795654297, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -153.2032928466797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5937180519104004, + "rewards_train/margins": 1.7266111373901367, + "rewards_train/rejected": -4.320329189300537, + "step": 1917 + }, + { + "epoch": 0.54, + "learning_rate": 2.8198877913342867e-07, + "loss": 0.3786, + "step": 1918 + }, + { + "epoch": 0.54, + "logps_train/chosen": -29.821414947509766, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -57.33905029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3696415424346924, + "rewards_train/margins": 1.88926362991333, + "rewards_train/rejected": -3.2589051723480225, + "step": 1918 + }, + { + "epoch": 0.54, + "logps_train/chosen": -205.27297973632812, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -237.82318115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3272979259490967, + "rewards_train/margins": 1.35502028465271, + "rewards_train/rejected": -4.682318210601807, + "step": 1918 + }, + { + "epoch": 0.54, + "logps_train/chosen": -128.77923583984375, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -147.48365783691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.377923488616943, + "rewards_train/margins": 2.770442485809326, + "rewards_train/rejected": -7.1483659744262695, + "step": 1918 + }, + { + "epoch": 0.54, + "logps_train/chosen": -5.886242866516113, + "logps_train/ref_chosen": -0.66015625, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -16.195640563964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5226086974143982, + "rewards_train/margins": 0.8578929305076599, + "rewards_train/rejected": -1.380501627922058, + "step": 1918 + }, + { + "epoch": 0.54, + "logps_train/chosen": -103.04039001464844, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -176.0870361328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9540390372276306, + "rewards_train/margins": 4.154664576053619, + "rewards_train/rejected": -5.10870361328125, + "step": 1918 + }, + { + "epoch": 0.54, + "logps_train/chosen": -184.74908447265625, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -168.8705596923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.674908459186554, + "rewards_train/margins": 1.9121474623680115, + "rewards_train/rejected": -2.5870559215545654, + "step": 1918 + }, + { + "epoch": 0.54, + "logps_train/chosen": -14.158214569091797, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -31.25571060180664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.027928544208407402, + "rewards_train/margins": 1.2409996520727873, + "rewards_train/rejected": -1.2130711078643799, + "step": 1918 + }, + { + "epoch": 0.54, + "logps_train/chosen": -12.29957389831543, + "logps_train/ref_chosen": -2.703125, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -27.163137435913086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9596449136734009, + "rewards_train/margins": 0.09416890144348145, + "rewards_train/rejected": -1.0538138151168823, + "step": 1918 + }, + { + "epoch": 0.54, + "logps_train/chosen": -51.73992156982422, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -53.37751007080078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5239921808242798, + "rewards_train/margins": 2.95750892162323, + "rewards_train/rejected": -4.48150110244751, + "step": 1919 + }, + { + "epoch": 0.54, + "logps_train/chosen": -15.788991928100586, + "logps_train/ref_chosen": -1.2109375, + "logps_train/ref_rejected": -1.2109375, + "logps_train/rejected": -15.697863578796387, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4578055143356323, + "rewards_train/margins": -0.009112834930419922, + "rewards_train/rejected": -1.4486926794052124, + "step": 1919 + }, + { + "epoch": 0.54, + "logps_train/chosen": -84.8229751586914, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -280.1128845214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9322975873947144, + "rewards_train/margins": 13.078991055488586, + "rewards_train/rejected": -15.0112886428833, + "step": 1919 + }, + { + "epoch": 0.54, + "logps_train/chosen": -243.41200256347656, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -185.62197875976562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.841200828552246, + "rewards_train/margins": -2.1790027618408203, + "rewards_train/rejected": -8.662198066711426, + "step": 1919 + }, + { + "epoch": 0.54, + "logps_train/chosen": -37.11210250854492, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -42.0, + "logps_train/rejected": -80.78360748291016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4362102448940277, + "rewards_train/margins": 3.442150503396988, + "rewards_train/rejected": -3.8783607482910156, + "step": 1919 + }, + { + "epoch": 0.54, + "logps_train/chosen": -9.904094696044922, + "logps_train/ref_chosen": -15.5, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -30.5152587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5595905184745789, + "rewards_train/margins": 2.7111164927482605, + "rewards_train/rejected": -2.1515259742736816, + "step": 1919 + }, + { + "epoch": 0.54, + "logps_train/chosen": -30.12798500061035, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -45.0, + "logps_train/rejected": -102.38054656982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1752985715866089, + "rewards_train/margins": 4.562756180763245, + "rewards_train/rejected": -5.7380547523498535, + "step": 1919 + }, + { + "epoch": 0.54, + "logps_train/chosen": -146.62533569335938, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -118.92645263671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.512533664703369, + "rewards_train/margins": -0.46988821029663086, + "rewards_train/rejected": -4.042645454406738, + "step": 1919 + }, + { + "epoch": 0.54, + "learning_rate": 2.801499065445032e-07, + "loss": 0.371, + "step": 1920 + }, + { + "epoch": 0.54, + "logps_train/chosen": -19.383014678955078, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -15.0, + "logps_train/rejected": -29.34392738342285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7195515036582947, + "rewards_train/margins": 0.7148413062095642, + "rewards_train/rejected": -1.4343928098678589, + "step": 1920 + }, + { + "epoch": 0.54, + "logps_train/chosen": -94.1065902709961, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -66.60888671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.860659122467041, + "rewards_train/margins": -0.324770450592041, + "rewards_train/rejected": -2.535888671875, + "step": 1920 + }, + { + "epoch": 0.54, + "logps_train/chosen": -178.0642852783203, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -261.0297546386719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.906428575515747, + "rewards_train/margins": 6.196547269821167, + "rewards_train/rejected": -9.102975845336914, + "step": 1920 + }, + { + "epoch": 0.54, + "logps_train/chosen": -89.2967529296875, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -35.61219024658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.679675281047821, + "rewards_train/margins": 1.9752936959266663, + "rewards_train/rejected": -2.6549689769744873, + "step": 1920 + }, + { + "epoch": 0.54, + "logps_train/chosen": -42.89179992675781, + "logps_train/ref_chosen": -29.625, + "logps_train/ref_rejected": -22.375, + "logps_train/rejected": -50.475860595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.326680064201355, + "rewards_train/margins": 1.4834059476852417, + "rewards_train/rejected": -2.8100860118865967, + "step": 1920 + }, + { + "epoch": 0.54, + "logps_train/chosen": -24.836124420166016, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -30.595664978027344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0804874897003174, + "rewards_train/margins": -0.05217099189758301, + "rewards_train/rejected": -2.0283164978027344, + "step": 1920 + }, + { + "epoch": 0.54, + "logps_train/chosen": -17.671051025390625, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -43.4039192199707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2421051263809204, + "rewards_train/margins": 2.1857868432998657, + "rewards_train/rejected": -3.427891969680786, + "step": 1920 + }, + { + "epoch": 0.54, + "logps_train/chosen": -176.05374145507812, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -201.0250244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.505374431610107, + "rewards_train/margins": 1.097127914428711, + "rewards_train/rejected": -5.602502346038818, + "step": 1920 + }, + { + "epoch": 0.54, + "logps_train/chosen": -0.05776449292898178, + "logps_train/ref_chosen": -0.6328125, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -21.14589500427246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.057504802942276, + "rewards_train/margins": 1.4533443748950958, + "rewards_train/rejected": -1.3958395719528198, + "step": 1921 + }, + { + "epoch": 0.54, + "logps_train/chosen": -23.00455665588379, + "logps_train/ref_chosen": -2.8125, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -70.87802124023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0192058086395264, + "rewards_train/margins": 2.681096315383911, + "rewards_train/rejected": -4.7003021240234375, + "step": 1921 + }, + { + "epoch": 0.54, + "logps_train/chosen": -8.140105247497559, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -23.96714210510254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2672394812107086, + "rewards_train/margins": 1.8264537155628204, + "rewards_train/rejected": -1.5592142343521118, + "step": 1921 + }, + { + "epoch": 0.54, + "logps_train/chosen": -10.876558303833008, + "logps_train/ref_chosen": -5.8125, + "logps_train/ref_rejected": -1.4140625, + "logps_train/rejected": -1.0752875804901123, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5064058303833008, + "rewards_train/margins": -0.5402833223342896, + "rewards_train/rejected": 0.03387749195098877, + "step": 1921 + }, + { + "epoch": 0.54, + "logps_train/chosen": -93.57575988769531, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -93.5519790649414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.457576036453247, + "rewards_train/margins": -0.002378106117248535, + "rewards_train/rejected": -1.4551979303359985, + "step": 1921 + }, + { + "epoch": 0.54, + "logps_train/chosen": -18.58951187133789, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -38.13513946533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2402012348175049, + "rewards_train/margins": 1.417062759399414, + "rewards_train/rejected": -2.657263994216919, + "step": 1921 + }, + { + "epoch": 0.54, + "logps_train/chosen": -98.85076141357422, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -22.625, + "logps_train/rejected": -60.059722900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4350762367248535, + "rewards_train/margins": 0.3083961009979248, + "rewards_train/rejected": -3.7434723377227783, + "step": 1921 + }, + { + "epoch": 0.54, + "logps_train/chosen": -226.9800567626953, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -196.0, + "logps_train/rejected": -272.1867370605469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.698005676269531, + "rewards_train/margins": 1.920668125152588, + "rewards_train/rejected": -7.618673801422119, + "step": 1921 + }, + { + "epoch": 0.54, + "learning_rate": 2.7831607214301787e-07, + "loss": 0.3584, + "step": 1922 + }, + { + "epoch": 0.54, + "logps_train/chosen": -17.81171417236328, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -41.25084686279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8186714053153992, + "rewards_train/margins": 1.3689132332801819, + "rewards_train/rejected": -2.187584638595581, + "step": 1922 + }, + { + "epoch": 0.54, + "logps_train/chosen": -66.71603393554688, + "logps_train/ref_chosen": -50.25, + "logps_train/ref_rejected": -25.75, + "logps_train/rejected": -42.11420822143555, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6466034650802612, + "rewards_train/margins": -0.010182619094848633, + "rewards_train/rejected": -1.6364208459854126, + "step": 1922 + }, + { + "epoch": 0.54, + "logps_train/chosen": -11.413368225097656, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -25.607097625732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6288368105888367, + "rewards_train/margins": 1.3693730235099792, + "rewards_train/rejected": -1.998209834098816, + "step": 1922 + }, + { + "epoch": 0.54, + "logps_train/chosen": -29.137407302856445, + "logps_train/ref_chosen": -3.640625, + "logps_train/ref_rejected": -26.0, + "logps_train/rejected": -64.70140075683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.549678325653076, + "rewards_train/margins": 1.3204617500305176, + "rewards_train/rejected": -3.8701400756835938, + "step": 1922 + }, + { + "epoch": 0.54, + "logps_train/chosen": -11.219780921936035, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -6.705812454223633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.159478098154068, + "rewards_train/margins": 0.10485315322875977, + "rewards_train/rejected": -0.26433125138282776, + "step": 1922 + }, + { + "epoch": 0.54, + "logps_train/chosen": -1.944475769996643, + "logps_train/ref_chosen": -0.89453125, + "logps_train/ref_rejected": -4.0, + "logps_train/rejected": -6.398540019989014, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10499445348978043, + "rewards_train/margins": 0.13485955446958542, + "rewards_train/rejected": -0.23985400795936584, + "step": 1922 + }, + { + "epoch": 0.54, + "logps_train/chosen": -170.89968872070312, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -159.6248779296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.289968967437744, + "rewards_train/margins": 0.5225191116333008, + "rewards_train/rejected": -5.812488079071045, + "step": 1922 + }, + { + "epoch": 0.54, + "logps_train/chosen": -24.300111770629883, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -5.9824700355529785, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.49251118302345276, + "rewards_train/margins": -0.7880141735076904, + "rewards_train/rejected": 0.29550299048423767, + "step": 1922 + }, + { + "epoch": 0.54, + "logps_train/chosen": -304.9376525878906, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -272.764892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -15.693765640258789, + "rewards_train/margins": 1.9827232360839844, + "rewards_train/rejected": -17.676488876342773, + "step": 1923 + }, + { + "epoch": 0.54, + "logps_train/chosen": -8.523876190185547, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -19.69386863708496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1586376279592514, + "rewards_train/margins": 0.8607492595911026, + "rewards_train/rejected": -1.019386887550354, + "step": 1923 + }, + { + "epoch": 0.54, + "logps_train/chosen": -18.173255920410156, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -44.28086471557617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5142005681991577, + "rewards_train/margins": 1.9201358556747437, + "rewards_train/rejected": -3.4343364238739014, + "step": 1923 + }, + { + "epoch": 0.54, + "logps_train/chosen": -23.404800415039062, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -54.847373962402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.00298011302948, + "rewards_train/margins": 3.225507378578186, + "rewards_train/rejected": -4.228487491607666, + "step": 1923 + }, + { + "epoch": 0.54, + "logps_train/chosen": -114.22398376464844, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -122.34442901611328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.572398662567139, + "rewards_train/margins": -0.23795557022094727, + "rewards_train/rejected": -4.334443092346191, + "step": 1923 + }, + { + "epoch": 0.54, + "logps_train/chosen": -69.64491271972656, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -151.57867431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8144912719726562, + "rewards_train/margins": 4.593376159667969, + "rewards_train/rejected": -5.407867431640625, + "step": 1923 + }, + { + "epoch": 0.54, + "logps_train/chosen": -230.48544311523438, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -208.637939453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.248544216156006, + "rewards_train/margins": -0.9847502708435059, + "rewards_train/rejected": -5.2637939453125, + "step": 1923 + }, + { + "epoch": 0.54, + "logps_train/chosen": -16.059734344482422, + "logps_train/ref_chosen": -15.0, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -35.5196418762207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10597343742847443, + "rewards_train/margins": 3.18192820250988, + "rewards_train/rejected": -3.2879016399383545, + "step": 1923 + }, + { + "epoch": 0.54, + "learning_rate": 2.764872887638686e-07, + "loss": 0.4448, + "step": 1924 + }, + { + "epoch": 0.54, + "logps_train/chosen": -76.26673889160156, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -59.0, + "logps_train/rejected": -72.48664855957031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.60167396068573, + "rewards_train/margins": -0.2530090808868408, + "rewards_train/rejected": -1.3486648797988892, + "step": 1924 + }, + { + "epoch": 0.54, + "logps_train/chosen": -128.94937133789062, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -175.18614196777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19493713974952698, + "rewards_train/margins": 4.673677057027817, + "rewards_train/rejected": -4.868614196777344, + "step": 1924 + }, + { + "epoch": 0.54, + "logps_train/chosen": -38.415409088134766, + "logps_train/ref_chosen": -25.75, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -53.99313735961914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2665408849716187, + "rewards_train/margins": 3.326522946357727, + "rewards_train/rejected": -4.593063831329346, + "step": 1924 + }, + { + "epoch": 0.54, + "logps_train/chosen": -65.90647888183594, + "logps_train/ref_chosen": -48.5, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -131.62054443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7406479120254517, + "rewards_train/margins": 3.9714068174362183, + "rewards_train/rejected": -5.71205472946167, + "step": 1924 + }, + { + "epoch": 0.54, + "logps_train/chosen": -82.58612823486328, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -123.84355163574219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.183612823486328, + "rewards_train/margins": 2.400742530822754, + "rewards_train/rejected": -4.584355354309082, + "step": 1924 + }, + { + "epoch": 0.54, + "logps_train/chosen": -13.46124267578125, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -31.375, + "logps_train/rejected": -57.48003005981445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.796124279499054, + "rewards_train/margins": 1.8143786787986755, + "rewards_train/rejected": -2.6105029582977295, + "step": 1924 + }, + { + "epoch": 0.54, + "logps_train/chosen": -79.670654296875, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -125.74293518066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.567065417766571, + "rewards_train/margins": 4.257228195667267, + "rewards_train/rejected": -4.824293613433838, + "step": 1924 + }, + { + "epoch": 0.54, + "logps_train/chosen": -10.958842277526855, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -38.56248092651367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47713422775268555, + "rewards_train/margins": 1.0541138648986816, + "rewards_train/rejected": -1.5312480926513672, + "step": 1924 + }, + { + "epoch": 0.54, + "logps_train/chosen": -18.81378936767578, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -60.44448471069336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7501289248466492, + "rewards_train/margins": 1.5193195939064026, + "rewards_train/rejected": -2.2694485187530518, + "step": 1925 + }, + { + "epoch": 0.54, + "logps_train/chosen": -122.17832946777344, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -177.474853515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6678329706192017, + "rewards_train/margins": 1.6796523332595825, + "rewards_train/rejected": -2.347485303878784, + "step": 1925 + }, + { + "epoch": 0.54, + "logps_train/chosen": -10.707464218139648, + "logps_train/ref_chosen": -4.0, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -17.362655639648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6707464456558228, + "rewards_train/margins": 0.20926910638809204, + "rewards_train/rejected": -0.8800155520439148, + "step": 1925 + }, + { + "epoch": 0.54, + "logps_train/chosen": -28.096656799316406, + "logps_train/ref_chosen": -0.8125, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -31.599281311035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7284157276153564, + "rewards_train/margins": 0.003387451171875, + "rewards_train/rejected": -2.7318031787872314, + "step": 1925 + }, + { + "epoch": 0.54, + "logps_train/chosen": -63.507568359375, + "logps_train/ref_chosen": -40.5, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -48.79344177246094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3007569313049316, + "rewards_train/margins": -0.9714127779006958, + "rewards_train/rejected": -1.3293441534042358, + "step": 1925 + }, + { + "epoch": 0.54, + "logps_train/chosen": -86.98884582519531, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -164.2276153564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1988846063613892, + "rewards_train/margins": 4.423877120018005, + "rewards_train/rejected": -5.6227617263793945, + "step": 1925 + }, + { + "epoch": 0.54, + "logps_train/chosen": -198.1874237060547, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -349.6029052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.668742179870605, + "rewards_train/margins": 11.891549110412598, + "rewards_train/rejected": -21.560291290283203, + "step": 1925 + }, + { + "epoch": 0.54, + "logps_train/chosen": -21.16118049621582, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -43.11587905883789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4161180257797241, + "rewards_train/margins": 0.9954699277877808, + "rewards_train/rejected": -2.411587953567505, + "step": 1925 + }, + { + "epoch": 0.54, + "learning_rate": 2.7466356920660036e-07, + "loss": 0.2946, + "step": 1926 + }, + { + "epoch": 0.54, + "logps_train/chosen": -73.93277740478516, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -188.22293090820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3432778120040894, + "rewards_train/margins": 5.879015564918518, + "rewards_train/rejected": -7.222293376922607, + "step": 1926 + }, + { + "epoch": 0.54, + "logps_train/chosen": -17.980152130126953, + "logps_train/ref_chosen": -3.75, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -34.37532043457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4230152368545532, + "rewards_train/margins": 1.0895167589187622, + "rewards_train/rejected": -2.5125319957733154, + "step": 1926 + }, + { + "epoch": 0.54, + "logps_train/chosen": -27.559247970581055, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -27.21221160888672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.321549892425537, + "rewards_train/margins": 0.013733863830566406, + "rewards_train/rejected": -2.3352837562561035, + "step": 1926 + }, + { + "epoch": 0.54, + "logps_train/chosen": -106.62771606445312, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -227.72845458984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.662771701812744, + "rewards_train/margins": 5.510074138641357, + "rewards_train/rejected": -9.172845840454102, + "step": 1926 + }, + { + "epoch": 0.54, + "logps_train/chosen": -9.130465507507324, + "logps_train/ref_chosen": -2.390625, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -14.961421966552734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6739840507507324, + "rewards_train/margins": 0.3440331220626831, + "rewards_train/rejected": -1.0180171728134155, + "step": 1926 + }, + { + "epoch": 0.54, + "logps_train/chosen": -148.40350341796875, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -180.52317810058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.390350341796875, + "rewards_train/margins": 1.4619674682617188, + "rewards_train/rejected": -4.852317810058594, + "step": 1926 + }, + { + "epoch": 0.54, + "logps_train/chosen": -66.20465087890625, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -29.923038482666016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4954651594161987, + "rewards_train/margins": 0.14683866500854492, + "rewards_train/rejected": -1.6423038244247437, + "step": 1926 + }, + { + "epoch": 0.54, + "logps_train/chosen": -21.05368423461914, + "logps_train/ref_chosen": -1.09375, + "logps_train/ref_rejected": -3.421875, + "logps_train/rejected": -30.000993728637695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9959934949874878, + "rewards_train/margins": 0.6619185209274292, + "rewards_train/rejected": -2.657912015914917, + "step": 1926 + }, + { + "epoch": 0.54, + "logps_train/chosen": -114.03543090820312, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -120.06326293945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.90354323387146, + "rewards_train/margins": 0.6527831554412842, + "rewards_train/rejected": -3.556326389312744, + "step": 1927 + }, + { + "epoch": 0.54, + "logps_train/chosen": -24.978605270385742, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -50.68967056274414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1541105508804321, + "rewards_train/margins": 3.186731696128845, + "rewards_train/rejected": -4.340842247009277, + "step": 1927 + }, + { + "epoch": 0.54, + "logps_train/chosen": -115.65444946289062, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -190.781005859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.715445041656494, + "rewards_train/margins": 4.762655735015869, + "rewards_train/rejected": -8.478100776672363, + "step": 1927 + }, + { + "epoch": 0.54, + "logps_train/chosen": -20.125682830810547, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -6.8125, + "logps_train/rejected": -39.783058166503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26256829500198364, + "rewards_train/margins": 3.0344876646995544, + "rewards_train/rejected": -3.297055959701538, + "step": 1927 + }, + { + "epoch": 0.54, + "logps_train/chosen": -32.91552734375, + "logps_train/ref_chosen": -25.0, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -40.70634460449219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.791552722454071, + "rewards_train/margins": 1.3415818810462952, + "rewards_train/rejected": -2.133134603500366, + "step": 1927 + }, + { + "epoch": 0.54, + "logps_train/chosen": -73.5826416015625, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -30.067493438720703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.483264207839966, + "rewards_train/margins": -2.1890147924423218, + "rewards_train/rejected": -1.294249415397644, + "step": 1927 + }, + { + "epoch": 0.54, + "logps_train/chosen": -127.33328247070312, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -99.56269836425781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8333282470703125, + "rewards_train/margins": 0.07294166088104248, + "rewards_train/rejected": -1.906269907951355, + "step": 1927 + }, + { + "epoch": 0.54, + "logps_train/chosen": -133.54896545410156, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -174.46556091308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.054896831512451, + "rewards_train/margins": 3.291659355163574, + "rewards_train/rejected": -7.346556186676025, + "step": 1927 + }, + { + "epoch": 0.54, + "learning_rate": 2.7284492623531595e-07, + "loss": 0.4064, + "step": 1928 + }, + { + "epoch": 0.54, + "logps_train/chosen": -164.67276000976562, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -144.92623901367188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.3172760009765625, + "rewards_train/margins": -1.7746520042419434, + "rewards_train/rejected": -3.542623996734619, + "step": 1928 + }, + { + "epoch": 0.54, + "logps_train/chosen": -33.464927673339844, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -33.31550216674805, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4839928150177, + "rewards_train/margins": -0.08369255065917969, + "rewards_train/rejected": -2.4003002643585205, + "step": 1928 + }, + { + "epoch": 0.54, + "logps_train/chosen": -32.035736083984375, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -35.26201248168945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9660736322402954, + "rewards_train/margins": 1.047627568244934, + "rewards_train/rejected": -3.0137012004852295, + "step": 1928 + }, + { + "epoch": 0.54, + "logps_train/chosen": -26.256019592285156, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -2.078125, + "logps_train/rejected": -11.14360237121582, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6318520307540894, + "rewards_train/margins": -0.7253043055534363, + "rewards_train/rejected": -0.9065477252006531, + "step": 1928 + }, + { + "epoch": 0.54, + "logps_train/chosen": -217.968017578125, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -218.37326049804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.5968017578125, + "rewards_train/margins": 0.04052448272705078, + "rewards_train/rejected": -6.637326240539551, + "step": 1928 + }, + { + "epoch": 0.54, + "logps_train/chosen": -147.6256103515625, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -129.4764404296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.362560987472534, + "rewards_train/margins": -0.06491684913635254, + "rewards_train/rejected": -3.2976441383361816, + "step": 1928 + }, + { + "epoch": 0.54, + "logps_train/chosen": -11.085184097290039, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -20.358963012695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32726842164993286, + "rewards_train/margins": 1.2461279034614563, + "rewards_train/rejected": -1.5733963251113892, + "step": 1928 + }, + { + "epoch": 0.54, + "logps_train/chosen": -24.69562339782715, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -11.4375, + "logps_train/rejected": -35.301795959472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6195623278617859, + "rewards_train/margins": 1.766867220401764, + "rewards_train/rejected": -2.38642954826355, + "step": 1928 + }, + { + "epoch": 0.54, + "logps_train/chosen": -216.5424346923828, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -227.5808868408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.554244041442871, + "rewards_train/margins": 0.20384502410888672, + "rewards_train/rejected": -8.758089065551758, + "step": 1929 + }, + { + "epoch": 0.54, + "logps_train/chosen": -65.00421905517578, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -199.8137664794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.175421953201294, + "rewards_train/margins": 7.305954694747925, + "rewards_train/rejected": -8.481376647949219, + "step": 1929 + }, + { + "epoch": 0.54, + "logps_train/chosen": -100.450927734375, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -147.46029663085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.045092821121216, + "rewards_train/margins": 2.8009369373321533, + "rewards_train/rejected": -5.846029758453369, + "step": 1929 + }, + { + "epoch": 0.54, + "logps_train/chosen": -152.26943969726562, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -195.67251586914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.576943874359131, + "rewards_train/margins": 2.4903082847595215, + "rewards_train/rejected": -10.067252159118652, + "step": 1929 + }, + { + "epoch": 0.54, + "logps_train/chosen": -97.75369262695312, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -105.72767639160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.77536940574646, + "rewards_train/margins": 0.9473981857299805, + "rewards_train/rejected": -3.7227675914764404, + "step": 1929 + }, + { + "epoch": 0.54, + "logps_train/chosen": -63.663246154785156, + "logps_train/ref_chosen": -61.25, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -127.69374084472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24132461845874786, + "rewards_train/margins": 1.4780494421720505, + "rewards_train/rejected": -1.7193740606307983, + "step": 1929 + }, + { + "epoch": 0.54, + "logps_train/chosen": -39.19733810424805, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -33.93385696411133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7572338581085205, + "rewards_train/margins": 0.10490190982818604, + "rewards_train/rejected": -1.8621357679367065, + "step": 1929 + }, + { + "epoch": 0.54, + "logps_train/chosen": -23.666526794433594, + "logps_train/ref_chosen": -12.0, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -36.418731689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1666526794433594, + "rewards_train/margins": 0.22522056102752686, + "rewards_train/rejected": -1.3918732404708862, + "step": 1929 + }, + { + "epoch": 0.54, + "learning_rate": 2.7103137257858863e-07, + "loss": 0.5247, + "step": 1930 + }, + { + "epoch": 0.54, + "logps_train/chosen": -30.7926025390625, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -34.28776168823242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.307385206222534, + "rewards_train/margins": 0.6776409149169922, + "rewards_train/rejected": -2.9850261211395264, + "step": 1930 + }, + { + "epoch": 0.54, + "logps_train/chosen": -20.070667266845703, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -20.31515121459961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5633167624473572, + "rewards_train/margins": 0.46819835901260376, + "rewards_train/rejected": -1.031515121459961, + "step": 1930 + }, + { + "epoch": 0.54, + "logps_train/chosen": -177.55433654785156, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -232.63218688964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.355433940887451, + "rewards_train/margins": 1.8077845573425293, + "rewards_train/rejected": -8.16321849822998, + "step": 1930 + }, + { + "epoch": 0.54, + "logps_train/chosen": -158.69265747070312, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -270.0331115722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0692657232284546, + "rewards_train/margins": 8.334046006202698, + "rewards_train/rejected": -9.403311729431152, + "step": 1930 + }, + { + "epoch": 0.54, + "logps_train/chosen": -140.2646942138672, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -241.88348388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9764693975448608, + "rewards_train/margins": 7.711879372596741, + "rewards_train/rejected": -9.688348770141602, + "step": 1930 + }, + { + "epoch": 0.54, + "logps_train/chosen": -174.3480224609375, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -176.76934814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.384802341461182, + "rewards_train/margins": 0.5421323776245117, + "rewards_train/rejected": -6.926934719085693, + "step": 1930 + }, + { + "epoch": 0.54, + "logps_train/chosen": -91.03715515136719, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -231.2255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5037155151367188, + "rewards_train/margins": 11.018843650817871, + "rewards_train/rejected": -11.52255916595459, + "step": 1930 + }, + { + "epoch": 0.54, + "logps_train/chosen": -23.315353393554688, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -2.109375, + "logps_train/rejected": -21.542558670043945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2065353393554688, + "rewards_train/margins": 0.7367830276489258, + "rewards_train/rejected": -1.9433183670043945, + "step": 1930 + }, + { + "epoch": 0.54, + "logps_train/chosen": -54.376014709472656, + "logps_train/ref_chosen": -25.375, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -29.092798233032227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.90010142326355, + "rewards_train/margins": -1.0470715761184692, + "rewards_train/rejected": -1.8530298471450806, + "step": 1931 + }, + { + "epoch": 0.54, + "logps_train/chosen": -20.4737548828125, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -29.942996978759766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25987550616264343, + "rewards_train/margins": 1.4844242632389069, + "rewards_train/rejected": -1.7442997694015503, + "step": 1931 + }, + { + "epoch": 0.54, + "logps_train/chosen": -111.87648010253906, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -156.55075073242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4876480102539062, + "rewards_train/margins": 1.4674270153045654, + "rewards_train/rejected": -3.9550750255584717, + "step": 1931 + }, + { + "epoch": 0.54, + "logps_train/chosen": -42.35036087036133, + "logps_train/ref_chosen": -27.375, + "logps_train/ref_rejected": -48.75, + "logps_train/rejected": -88.76077270507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.497536063194275, + "rewards_train/margins": 2.503541111946106, + "rewards_train/rejected": -4.001077175140381, + "step": 1931 + }, + { + "epoch": 0.54, + "logps_train/chosen": -95.83036804199219, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -205.273681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.033036947250366, + "rewards_train/margins": 8.24433159828186, + "rewards_train/rejected": -11.277368545532227, + "step": 1931 + }, + { + "epoch": 0.54, + "logps_train/chosen": -90.3711929321289, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -84.92672729492188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6371192932128906, + "rewards_train/margins": -1.7444465681910515, + "rewards_train/rejected": 0.10732727497816086, + "step": 1931 + }, + { + "epoch": 0.54, + "logps_train/chosen": -169.18731689453125, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -114.02217864990234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.418731689453125, + "rewards_train/margins": -0.46651387214660645, + "rewards_train/rejected": -2.9522178173065186, + "step": 1931 + }, + { + "epoch": 0.54, + "logps_train/chosen": -15.34530258178711, + "logps_train/ref_chosen": -2.515625, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -23.847087860107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2829678058624268, + "rewards_train/margins": 0.2892409563064575, + "rewards_train/rejected": -1.5722087621688843, + "step": 1931 + }, + { + "epoch": 0.54, + "learning_rate": 2.692229209293707e-07, + "loss": 0.4473, + "step": 1932 + }, + { + "epoch": 0.54, + "logps_train/chosen": -271.63214111328125, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -226.31117248535156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.463213920593262, + "rewards_train/margins": -2.432096481323242, + "rewards_train/rejected": -8.03111743927002, + "step": 1932 + }, + { + "epoch": 0.54, + "logps_train/chosen": -96.10493469238281, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -58.0, + "logps_train/rejected": -112.49304962158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9604934453964233, + "rewards_train/margins": 3.4888116121292114, + "rewards_train/rejected": -5.449305057525635, + "step": 1932 + }, + { + "epoch": 0.54, + "logps_train/chosen": -22.502674102783203, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -26.119422912597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2940174341201782, + "rewards_train/margins": 0.46167492866516113, + "rewards_train/rejected": -1.7556923627853394, + "step": 1932 + }, + { + "epoch": 0.54, + "logps_train/chosen": -192.51275634765625, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -256.42095947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.501276016235352, + "rewards_train/margins": 1.4408197402954102, + "rewards_train/rejected": -9.942095756530762, + "step": 1932 + }, + { + "epoch": 0.54, + "logps_train/chosen": -100.60139465332031, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -211.0, + "logps_train/rejected": -239.23403930664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1601394414901733, + "rewards_train/margins": 1.6632646322250366, + "rewards_train/rejected": -2.82340407371521, + "step": 1932 + }, + { + "epoch": 0.54, + "logps_train/chosen": -178.08999633789062, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -194.72445678710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.608999729156494, + "rewards_train/margins": 1.5634465217590332, + "rewards_train/rejected": -9.172446250915527, + "step": 1932 + }, + { + "epoch": 0.54, + "logps_train/chosen": -105.74857330322266, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -210.14227294921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.499857425689697, + "rewards_train/margins": 4.914370059967041, + "rewards_train/rejected": -10.414227485656738, + "step": 1932 + }, + { + "epoch": 0.54, + "logps_train/chosen": -152.9250030517578, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -228.07830810546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.742500305175781, + "rewards_train/margins": 2.2653303146362305, + "rewards_train/rejected": -8.007830619812012, + "step": 1932 + }, + { + "epoch": 0.54, + "logps_train/chosen": -122.80470275878906, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -221.23171997070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.93047034740448, + "rewards_train/margins": 6.792701840400696, + "rewards_train/rejected": -8.723172187805176, + "step": 1933 + }, + { + "epoch": 0.54, + "logps_train/chosen": -139.66830444335938, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -126.29150390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06683044880628586, + "rewards_train/margins": 2.01231998950243, + "rewards_train/rejected": -2.079150438308716, + "step": 1933 + }, + { + "epoch": 0.54, + "logps_train/chosen": -166.30926513671875, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -216.928955078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.130926609039307, + "rewards_train/margins": 4.961968898773193, + "rewards_train/rejected": -11.0928955078125, + "step": 1933 + }, + { + "epoch": 0.54, + "logps_train/chosen": -16.660764694213867, + "logps_train/ref_chosen": -2.921875, + "logps_train/ref_rejected": -3.15625, + "logps_train/rejected": -17.44730567932129, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3738889694213867, + "rewards_train/margins": 0.05521667003631592, + "rewards_train/rejected": -1.4291056394577026, + "step": 1933 + }, + { + "epoch": 0.54, + "logps_train/chosen": -73.90219116210938, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -104.90525817871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9902191162109375, + "rewards_train/margins": 2.550306797027588, + "rewards_train/rejected": -3.5405259132385254, + "step": 1933 + }, + { + "epoch": 0.54, + "logps_train/chosen": -135.12005615234375, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -92.32328796386719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.012005567550659, + "rewards_train/margins": -0.4796767234802246, + "rewards_train/rejected": -1.5323288440704346, + "step": 1933 + }, + { + "epoch": 0.54, + "logps_train/chosen": -26.357982635498047, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -93.37608337402344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.295173406600952, + "rewards_train/margins": -0.007565021514892578, + "rewards_train/rejected": -2.2876083850860596, + "step": 1933 + }, + { + "epoch": 0.54, + "logps_train/chosen": -65.76962280273438, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -135.5806884765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4019622802734375, + "rewards_train/margins": 2.40610671043396, + "rewards_train/rejected": -2.8080689907073975, + "step": 1933 + }, + { + "epoch": 0.54, + "learning_rate": 2.674195839449067e-07, + "loss": 0.3958, + "step": 1934 + }, + { + "epoch": 0.54, + "logps_train/chosen": -163.52182006835938, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -46.0, + "logps_train/rejected": -58.744422912597656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.952182292938232, + "rewards_train/margins": -4.677739977836609, + "rewards_train/rejected": -1.2744423151016235, + "step": 1934 + }, + { + "epoch": 0.54, + "logps_train/chosen": -97.53284454345703, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -88.4158706665039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5532844662666321, + "rewards_train/margins": 0.9883026480674744, + "rewards_train/rejected": -1.5415871143341064, + "step": 1934 + }, + { + "epoch": 0.54, + "logps_train/chosen": -210.67770385742188, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -243.0, + "logps_train/rejected": -374.65447998046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.067770481109619, + "rewards_train/margins": 9.09767770767212, + "rewards_train/rejected": -13.165448188781738, + "step": 1934 + }, + { + "epoch": 0.54, + "logps_train/chosen": -63.568626403808594, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -100.14213562011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.043137360364198685, + "rewards_train/margins": 1.4573508985340595, + "rewards_train/rejected": -1.4142135381698608, + "step": 1934 + }, + { + "epoch": 0.54, + "logps_train/chosen": -12.811856269836426, + "logps_train/ref_chosen": -1.9609375, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -9.926027297973633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0850919485092163, + "rewards_train/margins": -1.042489219456911, + "rewards_train/rejected": -0.04260272905230522, + "step": 1934 + }, + { + "epoch": 0.54, + "logps_train/chosen": -100.34297943115234, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -36.5, + "logps_train/rejected": -71.77568054199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8842979669570923, + "rewards_train/margins": 1.6432701349258423, + "rewards_train/rejected": -3.5275681018829346, + "step": 1934 + }, + { + "epoch": 0.54, + "logps_train/chosen": -52.1856575012207, + "logps_train/ref_chosen": -26.625, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -66.04325103759766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.556065797805786, + "rewards_train/margins": 2.1982595920562744, + "rewards_train/rejected": -4.7543253898620605, + "step": 1934 + }, + { + "epoch": 0.54, + "logps_train/chosen": -115.4555892944336, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -169.191162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7955589294433594, + "rewards_train/margins": 7.023557662963867, + "rewards_train/rejected": -8.819116592407227, + "step": 1934 + }, + { + "epoch": 0.54, + "logps_train/chosen": -106.72386169433594, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -165.71685791015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.972386121749878, + "rewards_train/margins": -1.400700330734253, + "rewards_train/rejected": -1.571685791015625, + "step": 1935 + }, + { + "epoch": 0.54, + "logps_train/chosen": -21.22704315185547, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -41.85499954223633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5352043509483337, + "rewards_train/margins": 1.725295603275299, + "rewards_train/rejected": -2.260499954223633, + "step": 1935 + }, + { + "epoch": 0.54, + "logps_train/chosen": -20.496322631835938, + "logps_train/ref_chosen": -1.3671875, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -30.364532470703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9129135608673096, + "rewards_train/margins": 0.5454146862030029, + "rewards_train/rejected": -2.4583282470703125, + "step": 1935 + }, + { + "epoch": 0.54, + "logps_train/chosen": -42.87541580200195, + "logps_train/ref_chosen": -30.5, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -44.83637619018555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2375415563583374, + "rewards_train/margins": 0.6960960626602173, + "rewards_train/rejected": -1.9336376190185547, + "step": 1935 + }, + { + "epoch": 0.54, + "logps_train/chosen": -136.06747436523438, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -151.4033660888672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.656747341156006, + "rewards_train/margins": -0.4164104461669922, + "rewards_train/rejected": -4.240336894989014, + "step": 1935 + }, + { + "epoch": 0.54, + "logps_train/chosen": -14.371397972106934, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -23.64588165283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2433898001909256, + "rewards_train/margins": 0.8649484366178513, + "rewards_train/rejected": -1.1083382368087769, + "step": 1935 + }, + { + "epoch": 0.54, + "logps_train/chosen": -166.5023651123047, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -169.6831817626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6502366065979004, + "rewards_train/margins": 5.368081569671631, + "rewards_train/rejected": -8.018318176269531, + "step": 1935 + }, + { + "epoch": 0.54, + "logps_train/chosen": -165.4762420654297, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -245.1161651611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.697624206542969, + "rewards_train/margins": 4.613992691040039, + "rewards_train/rejected": -11.311616897583008, + "step": 1935 + }, + { + "epoch": 0.54, + "learning_rate": 2.656213742466432e-07, + "loss": 0.6734, + "step": 1936 + }, + { + "epoch": 0.54, + "logps_train/chosen": -94.11540222167969, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -122.22370910644531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.136540174484253, + "rewards_train/margins": 1.0858309268951416, + "rewards_train/rejected": -4.2223711013793945, + "step": 1936 + }, + { + "epoch": 0.54, + "logps_train/chosen": -11.95697021484375, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -25.513378143310547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.542572021484375, + "rewards_train/margins": 0.7712658643722534, + "rewards_train/rejected": -1.3138378858566284, + "step": 1936 + }, + { + "epoch": 0.54, + "logps_train/chosen": -109.93386840820312, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -173.86561584472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0933868885040283, + "rewards_train/margins": 6.143174886703491, + "rewards_train/rejected": -8.23656177520752, + "step": 1936 + }, + { + "epoch": 0.54, + "logps_train/chosen": -20.61182975769043, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -30.325822830200195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8049330115318298, + "rewards_train/margins": 1.8385868668556213, + "rewards_train/rejected": -2.643519878387451, + "step": 1936 + }, + { + "epoch": 0.54, + "logps_train/chosen": -122.73346710205078, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -192.14993286132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.723346710205078, + "rewards_train/margins": 5.341647148132324, + "rewards_train/rejected": -8.064993858337402, + "step": 1936 + }, + { + "epoch": 0.54, + "logps_train/chosen": -37.21175003051758, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -11.75, + "logps_train/rejected": -34.945770263671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0211751461029053, + "rewards_train/margins": 0.2984018325805664, + "rewards_train/rejected": -2.3195769786834717, + "step": 1936 + }, + { + "epoch": 0.54, + "logps_train/chosen": -269.9503479003906, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -245.93460083007812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -13.795035362243652, + "rewards_train/margins": -1.2515754699707031, + "rewards_train/rejected": -12.54345989227295, + "step": 1936 + }, + { + "epoch": 0.54, + "logps_train/chosen": -10.524024963378906, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -15.925680160522461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5774025321006775, + "rewards_train/margins": 0.41829049587249756, + "rewards_train/rejected": -0.995693027973175, + "step": 1936 + }, + { + "epoch": 0.54, + "logps_train/chosen": -9.454797744750977, + "logps_train/ref_chosen": -4.625, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -27.834007263183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48297977447509766, + "rewards_train/margins": 1.2691709995269775, + "rewards_train/rejected": -1.7521507740020752, + "step": 1937 + }, + { + "epoch": 0.54, + "logps_train/chosen": -78.87973022460938, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -30.825546264648438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3629729747772217, + "rewards_train/margins": -0.8366683721542358, + "rewards_train/rejected": -1.5263046026229858, + "step": 1937 + }, + { + "epoch": 0.54, + "logps_train/chosen": -253.13893127441406, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -213.42477416992188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.31389331817627, + "rewards_train/margins": -1.7714157104492188, + "rewards_train/rejected": -8.54247760772705, + "step": 1937 + }, + { + "epoch": 0.54, + "logps_train/chosen": -18.87638282775879, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -20.78742027282715, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4032633304595947, + "rewards_train/margins": -0.424521267414093, + "rewards_train/rejected": -0.9787420630455017, + "step": 1937 + }, + { + "epoch": 0.54, + "logps_train/chosen": -125.21477508544922, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -103.75259399414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9214775562286377, + "rewards_train/margins": 1.4037818908691406, + "rewards_train/rejected": -3.3252594470977783, + "step": 1937 + }, + { + "epoch": 0.54, + "logps_train/chosen": -4.338189601898193, + "logps_train/ref_chosen": -1.9765625, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -7.273288249969482, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2361627072095871, + "rewards_train/margins": 0.04116611182689667, + "rewards_train/rejected": -0.27732881903648376, + "step": 1937 + }, + { + "epoch": 0.54, + "logps_train/chosen": -135.64186096191406, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -109.3433609008789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0641860961914062, + "rewards_train/margins": 0.07015013694763184, + "rewards_train/rejected": -2.134336233139038, + "step": 1937 + }, + { + "epoch": 0.54, + "logps_train/chosen": -113.35704040527344, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -218.428466796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6357040405273438, + "rewards_train/margins": 7.407142639160156, + "rewards_train/rejected": -11.0428466796875, + "step": 1937 + }, + { + "epoch": 0.54, + "learning_rate": 2.6382830442014204e-07, + "loss": 0.5773, + "step": 1938 + }, + { + "epoch": 0.54, + "logps_train/chosen": -117.94120788574219, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -141.5003204345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9941208362579346, + "rewards_train/margins": 1.255911111831665, + "rewards_train/rejected": -4.2500319480896, + "step": 1938 + }, + { + "epoch": 0.54, + "logps_train/chosen": -24.26717758178711, + "logps_train/ref_chosen": -6.75, + "logps_train/ref_rejected": -3.0625, + "logps_train/rejected": -30.662567138671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7517178058624268, + "rewards_train/margins": 1.008288860321045, + "rewards_train/rejected": -2.7600066661834717, + "step": 1938 + }, + { + "epoch": 0.54, + "logps_train/chosen": -16.202268600463867, + "logps_train/ref_chosen": -2.9375, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -25.562654495239258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3264769315719604, + "rewards_train/margins": 0.32353854179382324, + "rewards_train/rejected": -1.6500154733657837, + "step": 1938 + }, + { + "epoch": 0.54, + "logps_train/chosen": -19.183391571044922, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -28.5, + "logps_train/rejected": -37.30757141113281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2683391571044922, + "rewards_train/margins": 0.612417995929718, + "rewards_train/rejected": -0.8807571530342102, + "step": 1938 + }, + { + "epoch": 0.54, + "logps_train/chosen": -9.128889083862305, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -78.67872619628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33476391434669495, + "rewards_train/margins": 3.908108800649643, + "rewards_train/rejected": -4.242872714996338, + "step": 1938 + }, + { + "epoch": 0.54, + "logps_train/chosen": -78.21331787109375, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -56.46294021606445, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.183831691741943, + "rewards_train/margins": -0.27816247940063477, + "rewards_train/rejected": -4.905669212341309, + "step": 1938 + }, + { + "epoch": 0.54, + "logps_train/chosen": -36.22684860229492, + "logps_train/ref_chosen": -24.875, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -39.41973114013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.13518488407135, + "rewards_train/margins": 2.363038182258606, + "rewards_train/rejected": -3.498223066329956, + "step": 1938 + }, + { + "epoch": 0.54, + "logps_train/chosen": -177.5196075439453, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -236.0282745361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.8019609451293945, + "rewards_train/margins": 0.0008664131164550781, + "rewards_train/rejected": -7.80282735824585, + "step": 1938 + }, + { + "epoch": 0.54, + "logps_train/chosen": -4.60321044921875, + "logps_train/ref_chosen": -2.390625, + "logps_train/ref_rejected": -0.6171875, + "logps_train/rejected": -1.5163042545318604, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.22125855088233948, + "rewards_train/margins": -0.13134687393903732, + "rewards_train/rejected": -0.08991167694330215, + "step": 1939 + }, + { + "epoch": 0.54, + "logps_train/chosen": -141.38296508789062, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -205.9195098876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.638296604156494, + "rewards_train/margins": 1.9536542892456055, + "rewards_train/rejected": -7.5919508934021, + "step": 1939 + }, + { + "epoch": 0.54, + "logps_train/chosen": -304.6521911621094, + "logps_train/ref_chosen": -213.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -212.59063720703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.1652193069458, + "rewards_train/margins": -1.006155014038086, + "rewards_train/rejected": -8.159064292907715, + "step": 1939 + }, + { + "epoch": 0.54, + "logps_train/chosen": -94.81690979003906, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -62.5, + "logps_train/rejected": -114.12015533447266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4816909730434418, + "rewards_train/margins": 4.680324465036392, + "rewards_train/rejected": -5.162015438079834, + "step": 1939 + }, + { + "epoch": 0.54, + "logps_train/chosen": -44.29088592529297, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -63.09614181518555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4040887355804443, + "rewards_train/margins": 1.6055257320404053, + "rewards_train/rejected": -4.00961446762085, + "step": 1939 + }, + { + "epoch": 0.54, + "logps_train/chosen": -245.8721923828125, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -203.4705047607422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.08721923828125, + "rewards_train/margins": -0.24016857147216797, + "rewards_train/rejected": -6.847050666809082, + "step": 1939 + }, + { + "epoch": 0.54, + "logps_train/chosen": -148.416015625, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -146.23031616210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.091601848602295, + "rewards_train/margins": 0.2814297676086426, + "rewards_train/rejected": -4.3730316162109375, + "step": 1939 + }, + { + "epoch": 0.54, + "logps_train/chosen": -123.28834533691406, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -193.7874755859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1288344860076904, + "rewards_train/margins": 3.049913167953491, + "rewards_train/rejected": -5.178747653961182, + "step": 1939 + }, + { + "epoch": 0.54, + "learning_rate": 2.6204038701499053e-07, + "loss": 0.4384, + "step": 1940 + }, + { + "epoch": 0.54, + "logps_train/chosen": -29.620264053344727, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -38.71434020996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6807764768600464, + "rewards_train/margins": 1.3406575918197632, + "rewards_train/rejected": -3.0214340686798096, + "step": 1940 + }, + { + "epoch": 0.54, + "logps_train/chosen": -263.019287109375, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -255.23681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.401928901672363, + "rewards_train/margins": 0.4217529296875, + "rewards_train/rejected": -11.823681831359863, + "step": 1940 + }, + { + "epoch": 0.54, + "logps_train/chosen": -16.990999221801758, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -26.75, + "logps_train/rejected": -49.001258850097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15090008080005646, + "rewards_train/margins": 2.3760261088609695, + "rewards_train/rejected": -2.225126028060913, + "step": 1940 + }, + { + "epoch": 0.54, + "logps_train/chosen": -217.50743103027344, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -199.0263671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.550743103027344, + "rewards_train/margins": -0.34810638427734375, + "rewards_train/rejected": -7.20263671875, + "step": 1940 + }, + { + "epoch": 0.54, + "logps_train/chosen": -16.687673568725586, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -46.75, + "logps_train/rejected": -49.39807891845703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1250174045562744, + "rewards_train/margins": -0.8602094948291779, + "rewards_train/rejected": -0.26480790972709656, + "step": 1940 + }, + { + "epoch": 0.54, + "logps_train/chosen": -51.24109649658203, + "logps_train/ref_chosen": -33.5, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -57.443565368652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7741097211837769, + "rewards_train/margins": 2.0577467679977417, + "rewards_train/rejected": -3.8318564891815186, + "step": 1940 + }, + { + "epoch": 0.54, + "logps_train/chosen": -174.43618774414062, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -129.79791259765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.4436187744140625, + "rewards_train/margins": -0.013827323913574219, + "rewards_train/rejected": -4.429791450500488, + "step": 1940 + }, + { + "epoch": 0.54, + "logps_train/chosen": -74.79817199707031, + "logps_train/ref_chosen": -56.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -141.8458251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.879817247390747, + "rewards_train/margins": 2.204765558242798, + "rewards_train/rejected": -4.084582805633545, + "step": 1940 + }, + { + "epoch": 0.54, + "logps_train/chosen": -17.984041213989258, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -24.63937759399414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0984041690826416, + "rewards_train/margins": -1.109466410242021, + "rewards_train/rejected": 0.011062241159379482, + "step": 1941 + }, + { + "epoch": 0.54, + "logps_train/chosen": -94.70073699951172, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -151.572021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0700738430023193, + "rewards_train/margins": 3.787128210067749, + "rewards_train/rejected": -5.857202053070068, + "step": 1941 + }, + { + "epoch": 0.54, + "logps_train/chosen": -101.39196014404297, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -178.37451171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16080398857593536, + "rewards_train/margins": 4.898255065083504, + "rewards_train/rejected": -4.737451076507568, + "step": 1941 + }, + { + "epoch": 0.54, + "logps_train/chosen": -10.083816528320312, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -6.28125, + "logps_train/rejected": -7.704422950744629, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13963165879249573, + "rewards_train/margins": 0.002685636281967163, + "rewards_train/rejected": -0.1423172950744629, + "step": 1941 + }, + { + "epoch": 0.54, + "logps_train/chosen": -104.23109436035156, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -194.291259765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.973109483718872, + "rewards_train/margins": 5.506016492843628, + "rewards_train/rejected": -7.4791259765625, + "step": 1941 + }, + { + "epoch": 0.54, + "logps_train/chosen": -161.64535522460938, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -215.9086456298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1645355224609375, + "rewards_train/margins": 4.226329326629639, + "rewards_train/rejected": -6.390864849090576, + "step": 1941 + }, + { + "epoch": 0.54, + "logps_train/chosen": -149.49093627929688, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -221.16546630859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.799093723297119, + "rewards_train/margins": 4.917453289031982, + "rewards_train/rejected": -8.716547012329102, + "step": 1941 + }, + { + "epoch": 0.54, + "logps_train/chosen": -4.068273067474365, + "logps_train/ref_chosen": -2.515625, + "logps_train/ref_rejected": -0.4140625, + "logps_train/rejected": -0.6670785546302795, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.15526480972766876, + "rewards_train/margins": -0.1299632042646408, + "rewards_train/rejected": -0.025301605463027954, + "step": 1941 + }, + { + "epoch": 0.54, + "learning_rate": 2.6025763454471473e-07, + "loss": 0.4215, + "step": 1942 + }, + { + "epoch": 0.54, + "logps_train/chosen": -145.08734130859375, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -202.35357666015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2087342739105225, + "rewards_train/margins": 3.026623487472534, + "rewards_train/rejected": -6.235357761383057, + "step": 1942 + }, + { + "epoch": 0.54, + "logps_train/chosen": -108.65088653564453, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -181.31295776367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08491134643554688, + "rewards_train/margins": 6.416207313537598, + "rewards_train/rejected": -6.331295967102051, + "step": 1942 + }, + { + "epoch": 0.54, + "logps_train/chosen": -138.03695678710938, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -202.36659240722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3536957502365112, + "rewards_train/margins": 2.482963442802429, + "rewards_train/rejected": -3.8366591930389404, + "step": 1942 + }, + { + "epoch": 0.54, + "logps_train/chosen": -45.17661666870117, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -29.315914154052734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.855161666870117, + "rewards_train/margins": -1.5173202753067017, + "rewards_train/rejected": -1.3378413915634155, + "step": 1942 + }, + { + "epoch": 0.54, + "logps_train/chosen": -127.88882446289062, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -169.21920776367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9888824224472046, + "rewards_train/margins": 1.733038306236267, + "rewards_train/rejected": -3.7219207286834717, + "step": 1942 + }, + { + "epoch": 0.54, + "logps_train/chosen": -198.35939025878906, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -184.817138671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.235939025878906, + "rewards_train/margins": -0.6542248725891113, + "rewards_train/rejected": -5.581714153289795, + "step": 1942 + }, + { + "epoch": 0.54, + "logps_train/chosen": -12.838210105895996, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -12.822932243347168, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.17757101356983185, + "rewards_train/margins": -0.0015277862548828125, + "rewards_train/rejected": -0.17604322731494904, + "step": 1942 + }, + { + "epoch": 0.54, + "logps_train/chosen": -83.2497787475586, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -127.85655975341797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7249779105186462, + "rewards_train/margins": 2.3106780648231506, + "rewards_train/rejected": -3.035655975341797, + "step": 1942 + }, + { + "epoch": 0.54, + "logps_train/chosen": -20.555831909179688, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -41.0, + "logps_train/rejected": -50.022891998291016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3305831849575043, + "rewards_train/margins": 0.5717060267925262, + "rewards_train/rejected": -0.9022892117500305, + "step": 1943 + }, + { + "epoch": 0.54, + "logps_train/chosen": -41.25959396362305, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -4.25, + "logps_train/rejected": -44.2771110534668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.597834348678589, + "rewards_train/margins": 0.4048769474029541, + "rewards_train/rejected": -4.002711296081543, + "step": 1943 + }, + { + "epoch": 0.54, + "logps_train/chosen": -304.6595764160156, + "logps_train/ref_chosen": -226.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -253.68792724609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.865957736968994, + "rewards_train/margins": 0.4028353691101074, + "rewards_train/rejected": -8.268793106079102, + "step": 1943 + }, + { + "epoch": 0.54, + "logps_train/chosen": -27.951858520507812, + "logps_train/ref_chosen": -19.0, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -43.8231086730957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8951858878135681, + "rewards_train/margins": 2.4933751225471497, + "rewards_train/rejected": -3.3885610103607178, + "step": 1943 + }, + { + "epoch": 0.54, + "logps_train/chosen": -190.14349365234375, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -182.49896240234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.864349365234375, + "rewards_train/margins": -1.2644529342651367, + "rewards_train/rejected": -5.599896430969238, + "step": 1943 + }, + { + "epoch": 0.54, + "logps_train/chosen": -176.93447875976562, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -208.123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.743447780609131, + "rewards_train/margins": 1.068857192993164, + "rewards_train/rejected": -7.812304973602295, + "step": 1943 + }, + { + "epoch": 0.54, + "logps_train/chosen": -176.15139770507812, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -140.96266174316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.265140056610107, + "rewards_train/margins": 0.7811260223388672, + "rewards_train/rejected": -7.046266078948975, + "step": 1943 + }, + { + "epoch": 0.54, + "logps_train/chosen": -44.472450256347656, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -30.375, + "logps_train/rejected": -38.41651916503906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8222450613975525, + "rewards_train/margins": -0.018093109130859375, + "rewards_train/rejected": -0.8041519522666931, + "step": 1943 + }, + { + "epoch": 0.54, + "learning_rate": 2.584800594866919e-07, + "loss": 0.5191, + "step": 1944 + }, + { + "epoch": 0.54, + "logps_train/chosen": -10.748929977416992, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -1.0078125, + "logps_train/rejected": -0.4097628593444824, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21239300072193146, + "rewards_train/margins": -0.2721979655325413, + "rewards_train/rejected": 0.05980496481060982, + "step": 1944 + }, + { + "epoch": 0.54, + "logps_train/chosen": -5.269646644592285, + "logps_train/ref_chosen": -1.7265625, + "logps_train/ref_rejected": -4.5625, + "logps_train/rejected": -7.206389904022217, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.35430842638015747, + "rewards_train/margins": -0.08991941809654236, + "rewards_train/rejected": -0.2643890082836151, + "step": 1944 + }, + { + "epoch": 0.54, + "logps_train/chosen": -15.188867568969727, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -52.642433166503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7126367688179016, + "rewards_train/margins": 3.214106500148773, + "rewards_train/rejected": -3.926743268966675, + "step": 1944 + }, + { + "epoch": 0.54, + "logps_train/chosen": -264.69427490234375, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -273.0202331542969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -14.069427490234375, + "rewards_train/margins": 0.23259639739990234, + "rewards_train/rejected": -14.302023887634277, + "step": 1944 + }, + { + "epoch": 0.54, + "logps_train/chosen": -95.26054382324219, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -237.68324279785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4760544300079346, + "rewards_train/margins": 10.392269849777222, + "rewards_train/rejected": -11.868324279785156, + "step": 1944 + }, + { + "epoch": 0.54, + "logps_train/chosen": -57.963844299316406, + "logps_train/ref_chosen": -44.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -104.08135223388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3463844060897827, + "rewards_train/margins": 1.711750864982605, + "rewards_train/rejected": -3.0581352710723877, + "step": 1944 + }, + { + "epoch": 0.54, + "logps_train/chosen": -107.51899719238281, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -82.74807739257812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.651899814605713, + "rewards_train/margins": -0.7770920991897583, + "rewards_train/rejected": -1.8748077154159546, + "step": 1944 + }, + { + "epoch": 0.54, + "logps_train/chosen": -194.99000549316406, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -181.0, + "logps_train/rejected": -261.53082275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.199000597000122, + "rewards_train/margins": 4.854081869125366, + "rewards_train/rejected": -8.053082466125488, + "step": 1944 + }, + { + "epoch": 0.54, + "logps_train/chosen": -155.54849243164062, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -152.3988037109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.604849338531494, + "rewards_train/margins": 2.785031318664551, + "rewards_train/rejected": -6.389880657196045, + "step": 1945 + }, + { + "epoch": 0.54, + "logps_train/chosen": -53.99940490722656, + "logps_train/ref_chosen": -49.75, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -54.254066467285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4249404966831207, + "rewards_train/margins": 0.02546614408493042, + "rewards_train/rejected": -0.45040664076805115, + "step": 1945 + }, + { + "epoch": 0.54, + "logps_train/chosen": -179.31301879882812, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -223.9776153564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.6313018798828125, + "rewards_train/margins": 1.0664596557617188, + "rewards_train/rejected": -7.697761535644531, + "step": 1945 + }, + { + "epoch": 0.54, + "logps_train/chosen": -113.17935180664062, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -163.13279724121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1679351329803467, + "rewards_train/margins": 3.3953444957733154, + "rewards_train/rejected": -6.563279628753662, + "step": 1945 + }, + { + "epoch": 0.54, + "logps_train/chosen": -168.77484130859375, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -299.0810546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9774842262268066, + "rewards_train/margins": 8.730621814727783, + "rewards_train/rejected": -12.70810604095459, + "step": 1945 + }, + { + "epoch": 0.54, + "logps_train/chosen": -106.4904556274414, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -143.4405517578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4990456104278564, + "rewards_train/margins": 0.9950096607208252, + "rewards_train/rejected": -2.4940552711486816, + "step": 1945 + }, + { + "epoch": 0.54, + "logps_train/chosen": -88.11395263671875, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -92.84708404541016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.486395359039307, + "rewards_train/margins": 1.223313331604004, + "rewards_train/rejected": -5.7097086906433105, + "step": 1945 + }, + { + "epoch": 0.54, + "logps_train/chosen": -32.47724914550781, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -87.85417938232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5195999145507812, + "rewards_train/margins": 2.8908181190490723, + "rewards_train/rejected": -5.4104180335998535, + "step": 1945 + }, + { + "epoch": 0.54, + "learning_rate": 2.567076742820633e-07, + "loss": 0.3266, + "step": 1946 + }, + { + "epoch": 0.54, + "logps_train/chosen": -104.43235778808594, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -232.50921630859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.393235921859741, + "rewards_train/margins": 7.55768609046936, + "rewards_train/rejected": -9.950922012329102, + "step": 1946 + }, + { + "epoch": 0.54, + "logps_train/chosen": -9.473995208740234, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -11.019950866699219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4473995268344879, + "rewards_train/margins": -0.29540443420410156, + "rewards_train/rejected": -0.15199509263038635, + "step": 1946 + }, + { + "epoch": 0.54, + "logps_train/chosen": -32.67877197265625, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -72.64997863769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4116272926330566, + "rewards_train/margins": 2.9158706665039062, + "rewards_train/rejected": -5.327497959136963, + "step": 1946 + }, + { + "epoch": 0.54, + "logps_train/chosen": -7.9186577796936035, + "logps_train/ref_chosen": -1.546875, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -28.572017669677734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6371783018112183, + "rewards_train/margins": 1.376273512840271, + "rewards_train/rejected": -2.0134518146514893, + "step": 1946 + }, + { + "epoch": 0.54, + "logps_train/chosen": -33.19165802001953, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -27.6021671295166, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8941658139228821, + "rewards_train/margins": -0.5089491009712219, + "rewards_train/rejected": -0.38521671295166016, + "step": 1946 + }, + { + "epoch": 0.54, + "logps_train/chosen": -148.72653198242188, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -154.56512451171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4226531982421875, + "rewards_train/margins": 2.633859157562256, + "rewards_train/rejected": -5.056512355804443, + "step": 1946 + }, + { + "epoch": 0.54, + "logps_train/chosen": -16.141355514526367, + "logps_train/ref_chosen": -2.0, + "logps_train/ref_rejected": -3.09375, + "logps_train/rejected": -34.05024719238281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4141355752944946, + "rewards_train/margins": 1.6815141439437866, + "rewards_train/rejected": -3.0956497192382812, + "step": 1946 + }, + { + "epoch": 0.54, + "logps_train/chosen": -288.4139404296875, + "logps_train/ref_chosen": -205.0, + "logps_train/ref_rejected": -226.0, + "logps_train/rejected": -335.85748291015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.341394424438477, + "rewards_train/margins": 2.6443538665771484, + "rewards_train/rejected": -10.985748291015625, + "step": 1946 + }, + { + "epoch": 0.54, + "logps_train/chosen": -69.252685546875, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -98.5749740600586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2002686262130737, + "rewards_train/margins": 0.6572288274765015, + "rewards_train/rejected": -1.8574974536895752, + "step": 1947 + }, + { + "epoch": 0.54, + "logps_train/chosen": -165.1859893798828, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -211.27491760253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5185989737510681, + "rewards_train/margins": 5.108892977237701, + "rewards_train/rejected": -5.6274919509887695, + "step": 1947 + }, + { + "epoch": 0.54, + "logps_train/chosen": -24.63299560546875, + "logps_train/ref_chosen": -1.6484375, + "logps_train/ref_rejected": -7.0625, + "logps_train/rejected": -30.763324737548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2984559535980225, + "rewards_train/margins": 0.07162666320800781, + "rewards_train/rejected": -2.3700826168060303, + "step": 1947 + }, + { + "epoch": 0.54, + "logps_train/chosen": -11.561814308166504, + "logps_train/ref_chosen": -4.59375, + "logps_train/ref_rejected": -4.25, + "logps_train/rejected": -25.906423568725586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6968064308166504, + "rewards_train/margins": 1.4688360691070557, + "rewards_train/rejected": -2.165642499923706, + "step": 1947 + }, + { + "epoch": 0.54, + "logps_train/chosen": -115.92877197265625, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -138.39028930664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.542877197265625, + "rewards_train/margins": 0.5461517572402954, + "rewards_train/rejected": -1.0890289545059204, + "step": 1947 + }, + { + "epoch": 0.54, + "logps_train/chosen": -21.982620239257812, + "logps_train/ref_chosen": -2.15625, + "logps_train/ref_rejected": -3.125, + "logps_train/rejected": -34.94253158569336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9826370477676392, + "rewards_train/margins": 1.1991161108016968, + "rewards_train/rejected": -3.181753158569336, + "step": 1947 + }, + { + "epoch": 0.54, + "logps_train/chosen": -104.97410583496094, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -159.63400268554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3974106311798096, + "rewards_train/margins": 3.3159897327423096, + "rewards_train/rejected": -5.713400363922119, + "step": 1947 + }, + { + "epoch": 0.54, + "logps_train/chosen": -138.00900268554688, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -153.25379943847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9009002447128296, + "rewards_train/margins": 3.5244799852371216, + "rewards_train/rejected": -5.425380229949951, + "step": 1947 + }, + { + "epoch": 0.54, + "learning_rate": 2.5494049133564576e-07, + "loss": 0.2807, + "step": 1948 + }, + { + "epoch": 0.54, + "logps_train/chosen": -7.500511169433594, + "logps_train/ref_chosen": -2.4375, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -15.724952697753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5063011050224304, + "rewards_train/margins": 0.677131712436676, + "rewards_train/rejected": -1.1834328174591064, + "step": 1948 + }, + { + "epoch": 0.54, + "logps_train/chosen": -10.436371803283691, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -3.765625, + "logps_train/rejected": -15.52454662322998, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31551218032836914, + "rewards_train/margins": 0.8603800535202026, + "rewards_train/rejected": -1.1758922338485718, + "step": 1948 + }, + { + "epoch": 0.54, + "logps_train/chosen": -52.80146026611328, + "logps_train/ref_chosen": -13.875, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -65.12496948242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.892646074295044, + "rewards_train/margins": 1.5261008739471436, + "rewards_train/rejected": -5.4187469482421875, + "step": 1948 + }, + { + "epoch": 0.54, + "logps_train/chosen": -0.977030336856842, + "logps_train/ref_chosen": -1.828125, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -13.720291137695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08510946482419968, + "rewards_train/margins": 0.45088859647512436, + "rewards_train/rejected": -0.3657791316509247, + "step": 1948 + }, + { + "epoch": 0.54, + "logps_train/chosen": -5.526616096496582, + "logps_train/ref_chosen": -3.6875, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -4.8264641761779785, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.18391160666942596, + "rewards_train/margins": -0.129390187561512, + "rewards_train/rejected": -0.05452141910791397, + "step": 1948 + }, + { + "epoch": 0.54, + "logps_train/chosen": -53.64989471435547, + "logps_train/ref_chosen": -27.375, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -32.03895568847656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6274895668029785, + "rewards_train/margins": -0.6798440217971802, + "rewards_train/rejected": -1.9476455450057983, + "step": 1948 + }, + { + "epoch": 0.54, + "logps_train/chosen": -23.48145294189453, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -21.212331771850586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.398145318031311, + "rewards_train/margins": 0.48402535915374756, + "rewards_train/rejected": -1.8821706771850586, + "step": 1948 + }, + { + "epoch": 0.54, + "logps_train/chosen": -39.34687805175781, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -31.018569946289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8596878051757812, + "rewards_train/margins": 0.24216914176940918, + "rewards_train/rejected": -2.1018569469451904, + "step": 1948 + }, + { + "epoch": 0.54, + "logps_train/chosen": -187.10780334472656, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -233.1049041748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.410780429840088, + "rewards_train/margins": 2.299710273742676, + "rewards_train/rejected": -5.710490703582764, + "step": 1949 + }, + { + "epoch": 0.54, + "logps_train/chosen": -9.652724266052246, + "logps_train/ref_chosen": -2.359375, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -19.391746520996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7293349504470825, + "rewards_train/margins": 0.022339701652526855, + "rewards_train/rejected": -0.7516746520996094, + "step": 1949 + }, + { + "epoch": 0.54, + "logps_train/chosen": -100.75627136230469, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -126.11991882324219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7756271362304688, + "rewards_train/margins": 2.036365032196045, + "rewards_train/rejected": -4.811992168426514, + "step": 1949 + }, + { + "epoch": 0.54, + "logps_train/chosen": -137.49755859375, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -192.69537353515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.249755859375, + "rewards_train/margins": 3.7197813987731934, + "rewards_train/rejected": -5.969537258148193, + "step": 1949 + }, + { + "epoch": 0.54, + "logps_train/chosen": -14.816184997558594, + "logps_train/ref_chosen": -0.59765625, + "logps_train/ref_rejected": -0.59765625, + "logps_train/rejected": -14.698606491088867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.421852946281433, + "rewards_train/margins": -0.011757850646972656, + "rewards_train/rejected": -1.4100950956344604, + "step": 1949 + }, + { + "epoch": 0.54, + "logps_train/chosen": -18.007362365722656, + "logps_train/ref_chosen": -4.5, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -36.8680419921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3507362604141235, + "rewards_train/margins": 0.5985679626464844, + "rewards_train/rejected": -1.949304223060608, + "step": 1949 + }, + { + "epoch": 0.54, + "logps_train/chosen": -19.344402313232422, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -37.81123352050781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2094402313232422, + "rewards_train/margins": 2.3341832160949707, + "rewards_train/rejected": -2.543623447418213, + "step": 1949 + }, + { + "epoch": 0.54, + "logps_train/chosen": -187.60691833496094, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -98.39959716796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.110692024230957, + "rewards_train/margins": -4.620732307434082, + "rewards_train/rejected": -2.489959716796875, + "step": 1949 + }, + { + "epoch": 0.55, + "learning_rate": 2.5317852301584643e-07, + "loss": 0.6966, + "step": 1950 + }, + { + "epoch": 0.55, + "logps_train/chosen": -0.0610969141125679, + "logps_train/ref_chosen": -0.32421875, + "logps_train/ref_rejected": -2.6875, + "logps_train/rejected": -3.9324684143066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02631218545138836, + "rewards_train/margins": 0.1508090253919363, + "rewards_train/rejected": -0.12449683994054794, + "step": 1950 + }, + { + "epoch": 0.55, + "logps_train/chosen": -41.91722106933594, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -41.67875671386719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9104721546173096, + "rewards_train/margins": 0.5480284690856934, + "rewards_train/rejected": -3.458500623703003, + "step": 1950 + }, + { + "epoch": 0.55, + "logps_train/chosen": -18.63150405883789, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -14.4375, + "logps_train/rejected": -45.08722686767578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0756504088640213, + "rewards_train/margins": 2.989322230219841, + "rewards_train/rejected": -3.0649726390838623, + "step": 1950 + }, + { + "epoch": 0.55, + "logps_train/chosen": -38.77107238769531, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -31.773500442504883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6771072745323181, + "rewards_train/margins": 1.6877427697181702, + "rewards_train/rejected": -2.3648500442504883, + "step": 1950 + }, + { + "epoch": 0.55, + "logps_train/chosen": -12.252614974975586, + "logps_train/ref_chosen": -1.90625, + "logps_train/ref_rejected": -5.0, + "logps_train/rejected": -38.26902389526367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0346364974975586, + "rewards_train/margins": 2.2922658920288086, + "rewards_train/rejected": -3.326902389526367, + "step": 1950 + }, + { + "epoch": 0.55, + "logps_train/chosen": -91.89973449707031, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -119.96269989013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.289973497390747, + "rewards_train/margins": 2.8062965869903564, + "rewards_train/rejected": -5.0962700843811035, + "step": 1950 + }, + { + "epoch": 0.55, + "logps_train/chosen": -122.52996826171875, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -201.93548583984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.452996730804443, + "rewards_train/margins": 6.7405524253845215, + "rewards_train/rejected": -12.193549156188965, + "step": 1950 + }, + { + "epoch": 0.55, + "logps_train/chosen": -143.23565673828125, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -189.0, + "logps_train/rejected": -313.3440856933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.173565864562988, + "rewards_train/margins": 6.260843276977539, + "rewards_train/rejected": -12.434409141540527, + "step": 1950 + }, + { + "epoch": 0.55, + "logps_train/chosen": -164.15887451171875, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -227.10467529296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.315887451171875, + "rewards_train/margins": 6.09458065032959, + "rewards_train/rejected": -8.410468101501465, + "step": 1951 + }, + { + "epoch": 0.55, + "logps_train/chosen": -17.186962127685547, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -31.9926700592041, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7749462127685547, + "rewards_train/margins": 1.9211957454681396, + "rewards_train/rejected": -2.6961419582366943, + "step": 1951 + }, + { + "epoch": 0.55, + "logps_train/chosen": -79.72346496582031, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -75.37162780761719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.222346544265747, + "rewards_train/margins": -0.08518373966217041, + "rewards_train/rejected": -1.1371628046035767, + "step": 1951 + }, + { + "epoch": 0.55, + "logps_train/chosen": -38.79399490356445, + "logps_train/ref_chosen": -6.375, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -41.55609893798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2418994903564453, + "rewards_train/margins": 0.3480854034423828, + "rewards_train/rejected": -3.589984893798828, + "step": 1951 + }, + { + "epoch": 0.55, + "logps_train/chosen": -231.42474365234375, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -197.8754119873047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.542474746704102, + "rewards_train/margins": -2.4549331665039062, + "rewards_train/rejected": -9.087541580200195, + "step": 1951 + }, + { + "epoch": 0.55, + "logps_train/chosen": -37.848304748535156, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -30.008441925048828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.922330617904663, + "rewards_train/margins": -0.3105487823486328, + "rewards_train/rejected": -2.6117818355560303, + "step": 1951 + }, + { + "epoch": 0.55, + "logps_train/chosen": -130.23062133789062, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -177.5639190673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7230621576309204, + "rewards_train/margins": 3.733329653739929, + "rewards_train/rejected": -4.45639181137085, + "step": 1951 + }, + { + "epoch": 0.55, + "logps_train/chosen": -123.08993530273438, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -221.3792724609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8589935302734375, + "rewards_train/margins": 8.478934288024902, + "rewards_train/rejected": -9.33792781829834, + "step": 1951 + }, + { + "epoch": 0.55, + "learning_rate": 2.5142178165457495e-07, + "loss": 0.3927, + "step": 1952 + }, + { + "epoch": 0.55, + "logps_train/chosen": -218.85842895507812, + "logps_train/ref_chosen": -188.0, + "logps_train/ref_rejected": -196.0, + "logps_train/rejected": -371.419189453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0858428478240967, + "rewards_train/margins": 14.456076860427856, + "rewards_train/rejected": -17.541919708251953, + "step": 1952 + }, + { + "epoch": 0.55, + "logps_train/chosen": -14.098029136657715, + "logps_train/ref_chosen": -4.9375, + "logps_train/ref_rejected": -2.921875, + "logps_train/rejected": -17.509342193603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9160529375076294, + "rewards_train/margins": 0.5426938533782959, + "rewards_train/rejected": -1.4587467908859253, + "step": 1952 + }, + { + "epoch": 0.55, + "logps_train/chosen": -3.3992109298706055, + "logps_train/ref_chosen": -3.09375, + "logps_train/ref_rejected": -5.75, + "logps_train/rejected": -9.766096115112305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.030546093359589577, + "rewards_train/margins": 0.3710635360330343, + "rewards_train/rejected": -0.4016096293926239, + "step": 1952 + }, + { + "epoch": 0.55, + "logps_train/chosen": -25.713197708129883, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -55.053977966308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.671319842338562, + "rewards_train/margins": 2.2090779542922974, + "rewards_train/rejected": -3.8803977966308594, + "step": 1952 + }, + { + "epoch": 0.55, + "logps_train/chosen": -21.221134185791016, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -74.68985748291016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6283634305000305, + "rewards_train/margins": 2.840622365474701, + "rewards_train/rejected": -3.4689857959747314, + "step": 1952 + }, + { + "epoch": 0.55, + "logps_train/chosen": -18.818941116333008, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -28.26482582092285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0443941354751587, + "rewards_train/margins": 0.8695884943008423, + "rewards_train/rejected": -1.913982629776001, + "step": 1952 + }, + { + "epoch": 0.55, + "logps_train/chosen": -24.134906768798828, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -32.89820861816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8791157007217407, + "rewards_train/margins": 0.7107051610946655, + "rewards_train/rejected": -2.5898208618164062, + "step": 1952 + }, + { + "epoch": 0.55, + "logps_train/chosen": -16.388809204101562, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -2.296875, + "logps_train/rejected": -15.93613338470459, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4263809323310852, + "rewards_train/margins": 0.9375448822975159, + "rewards_train/rejected": -1.363925814628601, + "step": 1952 + }, + { + "epoch": 0.55, + "logps_train/chosen": -121.38093566894531, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -170.60702514648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.238093614578247, + "rewards_train/margins": 6.37260890007019, + "rewards_train/rejected": -8.610702514648438, + "step": 1953 + }, + { + "epoch": 0.55, + "logps_train/chosen": -72.26950073242188, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -115.13099670410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17695008218288422, + "rewards_train/margins": 2.336149588227272, + "rewards_train/rejected": -2.5130996704101562, + "step": 1953 + }, + { + "epoch": 0.55, + "logps_train/chosen": -2.3899521827697754, + "logps_train/ref_chosen": -3.25, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -22.94594955444336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08600478619337082, + "rewards_train/margins": 1.8149747177958488, + "rewards_train/rejected": -1.728969931602478, + "step": 1953 + }, + { + "epoch": 0.55, + "logps_train/chosen": -176.26376342773438, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -126.62046813964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.026376485824585, + "rewards_train/margins": 1.535670518875122, + "rewards_train/rejected": -4.562047004699707, + "step": 1953 + }, + { + "epoch": 0.55, + "logps_train/chosen": -8.138712882995605, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -24.71494483947754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5060588121414185, + "rewards_train/margins": 0.9154356718063354, + "rewards_train/rejected": -1.421494483947754, + "step": 1953 + }, + { + "epoch": 0.55, + "logps_train/chosen": -44.85747146606445, + "logps_train/ref_chosen": -40.5, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -58.745521545410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4357471466064453, + "rewards_train/margins": 3.051305055618286, + "rewards_train/rejected": -3.4870522022247314, + "step": 1953 + }, + { + "epoch": 0.55, + "logps_train/chosen": -171.0419921875, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -196.97109985351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.004199266433716, + "rewards_train/margins": 4.94291090965271, + "rewards_train/rejected": -6.947110176086426, + "step": 1953 + }, + { + "epoch": 0.55, + "logps_train/chosen": -155.77932739257812, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -276.89617919921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02206726185977459, + "rewards_train/margins": 11.41168518178165, + "rewards_train/rejected": -11.389617919921875, + "step": 1953 + }, + { + "epoch": 0.55, + "learning_rate": 2.496702795471589e-07, + "loss": 0.1909, + "step": 1954 + }, + { + "epoch": 0.55, + "logps_train/chosen": -94.81156921386719, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -115.10231018066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9811569452285767, + "rewards_train/margins": 3.2790740728378296, + "rewards_train/rejected": -4.260231018066406, + "step": 1954 + }, + { + "epoch": 0.55, + "logps_train/chosen": -128.88307189941406, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -46.736900329589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6883071660995483, + "rewards_train/margins": 1.1353830099105835, + "rewards_train/rejected": -2.823690176010132, + "step": 1954 + }, + { + "epoch": 0.55, + "logps_train/chosen": -20.62605094909668, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -19.386274337768555, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.768855094909668, + "rewards_train/margins": -0.0614776611328125, + "rewards_train/rejected": -0.7073774337768555, + "step": 1954 + }, + { + "epoch": 0.55, + "logps_train/chosen": -213.87428283691406, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -163.1641387939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.087428569793701, + "rewards_train/margins": 1.72898530960083, + "rewards_train/rejected": -5.816413879394531, + "step": 1954 + }, + { + "epoch": 0.55, + "logps_train/chosen": -208.6498260498047, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -153.09353637695312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.164982795715332, + "rewards_train/margins": -1.255629062652588, + "rewards_train/rejected": -6.909353733062744, + "step": 1954 + }, + { + "epoch": 0.55, + "logps_train/chosen": -169.60159301757812, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -178.08956909179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.4601593017578125, + "rewards_train/margins": 3.49879789352417, + "rewards_train/rejected": -7.958957195281982, + "step": 1954 + }, + { + "epoch": 0.55, + "logps_train/chosen": -142.43038940429688, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -166.97897338867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.293039083480835, + "rewards_train/margins": 2.7048585414886475, + "rewards_train/rejected": -5.997897624969482, + "step": 1954 + }, + { + "epoch": 0.55, + "logps_train/chosen": -110.18942260742188, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -118.12229919433594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.3689422607421875, + "rewards_train/margins": -0.10671234130859375, + "rewards_train/rejected": -3.2622299194335938, + "step": 1954 + }, + { + "epoch": 0.55, + "logps_train/chosen": -5.152209281921387, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -38.883888244628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05897092819213867, + "rewards_train/margins": 0.15441790223121643, + "rewards_train/rejected": -0.2133888304233551, + "step": 1955 + }, + { + "epoch": 0.55, + "logps_train/chosen": -66.7745361328125, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -96.94088745117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2725463807582855, + "rewards_train/margins": 1.5666351020336151, + "rewards_train/rejected": -1.2940887212753296, + "step": 1955 + }, + { + "epoch": 0.55, + "logps_train/chosen": -24.149127960205078, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -25.074073791503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7774128317832947, + "rewards_train/margins": 0.7674946188926697, + "rewards_train/rejected": -1.5449074506759644, + "step": 1955 + }, + { + "epoch": 0.55, + "logps_train/chosen": -77.39042663574219, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -42.04118347167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4140427112579346, + "rewards_train/margins": 0.40257561206817627, + "rewards_train/rejected": -1.8166183233261108, + "step": 1955 + }, + { + "epoch": 0.55, + "logps_train/chosen": -9.048686981201172, + "logps_train/ref_chosen": -2.90625, + "logps_train/ref_rejected": -2.234375, + "logps_train/rejected": -18.935890197753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6142436861991882, + "rewards_train/margins": 1.0559079051017761, + "rewards_train/rejected": -1.6701515913009644, + "step": 1955 + }, + { + "epoch": 0.55, + "logps_train/chosen": -296.42584228515625, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -290.5608825683594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.742584228515625, + "rewards_train/margins": -0.9864959716796875, + "rewards_train/rejected": -10.756088256835938, + "step": 1955 + }, + { + "epoch": 0.55, + "logps_train/chosen": -9.433670043945312, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -3.9375, + "logps_train/rejected": -6.90518045425415, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5308670401573181, + "rewards_train/margins": -0.23409900069236755, + "rewards_train/rejected": -0.29676803946495056, + "step": 1955 + }, + { + "epoch": 0.55, + "logps_train/chosen": -27.378843307495117, + "logps_train/ref_chosen": -5.03125, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -37.653865814208984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2347593307495117, + "rewards_train/margins": -0.4693727493286133, + "rewards_train/rejected": -1.7653865814208984, + "step": 1955 + }, + { + "epoch": 0.55, + "learning_rate": 2.479240289522555e-07, + "loss": 0.5392, + "step": 1956 + }, + { + "epoch": 0.55, + "logps_train/chosen": -52.56348419189453, + "logps_train/ref_chosen": -14.0, + "logps_train/ref_rejected": -30.0, + "logps_train/rejected": -79.0792236328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8563485145568848, + "rewards_train/margins": 1.0515737533569336, + "rewards_train/rejected": -4.907922267913818, + "step": 1956 + }, + { + "epoch": 0.55, + "logps_train/chosen": -37.88558578491211, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -23.550079345703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.526058554649353, + "rewards_train/margins": 0.1570744514465332, + "rewards_train/rejected": -1.6831330060958862, + "step": 1956 + }, + { + "epoch": 0.55, + "logps_train/chosen": -0.059243667870759964, + "logps_train/ref_chosen": -0.318359375, + "logps_train/ref_rejected": -0.318359375, + "logps_train/rejected": -0.05832117050886154, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.025911569595336914, + "rewards_train/margins": -9.225122630596161e-05, + "rewards_train/rejected": 0.026003820821642876, + "step": 1956 + }, + { + "epoch": 0.55, + "logps_train/chosen": -174.23519897460938, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -200.24180603027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.723519802093506, + "rewards_train/margins": 0.6006608009338379, + "rewards_train/rejected": -6.324180603027344, + "step": 1956 + }, + { + "epoch": 0.55, + "logps_train/chosen": -18.397567749023438, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -5.75, + "logps_train/rejected": -9.700865745544434, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.177256777882576, + "rewards_train/margins": 0.21782980859279633, + "rewards_train/rejected": -0.3950865864753723, + "step": 1956 + }, + { + "epoch": 0.55, + "logps_train/chosen": -190.14244079589844, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -188.05792236328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.814244270324707, + "rewards_train/margins": 0.9915480613708496, + "rewards_train/rejected": -5.805792331695557, + "step": 1956 + }, + { + "epoch": 0.55, + "logps_train/chosen": -26.022045135498047, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -40.74057388305664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9772045016288757, + "rewards_train/margins": 1.2218528389930725, + "rewards_train/rejected": -2.1990573406219482, + "step": 1956 + }, + { + "epoch": 0.55, + "logps_train/chosen": -20.981584548950195, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -33.79270935058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5731585025787354, + "rewards_train/margins": 0.7561125755310059, + "rewards_train/rejected": -2.329271078109741, + "step": 1956 + }, + { + "epoch": 0.55, + "logps_train/chosen": -3.8067269325256348, + "logps_train/ref_chosen": -4.59375, + "logps_train/ref_rejected": -0.84765625, + "logps_train/rejected": -0.439214289188385, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07870230823755264, + "rewards_train/margins": 0.037858109921216965, + "rewards_train/rejected": 0.04084419831633568, + "step": 1957 + }, + { + "epoch": 0.55, + "logps_train/chosen": -129.13497924804688, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -164.74822998046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3134979009628296, + "rewards_train/margins": 4.261325001716614, + "rewards_train/rejected": -5.574822902679443, + "step": 1957 + }, + { + "epoch": 0.55, + "logps_train/chosen": -33.182064056396484, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -37.74858093261719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7744563817977905, + "rewards_train/margins": 0.9316517114639282, + "rewards_train/rejected": -2.7061080932617188, + "step": 1957 + }, + { + "epoch": 0.55, + "logps_train/chosen": -142.20555114746094, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -165.48960876464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6705551147460938, + "rewards_train/margins": 3.42840576171875, + "rewards_train/rejected": -6.098960876464844, + "step": 1957 + }, + { + "epoch": 0.55, + "logps_train/chosen": -7.029959201812744, + "logps_train/ref_chosen": -5.875, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -33.68366241455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11549592018127441, + "rewards_train/margins": 2.649745464324951, + "rewards_train/rejected": -2.7652413845062256, + "step": 1957 + }, + { + "epoch": 0.55, + "logps_train/chosen": -77.06466674804688, + "logps_train/ref_chosen": -46.25, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -298.1637878417969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0814666748046875, + "rewards_train/margins": 13.33491325378418, + "rewards_train/rejected": -16.416379928588867, + "step": 1957 + }, + { + "epoch": 0.55, + "logps_train/chosen": -35.57318115234375, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -103.69039916992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4823181629180908, + "rewards_train/margins": 3.8117220401763916, + "rewards_train/rejected": -5.294040203094482, + "step": 1957 + }, + { + "epoch": 0.55, + "logps_train/chosen": -77.98417663574219, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -118.111328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4515823423862457, + "rewards_train/margins": 3.712715297937393, + "rewards_train/rejected": -3.2611329555511475, + "step": 1957 + }, + { + "epoch": 0.55, + "learning_rate": 2.461830420917678e-07, + "loss": 0.2978, + "step": 1958 + }, + { + "epoch": 0.55, + "logps_train/chosen": -91.64877319335938, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -162.93569946289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9648773670196533, + "rewards_train/margins": 2.928692579269409, + "rewards_train/rejected": -4.8935699462890625, + "step": 1958 + }, + { + "epoch": 0.55, + "logps_train/chosen": -71.66705322265625, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -75.60816955566406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.41670534014701843, + "rewards_train/margins": -0.0058883726596832275, + "rewards_train/rejected": -0.4108169674873352, + "step": 1958 + }, + { + "epoch": 0.55, + "logps_train/chosen": -42.24243927001953, + "logps_train/ref_chosen": -28.25, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -46.26174545288086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.399243950843811, + "rewards_train/margins": 2.3519307374954224, + "rewards_train/rejected": -3.7511746883392334, + "step": 1958 + }, + { + "epoch": 0.55, + "logps_train/chosen": -5.238089561462402, + "logps_train/ref_chosen": -3.734375, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -40.77784729003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1503714621067047, + "rewards_train/margins": 2.6024132668972015, + "rewards_train/rejected": -2.7527847290039062, + "step": 1958 + }, + { + "epoch": 0.55, + "logps_train/chosen": -16.125892639160156, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -43.8140754699707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1374107450246811, + "rewards_train/margins": 1.2938183397054672, + "rewards_train/rejected": -1.1564075946807861, + "step": 1958 + }, + { + "epoch": 0.55, + "logps_train/chosen": -101.47554779052734, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -102.04180908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0975548028945923, + "rewards_train/margins": 0.056626081466674805, + "rewards_train/rejected": -1.154180884361267, + "step": 1958 + }, + { + "epoch": 0.55, + "logps_train/chosen": -124.25799560546875, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -137.02587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8757996559143066, + "rewards_train/margins": 1.5767884254455566, + "rewards_train/rejected": -4.452588081359863, + "step": 1958 + }, + { + "epoch": 0.55, + "logps_train/chosen": -83.72753143310547, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -54.25, + "logps_train/rejected": -90.97823333740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.522753119468689, + "rewards_train/margins": 2.150070309638977, + "rewards_train/rejected": -3.672823429107666, + "step": 1958 + }, + { + "epoch": 0.55, + "logps_train/chosen": -80.53622436523438, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -112.07318878173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9536224603652954, + "rewards_train/margins": 1.203696370124817, + "rewards_train/rejected": -2.1573188304901123, + "step": 1959 + }, + { + "epoch": 0.55, + "logps_train/chosen": -111.45452880859375, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -263.74810791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.045452833175659, + "rewards_train/margins": 12.329358339309692, + "rewards_train/rejected": -14.374811172485352, + "step": 1959 + }, + { + "epoch": 0.55, + "logps_train/chosen": -15.682334899902344, + "logps_train/ref_chosen": -1.2265625, + "logps_train/ref_rejected": -26.125, + "logps_train/rejected": -35.36023712158203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4455772638320923, + "rewards_train/margins": -0.5220535397529602, + "rewards_train/rejected": -0.9235237240791321, + "step": 1959 + }, + { + "epoch": 0.55, + "logps_train/chosen": -170.17820739746094, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -127.0684814453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.717820644378662, + "rewards_train/margins": -0.3109722137451172, + "rewards_train/rejected": -4.406848430633545, + "step": 1959 + }, + { + "epoch": 0.55, + "logps_train/chosen": -134.92599487304688, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -122.06941986083984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.642599582672119, + "rewards_train/margins": 0.5643424987792969, + "rewards_train/rejected": -3.206942081451416, + "step": 1959 + }, + { + "epoch": 0.55, + "logps_train/chosen": -217.0234832763672, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -248.28076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.102348327636719, + "rewards_train/margins": 4.525728225708008, + "rewards_train/rejected": -9.628076553344727, + "step": 1959 + }, + { + "epoch": 0.55, + "logps_train/chosen": -30.57833480834961, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -34.140037536621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5890835523605347, + "rewards_train/margins": -7.975101470947266e-05, + "rewards_train/rejected": -1.5890038013458252, + "step": 1959 + }, + { + "epoch": 0.55, + "logps_train/chosen": -45.98747634887695, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -55.25, + "logps_train/rejected": -92.20633697509766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.661247730255127, + "rewards_train/margins": 1.0343859195709229, + "rewards_train/rejected": -3.69563364982605, + "step": 1959 + }, + { + "epoch": 0.55, + "learning_rate": 2.444473311507582e-07, + "loss": 0.3555, + "step": 1960 + }, + { + "epoch": 0.55, + "logps_train/chosen": -272.3827209472656, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -264.54547119140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.638272285461426, + "rewards_train/margins": -0.3837251663208008, + "rewards_train/rejected": -11.254547119140625, + "step": 1960 + }, + { + "epoch": 0.55, + "logps_train/chosen": -14.904834747314453, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -35.323734283447266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5154834985733032, + "rewards_train/margins": 2.166890025138855, + "rewards_train/rejected": -2.682373523712158, + "step": 1960 + }, + { + "epoch": 0.55, + "logps_train/chosen": -241.14492797851562, + "logps_train/ref_chosen": -187.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -247.62046813964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.414493083953857, + "rewards_train/margins": 2.3475537300109863, + "rewards_train/rejected": -7.762046813964844, + "step": 1960 + }, + { + "epoch": 0.55, + "logps_train/chosen": -25.022380828857422, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -46.015533447265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0647380352020264, + "rewards_train/margins": 1.9055652618408203, + "rewards_train/rejected": -3.9703032970428467, + "step": 1960 + }, + { + "epoch": 0.55, + "logps_train/chosen": -145.23345947265625, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -183.0836944580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22334595024585724, + "rewards_train/margins": 2.6850235909223557, + "rewards_train/rejected": -2.908369541168213, + "step": 1960 + }, + { + "epoch": 0.55, + "logps_train/chosen": -184.49749755859375, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -266.70819091796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2497498989105225, + "rewards_train/margins": 10.121069192886353, + "rewards_train/rejected": -12.370819091796875, + "step": 1960 + }, + { + "epoch": 0.55, + "logps_train/chosen": -23.030115127563477, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -37.25, + "logps_train/rejected": -66.24845123291016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20301151275634766, + "rewards_train/margins": 2.696833610534668, + "rewards_train/rejected": -2.8998451232910156, + "step": 1960 + }, + { + "epoch": 0.55, + "logps_train/chosen": -132.26718139648438, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -195.08334350585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6267181634902954, + "rewards_train/margins": 4.2816160917282104, + "rewards_train/rejected": -5.908334255218506, + "step": 1960 + }, + { + "epoch": 0.55, + "logps_train/chosen": -31.89386558532715, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -58.27103805541992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6643866300582886, + "rewards_train/margins": 2.5377174615859985, + "rewards_train/rejected": -4.202104091644287, + "step": 1961 + }, + { + "epoch": 0.55, + "logps_train/chosen": -94.81863403320312, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -128.14825439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.231863498687744, + "rewards_train/margins": 3.282961845397949, + "rewards_train/rejected": -5.514825344085693, + "step": 1961 + }, + { + "epoch": 0.55, + "logps_train/chosen": -72.27310180664062, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -30.771703720092773, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.32731032371521, + "rewards_train/margins": -0.9813899993896484, + "rewards_train/rejected": -2.3459203243255615, + "step": 1961 + }, + { + "epoch": 0.55, + "logps_train/chosen": -5.691551208496094, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -17.75257110595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2535301148891449, + "rewards_train/margins": 0.4717269837856293, + "rewards_train/rejected": -0.7252570986747742, + "step": 1961 + }, + { + "epoch": 0.55, + "logps_train/chosen": -162.973876953125, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -187.18270874023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.6973876953125, + "rewards_train/margins": 4.470883369445801, + "rewards_train/rejected": -9.1682710647583, + "step": 1961 + }, + { + "epoch": 0.55, + "logps_train/chosen": -188.315673828125, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -185.80020141601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.181567668914795, + "rewards_train/margins": 0.7984523773193359, + "rewards_train/rejected": -7.980020046234131, + "step": 1961 + }, + { + "epoch": 0.55, + "logps_train/chosen": -69.42514038085938, + "logps_train/ref_chosen": -55.75, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -59.712005615234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3675140142440796, + "rewards_train/margins": 2.2161866426467896, + "rewards_train/rejected": -3.583700656890869, + "step": 1961 + }, + { + "epoch": 0.55, + "logps_train/chosen": -3.2243525981903076, + "logps_train/ref_chosen": -1.15625, + "logps_train/ref_rejected": -1.2734375, + "logps_train/rejected": -4.355232238769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20681026577949524, + "rewards_train/margins": 0.10136920213699341, + "rewards_train/rejected": -0.30817946791648865, + "step": 1961 + }, + { + "epoch": 0.55, + "learning_rate": 2.427169082773627e-07, + "loss": 0.2756, + "step": 1962 + }, + { + "epoch": 0.55, + "logps_train/chosen": -70.23013305664062, + "logps_train/ref_chosen": -55.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -123.50227355957031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5230133533477783, + "rewards_train/margins": 0.8272140026092529, + "rewards_train/rejected": -2.3502273559570312, + "step": 1962 + }, + { + "epoch": 0.55, + "logps_train/chosen": -53.60472869873047, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -50.485599517822266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.4979729652404785, + "rewards_train/margins": -0.2431631088256836, + "rewards_train/rejected": -4.254809856414795, + "step": 1962 + }, + { + "epoch": 0.55, + "logps_train/chosen": -5.32142972946167, + "logps_train/ref_chosen": -1.4140625, + "logps_train/ref_rejected": -2.046875, + "logps_train/rejected": -4.681387901306152, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.39073672890663147, + "rewards_train/margins": -0.1272854208946228, + "rewards_train/rejected": -0.26345130801200867, + "step": 1962 + }, + { + "epoch": 0.55, + "logps_train/chosen": -116.61410522460938, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -113.32159423828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2114105224609375, + "rewards_train/margins": -0.9792511463165283, + "rewards_train/rejected": -2.232159376144409, + "step": 1962 + }, + { + "epoch": 0.55, + "logps_train/chosen": -15.027033805847168, + "logps_train/ref_chosen": -3.734375, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -77.1509017944336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1292659044265747, + "rewards_train/margins": 4.248324275016785, + "rewards_train/rejected": -5.377590179443359, + "step": 1962 + }, + { + "epoch": 0.55, + "logps_train/chosen": -64.01049041748047, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -83.98709106445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.34895095229148865, + "rewards_train/margins": 1.1976600587368011, + "rewards_train/rejected": -0.8487091064453125, + "step": 1962 + }, + { + "epoch": 0.55, + "logps_train/chosen": -24.25326156616211, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -58.28588104248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7378262281417847, + "rewards_train/margins": 1.7782620191574097, + "rewards_train/rejected": -3.5160882472991943, + "step": 1962 + }, + { + "epoch": 0.55, + "logps_train/chosen": -161.79580688476562, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -167.90142822265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7295806407928467, + "rewards_train/margins": -0.4394378662109375, + "rewards_train/rejected": -3.290142774581909, + "step": 1962 + }, + { + "epoch": 0.55, + "logps_train/chosen": -183.2010498046875, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -188.0, + "logps_train/rejected": -232.9849853515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5201051235198975, + "rewards_train/margins": 0.9783933162689209, + "rewards_train/rejected": -4.498498439788818, + "step": 1963 + }, + { + "epoch": 0.55, + "logps_train/chosen": -95.84015655517578, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -97.21388244628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5840156674385071, + "rewards_train/margins": 0.13737261295318604, + "rewards_train/rejected": -0.7213882803916931, + "step": 1963 + }, + { + "epoch": 0.55, + "logps_train/chosen": -97.10057067871094, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -120.65170288085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0600571632385254, + "rewards_train/margins": 2.1051130294799805, + "rewards_train/rejected": -4.165170192718506, + "step": 1963 + }, + { + "epoch": 0.55, + "logps_train/chosen": -240.51690673828125, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -234.0788116455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.751690864562988, + "rewards_train/margins": 0.1561908721923828, + "rewards_train/rejected": -9.907881736755371, + "step": 1963 + }, + { + "epoch": 0.55, + "logps_train/chosen": -164.32461547851562, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -179.62997436523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.032461643218994, + "rewards_train/margins": 1.1305360794067383, + "rewards_train/rejected": -4.162997722625732, + "step": 1963 + }, + { + "epoch": 0.55, + "logps_train/chosen": -31.417747497558594, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -28.25, + "logps_train/rejected": -75.98646545410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3667747974395752, + "rewards_train/margins": 3.406872034072876, + "rewards_train/rejected": -4.773646831512451, + "step": 1963 + }, + { + "epoch": 0.55, + "logps_train/chosen": -77.39599609375, + "logps_train/ref_chosen": -59.25, + "logps_train/ref_rejected": -57.0, + "logps_train/rejected": -78.07148742675781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.814599633216858, + "rewards_train/margins": 0.2925492525100708, + "rewards_train/rejected": -2.1071488857269287, + "step": 1963 + }, + { + "epoch": 0.55, + "logps_train/chosen": -289.7572937011719, + "logps_train/ref_chosen": -233.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -236.9512939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.675729274749756, + "rewards_train/margins": 4.619400501251221, + "rewards_train/rejected": -10.295129776000977, + "step": 1963 + }, + { + "epoch": 0.55, + "learning_rate": 2.4099178558270674e-07, + "loss": 0.4482, + "step": 1964 + }, + { + "epoch": 0.55, + "logps_train/chosen": -18.907068252563477, + "logps_train/ref_chosen": -3.671875, + "logps_train/ref_rejected": -3.953125, + "logps_train/rejected": -23.715438842773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5235193967819214, + "rewards_train/margins": 0.4527120590209961, + "rewards_train/rejected": -1.9762314558029175, + "step": 1964 + }, + { + "epoch": 0.55, + "logps_train/chosen": -57.32876205444336, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -99.908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08287620544433594, + "rewards_train/margins": 2.4079442024230957, + "rewards_train/rejected": -2.4908204078674316, + "step": 1964 + }, + { + "epoch": 0.55, + "logps_train/chosen": -132.78231811523438, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -147.0230712890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8282318115234375, + "rewards_train/margins": 4.624075412750244, + "rewards_train/rejected": -5.452307224273682, + "step": 1964 + }, + { + "epoch": 0.55, + "logps_train/chosen": -155.259033203125, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -210.68307495117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5259034633636475, + "rewards_train/margins": 4.542403936386108, + "rewards_train/rejected": -7.068307399749756, + "step": 1964 + }, + { + "epoch": 0.55, + "logps_train/chosen": -26.299272537231445, + "logps_train/ref_chosen": -4.40625, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -37.44932556152344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1893022060394287, + "rewards_train/margins": -0.594369649887085, + "rewards_train/rejected": -1.5949325561523438, + "step": 1964 + }, + { + "epoch": 0.55, + "logps_train/chosen": -23.904098510742188, + "logps_train/ref_chosen": -2.53125, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -18.275543212890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.137284994125366, + "rewards_train/margins": -0.7409806251525879, + "rewards_train/rejected": -1.3963043689727783, + "step": 1964 + }, + { + "epoch": 0.55, + "logps_train/chosen": -136.18861389160156, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -290.87103271484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.4688615798950195, + "rewards_train/margins": 11.168242454528809, + "rewards_train/rejected": -16.637104034423828, + "step": 1964 + }, + { + "epoch": 0.55, + "logps_train/chosen": -126.79743957519531, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -123.07362365722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4797439575195312, + "rewards_train/margins": 0.8276183605194092, + "rewards_train/rejected": -3.3073623180389404, + "step": 1964 + }, + { + "epoch": 0.55, + "logps_train/chosen": -25.66331672668457, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -19.536373138427734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3163317441940308, + "rewards_train/margins": -0.24394440650939941, + "rewards_train/rejected": -1.0723873376846313, + "step": 1965 + }, + { + "epoch": 0.55, + "logps_train/chosen": -62.08409118652344, + "logps_train/ref_chosen": -44.5, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -67.90782165527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7584091424942017, + "rewards_train/margins": 1.407373070716858, + "rewards_train/rejected": -3.1657822132110596, + "step": 1965 + }, + { + "epoch": 0.55, + "logps_train/chosen": -82.43548583984375, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -85.17511749267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.043548583984375, + "rewards_train/margins": 0.023963168263435364, + "rewards_train/rejected": -0.06751175224781036, + "step": 1965 + }, + { + "epoch": 0.55, + "logps_train/chosen": -266.7479248046875, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -195.0, + "logps_train/rejected": -322.5768127441406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -13.674792289733887, + "rewards_train/margins": -0.9171104431152344, + "rewards_train/rejected": -12.757681846618652, + "step": 1965 + }, + { + "epoch": 0.55, + "logps_train/chosen": -153.58041381835938, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -187.16107177734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.2580413818359375, + "rewards_train/margins": 0.05806589126586914, + "rewards_train/rejected": -4.316107273101807, + "step": 1965 + }, + { + "epoch": 0.55, + "logps_train/chosen": -41.77446746826172, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -55.87187576293945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5836968421936035, + "rewards_train/margins": 1.972240924835205, + "rewards_train/rejected": -4.555937767028809, + "step": 1965 + }, + { + "epoch": 0.55, + "logps_train/chosen": -96.40006256103516, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -126.74845123291016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2400062084198, + "rewards_train/margins": 2.7348392009735107, + "rewards_train/rejected": -4.9748454093933105, + "step": 1965 + }, + { + "epoch": 0.55, + "logps_train/chosen": -32.37348175048828, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -2.90625, + "logps_train/rejected": -32.87514114379883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.043598175048828, + "rewards_train/margins": 0.9532909393310547, + "rewards_train/rejected": -2.996889114379883, + "step": 1965 + }, + { + "epoch": 0.55, + "learning_rate": 2.392719751408199e-07, + "loss": 0.4555, + "step": 1966 + }, + { + "epoch": 0.55, + "logps_train/chosen": -160.59402465820312, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -231.0, + "logps_train/rejected": -311.93243408203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.3094024658203125, + "rewards_train/margins": 2.783841133117676, + "rewards_train/rejected": -8.093243598937988, + "step": 1966 + }, + { + "epoch": 0.55, + "logps_train/chosen": -109.74909973144531, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -216.54754638671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1249099969863892, + "rewards_train/margins": 7.029844641685486, + "rewards_train/rejected": -8.154754638671875, + "step": 1966 + }, + { + "epoch": 0.55, + "logps_train/chosen": -149.5734405517578, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -220.610107421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.0073442459106445, + "rewards_train/margins": 5.6536664962768555, + "rewards_train/rejected": -11.6610107421875, + "step": 1966 + }, + { + "epoch": 0.55, + "logps_train/chosen": -247.76419067382812, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -217.44735717773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.276419162750244, + "rewards_train/margins": 0.9683165550231934, + "rewards_train/rejected": -8.244735717773438, + "step": 1966 + }, + { + "epoch": 0.55, + "logps_train/chosen": -122.59686279296875, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -203.64291381835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.059686303138733, + "rewards_train/margins": 8.454605460166931, + "rewards_train/rejected": -9.514291763305664, + "step": 1966 + }, + { + "epoch": 0.55, + "logps_train/chosen": -172.70240783691406, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -299.9255065917969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.170240879058838, + "rewards_train/margins": 9.92231035232544, + "rewards_train/rejected": -14.092551231384277, + "step": 1966 + }, + { + "epoch": 0.55, + "logps_train/chosen": -128.35336303710938, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -203.2352294921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4853363037109375, + "rewards_train/margins": 7.738186836242676, + "rewards_train/rejected": -8.223523139953613, + "step": 1966 + }, + { + "epoch": 0.55, + "logps_train/chosen": -147.13380432128906, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -124.86194610595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5133804678916931, + "rewards_train/margins": 2.172814190387726, + "rewards_train/rejected": -2.686194658279419, + "step": 1966 + }, + { + "epoch": 0.55, + "logps_train/chosen": -126.05363464355469, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -154.16983032226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4053635597229004, + "rewards_train/margins": 2.0116195678710938, + "rewards_train/rejected": -4.416983127593994, + "step": 1967 + }, + { + "epoch": 0.55, + "logps_train/chosen": -15.288774490356445, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -23.90951156616211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5726274847984314, + "rewards_train/margins": 0.47457367181777954, + "rewards_train/rejected": -1.047201156616211, + "step": 1967 + }, + { + "epoch": 0.55, + "logps_train/chosen": -25.76468849182129, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -1.296875, + "logps_train/rejected": -22.240882873535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.982718825340271, + "rewards_train/margins": 0.11168205738067627, + "rewards_train/rejected": -2.0944008827209473, + "step": 1967 + }, + { + "epoch": 0.55, + "logps_train/chosen": -163.2709503173828, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -190.47108459472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1270949840545654, + "rewards_train/margins": 7.8200132846832275, + "rewards_train/rejected": -10.947108268737793, + "step": 1967 + }, + { + "epoch": 0.55, + "logps_train/chosen": -11.724611282348633, + "logps_train/ref_chosen": -4.40625, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -41.244537353515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7318361401557922, + "rewards_train/margins": 2.3051175475120544, + "rewards_train/rejected": -3.0369536876678467, + "step": 1967 + }, + { + "epoch": 0.55, + "logps_train/chosen": -170.07460021972656, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -192.63145446777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9074599742889404, + "rewards_train/margins": 4.455685377120972, + "rewards_train/rejected": -7.363145351409912, + "step": 1967 + }, + { + "epoch": 0.55, + "logps_train/chosen": -5.9596943855285645, + "logps_train/ref_chosen": -0.79296875, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -17.46242904663086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5166725516319275, + "rewards_train/margins": -0.17667964100837708, + "rewards_train/rejected": -0.3399929106235504, + "step": 1967 + }, + { + "epoch": 0.55, + "logps_train/chosen": -10.320960998535156, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -33.235137939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6039711236953735, + "rewards_train/margins": 2.057042717933655, + "rewards_train/rejected": -2.6610138416290283, + "step": 1967 + }, + { + "epoch": 0.55, + "learning_rate": 2.37557488988552e-07, + "loss": 0.1722, + "step": 1968 + }, + { + "epoch": 0.55, + "logps_train/chosen": -2.333407163619995, + "logps_train/ref_chosen": -2.390625, + "logps_train/ref_rejected": -1.046875, + "logps_train/rejected": -0.7209370732307434, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.005721783731132746, + "rewards_train/margins": -0.026872010435909033, + "rewards_train/rejected": 0.03259379416704178, + "step": 1968 + }, + { + "epoch": 0.55, + "logps_train/chosen": -333.005859375, + "logps_train/ref_chosen": -226.0, + "logps_train/ref_rejected": -241.0, + "logps_train/rejected": -358.8609619140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.700586318969727, + "rewards_train/margins": 1.08551025390625, + "rewards_train/rejected": -11.786096572875977, + "step": 1968 + }, + { + "epoch": 0.55, + "logps_train/chosen": -102.76148223876953, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -209.11279296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6761482954025269, + "rewards_train/margins": 9.635131001472473, + "rewards_train/rejected": -11.311279296875, + "step": 1968 + }, + { + "epoch": 0.55, + "logps_train/chosen": -146.5917510986328, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -212.32757568359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.109175205230713, + "rewards_train/margins": 6.673582553863525, + "rewards_train/rejected": -9.782757759094238, + "step": 1968 + }, + { + "epoch": 0.55, + "logps_train/chosen": -48.941680908203125, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -105.48856353759766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.3691680431365967, + "rewards_train/margins": -0.22031164169311523, + "rewards_train/rejected": -3.1488564014434814, + "step": 1968 + }, + { + "epoch": 0.55, + "logps_train/chosen": -0.023608289659023285, + "logps_train/ref_chosen": -0.053955078125, + "logps_train/ref_rejected": -7.71875, + "logps_train/rejected": -17.110736846923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0030346789862960577, + "rewards_train/margins": 0.9422333517577499, + "rewards_train/rejected": -0.9391986727714539, + "step": 1968 + }, + { + "epoch": 0.55, + "logps_train/chosen": -152.67758178710938, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -193.7150421142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.617758274078369, + "rewards_train/margins": 3.9037461280822754, + "rewards_train/rejected": -9.521504402160645, + "step": 1968 + }, + { + "epoch": 0.55, + "logps_train/chosen": -121.22552490234375, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -114.78446960449219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6725525259971619, + "rewards_train/margins": 1.355894386768341, + "rewards_train/rejected": -2.028446912765503, + "step": 1968 + }, + { + "epoch": 0.55, + "logps_train/chosen": -52.773231506347656, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -11.75, + "logps_train/rejected": -33.429744720458984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.602323293685913, + "rewards_train/margins": -0.43434882164001465, + "rewards_train/rejected": -2.1679744720458984, + "step": 1969 + }, + { + "epoch": 0.55, + "logps_train/chosen": -42.12818145751953, + "logps_train/ref_chosen": -37.75, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -74.11781311035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43781813979148865, + "rewards_train/margins": 0.27396318316459656, + "rewards_train/rejected": -0.7117813229560852, + "step": 1969 + }, + { + "epoch": 0.55, + "logps_train/chosen": -208.57952880859375, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -307.5477294921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.057952880859375, + "rewards_train/margins": 7.896820068359375, + "rewards_train/rejected": -14.95477294921875, + "step": 1969 + }, + { + "epoch": 0.55, + "logps_train/chosen": -18.857574462890625, + "logps_train/ref_chosen": -3.1875, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -54.372589111328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5670074224472046, + "rewards_train/margins": -0.12974846363067627, + "rewards_train/rejected": -1.4372589588165283, + "step": 1969 + }, + { + "epoch": 0.55, + "logps_train/chosen": -148.57000732421875, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -191.4615936279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.507000923156738, + "rewards_train/margins": 3.5891590118408203, + "rewards_train/rejected": -8.096159934997559, + "step": 1969 + }, + { + "epoch": 0.55, + "logps_train/chosen": -4.6344146728515625, + "logps_train/ref_chosen": -2.109375, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -10.027780532836914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2525039613246918, + "rewards_train/margins": 0.3377741277217865, + "rewards_train/rejected": -0.5902780890464783, + "step": 1969 + }, + { + "epoch": 0.55, + "logps_train/chosen": -11.088492393493652, + "logps_train/ref_chosen": -2.875, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -31.96877098083496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8213492631912231, + "rewards_train/margins": 1.4880279302597046, + "rewards_train/rejected": -2.3093771934509277, + "step": 1969 + }, + { + "epoch": 0.55, + "logps_train/chosen": -110.8270263671875, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -231.94708251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.782702624797821, + "rewards_train/margins": 4.612005531787872, + "rewards_train/rejected": -5.394708156585693, + "step": 1969 + }, + { + "epoch": 0.55, + "learning_rate": 2.3584833912548885e-07, + "loss": 0.3389, + "step": 1970 + }, + { + "epoch": 0.55, + "logps_train/chosen": -63.059303283691406, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -60.0, + "logps_train/rejected": -59.84757614135742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.29406967759132385, + "rewards_train/margins": 0.27882729191333055, + "rewards_train/rejected": 0.015242385677993298, + "step": 1970 + }, + { + "epoch": 0.55, + "logps_train/chosen": -157.7477569580078, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -158.29525756835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1747758388519287, + "rewards_train/margins": 0.5547499656677246, + "rewards_train/rejected": -3.7295258045196533, + "step": 1970 + }, + { + "epoch": 0.55, + "logps_train/chosen": -104.53750610351562, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -167.88751220703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3537505865097046, + "rewards_train/margins": 3.735000729560852, + "rewards_train/rejected": -5.088751316070557, + "step": 1970 + }, + { + "epoch": 0.55, + "logps_train/chosen": -104.8210220336914, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -156.9180145263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8821022510528564, + "rewards_train/margins": 0.6096992492675781, + "rewards_train/rejected": -3.4918015003204346, + "step": 1970 + }, + { + "epoch": 0.55, + "logps_train/chosen": -94.0464096069336, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -55.5, + "logps_train/rejected": -77.47994995117188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.504641056060791, + "rewards_train/margins": -0.30664610862731934, + "rewards_train/rejected": -2.1979949474334717, + "step": 1970 + }, + { + "epoch": 0.55, + "logps_train/chosen": -131.05734252929688, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -185.0, + "logps_train/rejected": -266.1693115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.305734157562256, + "rewards_train/margins": 1.8111968040466309, + "rewards_train/rejected": -8.116930961608887, + "step": 1970 + }, + { + "epoch": 0.55, + "logps_train/chosen": -72.22172546386719, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -150.2239990234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6971725225448608, + "rewards_train/margins": 6.225227475166321, + "rewards_train/rejected": -7.922399997711182, + "step": 1970 + }, + { + "epoch": 0.55, + "logps_train/chosen": -140.88389587402344, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -259.8287658691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8383896350860596, + "rewards_train/margins": 8.644487142562866, + "rewards_train/rejected": -11.482876777648926, + "step": 1970 + }, + { + "epoch": 0.55, + "logps_train/chosen": -154.94955444335938, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -235.63177490234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.994955539703369, + "rewards_train/margins": 3.468222141265869, + "rewards_train/rejected": -10.463177680969238, + "step": 1971 + }, + { + "epoch": 0.55, + "logps_train/chosen": -113.6376724243164, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -150.77877807617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6137672662734985, + "rewards_train/margins": 1.164110541343689, + "rewards_train/rejected": -1.7778778076171875, + "step": 1971 + }, + { + "epoch": 0.55, + "logps_train/chosen": -77.37210845947266, + "logps_train/ref_chosen": -50.0, + "logps_train/ref_rejected": -40.75, + "logps_train/rejected": -69.22274780273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.737210988998413, + "rewards_train/margins": 0.11006379127502441, + "rewards_train/rejected": -2.8472747802734375, + "step": 1971 + }, + { + "epoch": 0.55, + "logps_train/chosen": -64.44754028320312, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -53.29465866088867, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.026004314422607, + "rewards_train/margins": -0.4965381622314453, + "rewards_train/rejected": -4.529466152191162, + "step": 1971 + }, + { + "epoch": 0.55, + "logps_train/chosen": -122.99647521972656, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -39.15589904785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8996474742889404, + "rewards_train/margins": 0.16594243049621582, + "rewards_train/rejected": -3.0655899047851562, + "step": 1971 + }, + { + "epoch": 0.55, + "logps_train/chosen": -37.26076889038086, + "logps_train/ref_chosen": -28.375, + "logps_train/ref_rejected": -35.25, + "logps_train/rejected": -46.43268966674805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8885769248008728, + "rewards_train/margins": 0.22969204187393188, + "rewards_train/rejected": -1.1182689666748047, + "step": 1971 + }, + { + "epoch": 0.55, + "logps_train/chosen": -82.57685852050781, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -139.08804321289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.507685899734497, + "rewards_train/margins": 2.0511183738708496, + "rewards_train/rejected": -3.5588042736053467, + "step": 1971 + }, + { + "epoch": 0.55, + "logps_train/chosen": -18.543869018554688, + "logps_train/ref_chosen": -13.875, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -28.40755271911621, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4668869078159332, + "rewards_train/margins": 1.4926184117794037, + "rewards_train/rejected": -1.959505319595337, + "step": 1971 + }, + { + "epoch": 0.55, + "learning_rate": 2.34144537513867e-07, + "loss": 0.3702, + "step": 1972 + }, + { + "epoch": 0.55, + "logps_train/chosen": -97.36293029785156, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -34.489532470703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.186293125152588, + "rewards_train/margins": 0.6657850742340088, + "rewards_train/rejected": -2.8520781993865967, + "step": 1972 + }, + { + "epoch": 0.55, + "logps_train/chosen": -130.6260223388672, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -35.113739013671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.862602233886719, + "rewards_train/margins": -2.2199783325195312, + "rewards_train/rejected": -2.6426239013671875, + "step": 1972 + }, + { + "epoch": 0.55, + "logps_train/chosen": -195.39187622070312, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -229.67816162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.3391876220703125, + "rewards_train/margins": 0.9286284446716309, + "rewards_train/rejected": -6.267816066741943, + "step": 1972 + }, + { + "epoch": 0.55, + "logps_train/chosen": -149.76950073242188, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -220.36880493164062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.876950263977051, + "rewards_train/margins": -2.040069818496704, + "rewards_train/rejected": -3.8368804454803467, + "step": 1972 + }, + { + "epoch": 0.55, + "logps_train/chosen": -63.77926254272461, + "logps_train/ref_chosen": -61.25, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -64.28702545166016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2529262602329254, + "rewards_train/margins": 0.05077630281448364, + "rewards_train/rejected": -0.30370256304740906, + "step": 1972 + }, + { + "epoch": 0.55, + "logps_train/chosen": -45.871307373046875, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -13.6875, + "logps_train/rejected": -43.637332916259766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.755880832672119, + "rewards_train/margins": -0.7608973979949951, + "rewards_train/rejected": -2.994983434677124, + "step": 1972 + }, + { + "epoch": 0.55, + "logps_train/chosen": -33.82684326171875, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -42.908870697021484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.282684326171875, + "rewards_train/margins": 1.7269527912139893, + "rewards_train/rejected": -3.0096371173858643, + "step": 1972 + }, + { + "epoch": 0.55, + "logps_train/chosen": -156.892578125, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -232.84701538085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.439258098602295, + "rewards_train/margins": 3.245443820953369, + "rewards_train/rejected": -9.684701919555664, + "step": 1972 + }, + { + "epoch": 0.55, + "logps_train/chosen": -8.498008728027344, + "logps_train/ref_chosen": -3.609375, + "logps_train/ref_rejected": -27.625, + "logps_train/rejected": -49.14944839477539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48886337876319885, + "rewards_train/margins": 1.6635814607143402, + "rewards_train/rejected": -2.152444839477539, + "step": 1973 + }, + { + "epoch": 0.55, + "logps_train/chosen": -88.89978790283203, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -158.60687255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7899787425994873, + "rewards_train/margins": 2.5207087993621826, + "rewards_train/rejected": -5.31068754196167, + "step": 1973 + }, + { + "epoch": 0.55, + "logps_train/chosen": -3.751800775527954, + "logps_train/ref_chosen": -0.365234375, + "logps_train/ref_rejected": -0.365234375, + "logps_train/rejected": -4.144972801208496, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33865663409233093, + "rewards_train/margins": 0.03931722044944763, + "rewards_train/rejected": -0.37797385454177856, + "step": 1973 + }, + { + "epoch": 0.55, + "logps_train/chosen": -216.37051391601562, + "logps_train/ref_chosen": -196.0, + "logps_train/ref_rejected": -207.0, + "logps_train/rejected": -301.65106201171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0370514392852783, + "rewards_train/margins": 7.428054571151733, + "rewards_train/rejected": -9.465106010437012, + "step": 1973 + }, + { + "epoch": 0.55, + "logps_train/chosen": -20.454326629638672, + "logps_train/ref_chosen": -1.3359375, + "logps_train/ref_rejected": -1.3359375, + "logps_train/rejected": -21.03801155090332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9118388891220093, + "rewards_train/margins": 0.058368563652038574, + "rewards_train/rejected": -1.9702074527740479, + "step": 1973 + }, + { + "epoch": 0.55, + "logps_train/chosen": -147.11965942382812, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -104.73521423339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1119660139083862, + "rewards_train/margins": 0.16155540943145752, + "rewards_train/rejected": -1.2735214233398438, + "step": 1973 + }, + { + "epoch": 0.55, + "logps_train/chosen": -44.882423400878906, + "logps_train/ref_chosen": -14.25, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -35.50769805908203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0632424354553223, + "rewards_train/margins": -0.3312225341796875, + "rewards_train/rejected": -2.7320199012756348, + "step": 1973 + }, + { + "epoch": 0.55, + "logps_train/chosen": -7.406013011932373, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -6.5048065185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13435129821300507, + "rewards_train/margins": 0.1067543625831604, + "rewards_train/rejected": -0.24110566079616547, + "step": 1973 + }, + { + "epoch": 0.55, + "learning_rate": 2.3244609607849096e-07, + "loss": 0.6844, + "step": 1974 + }, + { + "epoch": 0.55, + "logps_train/chosen": -102.93285369873047, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -127.83842468261719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1932854652404785, + "rewards_train/margins": 3.7405571937561035, + "rewards_train/rejected": -5.933842658996582, + "step": 1974 + }, + { + "epoch": 0.55, + "logps_train/chosen": -78.7040023803711, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -78.74467468261719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9204002618789673, + "rewards_train/margins": 0.00406724214553833, + "rewards_train/rejected": -0.9244675040245056, + "step": 1974 + }, + { + "epoch": 0.55, + "logps_train/chosen": -160.19935607910156, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -154.8472442626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.31993567943573, + "rewards_train/margins": 2.3147886991500854, + "rewards_train/rejected": -3.6347243785858154, + "step": 1974 + }, + { + "epoch": 0.55, + "logps_train/chosen": -79.04977416992188, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -108.09780883789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0549774169921875, + "rewards_train/margins": 1.604803442955017, + "rewards_train/rejected": -1.6597808599472046, + "step": 1974 + }, + { + "epoch": 0.55, + "logps_train/chosen": -206.20999145507812, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -215.695068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.520999431610107, + "rewards_train/margins": 0.7485074996948242, + "rewards_train/rejected": -7.269506931304932, + "step": 1974 + }, + { + "epoch": 0.55, + "logps_train/chosen": -186.37350463867188, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -281.1742248535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.087350368499756, + "rewards_train/margins": 8.530072689056396, + "rewards_train/rejected": -14.617423057556152, + "step": 1974 + }, + { + "epoch": 0.55, + "logps_train/chosen": -54.86971664428711, + "logps_train/ref_chosen": -29.875, + "logps_train/ref_rejected": -58.75, + "logps_train/rejected": -77.01245880126953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.499471664428711, + "rewards_train/margins": -0.6732257604598999, + "rewards_train/rejected": -1.826245903968811, + "step": 1974 + }, + { + "epoch": 0.55, + "logps_train/chosen": -11.83930492401123, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -2.28125, + "logps_train/rejected": -17.620521545410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27768048644065857, + "rewards_train/margins": 1.2562467157840729, + "rewards_train/rejected": -1.5339272022247314, + "step": 1974 + }, + { + "epoch": 0.55, + "logps_train/chosen": -20.086849212646484, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -22.25, + "logps_train/rejected": -49.46070861816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.20381508767604828, + "rewards_train/margins": 2.924886092543602, + "rewards_train/rejected": -2.7210710048675537, + "step": 1975 + }, + { + "epoch": 0.55, + "logps_train/chosen": -21.288766860961914, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -18.850318908691406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6413767337799072, + "rewards_train/margins": -0.5500948429107666, + "rewards_train/rejected": -1.0912818908691406, + "step": 1975 + }, + { + "epoch": 0.55, + "logps_train/chosen": -226.43511962890625, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -302.6029052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.543511867523193, + "rewards_train/margins": 8.216779232025146, + "rewards_train/rejected": -14.76029109954834, + "step": 1975 + }, + { + "epoch": 0.55, + "logps_train/chosen": -126.32251739501953, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -198.62594604492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5322518348693848, + "rewards_train/margins": 7.230343341827393, + "rewards_train/rejected": -9.762595176696777, + "step": 1975 + }, + { + "epoch": 0.55, + "logps_train/chosen": -130.46385192871094, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -54.914241790771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.446385145187378, + "rewards_train/margins": 0.28253912925720215, + "rewards_train/rejected": -3.72892427444458, + "step": 1975 + }, + { + "epoch": 0.55, + "logps_train/chosen": -135.48223876953125, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -237.61209106445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.498223900794983, + "rewards_train/margins": 9.16298520565033, + "rewards_train/rejected": -10.661209106445312, + "step": 1975 + }, + { + "epoch": 0.55, + "logps_train/chosen": -88.84412384033203, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -58.25, + "logps_train/rejected": -139.4906005859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.659412384033203, + "rewards_train/margins": 3.4646482467651367, + "rewards_train/rejected": -8.12406063079834, + "step": 1975 + }, + { + "epoch": 0.55, + "logps_train/chosen": -168.65737915039062, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -286.2976989746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.96573805809021, + "rewards_train/margins": 9.86403203010559, + "rewards_train/rejected": -12.8297700881958, + "step": 1975 + }, + { + "epoch": 0.55, + "learning_rate": 2.3075302670665065e-07, + "loss": 0.2726, + "step": 1976 + }, + { + "epoch": 0.55, + "logps_train/chosen": -11.058456420898438, + "logps_train/ref_chosen": -1.859375, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -25.97271728515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9199081659317017, + "rewards_train/margins": -0.31013643741607666, + "rewards_train/rejected": -0.609771728515625, + "step": 1976 + }, + { + "epoch": 0.55, + "logps_train/chosen": -118.27754211425781, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -60.25, + "logps_train/rejected": -68.01460266113281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.927754282951355, + "rewards_train/margins": -1.1512939929962158, + "rewards_train/rejected": -0.7764602899551392, + "step": 1976 + }, + { + "epoch": 0.55, + "logps_train/chosen": -139.77206420898438, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -139.5814971923828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5272064208984375, + "rewards_train/margins": -0.01905655860900879, + "rewards_train/rejected": -2.5081498622894287, + "step": 1976 + }, + { + "epoch": 0.55, + "logps_train/chosen": -285.7584228515625, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -233.70840454101562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.275842666625977, + "rewards_train/margins": -0.15500164031982422, + "rewards_train/rejected": -11.120841026306152, + "step": 1976 + }, + { + "epoch": 0.55, + "logps_train/chosen": -12.816709518432617, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -39.794151306152344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7535459399223328, + "rewards_train/margins": -0.024130821228027344, + "rewards_train/rejected": -0.7294151186943054, + "step": 1976 + }, + { + "epoch": 0.55, + "logps_train/chosen": -14.890542030334473, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -31.418371200561523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21094579994678497, + "rewards_train/margins": 1.7402829676866531, + "rewards_train/rejected": -1.5293371677398682, + "step": 1976 + }, + { + "epoch": 0.55, + "logps_train/chosen": -42.17909240722656, + "logps_train/ref_chosen": -31.875, + "logps_train/ref_rejected": -49.0, + "logps_train/rejected": -88.32208251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0304092168807983, + "rewards_train/margins": 2.9017990827560425, + "rewards_train/rejected": -3.932208299636841, + "step": 1976 + }, + { + "epoch": 0.55, + "logps_train/chosen": -13.12653636932373, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -22.46510124206543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.387653648853302, + "rewards_train/margins": 1.0588565468788147, + "rewards_train/rejected": -1.4465101957321167, + "step": 1976 + }, + { + "epoch": 0.55, + "logps_train/chosen": -21.001148223876953, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -34.49233627319336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5626148581504822, + "rewards_train/margins": 1.0991187691688538, + "rewards_train/rejected": -1.661733627319336, + "step": 1977 + }, + { + "epoch": 0.55, + "logps_train/chosen": -115.93053436279297, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -131.61920166015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4430534839630127, + "rewards_train/margins": -0.031133174896240234, + "rewards_train/rejected": -3.4119203090667725, + "step": 1977 + }, + { + "epoch": 0.55, + "logps_train/chosen": -210.60333251953125, + "logps_train/ref_chosen": -191.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -253.33583068847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.960333228111267, + "rewards_train/margins": 5.473250031471252, + "rewards_train/rejected": -7.4335832595825195, + "step": 1977 + }, + { + "epoch": 0.55, + "logps_train/chosen": -30.65421485900879, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -44.03500747680664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0341715812683105, + "rewards_train/margins": 1.550579309463501, + "rewards_train/rejected": -3.5847508907318115, + "step": 1977 + }, + { + "epoch": 0.55, + "logps_train/chosen": -33.43131637573242, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -39.454246520996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9337565898895264, + "rewards_train/margins": 0.2897930145263672, + "rewards_train/rejected": -3.2235496044158936, + "step": 1977 + }, + { + "epoch": 0.55, + "logps_train/chosen": -53.897239685058594, + "logps_train/ref_chosen": -43.75, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -140.7534637451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0147240161895752, + "rewards_train/margins": 5.060622453689575, + "rewards_train/rejected": -6.07534646987915, + "step": 1977 + }, + { + "epoch": 0.55, + "logps_train/chosen": -219.00582885742188, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -227.00050354003906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.000582695007324, + "rewards_train/margins": -0.2005324363708496, + "rewards_train/rejected": -7.800050258636475, + "step": 1977 + }, + { + "epoch": 0.55, + "logps_train/chosen": -53.25284957885742, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -44.80344009399414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.981534957885742, + "rewards_train/margins": -0.4824409484863281, + "rewards_train/rejected": -3.499094009399414, + "step": 1977 + }, + { + "epoch": 0.55, + "learning_rate": 2.2906534124803622e-07, + "loss": 0.5313, + "step": 1978 + }, + { + "epoch": 0.55, + "logps_train/chosen": -56.308998107910156, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -30.75, + "logps_train/rejected": -50.14013671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5058998465538025, + "rewards_train/margins": 1.4331138730049133, + "rewards_train/rejected": -1.9390137195587158, + "step": 1978 + }, + { + "epoch": 0.55, + "logps_train/chosen": -276.1325378417969, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -273.1270446777344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -13.513254165649414, + "rewards_train/margins": -0.3005495071411133, + "rewards_train/rejected": -13.2127046585083, + "step": 1978 + }, + { + "epoch": 0.55, + "logps_train/chosen": -6.343496799468994, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -20.466047286987305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31091219186782837, + "rewards_train/margins": 1.0825675129890442, + "rewards_train/rejected": -1.3934797048568726, + "step": 1978 + }, + { + "epoch": 0.55, + "logps_train/chosen": -19.893278121948242, + "logps_train/ref_chosen": -27.125, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -32.22000503540039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.7231721878051758, + "rewards_train/margins": 3.4482977390289307, + "rewards_train/rejected": -2.725125551223755, + "step": 1978 + }, + { + "epoch": 0.55, + "logps_train/chosen": -250.43307495117188, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -250.04693603515625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.14330768585205, + "rewards_train/margins": -0.9386138916015625, + "rewards_train/rejected": -8.204693794250488, + "step": 1978 + }, + { + "epoch": 0.55, + "logps_train/chosen": -40.91904830932617, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -47.0, + "logps_train/rejected": -104.10363006591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19190482795238495, + "rewards_train/margins": 5.51845808327198, + "rewards_train/rejected": -5.710362911224365, + "step": 1978 + }, + { + "epoch": 0.55, + "logps_train/chosen": -1.5249682664871216, + "logps_train/ref_chosen": -0.4375, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -6.2490010261535645, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10874682664871216, + "rewards_train/margins": 0.10677827894687653, + "rewards_train/rejected": -0.21552510559558868, + "step": 1978 + }, + { + "epoch": 0.55, + "logps_train/chosen": -134.28277587890625, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -252.77328491210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2282776832580566, + "rewards_train/margins": 7.649051189422607, + "rewards_train/rejected": -9.877328872680664, + "step": 1978 + }, + { + "epoch": 0.55, + "logps_train/chosen": -7.627218246459961, + "logps_train/ref_chosen": -2.8125, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -43.52866744995117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48147183656692505, + "rewards_train/margins": 1.783894956111908, + "rewards_train/rejected": -2.265366792678833, + "step": 1979 + }, + { + "epoch": 0.55, + "logps_train/chosen": -14.459687232971191, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -2.671875, + "logps_train/rejected": -19.803573608398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4834687411785126, + "rewards_train/margins": 1.229701191186905, + "rewards_train/rejected": -1.7131699323654175, + "step": 1979 + }, + { + "epoch": 0.55, + "logps_train/chosen": -109.00839233398438, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -170.29396057128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3008391857147217, + "rewards_train/margins": 2.2285568714141846, + "rewards_train/rejected": -5.529396057128906, + "step": 1979 + }, + { + "epoch": 0.55, + "logps_train/chosen": -19.461406707763672, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -24.976566314697266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.660203218460083, + "rewards_train/margins": 0.35307836532592773, + "rewards_train/rejected": -2.0132815837860107, + "step": 1979 + }, + { + "epoch": 0.55, + "logps_train/chosen": -113.55224609375, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -89.6470718383789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2552247047424316, + "rewards_train/margins": 0.25948262214660645, + "rewards_train/rejected": -2.514707326889038, + "step": 1979 + }, + { + "epoch": 0.55, + "logps_train/chosen": -1.5185259580612183, + "logps_train/ref_chosen": -1.34375, + "logps_train/ref_rejected": -1.609375, + "logps_train/rejected": -1.4699164628982544, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.017477596178650856, + "rewards_train/margins": -0.03142345044761896, + "rewards_train/rejected": 0.013945854268968105, + "step": 1979 + }, + { + "epoch": 0.55, + "logps_train/chosen": -107.33241271972656, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -185.30322265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.83324134349823, + "rewards_train/margins": 6.797081112861633, + "rewards_train/rejected": -8.630322456359863, + "step": 1979 + }, + { + "epoch": 0.55, + "logps_train/chosen": -166.93113708496094, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -204.963134765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.643113613128662, + "rewards_train/margins": 3.3532004356384277, + "rewards_train/rejected": -9.99631404876709, + "step": 1979 + }, + { + "epoch": 0.55, + "learning_rate": 2.2738305151465642e-07, + "loss": 0.3543, + "step": 1980 + }, + { + "epoch": 0.55, + "logps_train/chosen": -212.30870056152344, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -189.9973907470703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.830870151519775, + "rewards_train/margins": -0.18113088607788086, + "rewards_train/rejected": -6.6497392654418945, + "step": 1980 + }, + { + "epoch": 0.55, + "logps_train/chosen": -172.86297607421875, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -137.973388671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.286297798156738, + "rewards_train/margins": -0.588958740234375, + "rewards_train/rejected": -4.697339057922363, + "step": 1980 + }, + { + "epoch": 0.55, + "logps_train/chosen": -126.13976287841797, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -165.64869689941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.813976287841797, + "rewards_train/margins": 3.600893497467041, + "rewards_train/rejected": -6.414869785308838, + "step": 1980 + }, + { + "epoch": 0.55, + "logps_train/chosen": -149.46649169921875, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -121.30733489990234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.546649217605591, + "rewards_train/margins": 1.2840845584869385, + "rewards_train/rejected": -4.830733776092529, + "step": 1980 + }, + { + "epoch": 0.55, + "logps_train/chosen": -41.69828796386719, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -28.573699951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5448288321495056, + "rewards_train/margins": 1.4187912344932556, + "rewards_train/rejected": -1.9636200666427612, + "step": 1980 + }, + { + "epoch": 0.55, + "logps_train/chosen": -193.43289184570312, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -258.7412414550781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.443289279937744, + "rewards_train/margins": 9.030834674835205, + "rewards_train/rejected": -12.47412395477295, + "step": 1980 + }, + { + "epoch": 0.55, + "logps_train/chosen": -33.722991943359375, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -13.6875, + "logps_train/rejected": -38.702117919921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.147299289703369, + "rewards_train/margins": 0.35416245460510254, + "rewards_train/rejected": -2.5014617443084717, + "step": 1980 + }, + { + "epoch": 0.55, + "logps_train/chosen": -85.89433288574219, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -112.24050903320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4894332885742188, + "rewards_train/margins": 1.5346176624298096, + "rewards_train/rejected": -3.0240509510040283, + "step": 1980 + }, + { + "epoch": 0.55, + "logps_train/chosen": -191.7782745361328, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -221.6375732421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.577827453613281, + "rewards_train/margins": 1.385930061340332, + "rewards_train/rejected": -6.963757514953613, + "step": 1981 + }, + { + "epoch": 0.55, + "logps_train/chosen": -156.22787475585938, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -194.4154052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.022787570953369, + "rewards_train/margins": 3.8187527656555176, + "rewards_train/rejected": -8.841540336608887, + "step": 1981 + }, + { + "epoch": 0.55, + "logps_train/chosen": -0.48056405782699585, + "logps_train/ref_chosen": -0.46875, + "logps_train/ref_rejected": -0.46875, + "logps_train/rejected": -0.47136154770851135, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.001181405852548778, + "rewards_train/margins": -0.0009202510700561106, + "rewards_train/rejected": -0.00026115478249266744, + "step": 1981 + }, + { + "epoch": 0.55, + "logps_train/chosen": -18.176633834838867, + "logps_train/ref_chosen": -2.25, + "logps_train/ref_rejected": -3.453125, + "logps_train/rejected": -51.505210876464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5926634073257446, + "rewards_train/margins": 3.2125452756881714, + "rewards_train/rejected": -4.805208683013916, + "step": 1981 + }, + { + "epoch": 0.55, + "logps_train/chosen": -208.6421661376953, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -195.59632873535156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.8642168045043945, + "rewards_train/margins": -1.30458402633667, + "rewards_train/rejected": -6.559632778167725, + "step": 1981 + }, + { + "epoch": 0.55, + "logps_train/chosen": -105.34294891357422, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -125.43936157226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6842949390411377, + "rewards_train/margins": 2.509641408920288, + "rewards_train/rejected": -4.193936347961426, + "step": 1981 + }, + { + "epoch": 0.55, + "logps_train/chosen": -77.73020935058594, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -198.32656860351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17697906494140625, + "rewards_train/margins": 5.509635925292969, + "rewards_train/rejected": -5.3326568603515625, + "step": 1981 + }, + { + "epoch": 0.55, + "logps_train/chosen": -105.66493225097656, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -132.06387329101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6664931774139404, + "rewards_train/margins": 2.939894437789917, + "rewards_train/rejected": -5.606387615203857, + "step": 1981 + }, + { + "epoch": 0.55, + "learning_rate": 2.2570616928075593e-07, + "loss": 0.3557, + "step": 1982 + }, + { + "epoch": 0.55, + "logps_train/chosen": -101.71499633789062, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -101.32167053222656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3714996576309204, + "rewards_train/margins": -0.03933262825012207, + "rewards_train/rejected": -1.3321670293807983, + "step": 1982 + }, + { + "epoch": 0.55, + "logps_train/chosen": -146.15090942382812, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -130.22911071777344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.115090847015381, + "rewards_train/margins": -0.3921794891357422, + "rewards_train/rejected": -4.722911357879639, + "step": 1982 + }, + { + "epoch": 0.55, + "logps_train/chosen": -175.85018920898438, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -195.7888641357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5850188732147217, + "rewards_train/margins": 3.9938676357269287, + "rewards_train/rejected": -6.57888650894165, + "step": 1982 + }, + { + "epoch": 0.55, + "logps_train/chosen": -14.053683280944824, + "logps_train/ref_chosen": -1.3203125, + "logps_train/ref_rejected": -1.46875, + "logps_train/rejected": -15.629154205322266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2733371257781982, + "rewards_train/margins": 0.14270329475402832, + "rewards_train/rejected": -1.4160404205322266, + "step": 1982 + }, + { + "epoch": 0.55, + "logps_train/chosen": -29.497581481933594, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -11.75, + "logps_train/rejected": -34.54338836669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.712258219718933, + "rewards_train/margins": 0.5670806169509888, + "rewards_train/rejected": -2.279338836669922, + "step": 1982 + }, + { + "epoch": 0.55, + "logps_train/chosen": -18.55706024169922, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -4.25, + "logps_train/rejected": -19.079843521118164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6057060360908508, + "rewards_train/margins": 0.8772783875465393, + "rewards_train/rejected": -1.4829844236373901, + "step": 1982 + }, + { + "epoch": 0.55, + "logps_train/chosen": -110.44705200195312, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -113.96406555175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7447052001953125, + "rewards_train/margins": 3.0017013549804688, + "rewards_train/rejected": -4.746406555175781, + "step": 1982 + }, + { + "epoch": 0.55, + "logps_train/chosen": -88.80110168457031, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -137.4921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7301101684570312, + "rewards_train/margins": 4.519108772277832, + "rewards_train/rejected": -7.249218940734863, + "step": 1982 + }, + { + "epoch": 0.55, + "logps_train/chosen": -13.308935165405273, + "logps_train/ref_chosen": -1.84375, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -27.142070770263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.146518588066101, + "rewards_train/margins": 0.49893856048583984, + "rewards_train/rejected": -1.645457148551941, + "step": 1983 + }, + { + "epoch": 0.55, + "logps_train/chosen": -48.541622161865234, + "logps_train/ref_chosen": -4.1875, + "logps_train/ref_rejected": -4.9375, + "logps_train/rejected": -59.12542724609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.435412406921387, + "rewards_train/margins": 0.9833803176879883, + "rewards_train/rejected": -5.418792724609375, + "step": 1983 + }, + { + "epoch": 0.55, + "logps_train/chosen": -5.228788375854492, + "logps_train/ref_chosen": -1.8828125, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -10.533231735229492, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3345975875854492, + "rewards_train/margins": -0.07502439618110657, + "rewards_train/rejected": -0.25957319140434265, + "step": 1983 + }, + { + "epoch": 0.55, + "logps_train/chosen": -12.932783126831055, + "logps_train/ref_chosen": -3.671875, + "logps_train/ref_rejected": -2.90625, + "logps_train/rejected": -17.65880012512207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9260908365249634, + "rewards_train/margins": 0.5491641759872437, + "rewards_train/rejected": -1.475255012512207, + "step": 1983 + }, + { + "epoch": 0.55, + "logps_train/chosen": -56.57328796386719, + "logps_train/ref_chosen": -27.0, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -72.86109924316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9573287963867188, + "rewards_train/margins": 2.428781032562256, + "rewards_train/rejected": -5.386109828948975, + "step": 1983 + }, + { + "epoch": 0.55, + "logps_train/chosen": -185.20362854003906, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -283.39447021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.720362901687622, + "rewards_train/margins": 8.119084119796753, + "rewards_train/rejected": -10.839447021484375, + "step": 1983 + }, + { + "epoch": 0.55, + "logps_train/chosen": -140.11354064941406, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -196.55593872070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.7113542556762695, + "rewards_train/margins": 2.7942395210266113, + "rewards_train/rejected": -7.505593776702881, + "step": 1983 + }, + { + "epoch": 0.55, + "logps_train/chosen": -10.564011573791504, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -14.474064826965332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4345261752605438, + "rewards_train/margins": 0.15663030743598938, + "rewards_train/rejected": -0.5911564826965332, + "step": 1983 + }, + { + "epoch": 0.55, + "learning_rate": 2.240347062827328e-07, + "loss": 0.3665, + "step": 1984 + }, + { + "epoch": 0.55, + "logps_train/chosen": -46.556522369384766, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -44.10783004760742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.168152332305908, + "rewards_train/margins": 0.017630815505981445, + "rewards_train/rejected": -3.1857831478118896, + "step": 1984 + }, + { + "epoch": 0.55, + "logps_train/chosen": -62.619117736816406, + "logps_train/ref_chosen": -29.0, + "logps_train/ref_rejected": -27.625, + "logps_train/rejected": -49.06884765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.3619117736816406, + "rewards_train/margins": -1.217526912689209, + "rewards_train/rejected": -2.1443848609924316, + "step": 1984 + }, + { + "epoch": 0.55, + "logps_train/chosen": -21.486425399780273, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -23.008068084716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2861425876617432, + "rewards_train/margins": 0.40841424465179443, + "rewards_train/rejected": -1.6945568323135376, + "step": 1984 + }, + { + "epoch": 0.55, + "logps_train/chosen": -134.8750457763672, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -192.3029327392578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.737504720687866, + "rewards_train/margins": 5.442788362503052, + "rewards_train/rejected": -9.180293083190918, + "step": 1984 + }, + { + "epoch": 0.55, + "logps_train/chosen": -51.712860107421875, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -84.29472351074219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0712860822677612, + "rewards_train/margins": 1.1581863164901733, + "rewards_train/rejected": -2.2294723987579346, + "step": 1984 + }, + { + "epoch": 0.55, + "logps_train/chosen": -36.45697784423828, + "logps_train/ref_chosen": -3.734375, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -33.419960021972656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2722604274749756, + "rewards_train/margins": -0.9865143299102783, + "rewards_train/rejected": -2.2857460975646973, + "step": 1984 + }, + { + "epoch": 0.55, + "logps_train/chosen": -18.433006286621094, + "logps_train/ref_chosen": -7.46875, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -21.961563110351562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0964256525039673, + "rewards_train/margins": -0.42526930570602417, + "rewards_train/rejected": -0.6711563467979431, + "step": 1984 + }, + { + "epoch": 0.55, + "logps_train/chosen": -2.279484748840332, + "logps_train/ref_chosen": -0.9765625, + "logps_train/ref_rejected": -2.140625, + "logps_train/rejected": -8.53238582611084, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13029222190380096, + "rewards_train/margins": 0.5088838487863541, + "rewards_train/rejected": -0.639176070690155, + "step": 1984 + }, + { + "epoch": 0.55, + "logps_train/chosen": -1.9784613847732544, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -17.01947593688965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.245903879404068, + "rewards_train/margins": 1.7087890207767487, + "rewards_train/rejected": -1.4628851413726807, + "step": 1985 + }, + { + "epoch": 0.55, + "logps_train/chosen": -158.81959533691406, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -191.94149780273438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.281959533691406, + "rewards_train/margins": -0.5878095626831055, + "rewards_train/rejected": -6.694149971008301, + "step": 1985 + }, + { + "epoch": 0.55, + "logps_train/chosen": -171.32223510742188, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -162.88763427734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.532223701477051, + "rewards_train/margins": 0.6565399169921875, + "rewards_train/rejected": -6.188763618469238, + "step": 1985 + }, + { + "epoch": 0.55, + "logps_train/chosen": -128.60577392578125, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -172.2372589111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.560577392578125, + "rewards_train/margins": 4.263148784637451, + "rewards_train/rejected": -6.823726177215576, + "step": 1985 + }, + { + "epoch": 0.55, + "logps_train/chosen": -14.09605598449707, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -40.462093353271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04085559770464897, + "rewards_train/margins": 2.417853880673647, + "rewards_train/rejected": -2.458709478378296, + "step": 1985 + }, + { + "epoch": 0.55, + "logps_train/chosen": -6.805042266845703, + "logps_train/ref_chosen": -1.9453125, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -22.506044387817383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48597297072410583, + "rewards_train/margins": 0.027131468057632446, + "rewards_train/rejected": -0.5131044387817383, + "step": 1985 + }, + { + "epoch": 0.55, + "logps_train/chosen": -38.75600051879883, + "logps_train/ref_chosen": -5.40625, + "logps_train/ref_rejected": -3.484375, + "logps_train/rejected": -39.08415222167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.334975004196167, + "rewards_train/margins": 0.22500276565551758, + "rewards_train/rejected": -3.5599777698516846, + "step": 1985 + }, + { + "epoch": 0.55, + "logps_train/chosen": -17.706212997436523, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -15.0, + "logps_train/rejected": -14.64660358428955, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.23937129974365234, + "rewards_train/margins": -0.2747109420597553, + "rewards_train/rejected": 0.03533964231610298, + "step": 1985 + }, + { + "epoch": 0.56, + "learning_rate": 2.2236867421905548e-07, + "loss": 0.5919, + "step": 1986 + }, + { + "epoch": 0.56, + "logps_train/chosen": -0.7861604690551758, + "logps_train/ref_chosen": -0.2265625, + "logps_train/ref_rejected": -0.2265625, + "logps_train/rejected": -0.7788336873054504, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.0559597983956337, + "rewards_train/margins": -0.0007326789200305939, + "rewards_train/rejected": -0.055227119475603104, + "step": 1986 + }, + { + "epoch": 0.56, + "logps_train/chosen": -80.016357421875, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -51.084983825683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7016358375549316, + "rewards_train/margins": 0.2068626880645752, + "rewards_train/rejected": -2.908498525619507, + "step": 1986 + }, + { + "epoch": 0.56, + "logps_train/chosen": -29.2752685546875, + "logps_train/ref_chosen": -11.5625, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -37.88549041748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.771276831626892, + "rewards_train/margins": 0.7235223054885864, + "rewards_train/rejected": -2.4947991371154785, + "step": 1986 + }, + { + "epoch": 0.56, + "logps_train/chosen": -18.298648834228516, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -23.623014450073242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3454898595809937, + "rewards_train/margins": 0.5980615615844727, + "rewards_train/rejected": -1.9435514211654663, + "step": 1986 + }, + { + "epoch": 0.56, + "logps_train/chosen": -105.0811767578125, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -92.12135314941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.25811767578125, + "rewards_train/margins": 0.40401768684387207, + "rewards_train/rejected": -1.662135362625122, + "step": 1986 + }, + { + "epoch": 0.56, + "logps_train/chosen": -15.863667488098145, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -23.309913635253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2832417488098145, + "rewards_train/margins": 0.37899959087371826, + "rewards_train/rejected": -1.6622413396835327, + "step": 1986 + }, + { + "epoch": 0.56, + "logps_train/chosen": -68.58075714111328, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -99.23036193847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4919242858886719, + "rewards_train/margins": 1.164960503578186, + "rewards_train/rejected": -0.6730362176895142, + "step": 1986 + }, + { + "epoch": 0.56, + "logps_train/chosen": -126.59989929199219, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -130.1810302734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9599899649620056, + "rewards_train/margins": 0.058113038539886475, + "rewards_train/rejected": -1.018103003501892, + "step": 1986 + }, + { + "epoch": 0.56, + "logps_train/chosen": -9.811244010925293, + "logps_train/ref_chosen": -3.171875, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -43.8100471496582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6639369130134583, + "rewards_train/margins": 1.7295679450035095, + "rewards_train/rejected": -2.3935048580169678, + "step": 1987 + }, + { + "epoch": 0.56, + "logps_train/chosen": -32.500370025634766, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -90.25663757324219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7375370264053345, + "rewards_train/margins": 0.9131268262863159, + "rewards_train/rejected": -2.6506638526916504, + "step": 1987 + }, + { + "epoch": 0.56, + "logps_train/chosen": -22.762714385986328, + "logps_train/ref_chosen": -7.875, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -52.17241287231445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4887714385986328, + "rewards_train/margins": 2.609719753265381, + "rewards_train/rejected": -4.098491191864014, + "step": 1987 + }, + { + "epoch": 0.56, + "logps_train/chosen": -24.213123321533203, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -3.328125, + "logps_train/rejected": -34.128055572509766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5838123559951782, + "rewards_train/margins": 1.4961806535720825, + "rewards_train/rejected": -3.0799930095672607, + "step": 1987 + }, + { + "epoch": 0.56, + "logps_train/chosen": -145.47064208984375, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -151.5188751220703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.897064208984375, + "rewards_train/margins": -0.2451765537261963, + "rewards_train/rejected": -3.6518876552581787, + "step": 1987 + }, + { + "epoch": 0.56, + "logps_train/chosen": -29.42605972290039, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -27.797645568847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1801060438156128, + "rewards_train/margins": 0.24340856075286865, + "rewards_train/rejected": -1.4235146045684814, + "step": 1987 + }, + { + "epoch": 0.56, + "logps_train/chosen": -13.227485656738281, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -19.90423011779785, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4477485716342926, + "rewards_train/margins": 0.7051744163036346, + "rewards_train/rejected": -1.1529229879379272, + "step": 1987 + }, + { + "epoch": 0.56, + "logps_train/chosen": -16.113327026367188, + "logps_train/ref_chosen": -7.46875, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -23.345849990844727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8644577264785767, + "rewards_train/margins": 0.8701273202896118, + "rewards_train/rejected": -1.7345850467681885, + "step": 1987 + }, + { + "epoch": 0.56, + "learning_rate": 2.207080847501822e-07, + "loss": 0.4384, + "step": 1988 + }, + { + "epoch": 0.56, + "logps_train/chosen": -189.42140197753906, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -90.74588775634766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.442140102386475, + "rewards_train/margins": -3.542551279067993, + "rewards_train/rejected": -2.8995888233184814, + "step": 1988 + }, + { + "epoch": 0.56, + "logps_train/chosen": -77.4313735961914, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -175.65081787109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5431373715400696, + "rewards_train/margins": 7.0219447016716, + "rewards_train/rejected": -7.56508207321167, + "step": 1988 + }, + { + "epoch": 0.56, + "logps_train/chosen": -22.98828887939453, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -53.107330322265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.005078911781311, + "rewards_train/margins": 0.7306541204452515, + "rewards_train/rejected": -1.7357330322265625, + "step": 1988 + }, + { + "epoch": 0.56, + "logps_train/chosen": -87.7871322631836, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -100.74853515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7787132263183594, + "rewards_train/margins": 2.8461403846740723, + "rewards_train/rejected": -3.6248536109924316, + "step": 1988 + }, + { + "epoch": 0.56, + "logps_train/chosen": -107.12489318847656, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -222.7066650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06248931959271431, + "rewards_train/margins": 8.108177375048399, + "rewards_train/rejected": -8.170666694641113, + "step": 1988 + }, + { + "epoch": 0.56, + "logps_train/chosen": -112.48504638671875, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -56.5, + "logps_train/rejected": -87.74824523925781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.048504590988159, + "rewards_train/margins": 1.076319932937622, + "rewards_train/rejected": -3.1248245239257812, + "step": 1988 + }, + { + "epoch": 0.56, + "logps_train/chosen": -45.87065505981445, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -52.826263427734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.487065553665161, + "rewards_train/margins": 0.2955608367919922, + "rewards_train/rejected": -2.7826263904571533, + "step": 1988 + }, + { + "epoch": 0.56, + "logps_train/chosen": -201.9923095703125, + "logps_train/ref_chosen": -180.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -177.12548828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.199230909347534, + "rewards_train/margins": -0.08668208122253418, + "rewards_train/rejected": -2.112548828125, + "step": 1988 + }, + { + "epoch": 0.56, + "logps_train/chosen": -6.110225677490234, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -23.788036346435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26727256178855896, + "rewards_train/margins": 0.5177811086177826, + "rewards_train/rejected": -0.7850536704063416, + "step": 1989 + }, + { + "epoch": 0.56, + "logps_train/chosen": -55.00677490234375, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -80.15779113769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.22432251274585724, + "rewards_train/margins": 0.04010161757469177, + "rewards_train/rejected": 0.18422089517116547, + "step": 1989 + }, + { + "epoch": 0.56, + "logps_train/chosen": -15.401083946228027, + "logps_train/ref_chosen": -1.8203125, + "logps_train/ref_rejected": -3.53125, + "logps_train/rejected": -21.10318374633789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3580771684646606, + "rewards_train/margins": 0.39911627769470215, + "rewards_train/rejected": -1.7571934461593628, + "step": 1989 + }, + { + "epoch": 0.56, + "logps_train/chosen": -0.8194763660430908, + "logps_train/ref_chosen": -0.37890625, + "logps_train/ref_rejected": -2.875, + "logps_train/rejected": -3.818089008331299, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04405701160430908, + "rewards_train/margins": 0.05025189369916916, + "rewards_train/rejected": -0.09430890530347824, + "step": 1989 + }, + { + "epoch": 0.56, + "logps_train/chosen": -18.437213897705078, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -25.007247924804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3187214136123657, + "rewards_train/margins": 0.08825337886810303, + "rewards_train/rejected": -1.4069747924804688, + "step": 1989 + }, + { + "epoch": 0.56, + "logps_train/chosen": -100.23583984375, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -118.03084564208984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.423583984375, + "rewards_train/margins": 1.0295007228851318, + "rewards_train/rejected": -2.453084707260132, + "step": 1989 + }, + { + "epoch": 0.56, + "logps_train/chosen": -114.71401977539062, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -140.5517578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9214019775390625, + "rewards_train/margins": 0.3837738037109375, + "rewards_train/rejected": -2.30517578125, + "step": 1989 + }, + { + "epoch": 0.56, + "logps_train/chosen": -17.4945068359375, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -37.99113464355469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40570068359375, + "rewards_train/margins": 1.3684128522872925, + "rewards_train/rejected": -1.7741135358810425, + "step": 1989 + }, + { + "epoch": 0.56, + "learning_rate": 2.190529494984782e-07, + "loss": 0.6019, + "step": 1990 + }, + { + "epoch": 0.56, + "logps_train/chosen": -4.5765461921691895, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -11.6875, + "logps_train/rejected": -18.298465728759766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.029529619961977005, + "rewards_train/margins": 0.6315669529139996, + "rewards_train/rejected": -0.6610965728759766, + "step": 1990 + }, + { + "epoch": 0.56, + "logps_train/chosen": -6.686708450317383, + "logps_train/ref_chosen": -1.5, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -26.141319274902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5186708569526672, + "rewards_train/margins": 1.6454610228538513, + "rewards_train/rejected": -2.1641318798065186, + "step": 1990 + }, + { + "epoch": 0.56, + "logps_train/chosen": -107.53163146972656, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -156.48062133789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.353163242340088, + "rewards_train/margins": 2.8448991775512695, + "rewards_train/rejected": -5.198062419891357, + "step": 1990 + }, + { + "epoch": 0.56, + "logps_train/chosen": -32.17255401611328, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -25.25, + "logps_train/rejected": -54.833370208740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7860053777694702, + "rewards_train/margins": 1.172331690788269, + "rewards_train/rejected": -2.9583370685577393, + "step": 1990 + }, + { + "epoch": 0.56, + "logps_train/chosen": -24.988632202148438, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -1.1328125, + "logps_train/rejected": -21.923669815063477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9894882440567017, + "rewards_train/margins": 0.08959758281707764, + "rewards_train/rejected": -2.0790858268737793, + "step": 1990 + }, + { + "epoch": 0.56, + "logps_train/chosen": -4.343075275421143, + "logps_train/ref_chosen": -3.84375, + "logps_train/ref_rejected": -3.3125, + "logps_train/rejected": -18.9641170501709, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04993252828717232, + "rewards_train/margins": 1.5152291767299175, + "rewards_train/rejected": -1.5651617050170898, + "step": 1990 + }, + { + "epoch": 0.56, + "logps_train/chosen": -72.93539428710938, + "logps_train/ref_chosen": -39.5, + "logps_train/ref_rejected": -39.5, + "logps_train/rejected": -73.03227233886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3435394763946533, + "rewards_train/margins": 0.00968790054321289, + "rewards_train/rejected": -3.353227376937866, + "step": 1990 + }, + { + "epoch": 0.56, + "logps_train/chosen": -174.64950561523438, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -169.9332733154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8649505972862244, + "rewards_train/margins": 0.12837672233581543, + "rewards_train/rejected": -0.9933273196220398, + "step": 1990 + }, + { + "epoch": 0.56, + "logps_train/chosen": -146.61012268066406, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -182.59085083007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6110122203826904, + "rewards_train/margins": 4.998072862625122, + "rewards_train/rejected": -8.609085083007812, + "step": 1991 + }, + { + "epoch": 0.56, + "logps_train/chosen": -64.42074584960938, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -90.65492248535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3920745849609375, + "rewards_train/margins": 3.5234177112579346, + "rewards_train/rejected": -3.915492296218872, + "step": 1991 + }, + { + "epoch": 0.56, + "logps_train/chosen": -23.80370330810547, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -51.932552337646484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41787034273147583, + "rewards_train/margins": 1.3753848671913147, + "rewards_train/rejected": -1.7932552099227905, + "step": 1991 + }, + { + "epoch": 0.56, + "logps_train/chosen": -32.317665100097656, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -20.375, + "logps_train/rejected": -42.95292663574219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5442665815353394, + "rewards_train/margins": 0.7135261297225952, + "rewards_train/rejected": -2.2577927112579346, + "step": 1991 + }, + { + "epoch": 0.56, + "logps_train/chosen": -66.53794860839844, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -123.03166961669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5037949085235596, + "rewards_train/margins": 2.9493720531463623, + "rewards_train/rejected": -4.453166961669922, + "step": 1991 + }, + { + "epoch": 0.56, + "logps_train/chosen": -22.35109519958496, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -23.248409271240234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.735109567642212, + "rewards_train/margins": 0.1366063356399536, + "rewards_train/rejected": -1.8717159032821655, + "step": 1991 + }, + { + "epoch": 0.56, + "logps_train/chosen": -33.344459533691406, + "logps_train/ref_chosen": -28.375, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -57.196800231933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49694594740867615, + "rewards_train/margins": 2.3852340281009674, + "rewards_train/rejected": -2.8821799755096436, + "step": 1991 + }, + { + "epoch": 0.56, + "logps_train/chosen": -26.053634643554688, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -59.52301025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6803635358810425, + "rewards_train/margins": 2.671937584877014, + "rewards_train/rejected": -4.352301120758057, + "step": 1991 + }, + { + "epoch": 0.56, + "learning_rate": 2.1740328004813568e-07, + "loss": 0.2868, + "step": 1992 + }, + { + "epoch": 0.56, + "logps_train/chosen": -47.41060256958008, + "logps_train/ref_chosen": -25.875, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -37.157264709472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1535604000091553, + "rewards_train/margins": 0.877791166305542, + "rewards_train/rejected": -3.0313515663146973, + "step": 1992 + }, + { + "epoch": 0.56, + "logps_train/chosen": -0.14256975054740906, + "logps_train/ref_chosen": -0.55078125, + "logps_train/ref_rejected": -0.55078125, + "logps_train/rejected": -0.14382719993591309, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.040821149945259094, + "rewards_train/margins": 0.00012574344873428345, + "rewards_train/rejected": 0.04069540649652481, + "step": 1992 + }, + { + "epoch": 0.56, + "logps_train/chosen": -5.503096103668213, + "logps_train/ref_chosen": -3.515625, + "logps_train/ref_rejected": -1.015625, + "logps_train/rejected": -4.93234920501709, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19874711334705353, + "rewards_train/margins": 0.1929253190755844, + "rewards_train/rejected": -0.39167243242263794, + "step": 1992 + }, + { + "epoch": 0.56, + "logps_train/chosen": -20.622047424316406, + "logps_train/ref_chosen": -8.0, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -26.80321502685547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2622047662734985, + "rewards_train/margins": -0.7318832278251648, + "rewards_train/rejected": -0.5303215384483337, + "step": 1992 + }, + { + "epoch": 0.56, + "logps_train/chosen": -307.5617370605469, + "logps_train/ref_chosen": -238.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -172.5597381591797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.956173896789551, + "rewards_train/margins": -2.700200080871582, + "rewards_train/rejected": -4.255973815917969, + "step": 1992 + }, + { + "epoch": 0.56, + "logps_train/chosen": -106.33039855957031, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -164.1974334716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2830398082733154, + "rewards_train/margins": 2.736703634262085, + "rewards_train/rejected": -6.0197434425354, + "step": 1992 + }, + { + "epoch": 0.56, + "logps_train/chosen": -8.212824821472168, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -45.0, + "logps_train/rejected": -66.8749008178711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5181574821472168, + "rewards_train/margins": 1.66933274269104, + "rewards_train/rejected": -2.187490224838257, + "step": 1992 + }, + { + "epoch": 0.56, + "logps_train/chosen": -148.28945922851562, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -170.03494262695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.328946113586426, + "rewards_train/margins": 2.9245481491088867, + "rewards_train/rejected": -7.2534942626953125, + "step": 1992 + }, + { + "epoch": 0.56, + "logps_train/chosen": -1.0492439270019531, + "logps_train/ref_chosen": -0.04638671875, + "logps_train/ref_rejected": -3.796875, + "logps_train/rejected": -8.282736778259277, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10028572380542755, + "rewards_train/margins": 0.3483004719018936, + "rewards_train/rejected": -0.44858619570732117, + "step": 1993 + }, + { + "epoch": 0.56, + "logps_train/chosen": -16.475318908691406, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -126.33685302734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5912818908691406, + "rewards_train/margins": 3.29240345954895, + "rewards_train/rejected": -3.883685350418091, + "step": 1993 + }, + { + "epoch": 0.56, + "logps_train/chosen": -6.555206298828125, + "logps_train/ref_chosen": -1.6953125, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -36.144371032714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48598939180374146, + "rewards_train/margins": 1.3034477829933167, + "rewards_train/rejected": -1.789437174797058, + "step": 1993 + }, + { + "epoch": 0.56, + "logps_train/chosen": -93.83275604248047, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -161.58279418945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5332756042480469, + "rewards_train/margins": 4.475003719329834, + "rewards_train/rejected": -6.008279323577881, + "step": 1993 + }, + { + "epoch": 0.56, + "logps_train/chosen": -119.09941864013672, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -120.955322265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3099420070648193, + "rewards_train/margins": -0.014409780502319336, + "rewards_train/rejected": -2.2955322265625, + "step": 1993 + }, + { + "epoch": 0.56, + "logps_train/chosen": -13.896909713745117, + "logps_train/ref_chosen": -2.59375, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -13.015636444091797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1303160190582275, + "rewards_train/margins": -0.24437737464904785, + "rewards_train/rejected": -0.8859386444091797, + "step": 1993 + }, + { + "epoch": 0.56, + "logps_train/chosen": -19.197978973388672, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -7.34375, + "logps_train/rejected": -48.12211608886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.538547933101654, + "rewards_train/margins": 3.539288580417633, + "rewards_train/rejected": -4.077836513519287, + "step": 1993 + }, + { + "epoch": 0.56, + "logps_train/chosen": -2.469932794570923, + "logps_train/ref_chosen": -1.640625, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -16.240766525268555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0829307809472084, + "rewards_train/margins": 0.07239586859941483, + "rewards_train/rejected": -0.15532664954662323, + "step": 1993 + }, + { + "epoch": 0.56, + "learning_rate": 2.1575908794509212e-07, + "loss": 0.5533, + "step": 1994 + }, + { + "epoch": 0.56, + "logps_train/chosen": -214.64938354492188, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -219.08155822753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.31493854522705, + "rewards_train/margins": 1.1932172775268555, + "rewards_train/rejected": -10.508155822753906, + "step": 1994 + }, + { + "epoch": 0.56, + "logps_train/chosen": -31.23263168334961, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -29.375, + "logps_train/rejected": -50.077308654785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9982631802558899, + "rewards_train/margins": 1.0719677805900574, + "rewards_train/rejected": -2.0702309608459473, + "step": 1994 + }, + { + "epoch": 0.56, + "logps_train/chosen": -175.57289123535156, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -255.14300537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.957289218902588, + "rewards_train/margins": 5.557011127471924, + "rewards_train/rejected": -8.514300346374512, + "step": 1994 + }, + { + "epoch": 0.56, + "logps_train/chosen": -53.76201629638672, + "logps_train/ref_chosen": -58.75, + "logps_train/ref_rejected": -40.75, + "logps_train/rejected": -40.258094787597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.4987983703613281, + "rewards_train/margins": 0.44960784912109375, + "rewards_train/rejected": 0.049190521240234375, + "step": 1994 + }, + { + "epoch": 0.56, + "logps_train/chosen": -0.8620858192443848, + "logps_train/ref_chosen": -0.263671875, + "logps_train/ref_rejected": -0.263671875, + "logps_train/rejected": -0.8660249710083008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05984139442443848, + "rewards_train/margins": 0.00039391592144966125, + "rewards_train/rejected": -0.06023531034588814, + "step": 1994 + }, + { + "epoch": 0.56, + "logps_train/chosen": -26.7921142578125, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -33.4896354675293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0542114973068237, + "rewards_train/margins": 0.33225202560424805, + "rewards_train/rejected": -1.3864635229110718, + "step": 1994 + }, + { + "epoch": 0.56, + "logps_train/chosen": -12.038866996765137, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -12.15341567993164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37888669967651367, + "rewards_train/margins": 0.33332985639572144, + "rewards_train/rejected": -0.7122165560722351, + "step": 1994 + }, + { + "epoch": 0.56, + "logps_train/chosen": -56.90576171875, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -93.19233703613281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.090576171875, + "rewards_train/margins": 0.32865750789642334, + "rewards_train/rejected": -1.4192336797714233, + "step": 1994 + }, + { + "epoch": 0.56, + "logps_train/chosen": -113.4468765258789, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -153.80494689941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8446877002716064, + "rewards_train/margins": 3.535806894302368, + "rewards_train/rejected": -6.380494594573975, + "step": 1995 + }, + { + "epoch": 0.56, + "logps_train/chosen": -19.32366943359375, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -28.389102935791016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.447991967201233, + "rewards_train/margins": 0.25341832637786865, + "rewards_train/rejected": -1.7014102935791016, + "step": 1995 + }, + { + "epoch": 0.56, + "logps_train/chosen": -19.619121551513672, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -59.397789001464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5869121551513672, + "rewards_train/margins": 1.7278668880462646, + "rewards_train/rejected": -2.314779043197632, + "step": 1995 + }, + { + "epoch": 0.56, + "logps_train/chosen": -47.865135192871094, + "logps_train/ref_chosen": -30.125, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -216.68521118164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7740135192871094, + "rewards_train/margins": 4.194507598876953, + "rewards_train/rejected": -5.9685211181640625, + "step": 1995 + }, + { + "epoch": 0.56, + "logps_train/chosen": -47.222557067871094, + "logps_train/ref_chosen": -26.125, + "logps_train/ref_rejected": -37.0, + "logps_train/rejected": -87.11132049560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.109755754470825, + "rewards_train/margins": 2.901376485824585, + "rewards_train/rejected": -5.01113224029541, + "step": 1995 + }, + { + "epoch": 0.56, + "logps_train/chosen": -80.7967529296875, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -33.25, + "logps_train/rejected": -53.99674606323242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.529675304889679, + "rewards_train/margins": 1.5449993014335632, + "rewards_train/rejected": -2.074674606323242, + "step": 1995 + }, + { + "epoch": 0.56, + "logps_train/chosen": -127.87120056152344, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -201.66754150390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8371200561523438, + "rewards_train/margins": 5.52963399887085, + "rewards_train/rejected": -6.366754055023193, + "step": 1995 + }, + { + "epoch": 0.56, + "logps_train/chosen": -13.116823196411133, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -40.16791915893555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7679323554039001, + "rewards_train/margins": 1.9488595128059387, + "rewards_train/rejected": -2.716791868209839, + "step": 1995 + }, + { + "epoch": 0.56, + "learning_rate": 2.1412038469694859e-07, + "loss": 0.2837, + "step": 1996 + }, + { + "epoch": 0.56, + "logps_train/chosen": -144.33193969726562, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -152.8617401123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.233193874359131, + "rewards_train/margins": 2.7029800415039062, + "rewards_train/rejected": -6.936173915863037, + "step": 1996 + }, + { + "epoch": 0.56, + "logps_train/chosen": -12.485268592834473, + "logps_train/ref_chosen": -0.4609375, + "logps_train/ref_rejected": -2.484375, + "logps_train/rejected": -18.82149887084961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2024331092834473, + "rewards_train/margins": 0.4312793016433716, + "rewards_train/rejected": -1.6337124109268188, + "step": 1996 + }, + { + "epoch": 0.56, + "logps_train/chosen": -56.48127746582031, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -113.46263122558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.773127794265747, + "rewards_train/margins": 2.8231356143951416, + "rewards_train/rejected": -5.596263408660889, + "step": 1996 + }, + { + "epoch": 0.56, + "logps_train/chosen": -71.94065856933594, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -71.986572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2940658628940582, + "rewards_train/margins": 0.0045913755893707275, + "rewards_train/rejected": -0.29865723848342896, + "step": 1996 + }, + { + "epoch": 0.56, + "logps_train/chosen": -208.29827880859375, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -179.67385864257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.329827785491943, + "rewards_train/margins": 0.4375581741333008, + "rewards_train/rejected": -4.767385959625244, + "step": 1996 + }, + { + "epoch": 0.56, + "logps_train/chosen": -39.026573181152344, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -29.625, + "logps_train/rejected": -48.62945556640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7026573419570923, + "rewards_train/margins": 1.1977882385253906, + "rewards_train/rejected": -1.900445580482483, + "step": 1996 + }, + { + "epoch": 0.56, + "logps_train/chosen": -60.568359375, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -55.98274612426758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33183595538139343, + "rewards_train/margins": 3.50393870472908, + "rewards_train/rejected": -3.8357746601104736, + "step": 1996 + }, + { + "epoch": 0.56, + "logps_train/chosen": -30.689613342285156, + "logps_train/ref_chosen": -22.25, + "logps_train/ref_rejected": -35.25, + "logps_train/rejected": -87.08685302734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8439613580703735, + "rewards_train/margins": 4.3397239446640015, + "rewards_train/rejected": -5.183685302734375, + "step": 1996 + }, + { + "epoch": 0.56, + "logps_train/chosen": -103.45283508300781, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -85.89665985107422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6952836513519287, + "rewards_train/margins": -1.4056177139282227, + "rewards_train/rejected": -2.289665937423706, + "step": 1997 + }, + { + "epoch": 0.56, + "logps_train/chosen": -4.058367729187012, + "logps_train/ref_chosen": -6.5, + "logps_train/ref_rejected": -30.625, + "logps_train/rejected": -52.07658004760742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24416323006153107, + "rewards_train/margins": 2.389321282505989, + "rewards_train/rejected": -2.145158052444458, + "step": 1997 + }, + { + "epoch": 0.56, + "logps_train/chosen": -107.64337158203125, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -189.50799560546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.614337205886841, + "rewards_train/margins": 4.686462163925171, + "rewards_train/rejected": -8.300799369812012, + "step": 1997 + }, + { + "epoch": 0.56, + "logps_train/chosen": -141.16366577148438, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -284.872314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.616366863250732, + "rewards_train/margins": 9.47086477279663, + "rewards_train/rejected": -14.087231636047363, + "step": 1997 + }, + { + "epoch": 0.56, + "logps_train/chosen": -163.35044860839844, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -217.0, + "logps_train/rejected": -306.07843017578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7350448369979858, + "rewards_train/margins": 7.172798752784729, + "rewards_train/rejected": -8.907843589782715, + "step": 1997 + }, + { + "epoch": 0.56, + "logps_train/chosen": -26.84809112548828, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -47.968231201171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4410591125488281, + "rewards_train/margins": 1.999514102935791, + "rewards_train/rejected": -3.440573215484619, + "step": 1997 + }, + { + "epoch": 0.56, + "logps_train/chosen": -20.735736846923828, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -31.75, + "logps_train/rejected": -73.59378814697266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.32357367873191833, + "rewards_train/margins": 3.860805422067642, + "rewards_train/rejected": -4.1843791007995605, + "step": 1997 + }, + { + "epoch": 0.56, + "logps_train/chosen": -139.3275909423828, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -121.44032287597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9827591180801392, + "rewards_train/margins": 2.561273455619812, + "rewards_train/rejected": -4.544032573699951, + "step": 1997 + }, + { + "epoch": 0.56, + "learning_rate": 2.1248718177289026e-07, + "loss": 0.254, + "step": 1998 + }, + { + "epoch": 0.56, + "logps_train/chosen": -34.05680847167969, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -39.57190704345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3744308948516846, + "rewards_train/margins": 1.1140098571777344, + "rewards_train/rejected": -3.488440752029419, + "step": 1998 + }, + { + "epoch": 0.56, + "logps_train/chosen": -141.5232696533203, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -186.4116973876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.3023271560668945, + "rewards_train/margins": 4.038843154907227, + "rewards_train/rejected": -9.341170310974121, + "step": 1998 + }, + { + "epoch": 0.56, + "logps_train/chosen": -39.98740005493164, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -48.27556610107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.386240005493164, + "rewards_train/margins": 0.8413166999816895, + "rewards_train/rejected": -3.2275567054748535, + "step": 1998 + }, + { + "epoch": 0.56, + "logps_train/chosen": -7.047144889831543, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -1.8515625, + "logps_train/rejected": -1.950600504875183, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.28283950686454773, + "rewards_train/margins": -0.2729357061907649, + "rewards_train/rejected": -0.009903800673782825, + "step": 1998 + }, + { + "epoch": 0.56, + "logps_train/chosen": -46.71818923950195, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -51.25, + "logps_train/rejected": -89.97715759277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44681891798973083, + "rewards_train/margins": 3.425896793603897, + "rewards_train/rejected": -3.872715711593628, + "step": 1998 + }, + { + "epoch": 0.56, + "logps_train/chosen": -25.236141204833984, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -47.038604736328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4611141681671143, + "rewards_train/margins": 1.867746353149414, + "rewards_train/rejected": -3.3288605213165283, + "step": 1998 + }, + { + "epoch": 0.56, + "logps_train/chosen": -159.38482666015625, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -243.4582977294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.588482618331909, + "rewards_train/margins": 5.757347345352173, + "rewards_train/rejected": -9.345829963684082, + "step": 1998 + }, + { + "epoch": 0.56, + "logps_train/chosen": -122.4900894165039, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -5.75, + "logps_train/rejected": -33.97087097167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4990090131759644, + "rewards_train/margins": 1.3230780363082886, + "rewards_train/rejected": -2.822087049484253, + "step": 1998 + }, + { + "epoch": 0.56, + "logps_train/chosen": -52.88808059692383, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -53.93727111816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4763081073760986, + "rewards_train/margins": 0.12991905212402344, + "rewards_train/rejected": -3.606227159500122, + "step": 1999 + }, + { + "epoch": 0.56, + "logps_train/chosen": -3.648162841796875, + "logps_train/ref_chosen": -2.328125, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -22.525171279907227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1320037841796875, + "rewards_train/margins": -0.041986651718616486, + "rewards_train/rejected": -0.09001713246107101, + "step": 1999 + }, + { + "epoch": 0.56, + "logps_train/chosen": -9.180160522460938, + "logps_train/ref_chosen": -7.59375, + "logps_train/ref_rejected": -1.5, + "logps_train/rejected": -35.53208923339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.158641055226326, + "rewards_train/margins": 3.2445679157972336, + "rewards_train/rejected": -3.4032089710235596, + "step": 1999 + }, + { + "epoch": 0.56, + "logps_train/chosen": -21.55772590637207, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -31.625, + "logps_train/rejected": -79.4703369140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5807725787162781, + "rewards_train/margins": 4.203761398792267, + "rewards_train/rejected": -4.784533977508545, + "step": 1999 + }, + { + "epoch": 0.56, + "logps_train/chosen": -82.84353637695312, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -129.25836181640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16564635932445526, + "rewards_train/margins": 1.0414825528860092, + "rewards_train/rejected": -0.875836193561554, + "step": 1999 + }, + { + "epoch": 0.56, + "logps_train/chosen": -24.035341262817383, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -2.875, + "logps_train/rejected": -14.072341918945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0535341501235962, + "rewards_train/margins": 0.06620001792907715, + "rewards_train/rejected": -1.1197341680526733, + "step": 1999 + }, + { + "epoch": 0.56, + "logps_train/chosen": -32.85015106201172, + "logps_train/ref_chosen": -14.75, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -35.418922424316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.810015082359314, + "rewards_train/margins": 0.7631272077560425, + "rewards_train/rejected": -2.5731422901153564, + "step": 1999 + }, + { + "epoch": 0.56, + "logps_train/chosen": -30.135955810546875, + "logps_train/ref_chosen": -13.5625, + "logps_train/ref_rejected": -7.78125, + "logps_train/rejected": -31.447769165039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6573456525802612, + "rewards_train/margins": 0.7093063592910767, + "rewards_train/rejected": -2.366652011871338, + "step": 1999 + }, + { + "epoch": 0.56, + "learning_rate": 2.1085949060360653e-07, + "loss": 0.3156, + "step": 2000 + }, + { + "epoch": 0.56, + "logps_train/chosen": -95.66426849365234, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -123.47234344482422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9164268374443054, + "rewards_train/margins": -0.16919249296188354, + "rewards_train/rejected": -0.7472343444824219, + "step": 2000 + }, + { + "epoch": 0.56, + "logps_train/chosen": -45.833621978759766, + "logps_train/ref_chosen": -41.5, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -79.21464538574219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43336221575737, + "rewards_train/margins": 1.4381023943424225, + "rewards_train/rejected": -1.8714646100997925, + "step": 2000 + }, + { + "epoch": 0.56, + "logps_train/chosen": -10.414764404296875, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -1.3203125, + "logps_train/rejected": -18.250017166137695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6335235834121704, + "rewards_train/margins": 2.3264940977096558, + "rewards_train/rejected": -1.6929705142974854, + "step": 2000 + }, + { + "epoch": 0.56, + "logps_train/chosen": -24.5801944732666, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -32.05476379394531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3705195188522339, + "rewards_train/margins": 0.6099568605422974, + "rewards_train/rejected": -1.9804763793945312, + "step": 2000 + }, + { + "epoch": 0.56, + "logps_train/chosen": -29.729904174804688, + "logps_train/ref_chosen": -15.1875, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -33.14858627319336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4542404413223267, + "rewards_train/margins": 0.8106182813644409, + "rewards_train/rejected": -2.2648587226867676, + "step": 2000 + }, + { + "epoch": 0.56, + "logps_train/chosen": -115.59747314453125, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -146.91485595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0597474575042725, + "rewards_train/margins": 0.7317380905151367, + "rewards_train/rejected": -3.791485548019409, + "step": 2000 + }, + { + "epoch": 0.56, + "logps_train/chosen": -29.467525482177734, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -41.736045837402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4123775959014893, + "rewards_train/margins": 1.0299770832061768, + "rewards_train/rejected": -3.442354679107666, + "step": 2000 + }, + { + "epoch": 0.56, + "logps_train/chosen": -228.33372497558594, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -164.47708129882812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.433372497558594, + "rewards_train/margins": -3.135664463043213, + "rewards_train/rejected": -5.297708034515381, + "step": 2000 + }, + { + "epoch": 0.56, + "logps_train/chosen": -15.815346717834473, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -56.52143859863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0940346717834473, + "rewards_train/margins": 3.0268592834472656, + "rewards_train/rejected": -4.120893955230713, + "step": 2001 + }, + { + "epoch": 0.56, + "logps_train/chosen": -19.14202880859375, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -36.24079895019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.098297119140625, + "rewards_train/margins": 2.0473769903182983, + "rewards_train/rejected": -1.9490798711776733, + "step": 2001 + }, + { + "epoch": 0.56, + "logps_train/chosen": -186.94581604003906, + "logps_train/ref_chosen": -180.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -192.77520751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6945816278457642, + "rewards_train/margins": 3.3829392194747925, + "rewards_train/rejected": -4.077520847320557, + "step": 2001 + }, + { + "epoch": 0.56, + "logps_train/chosen": -244.44140625, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -236.7587127685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.444140434265137, + "rewards_train/margins": 0.03173065185546875, + "rewards_train/rejected": -9.475871086120605, + "step": 2001 + }, + { + "epoch": 0.56, + "logps_train/chosen": -40.338314056396484, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -17.049602508544922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9588314294815063, + "rewards_train/margins": -1.2413711547851562, + "rewards_train/rejected": -0.7174602746963501, + "step": 2001 + }, + { + "epoch": 0.56, + "logps_train/chosen": -19.31414794921875, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -5.09375, + "logps_train/rejected": -21.92186737060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7376648187637329, + "rewards_train/margins": 0.945146918296814, + "rewards_train/rejected": -1.6828117370605469, + "step": 2001 + }, + { + "epoch": 0.56, + "logps_train/chosen": -117.30802917480469, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -147.7230987548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8808029294013977, + "rewards_train/margins": 3.7915069460868835, + "rewards_train/rejected": -4.672309875488281, + "step": 2001 + }, + { + "epoch": 0.56, + "logps_train/chosen": -39.45121765136719, + "logps_train/ref_chosen": -15.6875, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -32.90896224975586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3763718605041504, + "rewards_train/margins": -0.347975492477417, + "rewards_train/rejected": -2.0283963680267334, + "step": 2001 + }, + { + "epoch": 0.56, + "learning_rate": 2.092373225812092e-07, + "loss": 0.5858, + "step": 2002 + }, + { + "epoch": 0.56, + "logps_train/chosen": -78.1277084350586, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -209.33871459960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4377708435058594, + "rewards_train/margins": 7.446101188659668, + "rewards_train/rejected": -8.883872032165527, + "step": 2002 + }, + { + "epoch": 0.56, + "logps_train/chosen": -179.28399658203125, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -234.4915771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.628399610519409, + "rewards_train/margins": 4.920758485794067, + "rewards_train/rejected": -8.549158096313477, + "step": 2002 + }, + { + "epoch": 0.56, + "logps_train/chosen": -2.7951502799987793, + "logps_train/ref_chosen": -1.7265625, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -11.252410888671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10685878247022629, + "rewards_train/margins": 0.06838231533765793, + "rewards_train/rejected": -0.17524109780788422, + "step": 2002 + }, + { + "epoch": 0.56, + "logps_train/chosen": -132.96646118164062, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -173.73464965820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8466460704803467, + "rewards_train/margins": 3.0268189907073975, + "rewards_train/rejected": -6.873465061187744, + "step": 2002 + }, + { + "epoch": 0.56, + "logps_train/chosen": -100.10997772216797, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -97.49404907226562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.260997772216797, + "rewards_train/margins": -0.011592864990234375, + "rewards_train/rejected": -2.2494049072265625, + "step": 2002 + }, + { + "epoch": 0.56, + "logps_train/chosen": -32.53097152709961, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -72.54676818847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.871847152709961, + "rewards_train/margins": 1.5078296661376953, + "rewards_train/rejected": -3.3796768188476562, + "step": 2002 + }, + { + "epoch": 0.56, + "logps_train/chosen": -22.075531005859375, + "logps_train/ref_chosen": -7.96875, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -28.470739364624023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4106781482696533, + "rewards_train/margins": 0.7957708835601807, + "rewards_train/rejected": -2.206449031829834, + "step": 2002 + }, + { + "epoch": 0.56, + "logps_train/chosen": -20.34882354736328, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -6.375, + "logps_train/rejected": -50.17991638183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.709882378578186, + "rewards_train/margins": 3.6706093549728394, + "rewards_train/rejected": -4.380491733551025, + "step": 2002 + }, + { + "epoch": 0.56, + "logps_train/chosen": -131.88079833984375, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -202.36178588867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.588079810142517, + "rewards_train/margins": 6.998098969459534, + "rewards_train/rejected": -8.58617877960205, + "step": 2003 + }, + { + "epoch": 0.56, + "logps_train/chosen": -128.43585205078125, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -235.0206298828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.593585193157196, + "rewards_train/margins": 10.65847760438919, + "rewards_train/rejected": -11.252062797546387, + "step": 2003 + }, + { + "epoch": 0.56, + "logps_train/chosen": -10.837207794189453, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -29.044456481933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3118457794189453, + "rewards_train/margins": 1.7863500118255615, + "rewards_train/rejected": -2.098195791244507, + "step": 2003 + }, + { + "epoch": 0.56, + "logps_train/chosen": -4.982952117919922, + "logps_train/ref_chosen": -3.640625, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -31.35952377319336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13423271477222443, + "rewards_train/margins": 1.6392196863889694, + "rewards_train/rejected": -1.7734524011611938, + "step": 2003 + }, + { + "epoch": 0.56, + "logps_train/chosen": -209.13516235351562, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -238.89236450195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.7135162353515625, + "rewards_train/margins": 1.9757204055786133, + "rewards_train/rejected": -9.689236640930176, + "step": 2003 + }, + { + "epoch": 0.56, + "logps_train/chosen": -21.245559692382812, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -4.25, + "logps_train/rejected": -16.672582626342773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.212055966258049, + "rewards_train/margins": 1.0302023440599442, + "rewards_train/rejected": -1.2422583103179932, + "step": 2003 + }, + { + "epoch": 0.56, + "logps_train/chosen": -70.27039337158203, + "logps_train/ref_chosen": -45.75, + "logps_train/ref_rejected": -40.75, + "logps_train/rejected": -56.274627685546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4520394802093506, + "rewards_train/margins": -0.8995766639709473, + "rewards_train/rejected": -1.5524628162384033, + "step": 2003 + }, + { + "epoch": 0.56, + "logps_train/chosen": -91.94444274902344, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -189.84661865234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.394444227218628, + "rewards_train/margins": 6.240217447280884, + "rewards_train/rejected": -8.634661674499512, + "step": 2003 + }, + { + "epoch": 0.56, + "learning_rate": 2.076206890591552e-07, + "loss": 0.2515, + "step": 2004 + }, + { + "epoch": 0.56, + "logps_train/chosen": -8.55280590057373, + "logps_train/ref_chosen": -2.03125, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -27.062602996826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6521555781364441, + "rewards_train/margins": 1.4103546738624573, + "rewards_train/rejected": -2.0625102519989014, + "step": 2004 + }, + { + "epoch": 0.56, + "logps_train/chosen": -149.5128173828125, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -186.1239776611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.701281785964966, + "rewards_train/margins": 0.9111158847808838, + "rewards_train/rejected": -4.61239767074585, + "step": 2004 + }, + { + "epoch": 0.56, + "logps_train/chosen": -104.10868072509766, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -132.4463653564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7608680725097656, + "rewards_train/margins": 1.333768606185913, + "rewards_train/rejected": -3.0946366786956787, + "step": 2004 + }, + { + "epoch": 0.56, + "logps_train/chosen": -183.55612182617188, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -186.108154296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.4556121826171875, + "rewards_train/margins": -1.64479660987854, + "rewards_train/rejected": -3.8108155727386475, + "step": 2004 + }, + { + "epoch": 0.56, + "logps_train/chosen": -90.01626586914062, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -46.5, + "logps_train/rejected": -55.46288299560547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5516265630722046, + "rewards_train/margins": -0.6553382277488708, + "rewards_train/rejected": -0.8962883353233337, + "step": 2004 + }, + { + "epoch": 0.56, + "logps_train/chosen": -74.76903533935547, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -129.66390991210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3019036054611206, + "rewards_train/margins": 0.8144873380661011, + "rewards_train/rejected": -2.1163909435272217, + "step": 2004 + }, + { + "epoch": 0.56, + "logps_train/chosen": -37.92233657836914, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -36.261775970458984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.3453586101531982, + "rewards_train/margins": -0.13793087005615234, + "rewards_train/rejected": -3.207427740097046, + "step": 2004 + }, + { + "epoch": 0.56, + "logps_train/chosen": -113.64302062988281, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -100.19415283203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.114302158355713, + "rewards_train/margins": -0.19488680362701416, + "rewards_train/rejected": -1.9194153547286987, + "step": 2004 + }, + { + "epoch": 0.56, + "logps_train/chosen": -32.5525016784668, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -26.974769592285156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1302502155303955, + "rewards_train/margins": -0.7577732801437378, + "rewards_train/rejected": -1.3724769353866577, + "step": 2005 + }, + { + "epoch": 0.56, + "logps_train/chosen": -20.417434692382812, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -38.010765075683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21674346923828125, + "rewards_train/margins": 2.5968329906463623, + "rewards_train/rejected": -2.8135764598846436, + "step": 2005 + }, + { + "epoch": 0.56, + "logps_train/chosen": -80.87550354003906, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -47.73362731933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6375503540039062, + "rewards_train/margins": 2.435812473297119, + "rewards_train/rejected": -3.0733628273010254, + "step": 2005 + }, + { + "epoch": 0.56, + "logps_train/chosen": -37.604698181152344, + "logps_train/ref_chosen": -27.0, + "logps_train/ref_rejected": -5.15625, + "logps_train/rejected": -40.32334899902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0604698657989502, + "rewards_train/margins": 2.456240177154541, + "rewards_train/rejected": -3.516710042953491, + "step": 2005 + }, + { + "epoch": 0.56, + "logps_train/chosen": -134.48663330078125, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -174.4423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1986634731292725, + "rewards_train/margins": 0.745574951171875, + "rewards_train/rejected": -2.9442384243011475, + "step": 2005 + }, + { + "epoch": 0.56, + "logps_train/chosen": -27.661102294921875, + "logps_train/ref_chosen": -15.8125, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -28.273040771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1848602294921875, + "rewards_train/margins": 1.0111939907073975, + "rewards_train/rejected": -2.196054220199585, + "step": 2005 + }, + { + "epoch": 0.56, + "logps_train/chosen": -121.5830307006836, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -143.4943084716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.658303141593933, + "rewards_train/margins": 4.391127705574036, + "rewards_train/rejected": -6.049430847167969, + "step": 2005 + }, + { + "epoch": 0.56, + "logps_train/chosen": -10.686042785644531, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -33.89757537841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1686042845249176, + "rewards_train/margins": 2.4149032533168793, + "rewards_train/rejected": -2.583507537841797, + "step": 2005 + }, + { + "epoch": 0.56, + "learning_rate": 2.060096013521646e-07, + "loss": 0.4867, + "step": 2006 + }, + { + "epoch": 0.56, + "logps_train/chosen": -114.79643249511719, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -90.34555053710938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.5296432971954346, + "rewards_train/margins": -0.9950881004333496, + "rewards_train/rejected": -2.534555196762085, + "step": 2006 + }, + { + "epoch": 0.56, + "logps_train/chosen": -102.62958526611328, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -105.5189208984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1629585027694702, + "rewards_train/margins": 1.7389336824417114, + "rewards_train/rejected": -2.9018921852111816, + "step": 2006 + }, + { + "epoch": 0.56, + "logps_train/chosen": -126.48667907714844, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -196.75729370117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6986678838729858, + "rewards_train/margins": 7.477061867713928, + "rewards_train/rejected": -9.175729751586914, + "step": 2006 + }, + { + "epoch": 0.56, + "logps_train/chosen": -100.80218505859375, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -159.6453094482422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.380218505859375, + "rewards_train/margins": 2.5843124389648438, + "rewards_train/rejected": -5.964530944824219, + "step": 2006 + }, + { + "epoch": 0.56, + "logps_train/chosen": -166.46514892578125, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -196.10122680664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.696515083312988, + "rewards_train/margins": 4.7136077880859375, + "rewards_train/rejected": -9.410122871398926, + "step": 2006 + }, + { + "epoch": 0.56, + "logps_train/chosen": -53.203582763671875, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -11.6875, + "logps_train/rejected": -54.15161895751953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.595358371734619, + "rewards_train/margins": 0.6510534286499023, + "rewards_train/rejected": -4.2464118003845215, + "step": 2006 + }, + { + "epoch": 0.56, + "logps_train/chosen": -58.7761344909668, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -43.693233489990234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7776135206222534, + "rewards_train/margins": 1.3854597806930542, + "rewards_train/rejected": -3.1630733013153076, + "step": 2006 + }, + { + "epoch": 0.56, + "logps_train/chosen": -93.87992858886719, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -109.42662048339844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7879928350448608, + "rewards_train/margins": -1.145330786705017, + "rewards_train/rejected": -0.6426620483398438, + "step": 2006 + }, + { + "epoch": 0.56, + "logps_train/chosen": -33.49303436279297, + "logps_train/ref_chosen": -22.0, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -37.926429748535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1493034362792969, + "rewards_train/margins": 1.1933395862579346, + "rewards_train/rejected": -2.3426430225372314, + "step": 2007 + }, + { + "epoch": 0.56, + "logps_train/chosen": -169.64797973632812, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -267.32647705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9647979736328125, + "rewards_train/margins": 7.067850112915039, + "rewards_train/rejected": -10.032648086547852, + "step": 2007 + }, + { + "epoch": 0.56, + "logps_train/chosen": -143.17111206054688, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -239.2964324951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2171112298965454, + "rewards_train/margins": 7.412532210350037, + "rewards_train/rejected": -8.629643440246582, + "step": 2007 + }, + { + "epoch": 0.56, + "logps_train/chosen": -11.677787780761719, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -36.99417495727539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8052787780761719, + "rewards_train/margins": 2.2628886699676514, + "rewards_train/rejected": -3.0681674480438232, + "step": 2007 + }, + { + "epoch": 0.56, + "logps_train/chosen": -94.30973052978516, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -153.68240356445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3809731006622314, + "rewards_train/margins": 3.1372673511505127, + "rewards_train/rejected": -5.518240451812744, + "step": 2007 + }, + { + "epoch": 0.56, + "logps_train/chosen": -26.523935317993164, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -37.720577239990234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6836435794830322, + "rewards_train/margins": 1.6759142875671387, + "rewards_train/rejected": -3.359557867050171, + "step": 2007 + }, + { + "epoch": 0.56, + "logps_train/chosen": -176.55470275878906, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -176.8070068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.5554704666137695, + "rewards_train/margins": 0.02523040771484375, + "rewards_train/rejected": -4.580700874328613, + "step": 2007 + }, + { + "epoch": 0.56, + "logps_train/chosen": -0.04576569050550461, + "logps_train/ref_chosen": -0.072265625, + "logps_train/ref_rejected": -0.072265625, + "logps_train/rejected": -0.04074740782380104, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0026499934028834105, + "rewards_train/margins": -0.0005018282681703568, + "rewards_train/rejected": 0.003151821671053767, + "step": 2007 + }, + { + "epoch": 0.56, + "learning_rate": 2.0440407073614363e-07, + "loss": 0.3481, + "step": 2008 + }, + { + "epoch": 0.56, + "logps_train/chosen": -35.250545501708984, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -30.164579391479492, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0688045024871826, + "rewards_train/margins": -0.18359649181365967, + "rewards_train/rejected": -1.885208010673523, + "step": 2008 + }, + { + "epoch": 0.56, + "logps_train/chosen": -87.83340454101562, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -189.61495971679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4833405017852783, + "rewards_train/margins": 6.6781556606292725, + "rewards_train/rejected": -8.16149616241455, + "step": 2008 + }, + { + "epoch": 0.56, + "logps_train/chosen": -11.611948013305664, + "logps_train/ref_chosen": -2.65625, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -28.901840209960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8955698013305664, + "rewards_train/margins": 1.1258642673492432, + "rewards_train/rejected": -2.0214340686798096, + "step": 2008 + }, + { + "epoch": 0.56, + "logps_train/chosen": -129.38575744628906, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -186.84007263183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7385756969451904, + "rewards_train/margins": 5.595431566238403, + "rewards_train/rejected": -8.334007263183594, + "step": 2008 + }, + { + "epoch": 0.56, + "logps_train/chosen": -132.19346618652344, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -24.875, + "logps_train/rejected": -48.50755310058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2193466424942017, + "rewards_train/margins": 1.1439086198806763, + "rewards_train/rejected": -2.363255262374878, + "step": 2008 + }, + { + "epoch": 0.56, + "logps_train/chosen": -84.83372497558594, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -160.3286590576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8333725333213806, + "rewards_train/margins": 4.749493658542633, + "rewards_train/rejected": -5.582866191864014, + "step": 2008 + }, + { + "epoch": 0.56, + "logps_train/chosen": -26.185041427612305, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -45.37678527832031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.493504136800766, + "rewards_train/margins": 1.9691745340824127, + "rewards_train/rejected": -2.4626786708831787, + "step": 2008 + }, + { + "epoch": 0.56, + "logps_train/chosen": -135.73056030273438, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -187.45220947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3230559825897217, + "rewards_train/margins": 5.922165155410767, + "rewards_train/rejected": -8.245221138000488, + "step": 2008 + }, + { + "epoch": 0.56, + "logps_train/chosen": -68.48532104492188, + "logps_train/ref_chosen": -51.0, + "logps_train/ref_rejected": -5.84375, + "logps_train/rejected": -46.73692321777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7485321760177612, + "rewards_train/margins": 2.3407851457595825, + "rewards_train/rejected": -4.089317321777344, + "step": 2009 + }, + { + "epoch": 0.56, + "logps_train/chosen": -102.62493133544922, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -159.88482666015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7624931335449219, + "rewards_train/margins": 2.275989532470703, + "rewards_train/rejected": -4.038482666015625, + "step": 2009 + }, + { + "epoch": 0.56, + "logps_train/chosen": -13.260509490966797, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -2.984375, + "logps_train/rejected": -11.411890029907227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16769905388355255, + "rewards_train/margins": 1.0104505568742752, + "rewards_train/rejected": -0.8427515029907227, + "step": 2009 + }, + { + "epoch": 0.56, + "logps_train/chosen": -60.132286071777344, + "logps_train/ref_chosen": -48.0, + "logps_train/ref_rejected": -39.75, + "logps_train/rejected": -59.25556564331055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2132285833358765, + "rewards_train/margins": 0.737328052520752, + "rewards_train/rejected": -1.9505566358566284, + "step": 2009 + }, + { + "epoch": 0.56, + "logps_train/chosen": -77.9609375, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -51.24774169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8460937738418579, + "rewards_train/margins": 1.053680419921875, + "rewards_train/rejected": -1.899774193763733, + "step": 2009 + }, + { + "epoch": 0.56, + "logps_train/chosen": -45.20816421508789, + "logps_train/ref_chosen": -26.5, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -54.52521514892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8708164691925049, + "rewards_train/margins": 2.187955141067505, + "rewards_train/rejected": -4.05877161026001, + "step": 2009 + }, + { + "epoch": 0.56, + "logps_train/chosen": -83.53005981445312, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -141.37673950195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.653006076812744, + "rewards_train/margins": 2.3846678733825684, + "rewards_train/rejected": -5.0376739501953125, + "step": 2009 + }, + { + "epoch": 0.56, + "logps_train/chosen": -91.0226058959961, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -59.75, + "logps_train/rejected": -75.7138900756836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3522605895996094, + "rewards_train/margins": 0.24412846565246582, + "rewards_train/rejected": -1.5963890552520752, + "step": 2009 + }, + { + "epoch": 0.56, + "learning_rate": 2.0280410844810424e-07, + "loss": 0.2162, + "step": 2010 + }, + { + "epoch": 0.56, + "logps_train/chosen": -133.7678680419922, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -167.425048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7767868041992188, + "rewards_train/margins": 2.615718364715576, + "rewards_train/rejected": -6.392505168914795, + "step": 2010 + }, + { + "epoch": 0.56, + "logps_train/chosen": -19.62077522277832, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -42.34153366088867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5245774984359741, + "rewards_train/margins": 0.7220758199691772, + "rewards_train/rejected": -2.2466533184051514, + "step": 2010 + }, + { + "epoch": 0.56, + "logps_train/chosen": -87.6112060546875, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -159.06678771972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.561120629310608, + "rewards_train/margins": 1.2455581426620483, + "rewards_train/rejected": -2.8066787719726562, + "step": 2010 + }, + { + "epoch": 0.56, + "logps_train/chosen": -276.0699462890625, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -267.1163024902344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.706995010375977, + "rewards_train/margins": -1.2953643798828125, + "rewards_train/rejected": -8.411630630493164, + "step": 2010 + }, + { + "epoch": 0.56, + "logps_train/chosen": -10.165889739990234, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -28.199562072753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4915889799594879, + "rewards_train/margins": 0.6783672273159027, + "rewards_train/rejected": -1.1699562072753906, + "step": 2010 + }, + { + "epoch": 0.56, + "logps_train/chosen": -220.23931884765625, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -245.056884765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.623931884765625, + "rewards_train/margins": 2.3817567825317383, + "rewards_train/rejected": -9.005688667297363, + "step": 2010 + }, + { + "epoch": 0.56, + "logps_train/chosen": -198.17880249023438, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -229.8751678466797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.817880153656006, + "rewards_train/margins": 3.9696364402770996, + "rewards_train/rejected": -8.787516593933105, + "step": 2010 + }, + { + "epoch": 0.56, + "logps_train/chosen": -131.1782684326172, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -138.6951904296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.317826986312866, + "rewards_train/margins": 0.801692008972168, + "rewards_train/rejected": -3.119518995285034, + "step": 2010 + }, + { + "epoch": 0.56, + "logps_train/chosen": -19.52793312072754, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -1.5703125, + "logps_train/rejected": -23.16242790222168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.827793300151825, + "rewards_train/margins": 1.3314183354377747, + "rewards_train/rejected": -2.1592116355895996, + "step": 2011 + }, + { + "epoch": 0.56, + "logps_train/chosen": -75.83738708496094, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -77.05780029296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4837387204170227, + "rewards_train/margins": -0.12795868515968323, + "rewards_train/rejected": -0.3557800352573395, + "step": 2011 + }, + { + "epoch": 0.56, + "logps_train/chosen": -153.9542694091797, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -183.385986328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0954270362854004, + "rewards_train/margins": 1.8431715965270996, + "rewards_train/rejected": -3.9385986328125, + "step": 2011 + }, + { + "epoch": 0.56, + "logps_train/chosen": -25.91175651550293, + "logps_train/ref_chosen": -15.6875, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -56.84193420410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.022425651550293, + "rewards_train/margins": 2.511767864227295, + "rewards_train/rejected": -3.534193515777588, + "step": 2011 + }, + { + "epoch": 0.56, + "logps_train/chosen": -6.352908611297607, + "logps_train/ref_chosen": -0.6640625, + "logps_train/ref_rejected": -2.359375, + "logps_train/rejected": -9.031579971313477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5688846111297607, + "rewards_train/margins": 0.09833592176437378, + "rewards_train/rejected": -0.6672205328941345, + "step": 2011 + }, + { + "epoch": 0.56, + "logps_train/chosen": -17.510528564453125, + "logps_train/ref_chosen": -0.1982421875, + "logps_train/ref_rejected": -0.1982421875, + "logps_train/rejected": -17.384765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7312287092208862, + "rewards_train/margins": -0.01257634162902832, + "rewards_train/rejected": -1.718652367591858, + "step": 2011 + }, + { + "epoch": 0.56, + "logps_train/chosen": -84.22334289550781, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -88.40324401855469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5723342895507812, + "rewards_train/margins": 0.0179901123046875, + "rewards_train/rejected": -1.5903244018554688, + "step": 2011 + }, + { + "epoch": 0.56, + "logps_train/chosen": -113.76937866210938, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -151.9627227783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6769378185272217, + "rewards_train/margins": 4.36933445930481, + "rewards_train/rejected": -7.046272277832031, + "step": 2011 + }, + { + "epoch": 0.56, + "learning_rate": 2.0120972568608607e-07, + "loss": 0.4003, + "step": 2012 + }, + { + "epoch": 0.56, + "logps_train/chosen": -107.84595489501953, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -134.69786071777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7845954895019531, + "rewards_train/margins": 1.0851906538009644, + "rewards_train/rejected": -1.8697861433029175, + "step": 2012 + }, + { + "epoch": 0.56, + "logps_train/chosen": -34.276058197021484, + "logps_train/ref_chosen": -6.15625, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -29.522666931152344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.811980962753296, + "rewards_train/margins": -0.3003392219543457, + "rewards_train/rejected": -2.51164174079895, + "step": 2012 + }, + { + "epoch": 0.56, + "logps_train/chosen": -10.594099998474121, + "logps_train/ref_chosen": -6.15625, + "logps_train/ref_rejected": -30.75, + "logps_train/rejected": -67.39298248291016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44378501176834106, + "rewards_train/margins": 3.2205132842063904, + "rewards_train/rejected": -3.6642982959747314, + "step": 2012 + }, + { + "epoch": 0.56, + "logps_train/chosen": -93.96525573730469, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -258.0, + "logps_train/rejected": -292.7566223144531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19652557373046875, + "rewards_train/margins": 3.2791366577148438, + "rewards_train/rejected": -3.4756622314453125, + "step": 2012 + }, + { + "epoch": 0.56, + "logps_train/chosen": -172.744873046875, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -123.84629821777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.674487590789795, + "rewards_train/margins": 0.36014223098754883, + "rewards_train/rejected": -5.034629821777344, + "step": 2012 + }, + { + "epoch": 0.56, + "logps_train/chosen": -104.45695495605469, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -137.86932373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2956955432891846, + "rewards_train/margins": 1.9412367343902588, + "rewards_train/rejected": -4.236932277679443, + "step": 2012 + }, + { + "epoch": 0.56, + "logps_train/chosen": -63.18646240234375, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -120.71450805664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0686463117599487, + "rewards_train/margins": 4.652804493904114, + "rewards_train/rejected": -5.7214508056640625, + "step": 2012 + }, + { + "epoch": 0.56, + "logps_train/chosen": -176.3492431640625, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -189.7744140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.084924221038818, + "rewards_train/margins": 1.2925171852111816, + "rewards_train/rejected": -7.37744140625, + "step": 2012 + }, + { + "epoch": 0.56, + "logps_train/chosen": -29.771650314331055, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -2.859375, + "logps_train/rejected": -20.167348861694336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3834151029586792, + "rewards_train/margins": 0.3473823070526123, + "rewards_train/rejected": -1.7307974100112915, + "step": 2013 + }, + { + "epoch": 0.56, + "logps_train/chosen": -107.0062026977539, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -187.14974975585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8006203174591064, + "rewards_train/margins": 2.9143545627593994, + "rewards_train/rejected": -4.714974880218506, + "step": 2013 + }, + { + "epoch": 0.56, + "logps_train/chosen": -152.3831329345703, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -68.51620483398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4883134365081787, + "rewards_train/margins": 0.48830699920654297, + "rewards_train/rejected": -2.9766204357147217, + "step": 2013 + }, + { + "epoch": 0.56, + "logps_train/chosen": -23.43277931213379, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -24.375, + "logps_train/rejected": -45.67642593383789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6901530027389526, + "rewards_train/margins": 0.43998968601226807, + "rewards_train/rejected": -2.1301426887512207, + "step": 2013 + }, + { + "epoch": 0.56, + "logps_train/chosen": -24.224998474121094, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -3.0625, + "logps_train/rejected": -14.50859546661377, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8881248235702515, + "rewards_train/margins": -0.7435152530670166, + "rewards_train/rejected": -1.1446095705032349, + "step": 2013 + }, + { + "epoch": 0.56, + "logps_train/chosen": -19.377483367919922, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -7.34375, + "logps_train/rejected": -39.85310745239258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7564983367919922, + "rewards_train/margins": 2.4944374561309814, + "rewards_train/rejected": -3.2509357929229736, + "step": 2013 + }, + { + "epoch": 0.56, + "logps_train/chosen": -120.72598266601562, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -184.37417602539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5225982666015625, + "rewards_train/margins": 1.3148193359375, + "rewards_train/rejected": -3.8374176025390625, + "step": 2013 + }, + { + "epoch": 0.56, + "logps_train/chosen": -44.74939727783203, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -3.125, + "logps_train/rejected": -20.326417922973633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.3436896800994873, + "rewards_train/margins": -1.623547911643982, + "rewards_train/rejected": -1.7201417684555054, + "step": 2013 + }, + { + "epoch": 0.56, + "learning_rate": 1.9962093360907773e-07, + "loss": 0.4341, + "step": 2014 + }, + { + "epoch": 0.56, + "logps_train/chosen": -58.739688873291016, + "logps_train/ref_chosen": -26.625, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -83.34403991699219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2114689350128174, + "rewards_train/margins": 2.3854353427886963, + "rewards_train/rejected": -5.596904277801514, + "step": 2014 + }, + { + "epoch": 0.56, + "logps_train/chosen": -23.732105255126953, + "logps_train/ref_chosen": -1.46875, + "logps_train/ref_rejected": -3.328125, + "logps_train/rejected": -38.94170379638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2263355255126953, + "rewards_train/margins": 1.3350224494934082, + "rewards_train/rejected": -3.5613579750061035, + "step": 2014 + }, + { + "epoch": 0.56, + "logps_train/chosen": -89.53909301757812, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -168.61471557617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3539092540740967, + "rewards_train/margins": 5.407562494277954, + "rewards_train/rejected": -7.761471748352051, + "step": 2014 + }, + { + "epoch": 0.56, + "logps_train/chosen": -45.259246826171875, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -27.0, + "logps_train/rejected": -65.99440002441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.175924777984619, + "rewards_train/margins": 0.7235152721405029, + "rewards_train/rejected": -3.899440050125122, + "step": 2014 + }, + { + "epoch": 0.56, + "logps_train/chosen": -22.7731876373291, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -26.0, + "logps_train/rejected": -57.951698303222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.721068799495697, + "rewards_train/margins": 2.4741011261940002, + "rewards_train/rejected": -3.1951699256896973, + "step": 2014 + }, + { + "epoch": 0.56, + "logps_train/chosen": -112.47584533691406, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -143.63385009765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.247584581375122, + "rewards_train/margins": 1.6158006191253662, + "rewards_train/rejected": -4.863385200500488, + "step": 2014 + }, + { + "epoch": 0.56, + "logps_train/chosen": -82.02879333496094, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -67.7379150390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4278793334960938, + "rewards_train/margins": 0.9709124565124512, + "rewards_train/rejected": -4.398791790008545, + "step": 2014 + }, + { + "epoch": 0.56, + "logps_train/chosen": -133.81491088867188, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -156.9119873046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.381491184234619, + "rewards_train/margins": -0.04029250144958496, + "rewards_train/rejected": -3.341198682785034, + "step": 2014 + }, + { + "epoch": 0.56, + "logps_train/chosen": -37.25999450683594, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -41.21712112426758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8447494506835938, + "rewards_train/margins": 0.8675878047943115, + "rewards_train/rejected": -3.7123372554779053, + "step": 2015 + }, + { + "epoch": 0.56, + "logps_train/chosen": -187.98757934570312, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -198.97784423828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.3987579345703125, + "rewards_train/margins": -0.10097360610961914, + "rewards_train/rejected": -5.297784328460693, + "step": 2015 + }, + { + "epoch": 0.56, + "logps_train/chosen": -7.5254411697387695, + "logps_train/ref_chosen": -2.90625, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -11.127239227294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4619191288948059, + "rewards_train/margins": 0.03517979383468628, + "rewards_train/rejected": -0.4970989227294922, + "step": 2015 + }, + { + "epoch": 0.56, + "logps_train/chosen": -176.91506958007812, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -119.31327819824219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.991507053375244, + "rewards_train/margins": -0.06017923355102539, + "rewards_train/rejected": -2.9313278198242188, + "step": 2015 + }, + { + "epoch": 0.56, + "logps_train/chosen": -55.03138732910156, + "logps_train/ref_chosen": -34.5, + "logps_train/ref_rejected": -22.875, + "logps_train/rejected": -66.03897094726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0531387329101562, + "rewards_train/margins": 2.263258457183838, + "rewards_train/rejected": -4.316397190093994, + "step": 2015 + }, + { + "epoch": 0.56, + "logps_train/chosen": -53.906517028808594, + "logps_train/ref_chosen": -44.75, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -129.93003845214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9156517386436462, + "rewards_train/margins": 2.477352201938629, + "rewards_train/rejected": -3.3930039405822754, + "step": 2015 + }, + { + "epoch": 0.56, + "logps_train/chosen": -22.63964080810547, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -25.25, + "logps_train/rejected": -54.94868087768555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7577141523361206, + "rewards_train/margins": 1.2121540307998657, + "rewards_train/rejected": -2.9698681831359863, + "step": 2015 + }, + { + "epoch": 0.56, + "logps_train/chosen": -12.323541641235352, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -1.859375, + "logps_train/rejected": -13.501239776611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45110416412353516, + "rewards_train/margins": 0.7130823135375977, + "rewards_train/rejected": -1.1641864776611328, + "step": 2015 + }, + { + "epoch": 0.56, + "learning_rate": 1.9803774333693945e-07, + "loss": 0.3346, + "step": 2016 + }, + { + "epoch": 0.56, + "logps_train/chosen": -27.332983016967773, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -33.041927337646484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.227048397064209, + "rewards_train/margins": 0.020894289016723633, + "rewards_train/rejected": -2.2479426860809326, + "step": 2016 + }, + { + "epoch": 0.56, + "logps_train/chosen": -122.30664825439453, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -203.24908447265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4306648969650269, + "rewards_train/margins": 4.894243836402893, + "rewards_train/rejected": -6.32490873336792, + "step": 2016 + }, + { + "epoch": 0.56, + "logps_train/chosen": -148.30191040039062, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -144.0513458251953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1801910400390625, + "rewards_train/margins": 0.4749436378479004, + "rewards_train/rejected": -3.655134677886963, + "step": 2016 + }, + { + "epoch": 0.56, + "logps_train/chosen": -22.17337989807129, + "logps_train/ref_chosen": -3.296875, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -27.044193267822266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.887650489807129, + "rewards_train/margins": 0.24801898002624512, + "rewards_train/rejected": -2.135669469833374, + "step": 2016 + }, + { + "epoch": 0.56, + "logps_train/chosen": -126.71759796142578, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -92.11897277832031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.571759819984436, + "rewards_train/margins": -0.8098625540733337, + "rewards_train/rejected": -0.7618972659111023, + "step": 2016 + }, + { + "epoch": 0.56, + "logps_train/chosen": -155.8280029296875, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -198.84579467773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.032800197601318, + "rewards_train/margins": 3.501779556274414, + "rewards_train/rejected": -7.534579753875732, + "step": 2016 + }, + { + "epoch": 0.56, + "logps_train/chosen": -92.18558502197266, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -125.15516662597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.118558645248413, + "rewards_train/margins": 1.3969581127166748, + "rewards_train/rejected": -3.515516757965088, + "step": 2016 + }, + { + "epoch": 0.56, + "logps_train/chosen": -35.09983825683594, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -35.29775619506836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8006088733673096, + "rewards_train/margins": 0.3432292938232422, + "rewards_train/rejected": -3.1438381671905518, + "step": 2016 + }, + { + "epoch": 0.56, + "logps_train/chosen": -12.59170150756836, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -20.922039031982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8372951745986938, + "rewards_train/margins": 0.8486588001251221, + "rewards_train/rejected": -1.685953974723816, + "step": 2017 + }, + { + "epoch": 0.56, + "logps_train/chosen": -44.52241516113281, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -68.17513275146484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.927241563796997, + "rewards_train/margins": -1.1847282648086548, + "rewards_train/rejected": -1.7425132989883423, + "step": 2017 + }, + { + "epoch": 0.56, + "logps_train/chosen": -104.16511535644531, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -221.10568237304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.566511631011963, + "rewards_train/margins": 3.2440567016601562, + "rewards_train/rejected": -5.810568332672119, + "step": 2017 + }, + { + "epoch": 0.56, + "logps_train/chosen": -79.14666748046875, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -26.25, + "logps_train/rejected": -52.03684616088867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4646667540073395, + "rewards_train/margins": 2.114017814397812, + "rewards_train/rejected": -2.5786845684051514, + "step": 2017 + }, + { + "epoch": 0.56, + "logps_train/chosen": -54.54181671142578, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -55.75, + "logps_train/rejected": -68.12088012695312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.079181671142578, + "rewards_train/margins": -1.842093586921692, + "rewards_train/rejected": -1.2370880842208862, + "step": 2017 + }, + { + "epoch": 0.56, + "logps_train/chosen": -98.35184478759766, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -164.2433319091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.035184621810913, + "rewards_train/margins": 5.989148378372192, + "rewards_train/rejected": -9.024333000183105, + "step": 2017 + }, + { + "epoch": 0.56, + "logps_train/chosen": -110.82638549804688, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -104.96226501464844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9826385974884033, + "rewards_train/margins": -0.8364119529724121, + "rewards_train/rejected": -2.146226644515991, + "step": 2017 + }, + { + "epoch": 0.56, + "logps_train/chosen": -105.02100372314453, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -31.875, + "logps_train/rejected": -84.05330657958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.002100467681885, + "rewards_train/margins": 1.2157301902770996, + "rewards_train/rejected": -5.217830657958984, + "step": 2017 + }, + { + "epoch": 0.56, + "learning_rate": 1.9646016595032487e-07, + "loss": 0.5704, + "step": 2018 + }, + { + "epoch": 0.56, + "logps_train/chosen": -6.532332897186279, + "logps_train/ref_chosen": -2.59375, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -36.39851760864258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39385828375816345, + "rewards_train/margins": 2.414743572473526, + "rewards_train/rejected": -2.8086018562316895, + "step": 2018 + }, + { + "epoch": 0.56, + "logps_train/chosen": -24.72568702697754, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -59.149253845214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.422568678855896, + "rewards_train/margins": 2.8173567056655884, + "rewards_train/rejected": -4.239925384521484, + "step": 2018 + }, + { + "epoch": 0.56, + "logps_train/chosen": -166.82562255859375, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -277.26416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.082562446594238, + "rewards_train/margins": 6.643854141235352, + "rewards_train/rejected": -10.72641658782959, + "step": 2018 + }, + { + "epoch": 0.56, + "logps_train/chosen": -88.00349426269531, + "logps_train/ref_chosen": -60.0, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -145.28927612304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.800349473953247, + "rewards_train/margins": 5.078578233718872, + "rewards_train/rejected": -7.878927707672119, + "step": 2018 + }, + { + "epoch": 0.56, + "logps_train/chosen": -176.2328643798828, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -189.25213623046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.823286533355713, + "rewards_train/margins": 5.501926898956299, + "rewards_train/rejected": -11.325213432312012, + "step": 2018 + }, + { + "epoch": 0.56, + "logps_train/chosen": -103.55441284179688, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -224.04087829589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.505441427230835, + "rewards_train/margins": 8.348646402359009, + "rewards_train/rejected": -10.854087829589844, + "step": 2018 + }, + { + "epoch": 0.56, + "logps_train/chosen": -192.28311157226562, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -224.8303985595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.228311061859131, + "rewards_train/margins": 4.854729175567627, + "rewards_train/rejected": -10.083040237426758, + "step": 2018 + }, + { + "epoch": 0.56, + "logps_train/chosen": -12.043583869934082, + "logps_train/ref_chosen": -2.0625, + "logps_train/ref_rejected": -13.9375, + "logps_train/rejected": -14.699190139770508, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9981083869934082, + "rewards_train/margins": -0.9219393730163574, + "rewards_train/rejected": -0.07616901397705078, + "step": 2018 + }, + { + "epoch": 0.56, + "logps_train/chosen": -171.36465454101562, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -180.59567260742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.4864654541015625, + "rewards_train/margins": 2.5731019973754883, + "rewards_train/rejected": -7.059567451477051, + "step": 2019 + }, + { + "epoch": 0.56, + "logps_train/chosen": -42.17063522338867, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -37.55450439453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.642063617706299, + "rewards_train/margins": -0.824113130569458, + "rewards_train/rejected": -1.8179504871368408, + "step": 2019 + }, + { + "epoch": 0.56, + "logps_train/chosen": -160.85464477539062, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -213.1699981689453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.93546462059021, + "rewards_train/margins": 6.331535577774048, + "rewards_train/rejected": -10.267000198364258, + "step": 2019 + }, + { + "epoch": 0.56, + "logps_train/chosen": -248.38307189941406, + "logps_train/ref_chosen": -192.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -217.91014099121094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.638307094573975, + "rewards_train/margins": -0.3472929000854492, + "rewards_train/rejected": -5.291014194488525, + "step": 2019 + }, + { + "epoch": 0.56, + "logps_train/chosen": -38.05434036254883, + "logps_train/ref_chosen": -30.875, + "logps_train/ref_rejected": -51.0, + "logps_train/rejected": -60.04363250732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7179340720176697, + "rewards_train/margins": 0.1864292025566101, + "rewards_train/rejected": -0.9043632745742798, + "step": 2019 + }, + { + "epoch": 0.56, + "logps_train/chosen": -108.15777587890625, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -130.35617065429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.665777564048767, + "rewards_train/margins": 1.2198394536972046, + "rewards_train/rejected": -2.8856170177459717, + "step": 2019 + }, + { + "epoch": 0.56, + "logps_train/chosen": -198.96450805664062, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -187.42393493652344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.196450710296631, + "rewards_train/margins": -0.2540569305419922, + "rewards_train/rejected": -4.942393779754639, + "step": 2019 + }, + { + "epoch": 0.56, + "logps_train/chosen": -111.34785461425781, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -110.29043579101562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6347854733467102, + "rewards_train/margins": -0.20574188232421875, + "rewards_train/rejected": -0.42904359102249146, + "step": 2019 + }, + { + "epoch": 0.56, + "learning_rate": 1.9488821249060293e-07, + "loss": 0.3786, + "step": 2020 + }, + { + "epoch": 0.56, + "logps_train/chosen": -47.26948928833008, + "logps_train/ref_chosen": -36.0, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -29.058441162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1269489526748657, + "rewards_train/margins": 0.3788951635360718, + "rewards_train/rejected": -1.5058441162109375, + "step": 2020 + }, + { + "epoch": 0.56, + "logps_train/chosen": -242.15496826171875, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -229.27056884765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.615496635437012, + "rewards_train/margins": 0.011560440063476562, + "rewards_train/rejected": -8.627057075500488, + "step": 2020 + }, + { + "epoch": 0.56, + "logps_train/chosen": -63.857521057128906, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -63.48884963989258, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21075211465358734, + "rewards_train/margins": -0.03686714172363281, + "rewards_train/rejected": -0.17388497292995453, + "step": 2020 + }, + { + "epoch": 0.56, + "logps_train/chosen": -217.42245483398438, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -173.3115997314453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.742245674133301, + "rewards_train/margins": -0.7110857963562012, + "rewards_train/rejected": -4.0311598777771, + "step": 2020 + }, + { + "epoch": 0.56, + "logps_train/chosen": -123.97904968261719, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -159.70477294921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.147905111312866, + "rewards_train/margins": 3.922572374343872, + "rewards_train/rejected": -6.070477485656738, + "step": 2020 + }, + { + "epoch": 0.56, + "logps_train/chosen": -13.4862060546875, + "logps_train/ref_chosen": -4.9375, + "logps_train/ref_rejected": -3.140625, + "logps_train/rejected": -12.008309364318848, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.854870617389679, + "rewards_train/margins": 0.03189784288406372, + "rewards_train/rejected": -0.8867684602737427, + "step": 2020 + }, + { + "epoch": 0.56, + "logps_train/chosen": -138.74014282226562, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -163.69381713867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.374014377593994, + "rewards_train/margins": 0.3953673839569092, + "rewards_train/rejected": -3.7693817615509033, + "step": 2020 + }, + { + "epoch": 0.56, + "logps_train/chosen": -0.00037834796239621937, + "logps_train/ref_chosen": -0.0074462890625, + "logps_train/ref_rejected": -0.0074462890625, + "logps_train/rejected": -0.00040230643935501575, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0007067941478453577, + "rewards_train/margins": 2.3958273231983185e-06, + "rewards_train/rejected": 0.0007043983205221593, + "step": 2020 + }, + { + "epoch": 0.56, + "logps_train/chosen": -20.850811004638672, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -3.140625, + "logps_train/rejected": -12.541410446166992, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5225811004638672, + "rewards_train/margins": -0.582502543926239, + "rewards_train/rejected": -0.9400785565376282, + "step": 2021 + }, + { + "epoch": 0.56, + "logps_train/chosen": -117.01409912109375, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -152.0133056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0014100074768066, + "rewards_train/margins": 3.449920654296875, + "rewards_train/rejected": -5.451330661773682, + "step": 2021 + }, + { + "epoch": 0.56, + "logps_train/chosen": -65.42527770996094, + "logps_train/ref_chosen": -50.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -146.00021362304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5425277948379517, + "rewards_train/margins": 3.357493758201599, + "rewards_train/rejected": -4.900021553039551, + "step": 2021 + }, + { + "epoch": 0.56, + "logps_train/chosen": -28.984825134277344, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -51.806373596191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8609825372695923, + "rewards_train/margins": 3.3946551084518433, + "rewards_train/rejected": -4.2556376457214355, + "step": 2021 + }, + { + "epoch": 0.56, + "logps_train/chosen": -73.01673889160156, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -181.73092651367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.35167396068573, + "rewards_train/margins": 8.07141888141632, + "rewards_train/rejected": -9.42309284210205, + "step": 2021 + }, + { + "epoch": 0.56, + "logps_train/chosen": -37.435665130615234, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -3.390625, + "logps_train/rejected": -22.591135025024414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.206066608428955, + "rewards_train/margins": -0.2860156297683716, + "rewards_train/rejected": -1.9200509786605835, + "step": 2021 + }, + { + "epoch": 0.56, + "logps_train/chosen": -3.76461124420166, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -1.171875, + "logps_train/rejected": -5.7794060707092285, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.039163876324892044, + "rewards_train/margins": 0.4999169893562794, + "rewards_train/rejected": -0.46075311303138733, + "step": 2021 + }, + { + "epoch": 0.56, + "logps_train/chosen": -116.13763427734375, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -167.49307250976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.813763380050659, + "rewards_train/margins": 4.035544157028198, + "rewards_train/rejected": -7.849307537078857, + "step": 2021 + }, + { + "epoch": 0.57, + "learning_rate": 1.933218939597808e-07, + "loss": 0.4624, + "step": 2022 + }, + { + "epoch": 0.57, + "logps_train/chosen": -120.99148559570312, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -147.94808959960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5991485118865967, + "rewards_train/margins": 1.0956604480743408, + "rewards_train/rejected": -3.6948089599609375, + "step": 2022 + }, + { + "epoch": 0.57, + "logps_train/chosen": -47.851356506347656, + "logps_train/ref_chosen": -31.875, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -36.23930358886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5976356267929077, + "rewards_train/margins": 1.4294198751449585, + "rewards_train/rejected": -3.027055501937866, + "step": 2022 + }, + { + "epoch": 0.57, + "logps_train/chosen": -18.57271957397461, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -2.375, + "logps_train/rejected": -20.511201858520508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18022803962230682, + "rewards_train/margins": 1.9938482493162155, + "rewards_train/rejected": -1.8136202096939087, + "step": 2022 + }, + { + "epoch": 0.57, + "logps_train/chosen": -147.47723388671875, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -175.173583984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.647723376750946, + "rewards_train/margins": 0.36963504552841187, + "rewards_train/rejected": -1.017358422279358, + "step": 2022 + }, + { + "epoch": 0.57, + "logps_train/chosen": -34.93973922729492, + "logps_train/ref_chosen": -7.9375, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -46.179378509521484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.700223922729492, + "rewards_train/margins": -0.38228607177734375, + "rewards_train/rejected": -2.3179378509521484, + "step": 2022 + }, + { + "epoch": 0.57, + "logps_train/chosen": -49.61823272705078, + "logps_train/ref_chosen": -34.25, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -63.175132751464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5368232727050781, + "rewards_train/margins": 3.3431901931762695, + "rewards_train/rejected": -4.880013465881348, + "step": 2022 + }, + { + "epoch": 0.57, + "logps_train/chosen": -210.3315887451172, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -161.55499267578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.033158779144287, + "rewards_train/margins": -1.5776596069335938, + "rewards_train/rejected": -4.455499172210693, + "step": 2022 + }, + { + "epoch": 0.57, + "logps_train/chosen": -15.301424980163574, + "logps_train/ref_chosen": -3.890625, + "logps_train/ref_rejected": -1.7890625, + "logps_train/rejected": -17.837305068969727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1410800218582153, + "rewards_train/margins": 0.46374428272247314, + "rewards_train/rejected": -1.6048243045806885, + "step": 2022 + }, + { + "epoch": 0.57, + "logps_train/chosen": -25.622196197509766, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -3.53125, + "logps_train/rejected": -31.562238693237305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5997196435928345, + "rewards_train/margins": 1.2033792734146118, + "rewards_train/rejected": -2.8030989170074463, + "step": 2023 + }, + { + "epoch": 0.57, + "logps_train/chosen": -14.571878433227539, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -19.89035987854004, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0259379148483276, + "rewards_train/margins": 0.14434814453125, + "rewards_train/rejected": -1.1702860593795776, + "step": 2023 + }, + { + "epoch": 0.57, + "logps_train/chosen": -101.68523406982422, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -168.5985870361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6185234785079956, + "rewards_train/margins": 5.591335415840149, + "rewards_train/rejected": -7.2098588943481445, + "step": 2023 + }, + { + "epoch": 0.57, + "logps_train/chosen": -14.88823127746582, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -34.33787536621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35757312178611755, + "rewards_train/margins": 1.8199645578861237, + "rewards_train/rejected": -2.177537679672241, + "step": 2023 + }, + { + "epoch": 0.57, + "logps_train/chosen": -145.02291870117188, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -245.593017578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0522918701171875, + "rewards_train/margins": 4.707009792327881, + "rewards_train/rejected": -7.759301662445068, + "step": 2023 + }, + { + "epoch": 0.57, + "logps_train/chosen": -16.31108283996582, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -30.499977111816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6998583078384399, + "rewards_train/margins": 1.1938893795013428, + "rewards_train/rejected": -1.8937476873397827, + "step": 2023 + }, + { + "epoch": 0.57, + "logps_train/chosen": -134.73585510253906, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -191.41635131835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3235855102539062, + "rewards_train/margins": 3.7180495262145996, + "rewards_train/rejected": -7.041635036468506, + "step": 2023 + }, + { + "epoch": 0.57, + "logps_train/chosen": -91.54080200195312, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -123.03157043457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8040802478790283, + "rewards_train/margins": 2.549076795578003, + "rewards_train/rejected": -4.353157043457031, + "step": 2023 + }, + { + "epoch": 0.57, + "learning_rate": 1.9176122132042817e-07, + "loss": 0.36, + "step": 2024 + }, + { + "epoch": 0.57, + "logps_train/chosen": -198.54563903808594, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -271.29058837890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.454564094543457, + "rewards_train/margins": 4.374494552612305, + "rewards_train/rejected": -9.829058647155762, + "step": 2024 + }, + { + "epoch": 0.57, + "logps_train/chosen": -2.9293718338012695, + "logps_train/ref_chosen": -1.875, + "logps_train/ref_rejected": -3.8125, + "logps_train/rejected": -26.853971481323242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10543718189001083, + "rewards_train/margins": 2.198710061609745, + "rewards_train/rejected": -2.304147243499756, + "step": 2024 + }, + { + "epoch": 0.57, + "logps_train/chosen": -96.19313049316406, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -114.75931549072266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0693130493164062, + "rewards_train/margins": 0.5066184997558594, + "rewards_train/rejected": -1.5759315490722656, + "step": 2024 + }, + { + "epoch": 0.57, + "logps_train/chosen": -1.0088956356048584, + "logps_train/ref_chosen": -1.546875, + "logps_train/ref_rejected": -1.296875, + "logps_train/rejected": -1.2611829042434692, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05379793792963028, + "rewards_train/margins": 0.05022872821427882, + "rewards_train/rejected": 0.0035692097153514624, + "step": 2024 + }, + { + "epoch": 0.57, + "logps_train/chosen": -94.79707336425781, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -211.32281494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.079707384109497, + "rewards_train/margins": 8.602574110031128, + "rewards_train/rejected": -10.682281494140625, + "step": 2024 + }, + { + "epoch": 0.57, + "logps_train/chosen": -20.824676513671875, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -38.696651458740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9012176394462585, + "rewards_train/margins": 1.7309475541114807, + "rewards_train/rejected": -2.6321651935577393, + "step": 2024 + }, + { + "epoch": 0.57, + "logps_train/chosen": -168.34373474121094, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -187.79254150390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2343734502792358, + "rewards_train/margins": 2.1448806524276733, + "rewards_train/rejected": -3.379254102706909, + "step": 2024 + }, + { + "epoch": 0.57, + "logps_train/chosen": -196.12420654296875, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -196.0, + "logps_train/rejected": -354.4075012207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.912420630455017, + "rewards_train/margins": 13.928330063819885, + "rewards_train/rejected": -15.840750694274902, + "step": 2024 + }, + { + "epoch": 0.57, + "logps_train/chosen": -169.0109100341797, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -224.66928100585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.851090908050537, + "rewards_train/margins": 6.16583776473999, + "rewards_train/rejected": -11.016928672790527, + "step": 2025 + }, + { + "epoch": 0.57, + "logps_train/chosen": -6.225338459014893, + "logps_train/ref_chosen": -1.5859375, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -29.672677993774414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4639401137828827, + "rewards_train/margins": 1.6220777332782745, + "rewards_train/rejected": -2.0860178470611572, + "step": 2025 + }, + { + "epoch": 0.57, + "logps_train/chosen": -25.360837936401367, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -20.07020378112793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47358378767967224, + "rewards_train/margins": 0.8709366619586945, + "rewards_train/rejected": -1.3445204496383667, + "step": 2025 + }, + { + "epoch": 0.57, + "logps_train/chosen": -149.42080688476562, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -227.37338256835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1420806646347046, + "rewards_train/margins": 7.9952579736709595, + "rewards_train/rejected": -9.137338638305664, + "step": 2025 + }, + { + "epoch": 0.57, + "logps_train/chosen": -27.53272247314453, + "logps_train/ref_chosen": -28.375, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -31.280229568481445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08422775566577911, + "rewards_train/margins": 2.262250855565071, + "rewards_train/rejected": -2.178023099899292, + "step": 2025 + }, + { + "epoch": 0.57, + "logps_train/chosen": -93.51460266113281, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -49.0, + "logps_train/rejected": -107.88484191894531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6014602184295654, + "rewards_train/margins": 3.2870242595672607, + "rewards_train/rejected": -5.888484477996826, + "step": 2025 + }, + { + "epoch": 0.57, + "logps_train/chosen": -41.20441436767578, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -111.02149963378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8704414367675781, + "rewards_train/margins": 2.731708526611328, + "rewards_train/rejected": -3.6021499633789062, + "step": 2025 + }, + { + "epoch": 0.57, + "logps_train/chosen": -19.84624481201172, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -19.25550651550293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6658744812011719, + "rewards_train/margins": 0.7253011465072632, + "rewards_train/rejected": -1.391175627708435, + "step": 2025 + }, + { + "epoch": 0.57, + "learning_rate": 1.902062054955982e-07, + "loss": 0.1661, + "step": 2026 + }, + { + "epoch": 0.57, + "logps_train/chosen": -183.54861450195312, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -209.00588989257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.804862022399902, + "rewards_train/margins": 0.39572715759277344, + "rewards_train/rejected": -9.200589179992676, + "step": 2026 + }, + { + "epoch": 0.57, + "logps_train/chosen": -113.53449249267578, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -113.51165771484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3534492254257202, + "rewards_train/margins": -0.002283453941345215, + "rewards_train/rejected": -1.351165771484375, + "step": 2026 + }, + { + "epoch": 0.57, + "logps_train/chosen": -178.763916015625, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -244.38690185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6763916015625, + "rewards_train/margins": 8.962298393249512, + "rewards_train/rejected": -12.638689994812012, + "step": 2026 + }, + { + "epoch": 0.57, + "logps_train/chosen": -15.649669647216797, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -14.625, + "logps_train/rejected": -17.09763526916504, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8649669885635376, + "rewards_train/margins": -0.617703452706337, + "rewards_train/rejected": -0.24726353585720062, + "step": 2026 + }, + { + "epoch": 0.57, + "logps_train/chosen": -54.794063568115234, + "logps_train/ref_chosen": -14.0, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -45.19482421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.079406261444092, + "rewards_train/margins": -0.5599238872528076, + "rewards_train/rejected": -3.519482374191284, + "step": 2026 + }, + { + "epoch": 0.57, + "logps_train/chosen": -24.793624877929688, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -32.75150680541992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2168624848127365, + "rewards_train/margins": 1.3332882672548294, + "rewards_train/rejected": -1.550150752067566, + "step": 2026 + }, + { + "epoch": 0.57, + "logps_train/chosen": -98.32015991210938, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -46.75, + "logps_train/rejected": -109.05067443847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6820160150527954, + "rewards_train/margins": 5.548051714897156, + "rewards_train/rejected": -6.230067729949951, + "step": 2026 + }, + { + "epoch": 0.57, + "logps_train/chosen": -101.06997680664062, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -57.12821960449219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1569976806640625, + "rewards_train/margins": 1.6808242797851562, + "rewards_train/rejected": -1.8378219604492188, + "step": 2026 + }, + { + "epoch": 0.57, + "logps_train/chosen": -6.4251627922058105, + "logps_train/ref_chosen": -1.75, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -17.343345642089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4675162732601166, + "rewards_train/margins": 0.8605683147907257, + "rewards_train/rejected": -1.3280845880508423, + "step": 2027 + }, + { + "epoch": 0.57, + "logps_train/chosen": -8.714494705200195, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -16.267234802246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03394947201013565, + "rewards_train/margins": 0.8521490320563316, + "rewards_train/rejected": -0.8860985040664673, + "step": 2027 + }, + { + "epoch": 0.57, + "logps_train/chosen": -116.53346252441406, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -123.53482818603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.653346300125122, + "rewards_train/margins": 0.5501365661621094, + "rewards_train/rejected": -3.2034828662872314, + "step": 2027 + }, + { + "epoch": 0.57, + "logps_train/chosen": -13.51994800567627, + "logps_train/ref_chosen": -2.0, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -49.510223388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1519948244094849, + "rewards_train/margins": 1.4990276098251343, + "rewards_train/rejected": -2.651022434234619, + "step": 2027 + }, + { + "epoch": 0.57, + "logps_train/chosen": -141.11099243164062, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -233.53671264648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5610992908477783, + "rewards_train/margins": 5.692572355270386, + "rewards_train/rejected": -9.253671646118164, + "step": 2027 + }, + { + "epoch": 0.57, + "logps_train/chosen": -5.191293239593506, + "logps_train/ref_chosen": -1.1796875, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -12.643808364868164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4011605679988861, + "rewards_train/margins": 0.4319702684879303, + "rewards_train/rejected": -0.8331308364868164, + "step": 2027 + }, + { + "epoch": 0.57, + "logps_train/chosen": -29.546337127685547, + "logps_train/ref_chosen": -3.75, + "logps_train/ref_rejected": -3.484375, + "logps_train/rejected": -24.408588409423828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5796337127685547, + "rewards_train/margins": -0.4872124195098877, + "rewards_train/rejected": -2.092421293258667, + "step": 2027 + }, + { + "epoch": 0.57, + "logps_train/chosen": -13.746201515197754, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -6.8371901512146, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9683701395988464, + "rewards_train/margins": -0.9940261244773865, + "rewards_train/rejected": 0.02565598487854004, + "step": 2027 + }, + { + "epoch": 0.57, + "learning_rate": 1.8865685736875357e-07, + "loss": 0.4889, + "step": 2028 + }, + { + "epoch": 0.57, + "logps_train/chosen": -157.1875, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -181.23561096191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3187501430511475, + "rewards_train/margins": 1.2048110961914062, + "rewards_train/rejected": -3.5235612392425537, + "step": 2028 + }, + { + "epoch": 0.57, + "logps_train/chosen": -50.96870040893555, + "logps_train/ref_chosen": -38.25, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -61.13035202026367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2718700170516968, + "rewards_train/margins": 1.241165280342102, + "rewards_train/rejected": -2.513035297393799, + "step": 2028 + }, + { + "epoch": 0.57, + "logps_train/chosen": -234.4485626220703, + "logps_train/ref_chosen": -202.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -231.16131591796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.244856357574463, + "rewards_train/margins": 2.571275234222412, + "rewards_train/rejected": -5.816131591796875, + "step": 2028 + }, + { + "epoch": 0.57, + "logps_train/chosen": -12.274816513061523, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -35.42446517944336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27748164534568787, + "rewards_train/margins": 2.5930899679660797, + "rewards_train/rejected": -2.8705716133117676, + "step": 2028 + }, + { + "epoch": 0.57, + "logps_train/chosen": -200.37142944335938, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -290.0813903808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.437142848968506, + "rewards_train/margins": 6.770996570587158, + "rewards_train/rejected": -12.208139419555664, + "step": 2028 + }, + { + "epoch": 0.57, + "logps_train/chosen": -92.60643005371094, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -100.3640365600586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1893569976091385, + "rewards_train/margins": 3.725760653614998, + "rewards_train/rejected": -3.5364036560058594, + "step": 2028 + }, + { + "epoch": 0.57, + "logps_train/chosen": -208.6664581298828, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -204.08096313476562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.6666460037231445, + "rewards_train/margins": -0.05854940414428711, + "rewards_train/rejected": -5.608096599578857, + "step": 2028 + }, + { + "epoch": 0.57, + "logps_train/chosen": -61.53395080566406, + "logps_train/ref_chosen": -34.0, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -27.58199119567871, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7533950805664062, + "rewards_train/margins": -0.9701958894729614, + "rewards_train/rejected": -1.7831991910934448, + "step": 2028 + }, + { + "epoch": 0.57, + "logps_train/chosen": -20.48979377746582, + "logps_train/ref_chosen": -14.0, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -21.01547622680664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6489793658256531, + "rewards_train/margins": 1.043193280696869, + "rewards_train/rejected": -1.692172646522522, + "step": 2029 + }, + { + "epoch": 0.57, + "logps_train/chosen": -15.685502052307129, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -37.64223861694336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06230020523071289, + "rewards_train/margins": 2.8644237518310547, + "rewards_train/rejected": -2.9267239570617676, + "step": 2029 + }, + { + "epoch": 0.57, + "logps_train/chosen": -185.8028564453125, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -200.69326782226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.280285835266113, + "rewards_train/margins": 4.639040946960449, + "rewards_train/rejected": -8.919326782226562, + "step": 2029 + }, + { + "epoch": 0.57, + "logps_train/chosen": -24.918682098388672, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -14.214445114135742, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.719993233680725, + "rewards_train/margins": -0.7704237103462219, + "rewards_train/rejected": -0.9495695233345032, + "step": 2029 + }, + { + "epoch": 0.57, + "logps_train/chosen": -7.143819808959961, + "logps_train/ref_chosen": -5.84375, + "logps_train/ref_rejected": -5.25, + "logps_train/rejected": -9.59057331085205, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13000698387622833, + "rewards_train/margins": 0.30405034124851227, + "rewards_train/rejected": -0.4340573251247406, + "step": 2029 + }, + { + "epoch": 0.57, + "logps_train/chosen": -186.82147216796875, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -216.01377868652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.532147407531738, + "rewards_train/margins": 3.519230842590332, + "rewards_train/rejected": -11.05137825012207, + "step": 2029 + }, + { + "epoch": 0.57, + "logps_train/chosen": -128.4940643310547, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -163.3003387451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2994065284729004, + "rewards_train/margins": 5.730627536773682, + "rewards_train/rejected": -8.030034065246582, + "step": 2029 + }, + { + "epoch": 0.57, + "logps_train/chosen": -6.437018871307373, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -16.89906120300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.00932688731700182, + "rewards_train/margins": 0.3305792389437556, + "rewards_train/rejected": -0.33990612626075745, + "step": 2029 + }, + { + "epoch": 0.57, + "learning_rate": 1.8711318778368789e-07, + "loss": 0.3341, + "step": 2030 + }, + { + "epoch": 0.57, + "logps_train/chosen": -130.61090087890625, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -81.74214935302734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.761090040206909, + "rewards_train/margins": -2.036875069141388, + "rewards_train/rejected": -0.7242149710655212, + "step": 2030 + }, + { + "epoch": 0.57, + "logps_train/chosen": -147.40139770507812, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -168.59771728515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9401397705078125, + "rewards_train/margins": 2.819632053375244, + "rewards_train/rejected": -3.7597718238830566, + "step": 2030 + }, + { + "epoch": 0.57, + "logps_train/chosen": -22.18244171142578, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -22.696094512939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1807441711425781, + "rewards_train/margins": 0.08261525630950928, + "rewards_train/rejected": -1.2633594274520874, + "step": 2030 + }, + { + "epoch": 0.57, + "logps_train/chosen": -185.34390258789062, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -199.07749938964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.034390449523926, + "rewards_train/margins": 2.1233596801757812, + "rewards_train/rejected": -9.157750129699707, + "step": 2030 + }, + { + "epoch": 0.57, + "logps_train/chosen": -208.46728515625, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -172.57345581054688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.146728515625, + "rewards_train/margins": -4.189382791519165, + "rewards_train/rejected": -3.957345724105835, + "step": 2030 + }, + { + "epoch": 0.57, + "logps_train/chosen": -39.923099517822266, + "logps_train/ref_chosen": -14.0, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -52.191368103027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5923099517822266, + "rewards_train/margins": 0.6768269538879395, + "rewards_train/rejected": -3.269136905670166, + "step": 2030 + }, + { + "epoch": 0.57, + "logps_train/chosen": -59.429691314697266, + "logps_train/ref_chosen": -35.75, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -138.90542602539062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.367969274520874, + "rewards_train/margins": -0.3274266719818115, + "rewards_train/rejected": -2.0405426025390625, + "step": 2030 + }, + { + "epoch": 0.57, + "logps_train/chosen": -24.82840919494629, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -7.4375, + "logps_train/rejected": -20.788166046142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1015909910202026, + "rewards_train/margins": 0.2334756851196289, + "rewards_train/rejected": -1.3350666761398315, + "step": 2030 + }, + { + "epoch": 0.57, + "logps_train/chosen": -126.78447723388672, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -185.2561492919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8784477710723877, + "rewards_train/margins": 4.097167253494263, + "rewards_train/rejected": -5.97561502456665, + "step": 2031 + }, + { + "epoch": 0.57, + "logps_train/chosen": -28.057212829589844, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -14.8125, + "logps_train/rejected": -25.014869689941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48072129487991333, + "rewards_train/margins": 0.5395156741142273, + "rewards_train/rejected": -1.0202369689941406, + "step": 2031 + }, + { + "epoch": 0.57, + "logps_train/chosen": -17.594642639160156, + "logps_train/ref_chosen": -3.5, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -46.150978088378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4094642400741577, + "rewards_train/margins": 1.843133568763733, + "rewards_train/rejected": -3.2525978088378906, + "step": 2031 + }, + { + "epoch": 0.57, + "logps_train/chosen": -30.897891998291016, + "logps_train/ref_chosen": -24.875, + "logps_train/ref_rejected": -37.5, + "logps_train/rejected": -61.02750778198242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6022891998291016, + "rewards_train/margins": 1.7504615783691406, + "rewards_train/rejected": -2.352750778198242, + "step": 2031 + }, + { + "epoch": 0.57, + "logps_train/chosen": -98.25399780273438, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -55.5, + "logps_train/rejected": -127.09789276123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8253997564315796, + "rewards_train/margins": 5.334389805793762, + "rewards_train/rejected": -7.159789562225342, + "step": 2031 + }, + { + "epoch": 0.57, + "logps_train/chosen": -13.721476554870605, + "logps_train/ref_chosen": -3.328125, + "logps_train/ref_rejected": -2.546875, + "logps_train/rejected": -17.991424560546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0393351316452026, + "rewards_train/margins": 0.505119800567627, + "rewards_train/rejected": -1.5444549322128296, + "step": 2031 + }, + { + "epoch": 0.57, + "logps_train/chosen": -18.969280242919922, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -22.695045471191406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3844280242919922, + "rewards_train/margins": -0.38367342948913574, + "rewards_train/rejected": -1.0007545948028564, + "step": 2031 + }, + { + "epoch": 0.57, + "logps_train/chosen": -28.30828285217285, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -19.615554809570312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9183282852172852, + "rewards_train/margins": -0.4317728281021118, + "rewards_train/rejected": -1.4865554571151733, + "step": 2031 + }, + { + "epoch": 0.57, + "learning_rate": 1.8557520754445221e-07, + "loss": 0.7601, + "step": 2032 + }, + { + "epoch": 0.57, + "logps_train/chosen": -27.194110870361328, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -38.803016662597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8444111347198486, + "rewards_train/margins": 1.2983906269073486, + "rewards_train/rejected": -3.1428017616271973, + "step": 2032 + }, + { + "epoch": 0.57, + "logps_train/chosen": -1.3397974967956543, + "logps_train/ref_chosen": -0.255859375, + "logps_train/ref_rejected": -1.65625, + "logps_train/rejected": -4.338118076324463, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10839381068944931, + "rewards_train/margins": 0.15979299694299698, + "rewards_train/rejected": -0.2681868076324463, + "step": 2032 + }, + { + "epoch": 0.57, + "logps_train/chosen": -20.68707275390625, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -23.561786651611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.637457251548767, + "rewards_train/margins": 0.25309646129608154, + "rewards_train/rejected": -1.8905537128448486, + "step": 2032 + }, + { + "epoch": 0.57, + "logps_train/chosen": -206.9007568359375, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -153.55361938476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.090075969696045, + "rewards_train/margins": 0.11528587341308594, + "rewards_train/rejected": -4.205361843109131, + "step": 2032 + }, + { + "epoch": 0.57, + "logps_train/chosen": -145.87078857421875, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -177.81570434570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8870790004730225, + "rewards_train/margins": 3.0444915294647217, + "rewards_train/rejected": -5.931570529937744, + "step": 2032 + }, + { + "epoch": 0.57, + "logps_train/chosen": -29.141603469848633, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -22.375, + "logps_train/rejected": -41.162139892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9766603708267212, + "rewards_train/margins": 0.9020535945892334, + "rewards_train/rejected": -1.8787139654159546, + "step": 2032 + }, + { + "epoch": 0.57, + "logps_train/chosen": -126.33711242675781, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -184.0, + "logps_train/rejected": -219.84671020507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7337112426757812, + "rewards_train/margins": 2.8509597778320312, + "rewards_train/rejected": -3.5846710205078125, + "step": 2032 + }, + { + "epoch": 0.57, + "logps_train/chosen": -39.691165924072266, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -56.75, + "logps_train/rejected": -89.99661254882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7691165804862976, + "rewards_train/margins": 2.555544674396515, + "rewards_train/rejected": -3.3246612548828125, + "step": 2032 + }, + { + "epoch": 0.57, + "logps_train/chosen": -176.33792114257812, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -220.703857421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.983792304992676, + "rewards_train/margins": 4.386593818664551, + "rewards_train/rejected": -10.370386123657227, + "step": 2033 + }, + { + "epoch": 0.57, + "logps_train/chosen": -162.02896118164062, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -222.38470458984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.7028961181640625, + "rewards_train/margins": 2.435574531555176, + "rewards_train/rejected": -8.138470649719238, + "step": 2033 + }, + { + "epoch": 0.57, + "logps_train/chosen": -83.18273162841797, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -85.05526733398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08172684162855148, + "rewards_train/margins": 0.9872535988688469, + "rewards_train/rejected": -0.9055267572402954, + "step": 2033 + }, + { + "epoch": 0.57, + "logps_train/chosen": -47.54847717285156, + "logps_train/ref_chosen": -28.0, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -59.72011184692383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9548476934432983, + "rewards_train/margins": 1.5421634912490845, + "rewards_train/rejected": -3.497011184692383, + "step": 2033 + }, + { + "epoch": 0.57, + "logps_train/chosen": -43.034942626953125, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -48.64667510986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.04099440574646, + "rewards_train/margins": 0.611173152923584, + "rewards_train/rejected": -2.652167558670044, + "step": 2033 + }, + { + "epoch": 0.57, + "logps_train/chosen": -57.11392593383789, + "logps_train/ref_chosen": -49.25, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -81.79193115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7863926291465759, + "rewards_train/margins": 0.09280049800872803, + "rewards_train/rejected": -0.879193127155304, + "step": 2033 + }, + { + "epoch": 0.57, + "logps_train/chosen": -46.954158782958984, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -143.8614501953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4829158782958984, + "rewards_train/margins": 3.85322904586792, + "rewards_train/rejected": -6.336144924163818, + "step": 2033 + }, + { + "epoch": 0.57, + "logps_train/chosen": -96.03607177734375, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -143.57968139648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.153607130050659, + "rewards_train/margins": 1.6043610572814941, + "rewards_train/rejected": -3.7579681873321533, + "step": 2033 + }, + { + "epoch": 0.57, + "learning_rate": 1.8404292741527715e-07, + "loss": 0.28, + "step": 2034 + }, + { + "epoch": 0.57, + "logps_train/chosen": -263.94512939453125, + "logps_train/ref_chosen": -193.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -235.11746215820312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.094512939453125, + "rewards_train/margins": -0.08276653289794922, + "rewards_train/rejected": -7.011746406555176, + "step": 2034 + }, + { + "epoch": 0.57, + "logps_train/chosen": -183.83517456054688, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -206.4407958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.983517646789551, + "rewards_train/margins": 2.160562038421631, + "rewards_train/rejected": -7.144079685211182, + "step": 2034 + }, + { + "epoch": 0.57, + "logps_train/chosen": -156.88607788085938, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -234.42987060546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.188607692718506, + "rewards_train/margins": 5.154379367828369, + "rewards_train/rejected": -9.342987060546875, + "step": 2034 + }, + { + "epoch": 0.57, + "logps_train/chosen": -191.70065307617188, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -281.2646484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.870065212249756, + "rewards_train/margins": 7.056399822235107, + "rewards_train/rejected": -12.926465034484863, + "step": 2034 + }, + { + "epoch": 0.57, + "logps_train/chosen": -29.29447364807129, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -48.27550506591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.066947340965271, + "rewards_train/margins": 1.6231032609939575, + "rewards_train/rejected": -2.6900506019592285, + "step": 2034 + }, + { + "epoch": 0.57, + "logps_train/chosen": -139.41256713867188, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -219.97317504882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.091256856918335, + "rewards_train/margins": 8.056061029434204, + "rewards_train/rejected": -11.147317886352539, + "step": 2034 + }, + { + "epoch": 0.57, + "logps_train/chosen": -108.22964477539062, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -131.94113159179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0229644775390625, + "rewards_train/margins": 1.521148681640625, + "rewards_train/rejected": -3.5441131591796875, + "step": 2034 + }, + { + "epoch": 0.57, + "logps_train/chosen": -26.68246078491211, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -39.1316032409668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.018246054649353, + "rewards_train/margins": 1.1824144124984741, + "rewards_train/rejected": -2.200660467147827, + "step": 2034 + }, + { + "epoch": 0.57, + "logps_train/chosen": -12.858171463012695, + "logps_train/ref_chosen": -1.2265625, + "logps_train/ref_rejected": -28.125, + "logps_train/rejected": -68.61618041992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1631609201431274, + "rewards_train/margins": 2.88595712184906, + "rewards_train/rejected": -4.0491180419921875, + "step": 2035 + }, + { + "epoch": 0.57, + "logps_train/chosen": -5.54240608215332, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -27.03817367553711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06763439625501633, + "rewards_train/margins": 1.9527017399668694, + "rewards_train/rejected": -1.885067343711853, + "step": 2035 + }, + { + "epoch": 0.57, + "logps_train/chosen": -55.57358932495117, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -79.65909576416016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.232358932495117, + "rewards_train/margins": -0.7664493322372437, + "rewards_train/rejected": -1.4659096002578735, + "step": 2035 + }, + { + "epoch": 0.57, + "logps_train/chosen": -44.820335388183594, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -14.875, + "logps_train/rejected": -46.226627349853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1570335626602173, + "rewards_train/margins": 1.978129267692566, + "rewards_train/rejected": -3.135162830352783, + "step": 2035 + }, + { + "epoch": 0.57, + "logps_train/chosen": -270.1530456542969, + "logps_train/ref_chosen": -193.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -253.89108276367188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.715304851531982, + "rewards_train/margins": -0.12619638442993164, + "rewards_train/rejected": -7.589108467102051, + "step": 2035 + }, + { + "epoch": 0.57, + "logps_train/chosen": -147.0350341796875, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -229.03652954101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.553503513336182, + "rewards_train/margins": 4.550149440765381, + "rewards_train/rejected": -9.103652954101562, + "step": 2035 + }, + { + "epoch": 0.57, + "logps_train/chosen": -147.31683349609375, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -138.3834228515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.931683540344238, + "rewards_train/margins": 0.7066588401794434, + "rewards_train/rejected": -5.638342380523682, + "step": 2035 + }, + { + "epoch": 0.57, + "logps_train/chosen": -23.66619873046875, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -35.15476608276367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.410369873046875, + "rewards_train/margins": 1.2676067352294922, + "rewards_train/rejected": -2.677976608276367, + "step": 2035 + }, + { + "epoch": 0.57, + "learning_rate": 1.8251635812049893e-07, + "loss": 0.2737, + "step": 2036 + }, + { + "epoch": 0.57, + "logps_train/chosen": -11.28857135772705, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -26.421367645263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2601071298122406, + "rewards_train/margins": 1.4695296585559845, + "rewards_train/rejected": -1.729636788368225, + "step": 2036 + }, + { + "epoch": 0.57, + "logps_train/chosen": -44.013370513916016, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -53.75, + "logps_train/rejected": -62.866905212402344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.788837194442749, + "rewards_train/margins": -1.8771466612815857, + "rewards_train/rejected": -0.9116905331611633, + "step": 2036 + }, + { + "epoch": 0.57, + "logps_train/chosen": -45.1198616027832, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -33.68246841430664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2369861602783203, + "rewards_train/margins": -0.03123927116394043, + "rewards_train/rejected": -1.2057468891143799, + "step": 2036 + }, + { + "epoch": 0.57, + "logps_train/chosen": -20.90475845336914, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -2.25, + "logps_train/rejected": -16.50165367126465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2092258930206299, + "rewards_train/margins": 0.21593952178955078, + "rewards_train/rejected": -1.4251654148101807, + "step": 2036 + }, + { + "epoch": 0.57, + "logps_train/chosen": -226.87118530273438, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -294.61865234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.287118911743164, + "rewards_train/margins": 6.674746513366699, + "rewards_train/rejected": -14.961865425109863, + "step": 2036 + }, + { + "epoch": 0.57, + "logps_train/chosen": -137.54489135742188, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -131.042236328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0044891834259033, + "rewards_train/margins": 2.74973464012146, + "rewards_train/rejected": -4.754223823547363, + "step": 2036 + }, + { + "epoch": 0.57, + "logps_train/chosen": -7.796229362487793, + "logps_train/ref_chosen": -5.84375, + "logps_train/ref_rejected": -5.84375, + "logps_train/rejected": -7.606714725494385, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.19524793326854706, + "rewards_train/margins": -0.01895146071910858, + "rewards_train/rejected": -0.17629647254943848, + "step": 2036 + }, + { + "epoch": 0.57, + "logps_train/chosen": -256.173095703125, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -279.27618408203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.817309379577637, + "rewards_train/margins": -0.6896905899047852, + "rewards_train/rejected": -10.127618789672852, + "step": 2036 + }, + { + "epoch": 0.57, + "logps_train/chosen": -157.04510498046875, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -203.55316162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.754510402679443, + "rewards_train/margins": 1.8008055686950684, + "rewards_train/rejected": -8.555315971374512, + "step": 2037 + }, + { + "epoch": 0.57, + "logps_train/chosen": -147.44142150878906, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -227.5172882080078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2441422939300537, + "rewards_train/margins": 7.307586908340454, + "rewards_train/rejected": -9.551729202270508, + "step": 2037 + }, + { + "epoch": 0.57, + "logps_train/chosen": -52.62697219848633, + "logps_train/ref_chosen": -40.5, + "logps_train/ref_rejected": -42.75, + "logps_train/rejected": -52.37923049926758, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2126972675323486, + "rewards_train/margins": -0.24977421760559082, + "rewards_train/rejected": -0.9629230499267578, + "step": 2037 + }, + { + "epoch": 0.57, + "logps_train/chosen": -9.564193725585938, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -15.564699172973633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03141937404870987, + "rewards_train/margins": 0.6188005432486534, + "rewards_train/rejected": -0.6502199172973633, + "step": 2037 + }, + { + "epoch": 0.57, + "logps_train/chosen": -0.090231753885746, + "logps_train/ref_chosen": -0.126953125, + "logps_train/ref_rejected": -0.126953125, + "logps_train/rejected": -0.09259075671434402, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0036721371579915285, + "rewards_train/margins": 0.00023590028285980225, + "rewards_train/rejected": 0.0034362368751317263, + "step": 2037 + }, + { + "epoch": 0.57, + "logps_train/chosen": -46.91371154785156, + "logps_train/ref_chosen": -25.875, + "logps_train/ref_rejected": -27.25, + "logps_train/rejected": -48.05268096923828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1038711071014404, + "rewards_train/margins": -0.023602962493896484, + "rewards_train/rejected": -2.080268144607544, + "step": 2037 + }, + { + "epoch": 0.57, + "logps_train/chosen": -175.09718322753906, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -186.366455078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.109718322753906, + "rewards_train/margins": 4.726927757263184, + "rewards_train/rejected": -8.83664608001709, + "step": 2037 + }, + { + "epoch": 0.57, + "logps_train/chosen": -44.64126205444336, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -36.381370544433594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.864126205444336, + "rewards_train/margins": -0.05723905563354492, + "rewards_train/rejected": -2.806887149810791, + "step": 2037 + }, + { + "epoch": 0.57, + "learning_rate": 1.8099551034448458e-07, + "loss": 0.5573, + "step": 2038 + }, + { + "epoch": 0.57, + "logps_train/chosen": -312.7674865722656, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -273.90875244140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -13.376749038696289, + "rewards_train/margins": -0.7858734130859375, + "rewards_train/rejected": -12.590875625610352, + "step": 2038 + }, + { + "epoch": 0.57, + "logps_train/chosen": -106.43608093261719, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -225.00204467773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.243608236312866, + "rewards_train/margins": 7.656596422195435, + "rewards_train/rejected": -10.9002046585083, + "step": 2038 + }, + { + "epoch": 0.57, + "logps_train/chosen": -78.55956268310547, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -172.7225341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.155956268310547, + "rewards_train/margins": 3.166297435760498, + "rewards_train/rejected": -5.322253704071045, + "step": 2038 + }, + { + "epoch": 0.57, + "logps_train/chosen": -30.071121215820312, + "logps_train/ref_chosen": -24.125, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -33.94193649291992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5946121215820312, + "rewards_train/margins": 0.04958152770996094, + "rewards_train/rejected": -0.6441936492919922, + "step": 2038 + }, + { + "epoch": 0.57, + "logps_train/chosen": -27.43868637084961, + "logps_train/ref_chosen": -2.296875, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -77.81683349609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.514181137084961, + "rewards_train/margins": 1.2675023078918457, + "rewards_train/rejected": -3.7816834449768066, + "step": 2038 + }, + { + "epoch": 0.57, + "logps_train/chosen": -204.10260009765625, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -225.1429443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.810260057449341, + "rewards_train/margins": 3.904034376144409, + "rewards_train/rejected": -7.71429443359375, + "step": 2038 + }, + { + "epoch": 0.57, + "logps_train/chosen": -129.87045288085938, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -142.18753051757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.337045431137085, + "rewards_train/margins": 2.6317079067230225, + "rewards_train/rejected": -4.968753337860107, + "step": 2038 + }, + { + "epoch": 0.57, + "logps_train/chosen": -37.03606033325195, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -54.5806884765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4661060571670532, + "rewards_train/margins": 2.3794628381729126, + "rewards_train/rejected": -3.845568895339966, + "step": 2038 + }, + { + "epoch": 0.57, + "logps_train/chosen": -57.143310546875, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -203.97637939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8393310904502869, + "rewards_train/margins": 10.658306658267975, + "rewards_train/rejected": -11.497637748718262, + "step": 2039 + }, + { + "epoch": 0.57, + "logps_train/chosen": -88.13055419921875, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -215.6078338623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0630555152893066, + "rewards_train/margins": 8.997727870941162, + "rewards_train/rejected": -11.060783386230469, + "step": 2039 + }, + { + "epoch": 0.57, + "logps_train/chosen": -13.770705223083496, + "logps_train/ref_chosen": -3.84375, + "logps_train/ref_rejected": -3.96875, + "logps_train/rejected": -8.805265426635742, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9926955103874207, + "rewards_train/margins": -0.509043961763382, + "rewards_train/rejected": -0.4836515486240387, + "step": 2039 + }, + { + "epoch": 0.57, + "logps_train/chosen": -2.0830929279327393, + "logps_train/ref_chosen": -0.53125, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -15.570443153381348, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1551842987537384, + "rewards_train/margins": 0.5393600165843964, + "rewards_train/rejected": -0.6945443153381348, + "step": 2039 + }, + { + "epoch": 0.57, + "logps_train/chosen": -93.17091369628906, + "logps_train/ref_chosen": -41.25, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -89.79589080810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.192091464996338, + "rewards_train/margins": 0.48749780654907227, + "rewards_train/rejected": -5.67958927154541, + "step": 2039 + }, + { + "epoch": 0.57, + "logps_train/chosen": -80.45825958251953, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -101.23735046386719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4458259642124176, + "rewards_train/margins": 2.477909177541733, + "rewards_train/rejected": -2.9237351417541504, + "step": 2039 + }, + { + "epoch": 0.57, + "logps_train/chosen": -71.53190612792969, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -186.27142333984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40319061279296875, + "rewards_train/margins": 5.973951816558838, + "rewards_train/rejected": -6.377142429351807, + "step": 2039 + }, + { + "epoch": 0.57, + "logps_train/chosen": -111.86278533935547, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -50.45793914794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7862786054611206, + "rewards_train/margins": 0.9595154523849487, + "rewards_train/rejected": -2.7457940578460693, + "step": 2039 + }, + { + "epoch": 0.57, + "learning_rate": 1.794803947315555e-07, + "loss": 0.2891, + "step": 2040 + }, + { + "epoch": 0.57, + "logps_train/chosen": -31.352785110473633, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -27.62346076965332, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.885278582572937, + "rewards_train/margins": -0.01043248176574707, + "rewards_train/rejected": -1.87484610080719, + "step": 2040 + }, + { + "epoch": 0.57, + "logps_train/chosen": -129.82069396972656, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -235.18690490722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6320693492889404, + "rewards_train/margins": 6.1866209506988525, + "rewards_train/rejected": -9.818690299987793, + "step": 2040 + }, + { + "epoch": 0.57, + "logps_train/chosen": -200.63790893554688, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -161.05804443359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.063790798187256, + "rewards_train/margins": -1.4579863548278809, + "rewards_train/rejected": -3.605804443359375, + "step": 2040 + }, + { + "epoch": 0.57, + "logps_train/chosen": -74.0067138671875, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -104.86688995361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.37567138671875, + "rewards_train/margins": 0.911017656326294, + "rewards_train/rejected": -2.286689043045044, + "step": 2040 + }, + { + "epoch": 0.57, + "logps_train/chosen": -98.1240463256836, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -135.03305053710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36240464448928833, + "rewards_train/margins": 1.990900456905365, + "rewards_train/rejected": -2.3533051013946533, + "step": 2040 + }, + { + "epoch": 0.57, + "logps_train/chosen": -18.12877082824707, + "logps_train/ref_chosen": -2.796875, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -9.60263442993164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5331896543502808, + "rewards_train/margins": -0.9588636755943298, + "rewards_train/rejected": -0.5743259787559509, + "step": 2040 + }, + { + "epoch": 0.57, + "logps_train/chosen": -38.59103775024414, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -23.395544052124023, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.30285382270813, + "rewards_train/margins": -0.7570494413375854, + "rewards_train/rejected": -1.5458043813705444, + "step": 2040 + }, + { + "epoch": 0.57, + "logps_train/chosen": -9.846860885620117, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -9.837793350219727, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07843609154224396, + "rewards_train/margins": -0.0009067580103874207, + "rewards_train/rejected": -0.07752933353185654, + "step": 2040 + }, + { + "epoch": 0.57, + "logps_train/chosen": -83.31489562988281, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -219.0, + "logps_train/rejected": -263.5380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.131489634513855, + "rewards_train/margins": 3.3223191499710083, + "rewards_train/rejected": -4.453808784484863, + "step": 2041 + }, + { + "epoch": 0.57, + "logps_train/chosen": -65.3360824584961, + "logps_train/ref_chosen": -55.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -91.48516845703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9836082458496094, + "rewards_train/margins": -0.4850913882255554, + "rewards_train/rejected": -0.49851685762405396, + "step": 2041 + }, + { + "epoch": 0.57, + "logps_train/chosen": -316.0397644042969, + "logps_train/ref_chosen": -264.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -231.76429748535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.203976631164551, + "rewards_train/margins": 3.572453498840332, + "rewards_train/rejected": -8.776430130004883, + "step": 2041 + }, + { + "epoch": 0.57, + "logps_train/chosen": -271.0882568359375, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -220.57809448242188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.908825874328613, + "rewards_train/margins": -1.9510164260864258, + "rewards_train/rejected": -8.957809448242188, + "step": 2041 + }, + { + "epoch": 0.57, + "logps_train/chosen": -11.385570526123047, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -24.747295379638672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7166820764541626, + "rewards_train/margins": 1.3236724138259888, + "rewards_train/rejected": -2.0403544902801514, + "step": 2041 + }, + { + "epoch": 0.57, + "logps_train/chosen": -9.079170227050781, + "logps_train/ref_chosen": -1.0078125, + "logps_train/ref_rejected": -1.0078125, + "logps_train/rejected": -8.613608360290527, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8071357607841492, + "rewards_train/margins": -0.046556174755096436, + "rewards_train/rejected": -0.7605795860290527, + "step": 2041 + }, + { + "epoch": 0.57, + "logps_train/chosen": -64.75623321533203, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -59.39809799194336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.369373321533203, + "rewards_train/margins": -0.40456342697143555, + "rewards_train/rejected": -4.964809894561768, + "step": 2041 + }, + { + "epoch": 0.57, + "logps_train/chosen": -13.666584014892578, + "logps_train/ref_chosen": -1.0859375, + "logps_train/ref_rejected": -4.9375, + "logps_train/rejected": -20.197860717773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2580646276474, + "rewards_train/margins": 0.2679715156555176, + "rewards_train/rejected": -1.5260361433029175, + "step": 2041 + }, + { + "epoch": 0.57, + "learning_rate": 1.7797102188591506e-07, + "loss": 0.7188, + "step": 2042 + }, + { + "epoch": 0.57, + "logps_train/chosen": -28.888099670410156, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -22.25, + "logps_train/rejected": -53.73225402832031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7263100147247314, + "rewards_train/margins": 1.4219155311584473, + "rewards_train/rejected": -3.1482255458831787, + "step": 2042 + }, + { + "epoch": 0.57, + "logps_train/chosen": -137.66412353515625, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -190.1995849609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.216412305831909, + "rewards_train/margins": 1.103546142578125, + "rewards_train/rejected": -3.319958448410034, + "step": 2042 + }, + { + "epoch": 0.57, + "logps_train/chosen": -9.342519760131836, + "logps_train/ref_chosen": -5.875, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -24.949247360229492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34675198793411255, + "rewards_train/margins": 1.4700477719306946, + "rewards_train/rejected": -1.8167997598648071, + "step": 2042 + }, + { + "epoch": 0.57, + "logps_train/chosen": -12.872535705566406, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -31.032440185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4247535765171051, + "rewards_train/margins": 1.7472403943538666, + "rewards_train/rejected": -2.1719939708709717, + "step": 2042 + }, + { + "epoch": 0.57, + "logps_train/chosen": -23.144521713256836, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -29.75, + "logps_train/rejected": -46.2197265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7082022428512573, + "rewards_train/margins": -0.061229586601257324, + "rewards_train/rejected": -1.64697265625, + "step": 2042 + }, + { + "epoch": 0.57, + "logps_train/chosen": -100.87095642089844, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -154.72018432617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1870956420898438, + "rewards_train/margins": 2.384922981262207, + "rewards_train/rejected": -4.572018623352051, + "step": 2042 + }, + { + "epoch": 0.57, + "logps_train/chosen": -201.12237548828125, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -215.10345458984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.512237548828125, + "rewards_train/margins": 4.24810791015625, + "rewards_train/rejected": -9.760345458984375, + "step": 2042 + }, + { + "epoch": 0.57, + "logps_train/chosen": -169.29437255859375, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -194.41836547851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.879437446594238, + "rewards_train/margins": 0.5623993873596191, + "rewards_train/rejected": -5.441836833953857, + "step": 2042 + }, + { + "epoch": 0.57, + "logps_train/chosen": -3.0048201084136963, + "logps_train/ref_chosen": -0.458984375, + "logps_train/ref_rejected": -0.458984375, + "logps_train/rejected": -3.0099425315856934, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25458356738090515, + "rewards_train/margins": 0.000512242317199707, + "rewards_train/rejected": -0.25509580969810486, + "step": 2043 + }, + { + "epoch": 0.57, + "logps_train/chosen": -56.917449951171875, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -56.43471145629883, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.454245090484619, + "rewards_train/margins": -0.04827404022216797, + "rewards_train/rejected": -4.405971050262451, + "step": 2043 + }, + { + "epoch": 0.57, + "logps_train/chosen": -52.86258316040039, + "logps_train/ref_chosen": -23.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -200.12269592285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.936258316040039, + "rewards_train/margins": 6.5260114669799805, + "rewards_train/rejected": -9.46226978302002, + "step": 2043 + }, + { + "epoch": 0.57, + "logps_train/chosen": -60.79954528808594, + "logps_train/ref_chosen": -49.25, + "logps_train/ref_rejected": -29.0, + "logps_train/rejected": -56.34625244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1549545526504517, + "rewards_train/margins": 1.579670786857605, + "rewards_train/rejected": -2.7346253395080566, + "step": 2043 + }, + { + "epoch": 0.57, + "logps_train/chosen": -62.932247161865234, + "logps_train/ref_chosen": -47.0, + "logps_train/ref_rejected": -60.75, + "logps_train/rejected": -76.47069549560547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5932247638702393, + "rewards_train/margins": -0.021155238151550293, + "rewards_train/rejected": -1.572069525718689, + "step": 2043 + }, + { + "epoch": 0.57, + "logps_train/chosen": -17.698501586914062, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -28.852506637573242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5136001706123352, + "rewards_train/margins": 1.631025493144989, + "rewards_train/rejected": -2.144625663757324, + "step": 2043 + }, + { + "epoch": 0.57, + "logps_train/chosen": -72.94497680664062, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -48.75, + "logps_train/rejected": -82.48153686523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8194977045059204, + "rewards_train/margins": 1.553655982017517, + "rewards_train/rejected": -3.3731536865234375, + "step": 2043 + }, + { + "epoch": 0.57, + "logps_train/chosen": -81.52346801757812, + "logps_train/ref_chosen": -45.0, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -53.1156005859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6523468494415283, + "rewards_train/margins": -0.4657866954803467, + "rewards_train/rejected": -3.1865601539611816, + "step": 2043 + }, + { + "epoch": 0.57, + "learning_rate": 1.7646740237157254e-07, + "loss": 0.3611, + "step": 2044 + }, + { + "epoch": 0.57, + "logps_train/chosen": -196.43606567382812, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -214.31918334960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.093606472015381, + "rewards_train/margins": 4.18831205368042, + "rewards_train/rejected": -11.2819185256958, + "step": 2044 + }, + { + "epoch": 0.57, + "logps_train/chosen": -5.195126056671143, + "logps_train/ref_chosen": -2.90625, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -26.06972885131836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22888760268688202, + "rewards_train/margins": 1.790585234761238, + "rewards_train/rejected": -2.01947283744812, + "step": 2044 + }, + { + "epoch": 0.57, + "logps_train/chosen": -176.171630859375, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -184.62106323242188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.617163181304932, + "rewards_train/margins": -0.3550567626953125, + "rewards_train/rejected": -4.262106418609619, + "step": 2044 + }, + { + "epoch": 0.57, + "logps_train/chosen": -25.311931610107422, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -51.85723114013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.078068256378174, + "rewards_train/margins": 2.698279857635498, + "rewards_train/rejected": -4.776348114013672, + "step": 2044 + }, + { + "epoch": 0.57, + "logps_train/chosen": -158.7962188720703, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -236.6444854736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0796220302581787, + "rewards_train/margins": 7.084826707839966, + "rewards_train/rejected": -10.164448738098145, + "step": 2044 + }, + { + "epoch": 0.57, + "logps_train/chosen": -158.57937622070312, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -160.07489013671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.357937812805176, + "rewards_train/margins": 1.0495514869689941, + "rewards_train/rejected": -6.40748929977417, + "step": 2044 + }, + { + "epoch": 0.57, + "logps_train/chosen": -115.02766418457031, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -194.7994842529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.652766704559326, + "rewards_train/margins": 2.327181816101074, + "rewards_train/rejected": -6.9799485206604, + "step": 2044 + }, + { + "epoch": 0.57, + "logps_train/chosen": -20.62983512878418, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -39.89861297607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5567335486412048, + "rewards_train/margins": 2.308127701282501, + "rewards_train/rejected": -2.864861249923706, + "step": 2044 + }, + { + "epoch": 0.57, + "logps_train/chosen": -163.74798583984375, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -221.86077880859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.574798583984375, + "rewards_train/margins": 5.211279392242432, + "rewards_train/rejected": -7.786077976226807, + "step": 2045 + }, + { + "epoch": 0.57, + "logps_train/chosen": -70.92543029785156, + "logps_train/ref_chosen": -49.5, + "logps_train/ref_rejected": -3.140625, + "logps_train/rejected": -33.35867691040039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.142543077468872, + "rewards_train/margins": 0.8792622089385986, + "rewards_train/rejected": -3.0218052864074707, + "step": 2045 + }, + { + "epoch": 0.57, + "logps_train/chosen": -21.626575469970703, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -6.34375, + "logps_train/rejected": -16.43662452697754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7251575589179993, + "rewards_train/margins": 0.28412991762161255, + "rewards_train/rejected": -1.0092874765396118, + "step": 2045 + }, + { + "epoch": 0.57, + "logps_train/chosen": -160.70907592773438, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -201.59759521484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.170907497406006, + "rewards_train/margins": 0.6888523101806641, + "rewards_train/rejected": -5.85975980758667, + "step": 2045 + }, + { + "epoch": 0.57, + "logps_train/chosen": -12.142309188842773, + "logps_train/ref_chosen": -3.578125, + "logps_train/ref_rejected": -23.5, + "logps_train/rejected": -51.524986267089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8564184308052063, + "rewards_train/margins": 1.9460801482200623, + "rewards_train/rejected": -2.8024985790252686, + "step": 2045 + }, + { + "epoch": 0.57, + "logps_train/chosen": -38.859161376953125, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -36.01782989501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2234160900115967, + "rewards_train/margins": 0.8596169948577881, + "rewards_train/rejected": -3.0830330848693848, + "step": 2045 + }, + { + "epoch": 0.57, + "logps_train/chosen": -121.94805908203125, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -170.9881134033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.194805860519409, + "rewards_train/margins": 1.704005479812622, + "rewards_train/rejected": -4.898811340332031, + "step": 2045 + }, + { + "epoch": 0.57, + "logps_train/chosen": -28.48395347595215, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -122.82418060302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8733953833580017, + "rewards_train/margins": 2.659022629261017, + "rewards_train/rejected": -3.5324180126190186, + "step": 2045 + }, + { + "epoch": 0.57, + "learning_rate": 1.749695467122706e-07, + "loss": 0.2286, + "step": 2046 + }, + { + "epoch": 0.57, + "logps_train/chosen": -98.81636047363281, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -175.6342010498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.931636095046997, + "rewards_train/margins": 4.781784296035767, + "rewards_train/rejected": -7.713420391082764, + "step": 2046 + }, + { + "epoch": 0.57, + "logps_train/chosen": -62.688720703125, + "logps_train/ref_chosen": -49.0, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -42.11281204223633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.368872046470642, + "rewards_train/margins": 1.523659110069275, + "rewards_train/rejected": -2.892531156539917, + "step": 2046 + }, + { + "epoch": 0.57, + "logps_train/chosen": -88.13742065429688, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -155.54727172851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4137420654296875, + "rewards_train/margins": 2.240985155105591, + "rewards_train/rejected": -3.6547272205352783, + "step": 2046 + }, + { + "epoch": 0.57, + "logps_train/chosen": -123.01185607910156, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -178.18783569335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4011857509613037, + "rewards_train/margins": 4.817598104476929, + "rewards_train/rejected": -7.218783855438232, + "step": 2046 + }, + { + "epoch": 0.57, + "logps_train/chosen": -67.2187271118164, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -123.4079818725586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2218727171421051, + "rewards_train/margins": 3.3689254224300385, + "rewards_train/rejected": -3.5907981395721436, + "step": 2046 + }, + { + "epoch": 0.57, + "logps_train/chosen": -196.1884765625, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -283.43115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.418847560882568, + "rewards_train/margins": 6.624268054962158, + "rewards_train/rejected": -12.043115615844727, + "step": 2046 + }, + { + "epoch": 0.57, + "logps_train/chosen": -23.87700653076172, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -29.25, + "logps_train/rejected": -41.85462951660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1127007007598877, + "rewards_train/margins": 0.14776229858398438, + "rewards_train/rejected": -1.260462999343872, + "step": 2046 + }, + { + "epoch": 0.57, + "logps_train/chosen": -172.61349487304688, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -191.42074584960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9613494873046875, + "rewards_train/margins": 3.2807250022888184, + "rewards_train/rejected": -5.242074489593506, + "step": 2046 + }, + { + "epoch": 0.57, + "logps_train/chosen": -125.1222152709961, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -178.25503540039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6122215390205383, + "rewards_train/margins": 5.6132819056510925, + "rewards_train/rejected": -6.225503444671631, + "step": 2047 + }, + { + "epoch": 0.57, + "logps_train/chosen": -161.49429321289062, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -138.88748168945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6494293212890625, + "rewards_train/margins": 1.3393189907073975, + "rewards_train/rejected": -3.98874831199646, + "step": 2047 + }, + { + "epoch": 0.57, + "logps_train/chosen": -175.60186767578125, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -176.75115966796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7601869106292725, + "rewards_train/margins": -0.28507089614868164, + "rewards_train/rejected": -3.475116014480591, + "step": 2047 + }, + { + "epoch": 0.57, + "logps_train/chosen": -126.92819213867188, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -122.34408569335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0428192615509033, + "rewards_train/margins": 1.3915894031524658, + "rewards_train/rejected": -3.434408664703369, + "step": 2047 + }, + { + "epoch": 0.57, + "logps_train/chosen": -16.681718826293945, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -19.775516510009766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35567188262939453, + "rewards_train/margins": 0.6218797564506531, + "rewards_train/rejected": -0.9775516390800476, + "step": 2047 + }, + { + "epoch": 0.57, + "logps_train/chosen": -10.737181663513184, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -39.37259292602539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1387818306684494, + "rewards_train/margins": 2.0510411709547043, + "rewards_train/rejected": -1.9122593402862549, + "step": 2047 + }, + { + "epoch": 0.57, + "logps_train/chosen": -259.287109375, + "logps_train/ref_chosen": -227.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -223.9644775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.228710889816284, + "rewards_train/margins": 5.467737436294556, + "rewards_train/rejected": -8.69644832611084, + "step": 2047 + }, + { + "epoch": 0.57, + "logps_train/chosen": -15.636259078979492, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -0.87890625, + "logps_train/rejected": -4.087626934051514, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.28862592577934265, + "rewards_train/margins": 0.032246142625808716, + "rewards_train/rejected": -0.32087206840515137, + "step": 2047 + }, + { + "epoch": 0.57, + "learning_rate": 1.7347746539141083e-07, + "loss": 0.2216, + "step": 2048 + }, + { + "epoch": 0.57, + "logps_train/chosen": -14.120368003845215, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -11.528390884399414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.455786794424057, + "rewards_train/margins": -0.1091977059841156, + "rewards_train/rejected": -0.3465890884399414, + "step": 2048 + }, + { + "epoch": 0.57, + "logps_train/chosen": -99.48829650878906, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -52.25, + "logps_train/rejected": -69.24578857421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2988296747207642, + "rewards_train/margins": 0.40074920654296875, + "rewards_train/rejected": -1.699578881263733, + "step": 2048 + }, + { + "epoch": 0.57, + "logps_train/chosen": -125.0152359008789, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -187.20211791992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.251523494720459, + "rewards_train/margins": 2.31868839263916, + "rewards_train/rejected": -6.570211887359619, + "step": 2048 + }, + { + "epoch": 0.57, + "logps_train/chosen": -38.870635986328125, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -5.09375, + "logps_train/rejected": -49.4492301940918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5058135986328125, + "rewards_train/margins": 1.929734706878662, + "rewards_train/rejected": -4.435548305511475, + "step": 2048 + }, + { + "epoch": 0.57, + "logps_train/chosen": -57.689918518066406, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -60.25, + "logps_train/rejected": -56.24943161010742, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.08100815117359161, + "rewards_train/margins": -0.3190486878156662, + "rewards_train/rejected": 0.4000568389892578, + "step": 2048 + }, + { + "epoch": 0.57, + "logps_train/chosen": -183.38925170898438, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -242.08712768554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.438925266265869, + "rewards_train/margins": 4.169787406921387, + "rewards_train/rejected": -6.608712673187256, + "step": 2048 + }, + { + "epoch": 0.57, + "logps_train/chosen": -116.68077087402344, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -197.22950744628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21807709336280823, + "rewards_train/margins": 3.704873651266098, + "rewards_train/rejected": -3.9229507446289062, + "step": 2048 + }, + { + "epoch": 0.57, + "logps_train/chosen": -30.495361328125, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -2.53125, + "logps_train/rejected": -25.506973266601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9245361685752869, + "rewards_train/margins": 1.3730362057685852, + "rewards_train/rejected": -2.297572374343872, + "step": 2048 + }, + { + "epoch": 0.57, + "logps_train/chosen": -38.21293258666992, + "logps_train/ref_chosen": -34.0, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -55.25025939941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4212932586669922, + "rewards_train/margins": 3.7412328720092773, + "rewards_train/rejected": -4.1625261306762695, + "step": 2049 + }, + { + "epoch": 0.57, + "logps_train/chosen": -109.7756118774414, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -191.06430053710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2275612354278564, + "rewards_train/margins": 5.178869009017944, + "rewards_train/rejected": -8.4064302444458, + "step": 2049 + }, + { + "epoch": 0.57, + "logps_train/chosen": -24.705074310302734, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -26.5, + "logps_train/rejected": -106.03179168701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0892574787139893, + "rewards_train/margins": 6.863921880722046, + "rewards_train/rejected": -7.953179359436035, + "step": 2049 + }, + { + "epoch": 0.57, + "logps_train/chosen": -16.42412567138672, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -31.862714767456055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5424125790596008, + "rewards_train/margins": 0.6688589453697205, + "rewards_train/rejected": -1.2112715244293213, + "step": 2049 + }, + { + "epoch": 0.57, + "logps_train/chosen": -23.174850463867188, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -31.879348754882812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3799850940704346, + "rewards_train/margins": -0.5670502185821533, + "rewards_train/rejected": -0.8129348754882812, + "step": 2049 + }, + { + "epoch": 0.57, + "logps_train/chosen": -11.108753204345703, + "logps_train/ref_chosen": -2.5, + "logps_train/ref_rejected": -0.99609375, + "logps_train/rejected": -4.282591819763184, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8608753085136414, + "rewards_train/margins": -0.532225489616394, + "rewards_train/rejected": -0.3286498188972473, + "step": 2049 + }, + { + "epoch": 0.57, + "logps_train/chosen": -161.80972290039062, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -189.36984252929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4809722900390625, + "rewards_train/margins": 3.9060120582580566, + "rewards_train/rejected": -6.386984348297119, + "step": 2049 + }, + { + "epoch": 0.57, + "logps_train/chosen": -31.850576400756836, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -31.875, + "logps_train/rejected": -99.14608764648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9975576400756836, + "rewards_train/margins": 4.729551315307617, + "rewards_train/rejected": -6.727108955383301, + "step": 2049 + }, + { + "epoch": 0.57, + "learning_rate": 1.7199116885197996e-07, + "loss": 0.319, + "step": 2050 + }, + { + "epoch": 0.57, + "logps_train/chosen": -129.20877075195312, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -104.30269622802734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7208771109580994, + "rewards_train/margins": 0.9593924880027771, + "rewards_train/rejected": -1.6802695989608765, + "step": 2050 + }, + { + "epoch": 0.57, + "logps_train/chosen": -56.64160919189453, + "logps_train/ref_chosen": -43.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -92.2396469116211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.314160943031311, + "rewards_train/margins": 0.7598038911819458, + "rewards_train/rejected": -2.073964834213257, + "step": 2050 + }, + { + "epoch": 0.57, + "logps_train/chosen": -15.36679458618164, + "logps_train/ref_chosen": -5.65625, + "logps_train/ref_rejected": -22.375, + "logps_train/rejected": -35.40287399291992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9710544943809509, + "rewards_train/margins": 0.33173292875289917, + "rewards_train/rejected": -1.30278742313385, + "step": 2050 + }, + { + "epoch": 0.57, + "logps_train/chosen": -116.4385986328125, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -213.98944091796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.193859815597534, + "rewards_train/margins": 6.555084466934204, + "rewards_train/rejected": -9.748944282531738, + "step": 2050 + }, + { + "epoch": 0.57, + "logps_train/chosen": -141.7542266845703, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -228.14846801757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.4754228591918945, + "rewards_train/margins": 1.739424228668213, + "rewards_train/rejected": -6.214847087860107, + "step": 2050 + }, + { + "epoch": 0.57, + "logps_train/chosen": -132.7401123046875, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -132.60025024414062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7740113735198975, + "rewards_train/margins": -0.013986349105834961, + "rewards_train/rejected": -3.7600250244140625, + "step": 2050 + }, + { + "epoch": 0.57, + "logps_train/chosen": -154.41998291015625, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -203.38131713867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.341998338699341, + "rewards_train/margins": 2.296133279800415, + "rewards_train/rejected": -4.638131618499756, + "step": 2050 + }, + { + "epoch": 0.57, + "logps_train/chosen": -30.829307556152344, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -36.105472564697266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6391807794570923, + "rewards_train/margins": 0.8651164770126343, + "rewards_train/rejected": -2.5042972564697266, + "step": 2050 + }, + { + "epoch": 0.57, + "logps_train/chosen": -24.704242706298828, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -2.734375, + "logps_train/rejected": -39.36859130859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8704242706298828, + "rewards_train/margins": 2.792997360229492, + "rewards_train/rejected": -3.663421630859375, + "step": 2051 + }, + { + "epoch": 0.57, + "logps_train/chosen": -70.20999145507812, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -11.6875, + "logps_train/rejected": -13.586373329162598, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6959991455078125, + "rewards_train/margins": -0.506111815571785, + "rewards_train/rejected": -0.18988732993602753, + "step": 2051 + }, + { + "epoch": 0.57, + "logps_train/chosen": -14.767462730407715, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -14.4375, + "logps_train/rejected": -28.588773727416992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0642462968826294, + "rewards_train/margins": 0.35088109970092773, + "rewards_train/rejected": -1.4151273965835571, + "step": 2051 + }, + { + "epoch": 0.57, + "logps_train/chosen": -131.41867065429688, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -131.2139434814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.091867208480835, + "rewards_train/margins": 1.779527187347412, + "rewards_train/rejected": -3.871394395828247, + "step": 2051 + }, + { + "epoch": 0.57, + "logps_train/chosen": -24.878631591796875, + "logps_train/ref_chosen": -13.0625, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -39.942466735839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1816132068634033, + "rewards_train/margins": 0.987633466720581, + "rewards_train/rejected": -2.1692466735839844, + "step": 2051 + }, + { + "epoch": 0.57, + "logps_train/chosen": -113.45696258544922, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -122.14360809326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.845696210861206, + "rewards_train/margins": 1.4686648845672607, + "rewards_train/rejected": -4.314361095428467, + "step": 2051 + }, + { + "epoch": 0.57, + "logps_train/chosen": -226.12857055664062, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -187.0, + "logps_train/rejected": -268.5586853027344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.312857627868652, + "rewards_train/margins": -0.15698909759521484, + "rewards_train/rejected": -8.155868530273438, + "step": 2051 + }, + { + "epoch": 0.57, + "logps_train/chosen": -143.01869201660156, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -207.4055938720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.0518693923950195, + "rewards_train/margins": 5.438690185546875, + "rewards_train/rejected": -10.490559577941895, + "step": 2051 + }, + { + "epoch": 0.57, + "learning_rate": 1.7051066749647835e-07, + "loss": 0.3496, + "step": 2052 + }, + { + "epoch": 0.57, + "logps_train/chosen": -9.394074440002441, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -48.870697021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44878244400024414, + "rewards_train/margins": 3.525787353515625, + "rewards_train/rejected": -3.974569797515869, + "step": 2052 + }, + { + "epoch": 0.57, + "logps_train/chosen": -37.97960662841797, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -48.29802703857422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7979607582092285, + "rewards_train/margins": 0.8943419456481934, + "rewards_train/rejected": -3.692302703857422, + "step": 2052 + }, + { + "epoch": 0.57, + "logps_train/chosen": -113.17225646972656, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -201.128173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.067225694656372, + "rewards_train/margins": 5.495591878890991, + "rewards_train/rejected": -8.562817573547363, + "step": 2052 + }, + { + "epoch": 0.57, + "logps_train/chosen": -94.62028503417969, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -208.35763549804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6120284795761108, + "rewards_train/margins": 6.5237356424331665, + "rewards_train/rejected": -8.135764122009277, + "step": 2052 + }, + { + "epoch": 0.57, + "logps_train/chosen": -21.567176818847656, + "logps_train/ref_chosen": -2.875, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -56.46419143676758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8692177534103394, + "rewards_train/margins": 1.6022013425827026, + "rewards_train/rejected": -3.471419095993042, + "step": 2052 + }, + { + "epoch": 0.57, + "logps_train/chosen": -146.65374755859375, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -177.901123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.665374755859375, + "rewards_train/margins": 0.8747377395629883, + "rewards_train/rejected": -6.540112495422363, + "step": 2052 + }, + { + "epoch": 0.57, + "logps_train/chosen": -16.031185150146484, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -14.8125, + "logps_train/rejected": -22.65705108642578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8937435150146484, + "rewards_train/margins": -0.10928839445114136, + "rewards_train/rejected": -0.7844551205635071, + "step": 2052 + }, + { + "epoch": 0.57, + "logps_train/chosen": -121.87889862060547, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -276.32269287109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7878898978233337, + "rewards_train/margins": 8.644379198551178, + "rewards_train/rejected": -9.432269096374512, + "step": 2052 + }, + { + "epoch": 0.57, + "logps_train/chosen": -132.72116088867188, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -132.70660400390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.272116184234619, + "rewards_train/margins": -0.0014557838439941406, + "rewards_train/rejected": -4.270660400390625, + "step": 2053 + }, + { + "epoch": 0.57, + "logps_train/chosen": -89.2799072265625, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -160.41439819335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.027990698814392, + "rewards_train/margins": 5.463449120521545, + "rewards_train/rejected": -6.4914398193359375, + "step": 2053 + }, + { + "epoch": 0.57, + "logps_train/chosen": -12.355398178100586, + "logps_train/ref_chosen": -1.125, + "logps_train/ref_rejected": -1.65625, + "logps_train/rejected": -4.114050388336182, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1230398416519165, + "rewards_train/margins": -0.8772598057985306, + "rewards_train/rejected": -0.24578003585338593, + "step": 2053 + }, + { + "epoch": 0.57, + "logps_train/chosen": -426.6939697265625, + "logps_train/ref_chosen": -320.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -157.46022033691406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.669397354125977, + "rewards_train/margins": -4.97337532043457, + "rewards_train/rejected": -5.696022033691406, + "step": 2053 + }, + { + "epoch": 0.57, + "logps_train/chosen": -293.15679931640625, + "logps_train/ref_chosen": -182.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -259.79327392578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.115679740905762, + "rewards_train/margins": 1.1636476516723633, + "rewards_train/rejected": -12.279327392578125, + "step": 2053 + }, + { + "epoch": 0.57, + "logps_train/chosen": -93.33793640136719, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -133.31906127929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6837936639785767, + "rewards_train/margins": 3.9981125593185425, + "rewards_train/rejected": -5.681906223297119, + "step": 2053 + }, + { + "epoch": 0.57, + "logps_train/chosen": -14.646148681640625, + "logps_train/ref_chosen": -13.875, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -37.65638732910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07711487263441086, + "rewards_train/margins": 2.926023907959461, + "rewards_train/rejected": -3.003138780593872, + "step": 2053 + }, + { + "epoch": 0.57, + "logps_train/chosen": -177.35671997070312, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -256.0, + "logps_train/rejected": -282.15740966796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43567201495170593, + "rewards_train/margins": 2.180068999528885, + "rewards_train/rejected": -2.615741014480591, + "step": 2053 + }, + { + "epoch": 0.57, + "learning_rate": 1.6903597168684603e-07, + "loss": 0.5601, + "step": 2054 + }, + { + "epoch": 0.57, + "logps_train/chosen": -56.021026611328125, + "logps_train/ref_chosen": -52.75, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -88.11385345458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3271026611328125, + "rewards_train/margins": 0.9842827320098877, + "rewards_train/rejected": -1.3113853931427002, + "step": 2054 + }, + { + "epoch": 0.57, + "logps_train/chosen": -156.84317016601562, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -167.93856811523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.0343170166015625, + "rewards_train/margins": 1.50954008102417, + "rewards_train/rejected": -7.543857097625732, + "step": 2054 + }, + { + "epoch": 0.57, + "logps_train/chosen": -136.6417236328125, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -177.00344848632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.414172410964966, + "rewards_train/margins": 1.78617262840271, + "rewards_train/rejected": -4.200345039367676, + "step": 2054 + }, + { + "epoch": 0.57, + "logps_train/chosen": -30.601093292236328, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -3.625, + "logps_train/rejected": -15.401130676269531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4194843769073486, + "rewards_train/margins": -1.2418712377548218, + "rewards_train/rejected": -1.1776131391525269, + "step": 2054 + }, + { + "epoch": 0.57, + "logps_train/chosen": -10.100010871887207, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -6.8125, + "logps_train/rejected": -54.710548400878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16499891877174377, + "rewards_train/margins": 4.954803854227066, + "rewards_train/rejected": -4.789804935455322, + "step": 2054 + }, + { + "epoch": 0.57, + "logps_train/chosen": -84.35890197753906, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -103.7852783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.135890245437622, + "rewards_train/margins": 1.6426377296447754, + "rewards_train/rejected": -2.7785279750823975, + "step": 2054 + }, + { + "epoch": 0.57, + "logps_train/chosen": -115.54658508300781, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -224.6604461669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4546585083007812, + "rewards_train/margins": 3.611386299133301, + "rewards_train/rejected": -6.066044807434082, + "step": 2054 + }, + { + "epoch": 0.57, + "logps_train/chosen": -126.90713500976562, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -148.79904174804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6407135128974915, + "rewards_train/margins": 1.8891906142234802, + "rewards_train/rejected": -2.5299041271209717, + "step": 2054 + }, + { + "epoch": 0.57, + "logps_train/chosen": -95.45196533203125, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -187.19642639160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.045196533203125, + "rewards_train/margins": 3.874446392059326, + "rewards_train/rejected": -5.919642925262451, + "step": 2055 + }, + { + "epoch": 0.57, + "logps_train/chosen": -15.733064651489258, + "logps_train/ref_chosen": -3.828125, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -33.61384582519531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1904939413070679, + "rewards_train/margins": 0.1958906650543213, + "rewards_train/rejected": -1.3863846063613892, + "step": 2055 + }, + { + "epoch": 0.57, + "logps_train/chosen": -82.81293487548828, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -161.46636962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9312934875488281, + "rewards_train/margins": 4.715343475341797, + "rewards_train/rejected": -5.646636962890625, + "step": 2055 + }, + { + "epoch": 0.57, + "logps_train/chosen": -7.1907172203063965, + "logps_train/ref_chosen": -0.7109375, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -20.095247268676758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6479780077934265, + "rewards_train/margins": 0.8959217667579651, + "rewards_train/rejected": -1.5438997745513916, + "step": 2055 + }, + { + "epoch": 0.57, + "logps_train/chosen": -132.66586303710938, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -112.96405792236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0165863037109375, + "rewards_train/margins": 0.5798194408416748, + "rewards_train/rejected": -2.5964057445526123, + "step": 2055 + }, + { + "epoch": 0.57, + "logps_train/chosen": -5.442637920379639, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -12.427051544189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0005137920379638672, + "rewards_train/margins": 0.6734413504600525, + "rewards_train/rejected": -0.6739551424980164, + "step": 2055 + }, + { + "epoch": 0.57, + "logps_train/chosen": -35.54890441894531, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -32.16426086425781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9798904657363892, + "rewards_train/margins": 0.5834106206893921, + "rewards_train/rejected": -2.5633010864257812, + "step": 2055 + }, + { + "epoch": 0.57, + "logps_train/chosen": -11.49321174621582, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -59.09914016723633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7180711627006531, + "rewards_train/margins": 3.0668428540229797, + "rewards_train/rejected": -3.784914016723633, + "step": 2055 + }, + { + "epoch": 0.57, + "learning_rate": 1.6756709174438975e-07, + "loss": 0.3023, + "step": 2056 + }, + { + "epoch": 0.57, + "logps_train/chosen": -40.84522247314453, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -54.725303649902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.053272247314453, + "rewards_train/margins": 0.41925811767578125, + "rewards_train/rejected": -3.4725303649902344, + "step": 2056 + }, + { + "epoch": 0.57, + "logps_train/chosen": -150.91372680664062, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -122.53619384765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.541372776031494, + "rewards_train/margins": -0.9877533912658691, + "rewards_train/rejected": -2.553619384765625, + "step": 2056 + }, + { + "epoch": 0.57, + "logps_train/chosen": -142.01380920410156, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -103.44464111328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5013809204101562, + "rewards_train/margins": 0.7930831909179688, + "rewards_train/rejected": -1.294464111328125, + "step": 2056 + }, + { + "epoch": 0.57, + "logps_train/chosen": -214.1475830078125, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -241.33517456054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.81475830078125, + "rewards_train/margins": 0.5187592506408691, + "rewards_train/rejected": -7.333517551422119, + "step": 2056 + }, + { + "epoch": 0.57, + "logps_train/chosen": -9.760231018066406, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -52.45901107788086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1572730988264084, + "rewards_train/margins": 3.582378104329109, + "rewards_train/rejected": -3.7396512031555176, + "step": 2056 + }, + { + "epoch": 0.57, + "logps_train/chosen": -37.077449798583984, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -53.98062515258789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.06712007522583, + "rewards_train/margins": 0.38094258308410645, + "rewards_train/rejected": -3.4480626583099365, + "step": 2056 + }, + { + "epoch": 0.57, + "logps_train/chosen": -27.686782836914062, + "logps_train/ref_chosen": -2.09375, + "logps_train/ref_rejected": -3.34375, + "logps_train/rejected": -35.6102180480957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5593032836914062, + "rewards_train/margins": 0.6673436164855957, + "rewards_train/rejected": -3.226646900177002, + "step": 2056 + }, + { + "epoch": 0.57, + "logps_train/chosen": -158.57154846191406, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -197.7734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.107154846191406, + "rewards_train/margins": 1.3201894760131836, + "rewards_train/rejected": -8.42734432220459, + "step": 2056 + }, + { + "epoch": 0.57, + "logps_train/chosen": -104.38243103027344, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -244.106201171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.338243246078491, + "rewards_train/margins": 8.172376871109009, + "rewards_train/rejected": -11.5106201171875, + "step": 2057 + }, + { + "epoch": 0.57, + "logps_train/chosen": -236.56736755371094, + "logps_train/ref_chosen": -190.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -188.4031982421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.656736850738525, + "rewards_train/margins": 3.583583354949951, + "rewards_train/rejected": -8.240320205688477, + "step": 2057 + }, + { + "epoch": 0.57, + "logps_train/chosen": -160.3492889404297, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -170.14178466796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.0849289894104, + "rewards_train/margins": 0.37924957275390625, + "rewards_train/rejected": -4.464178562164307, + "step": 2057 + }, + { + "epoch": 0.57, + "logps_train/chosen": -18.236865997314453, + "logps_train/ref_chosen": -2.71875, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -39.09340286254883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5518115758895874, + "rewards_train/margins": 1.0325287580490112, + "rewards_train/rejected": -2.5843403339385986, + "step": 2057 + }, + { + "epoch": 0.57, + "logps_train/chosen": -175.28634643554688, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -231.00259399414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.278634548187256, + "rewards_train/margins": 1.6216249465942383, + "rewards_train/rejected": -7.900259494781494, + "step": 2057 + }, + { + "epoch": 0.57, + "logps_train/chosen": -21.45056915283203, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -45.24386215209961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.53255695104599, + "rewards_train/margins": 2.9355793595314026, + "rewards_train/rejected": -3.4681363105773926, + "step": 2057 + }, + { + "epoch": 0.57, + "logps_train/chosen": -24.086769104003906, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -26.95721435546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9024269580841064, + "rewards_train/margins": -0.13795554637908936, + "rewards_train/rejected": -1.764471411705017, + "step": 2057 + }, + { + "epoch": 0.57, + "logps_train/chosen": -222.1096954345703, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -325.90435791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.110969543457031, + "rewards_train/margins": 8.77946662902832, + "rewards_train/rejected": -14.890436172485352, + "step": 2057 + }, + { + "epoch": 0.58, + "learning_rate": 1.6610403794971207e-07, + "loss": 0.3562, + "step": 2058 + }, + { + "epoch": 0.58, + "logps_train/chosen": -47.16278076171875, + "logps_train/ref_chosen": -27.125, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -58.72947692871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0037782192230225, + "rewards_train/margins": 2.269169569015503, + "rewards_train/rejected": -4.272947788238525, + "step": 2058 + }, + { + "epoch": 0.58, + "logps_train/chosen": -85.74604034423828, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -168.3876953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4996039867401123, + "rewards_train/margins": 3.1891658306121826, + "rewards_train/rejected": -5.688769817352295, + "step": 2058 + }, + { + "epoch": 0.58, + "logps_train/chosen": -168.866455078125, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -145.1042022705078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.986645698547363, + "rewards_train/margins": -1.4262254238128662, + "rewards_train/rejected": -3.560420274734497, + "step": 2058 + }, + { + "epoch": 0.58, + "logps_train/chosen": -189.63121032714844, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -224.35299682617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.613121032714844, + "rewards_train/margins": 3.572178840637207, + "rewards_train/rejected": -10.18529987335205, + "step": 2058 + }, + { + "epoch": 0.58, + "logps_train/chosen": -14.376198768615723, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -6.28125, + "logps_train/rejected": -21.135692596435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6063699126243591, + "rewards_train/margins": 0.8790743947029114, + "rewards_train/rejected": -1.4854443073272705, + "step": 2058 + }, + { + "epoch": 0.58, + "logps_train/chosen": -88.43928527832031, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -88.09553527832031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8939285278320312, + "rewards_train/margins": -0.034375011920928955, + "rewards_train/rejected": -0.8595535159111023, + "step": 2058 + }, + { + "epoch": 0.58, + "logps_train/chosen": -12.538328170776367, + "logps_train/ref_chosen": -3.125, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -45.539222717285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9413328170776367, + "rewards_train/margins": 1.687589406967163, + "rewards_train/rejected": -2.6289222240448, + "step": 2058 + }, + { + "epoch": 0.58, + "logps_train/chosen": -187.35076904296875, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -220.37864685058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.835076808929443, + "rewards_train/margins": 1.0027880668640137, + "rewards_train/rejected": -6.837864875793457, + "step": 2058 + }, + { + "epoch": 0.58, + "logps_train/chosen": -8.255146026611328, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -3.84375, + "logps_train/rejected": -17.76495361328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.38801461458206177, + "rewards_train/margins": 1.0041057467460632, + "rewards_train/rejected": -1.392120361328125, + "step": 2059 + }, + { + "epoch": 0.58, + "logps_train/chosen": -16.628185272216797, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -2.625, + "logps_train/rejected": -5.7091779708862305, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6690685153007507, + "rewards_train/margins": -0.3606507182121277, + "rewards_train/rejected": -0.30841779708862305, + "step": 2059 + }, + { + "epoch": 0.58, + "logps_train/chosen": -2.9127798080444336, + "logps_train/ref_chosen": -1.5625, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -25.712791442871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13502798974514008, + "rewards_train/margins": 1.6237511783838272, + "rewards_train/rejected": -1.7587791681289673, + "step": 2059 + }, + { + "epoch": 0.58, + "logps_train/chosen": -103.4884033203125, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -203.60494995117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9488403797149658, + "rewards_train/margins": 6.061654806137085, + "rewards_train/rejected": -8.01049518585205, + "step": 2059 + }, + { + "epoch": 0.58, + "logps_train/chosen": -90.43959045410156, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -47.038761138916016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05604095384478569, + "rewards_train/margins": 2.859917115420103, + "rewards_train/rejected": -2.8038761615753174, + "step": 2059 + }, + { + "epoch": 0.58, + "logps_train/chosen": -202.97891235351562, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -240.54954528808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.397891521453857, + "rewards_train/margins": 3.857062816619873, + "rewards_train/rejected": -9.25495433807373, + "step": 2059 + }, + { + "epoch": 0.58, + "logps_train/chosen": -86.49693298339844, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -198.03109741210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3496932983398438, + "rewards_train/margins": 6.753417015075684, + "rewards_train/rejected": -8.103110313415527, + "step": 2059 + }, + { + "epoch": 0.58, + "logps_train/chosen": -118.02936553955078, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -127.80288696289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1029365062713623, + "rewards_train/margins": 1.3773524761199951, + "rewards_train/rejected": -4.480288982391357, + "step": 2059 + }, + { + "epoch": 0.58, + "learning_rate": 1.6464682054263767e-07, + "loss": 0.3147, + "step": 2060 + }, + { + "epoch": 0.58, + "logps_train/chosen": -196.0712432861328, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -201.26229858398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.007124423980713, + "rewards_train/margins": 3.069105625152588, + "rewards_train/rejected": -9.0762300491333, + "step": 2060 + }, + { + "epoch": 0.58, + "logps_train/chosen": -14.17955207824707, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -2.453125, + "logps_train/rejected": -41.38256072998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.324205219745636, + "rewards_train/margins": 3.5687384009361267, + "rewards_train/rejected": -3.8929436206817627, + "step": 2060 + }, + { + "epoch": 0.58, + "logps_train/chosen": -92.97357177734375, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -49.0, + "logps_train/rejected": -80.79339599609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.747357130050659, + "rewards_train/margins": 0.43198251724243164, + "rewards_train/rejected": -3.179339647293091, + "step": 2060 + }, + { + "epoch": 0.58, + "logps_train/chosen": -32.73832702636719, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -5.4375, + "logps_train/rejected": -30.288188934326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5738327503204346, + "rewards_train/margins": 0.9112362861633301, + "rewards_train/rejected": -2.4850690364837646, + "step": 2060 + }, + { + "epoch": 0.58, + "logps_train/chosen": -32.684261322021484, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -28.633947372436523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9809261560440063, + "rewards_train/margins": 1.1137186288833618, + "rewards_train/rejected": -2.094644784927368, + "step": 2060 + }, + { + "epoch": 0.58, + "logps_train/chosen": -270.3290710449219, + "logps_train/ref_chosen": -183.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -203.2425537109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.73290729522705, + "rewards_train/margins": -1.0086517333984375, + "rewards_train/rejected": -7.724255561828613, + "step": 2060 + }, + { + "epoch": 0.58, + "logps_train/chosen": -111.08070373535156, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -44.79145431518555, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.758070468902588, + "rewards_train/margins": -1.441425085067749, + "rewards_train/rejected": -2.316645383834839, + "step": 2060 + }, + { + "epoch": 0.58, + "logps_train/chosen": -162.06590270996094, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -199.97528076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.456590414047241, + "rewards_train/margins": 4.590937852859497, + "rewards_train/rejected": -8.047528266906738, + "step": 2060 + }, + { + "epoch": 0.58, + "logps_train/chosen": -16.741943359375, + "logps_train/ref_chosen": -5.71875, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -51.68132781982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.102319359779358, + "rewards_train/margins": 3.1533135175704956, + "rewards_train/rejected": -4.2556328773498535, + "step": 2061 + }, + { + "epoch": 0.58, + "logps_train/chosen": -138.72314453125, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -54.0, + "logps_train/rejected": -92.43932342529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.722314476966858, + "rewards_train/margins": 2.121617913246155, + "rewards_train/rejected": -3.8439323902130127, + "step": 2061 + }, + { + "epoch": 0.58, + "logps_train/chosen": -148.2425537109375, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -184.0, + "logps_train/rejected": -321.9429931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7742555141448975, + "rewards_train/margins": 10.02004361152649, + "rewards_train/rejected": -13.794299125671387, + "step": 2061 + }, + { + "epoch": 0.58, + "logps_train/chosen": -20.944856643676758, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -31.854429244995117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4663606882095337, + "rewards_train/margins": 0.9003323316574097, + "rewards_train/rejected": -2.3666930198669434, + "step": 2061 + }, + { + "epoch": 0.58, + "logps_train/chosen": -143.34414672851562, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -279.8784484863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.734414577484131, + "rewards_train/margins": 8.153430461883545, + "rewards_train/rejected": -12.887845039367676, + "step": 2061 + }, + { + "epoch": 0.58, + "logps_train/chosen": -8.629623413085938, + "logps_train/ref_chosen": -1.84375, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -31.830913543701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6785873770713806, + "rewards_train/margins": 0.20450401306152344, + "rewards_train/rejected": -0.883091390132904, + "step": 2061 + }, + { + "epoch": 0.58, + "logps_train/chosen": -153.8502197265625, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -138.29327392578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6850221157073975, + "rewards_train/margins": 1.2943055629730225, + "rewards_train/rejected": -4.97932767868042, + "step": 2061 + }, + { + "epoch": 0.58, + "logps_train/chosen": -0.6929567456245422, + "logps_train/ref_chosen": -0.984375, + "logps_train/ref_rejected": -1.3984375, + "logps_train/rejected": -2.23384952545166, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.029141826555132866, + "rewards_train/margins": 0.11268303357064724, + "rewards_train/rejected": -0.08354120701551437, + "step": 2061 + }, + { + "epoch": 0.58, + "learning_rate": 1.6319544972214428e-07, + "loss": 0.3838, + "step": 2062 + }, + { + "epoch": 0.58, + "logps_train/chosen": -0.20600582659244537, + "logps_train/ref_chosen": -0.408203125, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -9.98820972442627, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.020219730213284492, + "rewards_train/margins": 0.06904070265591145, + "rewards_train/rejected": -0.04882097244262695, + "step": 2062 + }, + { + "epoch": 0.58, + "logps_train/chosen": -9.423591613769531, + "logps_train/ref_chosen": -1.2734375, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -31.706409454345703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.815015435218811, + "rewards_train/margins": -0.4818744957447052, + "rewards_train/rejected": -0.33314093947410583, + "step": 2062 + }, + { + "epoch": 0.58, + "logps_train/chosen": -126.228271484375, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -132.80899047851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17282715439796448, + "rewards_train/margins": 0.20807188749313354, + "rewards_train/rejected": -0.380899041891098, + "step": 2062 + }, + { + "epoch": 0.58, + "logps_train/chosen": -17.479034423828125, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -96.33941650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8729034662246704, + "rewards_train/margins": 2.4110382795333862, + "rewards_train/rejected": -3.2839417457580566, + "step": 2062 + }, + { + "epoch": 0.58, + "logps_train/chosen": -10.05936336517334, + "logps_train/ref_chosen": -1.5703125, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -29.247821807861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.848905086517334, + "rewards_train/margins": 0.21337711811065674, + "rewards_train/rejected": -1.0622822046279907, + "step": 2062 + }, + { + "epoch": 0.58, + "logps_train/chosen": -181.46774291992188, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -249.0430908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.296774387359619, + "rewards_train/margins": 4.207535266876221, + "rewards_train/rejected": -11.50430965423584, + "step": 2062 + }, + { + "epoch": 0.58, + "logps_train/chosen": -138.19969177246094, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -183.96035766601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2199692726135254, + "rewards_train/margins": 5.1260666847229, + "rewards_train/rejected": -7.346035957336426, + "step": 2062 + }, + { + "epoch": 0.58, + "logps_train/chosen": -21.514877319335938, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -18.700984954833984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7749252319335938, + "rewards_train/margins": -0.7673267126083374, + "rewards_train/rejected": -1.0075985193252563, + "step": 2062 + }, + { + "epoch": 0.58, + "logps_train/chosen": -53.38523864746094, + "logps_train/ref_chosen": -48.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -121.05354309082031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5385238528251648, + "rewards_train/margins": 3.8668306469917297, + "rewards_train/rejected": -4.4053544998168945, + "step": 2063 + }, + { + "epoch": 0.58, + "logps_train/chosen": -110.29264831542969, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -128.81101989746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8792648315429688, + "rewards_train/margins": 0.4018371105194092, + "rewards_train/rejected": -2.281101942062378, + "step": 2063 + }, + { + "epoch": 0.58, + "logps_train/chosen": -18.890207290649414, + "logps_train/ref_chosen": -2.390625, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -62.26936340332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6499582529067993, + "rewards_train/margins": 1.5519782304763794, + "rewards_train/rejected": -3.2019364833831787, + "step": 2063 + }, + { + "epoch": 0.58, + "logps_train/chosen": -20.16619300842285, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -31.114120483398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.535369336605072, + "rewards_train/margins": 1.8260427117347717, + "rewards_train/rejected": -2.3614120483398438, + "step": 2063 + }, + { + "epoch": 0.58, + "logps_train/chosen": -112.68911743164062, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -136.61798095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1689116954803467, + "rewards_train/margins": 1.8928864002227783, + "rewards_train/rejected": -4.061798095703125, + "step": 2063 + }, + { + "epoch": 0.58, + "logps_train/chosen": -31.57876968383789, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -40.45636749267578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.507876992225647, + "rewards_train/margins": 1.6252597570419312, + "rewards_train/rejected": -3.133136749267578, + "step": 2063 + }, + { + "epoch": 0.58, + "logps_train/chosen": -114.51387023925781, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -149.15174865722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4513871669769287, + "rewards_train/margins": 0.413787841796875, + "rewards_train/rejected": -2.8651750087738037, + "step": 2063 + }, + { + "epoch": 0.58, + "logps_train/chosen": -84.39286804199219, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -129.640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6392868757247925, + "rewards_train/margins": 1.9747756719589233, + "rewards_train/rejected": -3.614062547683716, + "step": 2063 + }, + { + "epoch": 0.58, + "learning_rate": 1.6174993564628813e-07, + "loss": 0.3684, + "step": 2064 + }, + { + "epoch": 0.58, + "logps_train/chosen": -108.11132049560547, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -237.08807373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.161132335662842, + "rewards_train/margins": 6.897675037384033, + "rewards_train/rejected": -11.058807373046875, + "step": 2064 + }, + { + "epoch": 0.58, + "logps_train/chosen": -14.92313289642334, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -38.59931945800781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.023563290014863014, + "rewards_train/margins": 3.01761875115335, + "rewards_train/rejected": -3.041182041168213, + "step": 2064 + }, + { + "epoch": 0.58, + "logps_train/chosen": -13.257037162780762, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -18.131147384643555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8194537162780762, + "rewards_train/margins": 0.06866103410720825, + "rewards_train/rejected": -0.8881147503852844, + "step": 2064 + }, + { + "epoch": 0.58, + "logps_train/chosen": -160.41648864746094, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -214.1382293701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.341648817062378, + "rewards_train/margins": 4.572174310684204, + "rewards_train/rejected": -7.913823127746582, + "step": 2064 + }, + { + "epoch": 0.58, + "logps_train/chosen": -176.31285095214844, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -207.92405700683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.481285095214844, + "rewards_train/margins": 2.36112117767334, + "rewards_train/rejected": -8.842406272888184, + "step": 2064 + }, + { + "epoch": 0.58, + "logps_train/chosen": -103.25074768066406, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -252.01077270507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0750749111175537, + "rewards_train/margins": 9.126002550125122, + "rewards_train/rejected": -11.201077461242676, + "step": 2064 + }, + { + "epoch": 0.58, + "logps_train/chosen": -153.096435546875, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -126.15415954589844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.709643840789795, + "rewards_train/margins": -1.0942277908325195, + "rewards_train/rejected": -4.615416049957275, + "step": 2064 + }, + { + "epoch": 0.58, + "logps_train/chosen": -157.5467529296875, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -142.176025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.054675579071045, + "rewards_train/margins": 0.7629270553588867, + "rewards_train/rejected": -5.817602634429932, + "step": 2064 + }, + { + "epoch": 0.58, + "logps_train/chosen": -9.725797653198242, + "logps_train/ref_chosen": -1.1875, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -21.115360260009766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8538298010826111, + "rewards_train/margins": 0.032706260681152344, + "rewards_train/rejected": -0.8865360617637634, + "step": 2065 + }, + { + "epoch": 0.58, + "logps_train/chosen": -105.68814086914062, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -140.03628540039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7188141345977783, + "rewards_train/margins": 0.7348144054412842, + "rewards_train/rejected": -2.4536285400390625, + "step": 2065 + }, + { + "epoch": 0.58, + "logps_train/chosen": -157.67837524414062, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -146.3546142578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.0178375244140625, + "rewards_train/margins": 2.417623996734619, + "rewards_train/rejected": -6.435461521148682, + "step": 2065 + }, + { + "epoch": 0.58, + "logps_train/chosen": -84.72441101074219, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -153.80902099609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9724411368370056, + "rewards_train/margins": 4.708460867404938, + "rewards_train/rejected": -5.680902004241943, + "step": 2065 + }, + { + "epoch": 0.58, + "logps_train/chosen": -167.97055053710938, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -186.20770263671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.897055149078369, + "rewards_train/margins": -0.3762848377227783, + "rewards_train/rejected": -3.520770311355591, + "step": 2065 + }, + { + "epoch": 0.58, + "logps_train/chosen": -237.96743774414062, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -307.1440734863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.896743774414062, + "rewards_train/margins": 3.6176633834838867, + "rewards_train/rejected": -15.51440715789795, + "step": 2065 + }, + { + "epoch": 0.58, + "logps_train/chosen": -80.96307373046875, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -201.68124389648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.746307373046875, + "rewards_train/margins": 6.621817111968994, + "rewards_train/rejected": -7.368124485015869, + "step": 2065 + }, + { + "epoch": 0.58, + "logps_train/chosen": -22.907678604125977, + "logps_train/ref_chosen": -3.046875, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -59.13764953613281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9860804080963135, + "rewards_train/margins": 2.4714348316192627, + "rewards_train/rejected": -4.457515239715576, + "step": 2065 + }, + { + "epoch": 0.58, + "learning_rate": 1.603102884321358e-07, + "loss": 0.2966, + "step": 2066 + }, + { + "epoch": 0.58, + "logps_train/chosen": -88.71601867675781, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -192.53433227539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0716018676757812, + "rewards_train/margins": 6.831831932067871, + "rewards_train/rejected": -8.903433799743652, + "step": 2066 + }, + { + "epoch": 0.58, + "logps_train/chosen": -115.3970718383789, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -175.87046813964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.189707279205322, + "rewards_train/margins": 4.147339344024658, + "rewards_train/rejected": -8.33704662322998, + "step": 2066 + }, + { + "epoch": 0.58, + "logps_train/chosen": -42.078887939453125, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -58.9066162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4735138416290283, + "rewards_train/margins": 0.8233978748321533, + "rewards_train/rejected": -4.296911716461182, + "step": 2066 + }, + { + "epoch": 0.58, + "logps_train/chosen": -53.614952087402344, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -64.18126678466797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.498995304107666, + "rewards_train/margins": 0.8878812789916992, + "rewards_train/rejected": -5.386876583099365, + "step": 2066 + }, + { + "epoch": 0.58, + "logps_train/chosen": -15.444971084594727, + "logps_train/ref_chosen": -1.140625, + "logps_train/ref_rejected": -1.859375, + "logps_train/rejected": -22.580482482910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4304345846176147, + "rewards_train/margins": 0.6416763067245483, + "rewards_train/rejected": -2.072110891342163, + "step": 2066 + }, + { + "epoch": 0.58, + "logps_train/chosen": -125.20018005371094, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -184.22930908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.920017957687378, + "rewards_train/margins": 4.252913236618042, + "rewards_train/rejected": -7.17293119430542, + "step": 2066 + }, + { + "epoch": 0.58, + "logps_train/chosen": -97.91650390625, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -69.7430648803711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.941650390625, + "rewards_train/margins": 3.6951560974121094, + "rewards_train/rejected": -4.636806488037109, + "step": 2066 + }, + { + "epoch": 0.58, + "logps_train/chosen": -59.11561584472656, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -31.140522003173828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.542811870574951, + "rewards_train/margins": -2.044384717941284, + "rewards_train/rejected": -2.498427152633667, + "step": 2066 + }, + { + "epoch": 0.58, + "logps_train/chosen": -118.9079360961914, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -175.65960693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7907936573028564, + "rewards_train/margins": 1.8751671314239502, + "rewards_train/rejected": -5.665960788726807, + "step": 2067 + }, + { + "epoch": 0.58, + "logps_train/chosen": -293.50372314453125, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -296.937255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.750372886657715, + "rewards_train/margins": 1.7433528900146484, + "rewards_train/rejected": -13.493725776672363, + "step": 2067 + }, + { + "epoch": 0.58, + "logps_train/chosen": -28.169780731201172, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -42.734886169433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14197807013988495, + "rewards_train/margins": 3.225260689854622, + "rewards_train/rejected": -3.367238759994507, + "step": 2067 + }, + { + "epoch": 0.58, + "logps_train/chosen": -140.279296875, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -170.05613708496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8779296875, + "rewards_train/margins": 2.827683925628662, + "rewards_train/rejected": -4.705613613128662, + "step": 2067 + }, + { + "epoch": 0.58, + "logps_train/chosen": -170.77145385742188, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -221.30810546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7771453857421875, + "rewards_train/margins": 5.653665542602539, + "rewards_train/rejected": -8.430810928344727, + "step": 2067 + }, + { + "epoch": 0.58, + "logps_train/chosen": -84.22157287597656, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -48.0, + "logps_train/rejected": -86.93814086914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2971572875976562, + "rewards_train/margins": 0.5966567993164062, + "rewards_train/rejected": -3.8938140869140625, + "step": 2067 + }, + { + "epoch": 0.58, + "logps_train/chosen": -133.2982177734375, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -175.04269409179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.729821681976318, + "rewards_train/margins": 0.8244476318359375, + "rewards_train/rejected": -6.554269313812256, + "step": 2067 + }, + { + "epoch": 0.58, + "logps_train/chosen": -134.50869750976562, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -58.0, + "logps_train/rejected": -96.24051666259766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.2508697509765625, + "rewards_train/margins": -3.4268181324005127, + "rewards_train/rejected": -3.82405161857605, + "step": 2067 + }, + { + "epoch": 0.58, + "learning_rate": 1.588765181556907e-07, + "loss": 0.5012, + "step": 2068 + }, + { + "epoch": 0.58, + "logps_train/chosen": -151.3021240234375, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -157.74363708496094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2302124500274658, + "rewards_train/margins": -0.05584871768951416, + "rewards_train/rejected": -1.1743637323379517, + "step": 2068 + }, + { + "epoch": 0.58, + "logps_train/chosen": -32.41620635986328, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -46.75, + "logps_train/rejected": -61.406185150146484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9103707075119019, + "rewards_train/margins": -0.44475221633911133, + "rewards_train/rejected": -1.4656184911727905, + "step": 2068 + }, + { + "epoch": 0.58, + "logps_train/chosen": -123.21778106689453, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -173.8080291748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4217782020568848, + "rewards_train/margins": 2.5590248107910156, + "rewards_train/rejected": -5.9808030128479, + "step": 2068 + }, + { + "epoch": 0.58, + "logps_train/chosen": -70.9437484741211, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -172.85791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.544374942779541, + "rewards_train/margins": 5.641416072845459, + "rewards_train/rejected": -8.185791015625, + "step": 2068 + }, + { + "epoch": 0.58, + "logps_train/chosen": -0.5247474908828735, + "logps_train/ref_chosen": -0.5234375, + "logps_train/ref_rejected": -1.9765625, + "logps_train/rejected": -1.368395447731018, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.00013099909119773656, + "rewards_train/margins": -0.06094770431809593, + "rewards_train/rejected": 0.06081670522689819, + "step": 2068 + }, + { + "epoch": 0.58, + "logps_train/chosen": -35.71445083618164, + "logps_train/ref_chosen": -35.75, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -108.78997039794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.003554916474968195, + "rewards_train/margins": 3.5325520993210375, + "rewards_train/rejected": -3.5289971828460693, + "step": 2068 + }, + { + "epoch": 0.58, + "logps_train/chosen": -183.86367797851562, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -229.9544677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5863678455352783, + "rewards_train/margins": 6.2090795040130615, + "rewards_train/rejected": -8.79544734954834, + "step": 2068 + }, + { + "epoch": 0.58, + "logps_train/chosen": -245.51602172851562, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -242.898193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.551602363586426, + "rewards_train/margins": 1.1382169723510742, + "rewards_train/rejected": -9.6898193359375, + "step": 2068 + }, + { + "epoch": 0.58, + "logps_train/chosen": -81.61968994140625, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -107.58728790283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4119690656661987, + "rewards_train/margins": 2.7467600107192993, + "rewards_train/rejected": -4.158729076385498, + "step": 2069 + }, + { + "epoch": 0.58, + "logps_train/chosen": -31.356494903564453, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -68.16212463378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0106494426727295, + "rewards_train/margins": 1.7805631160736084, + "rewards_train/rejected": -3.791212558746338, + "step": 2069 + }, + { + "epoch": 0.58, + "logps_train/chosen": -143.1922607421875, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -160.14649963378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.419226050376892, + "rewards_train/margins": 3.3454238176345825, + "rewards_train/rejected": -4.764649868011475, + "step": 2069 + }, + { + "epoch": 0.58, + "logps_train/chosen": -154.624755859375, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -179.04766845703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.512475490570068, + "rewards_train/margins": 2.0422916412353516, + "rewards_train/rejected": -6.55476713180542, + "step": 2069 + }, + { + "epoch": 0.58, + "logps_train/chosen": -22.371097564697266, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -28.692028045654297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0183597803115845, + "rewards_train/margins": 0.5570930242538452, + "rewards_train/rejected": -1.5754528045654297, + "step": 2069 + }, + { + "epoch": 0.58, + "logps_train/chosen": -90.3803482055664, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -181.50051879882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2380348443984985, + "rewards_train/margins": 3.312017321586609, + "rewards_train/rejected": -4.550052165985107, + "step": 2069 + }, + { + "epoch": 0.58, + "logps_train/chosen": -193.35357666015625, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -255.1305389404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2353577613830566, + "rewards_train/margins": 5.077696323394775, + "rewards_train/rejected": -7.313054084777832, + "step": 2069 + }, + { + "epoch": 0.58, + "logps_train/chosen": -105.63936614990234, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -258.70977783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.913936614990234, + "rewards_train/margins": 7.357041358947754, + "rewards_train/rejected": -12.270977973937988, + "step": 2069 + }, + { + "epoch": 0.58, + "learning_rate": 1.5744863485182535e-07, + "loss": 0.2279, + "step": 2070 + }, + { + "epoch": 0.58, + "logps_train/chosen": -7.097100257873535, + "logps_train/ref_chosen": -1.5859375, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -47.84295654296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5511162877082825, + "rewards_train/margins": 2.5581793189048767, + "rewards_train/rejected": -3.109295606613159, + "step": 2070 + }, + { + "epoch": 0.58, + "logps_train/chosen": -105.96324157714844, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -104.18696594238281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6963242292404175, + "rewards_train/margins": -1.3776276409626007, + "rewards_train/rejected": -0.3186965882778168, + "step": 2070 + }, + { + "epoch": 0.58, + "logps_train/chosen": -0.06587257981300354, + "logps_train/ref_chosen": -0.34375, + "logps_train/ref_rejected": -2.078125, + "logps_train/rejected": -4.041618347167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.027787743136286736, + "rewards_train/margins": 0.22413708083331585, + "rewards_train/rejected": -0.1963493376970291, + "step": 2070 + }, + { + "epoch": 0.58, + "logps_train/chosen": -100.13194274902344, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -116.538818359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9631942510604858, + "rewards_train/margins": 0.5406876802444458, + "rewards_train/rejected": -2.5038819313049316, + "step": 2070 + }, + { + "epoch": 0.58, + "logps_train/chosen": -163.18533325195312, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -209.79905700683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4185333251953125, + "rewards_train/margins": 4.811372756958008, + "rewards_train/rejected": -8.22990608215332, + "step": 2070 + }, + { + "epoch": 0.58, + "logps_train/chosen": -10.260624885559082, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -21.255352020263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08231248706579208, + "rewards_train/margins": 0.9369727149605751, + "rewards_train/rejected": -1.0192852020263672, + "step": 2070 + }, + { + "epoch": 0.58, + "logps_train/chosen": -18.334012985229492, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -20.221324920654297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6459013223648071, + "rewards_train/margins": 0.08248120546340942, + "rewards_train/rejected": -0.7283825278282166, + "step": 2070 + }, + { + "epoch": 0.58, + "logps_train/chosen": -1.4058890342712402, + "logps_train/ref_chosen": -0.78515625, + "logps_train/ref_rejected": -2.9375, + "logps_train/rejected": -7.039958953857422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06207327917218208, + "rewards_train/margins": 0.3481726162135601, + "rewards_train/rejected": -0.4102458953857422, + "step": 2070 + }, + { + "epoch": 0.58, + "logps_train/chosen": -112.70760345458984, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -116.64590454101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.570760488510132, + "rewards_train/margins": 1.593829870223999, + "rewards_train/rejected": -4.164590358734131, + "step": 2071 + }, + { + "epoch": 0.58, + "logps_train/chosen": -16.863996505737305, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -53.168914794921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9051496386528015, + "rewards_train/margins": 0.8117418885231018, + "rewards_train/rejected": -1.7168915271759033, + "step": 2071 + }, + { + "epoch": 0.58, + "logps_train/chosen": -23.113853454589844, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -72.44480895996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5770103931427002, + "rewards_train/margins": 3.154970407485962, + "rewards_train/rejected": -4.731980800628662, + "step": 2071 + }, + { + "epoch": 0.58, + "logps_train/chosen": -61.06309127807617, + "logps_train/ref_chosen": -53.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -188.43362426757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7563091516494751, + "rewards_train/margins": 5.587053179740906, + "rewards_train/rejected": -6.343362331390381, + "step": 2071 + }, + { + "epoch": 0.58, + "logps_train/chosen": -132.21649169921875, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -143.7047119140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.471649169921875, + "rewards_train/margins": 0.5488221645355225, + "rewards_train/rejected": -3.0204713344573975, + "step": 2071 + }, + { + "epoch": 0.58, + "logps_train/chosen": -30.775787353515625, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -45.109657287597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.30570387840271, + "rewards_train/margins": 0.9552619457244873, + "rewards_train/rejected": -3.2609658241271973, + "step": 2071 + }, + { + "epoch": 0.58, + "logps_train/chosen": -124.07017517089844, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -100.86027526855469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.657017469406128, + "rewards_train/margins": 0.6290102005004883, + "rewards_train/rejected": -3.286027669906616, + "step": 2071 + }, + { + "epoch": 0.58, + "logps_train/chosen": -209.11892700195312, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -245.5998992919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.211892604827881, + "rewards_train/margins": 1.5480976104736328, + "rewards_train/rejected": -7.759990215301514, + "step": 2071 + }, + { + "epoch": 0.58, + "learning_rate": 1.5602664851420855e-07, + "loss": 0.3907, + "step": 2072 + }, + { + "epoch": 0.58, + "logps_train/chosen": -237.5518798828125, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -211.339599609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.75518798828125, + "rewards_train/margins": 2.1287717819213867, + "rewards_train/rejected": -8.883959770202637, + "step": 2072 + }, + { + "epoch": 0.58, + "logps_train/chosen": -188.12103271484375, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -46.75, + "logps_train/rejected": -66.941650390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.712103366851807, + "rewards_train/margins": -2.6929383277893066, + "rewards_train/rejected": -2.0191650390625, + "step": 2072 + }, + { + "epoch": 0.58, + "logps_train/chosen": -15.779742240905762, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -21.13318634033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8060992360115051, + "rewards_train/margins": 0.169719398021698, + "rewards_train/rejected": -0.9758186340332031, + "step": 2072 + }, + { + "epoch": 0.58, + "logps_train/chosen": -104.46369934082031, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -176.68707275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24636994302272797, + "rewards_train/margins": 5.372337237000465, + "rewards_train/rejected": -5.618707180023193, + "step": 2072 + }, + { + "epoch": 0.58, + "logps_train/chosen": -252.59063720703125, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -134.09454345703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.359064102172852, + "rewards_train/margins": -3.549609661102295, + "rewards_train/rejected": -5.809454441070557, + "step": 2072 + }, + { + "epoch": 0.58, + "logps_train/chosen": -15.321459770202637, + "logps_train/ref_chosen": -3.25, + "logps_train/ref_rejected": -3.25, + "logps_train/rejected": -15.337413787841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2071460485458374, + "rewards_train/margins": 0.0015953779220581055, + "rewards_train/rejected": -1.2087414264678955, + "step": 2072 + }, + { + "epoch": 0.58, + "logps_train/chosen": -41.794189453125, + "logps_train/ref_chosen": -25.125, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -78.7452392578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6669189929962158, + "rewards_train/margins": 3.907604932785034, + "rewards_train/rejected": -5.57452392578125, + "step": 2072 + }, + { + "epoch": 0.58, + "logps_train/chosen": -15.843226432800293, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -34.51327896118164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5343226790428162, + "rewards_train/margins": 2.304505169391632, + "rewards_train/rejected": -2.8388278484344482, + "step": 2072 + }, + { + "epoch": 0.58, + "logps_train/chosen": -11.376853942871094, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -7.03125, + "logps_train/rejected": -38.24736785888672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6408104300498962, + "rewards_train/margins": 2.4808014035224915, + "rewards_train/rejected": -3.1216118335723877, + "step": 2073 + }, + { + "epoch": 0.58, + "logps_train/chosen": -9.858952522277832, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -29.155221939086914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6093327403068542, + "rewards_train/margins": 1.0686895251274109, + "rewards_train/rejected": -1.6780222654342651, + "step": 2073 + }, + { + "epoch": 0.58, + "logps_train/chosen": -25.247499465942383, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -30.19522476196289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2872499227523804, + "rewards_train/margins": -0.2677273750305176, + "rewards_train/rejected": -1.0195225477218628, + "step": 2073 + }, + { + "epoch": 0.58, + "logps_train/chosen": -13.187192916870117, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -22.983863830566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45621928572654724, + "rewards_train/margins": 0.8171670734882355, + "rewards_train/rejected": -1.2733863592147827, + "step": 2073 + }, + { + "epoch": 0.58, + "logps_train/chosen": -160.62228393554688, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -204.01669311523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5622284412384033, + "rewards_train/margins": 3.539440870285034, + "rewards_train/rejected": -6.1016693115234375, + "step": 2073 + }, + { + "epoch": 0.58, + "logps_train/chosen": -3.3695521354675293, + "logps_train/ref_chosen": -1.6171875, + "logps_train/ref_rejected": -0.91796875, + "logps_train/rejected": -5.335315704345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17523646354675293, + "rewards_train/margins": 0.26649823784828186, + "rewards_train/rejected": -0.4417347013950348, + "step": 2073 + }, + { + "epoch": 0.58, + "logps_train/chosen": -239.402099609375, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -256.5117492675781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.640210151672363, + "rewards_train/margins": 0.010965347290039062, + "rewards_train/rejected": -11.651175498962402, + "step": 2073 + }, + { + "epoch": 0.58, + "logps_train/chosen": -124.15455627441406, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -24.375, + "logps_train/rejected": -43.695350646972656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.515455722808838, + "rewards_train/margins": -1.5834206342697144, + "rewards_train/rejected": -1.9320350885391235, + "step": 2073 + }, + { + "epoch": 0.58, + "learning_rate": 1.5461056909523695e-07, + "loss": 0.7837, + "step": 2074 + }, + { + "epoch": 0.58, + "logps_train/chosen": -15.979917526245117, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -44.9349250793457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16450825333595276, + "rewards_train/margins": 3.7830008566379547, + "rewards_train/rejected": -3.618492603302002, + "step": 2074 + }, + { + "epoch": 0.58, + "logps_train/chosen": -17.643842697143555, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -52.75, + "logps_train/rejected": -76.37210083007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6956343054771423, + "rewards_train/margins": 1.6665757298469543, + "rewards_train/rejected": -2.3622100353240967, + "step": 2074 + }, + { + "epoch": 0.58, + "logps_train/chosen": -9.229924201965332, + "logps_train/ref_chosen": -3.265625, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -30.718048095703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5964299440383911, + "rewards_train/margins": 1.637874960899353, + "rewards_train/rejected": -2.234304904937744, + "step": 2074 + }, + { + "epoch": 0.58, + "logps_train/chosen": -104.14710998535156, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -131.06768798828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.364711046218872, + "rewards_train/margins": 3.242058038711548, + "rewards_train/rejected": -6.60676908493042, + "step": 2074 + }, + { + "epoch": 0.58, + "logps_train/chosen": -16.685941696166992, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -10.082572937011719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.21890583634376526, + "rewards_train/margins": 0.10841313004493713, + "rewards_train/rejected": 0.11049270629882812, + "step": 2074 + }, + { + "epoch": 0.58, + "logps_train/chosen": -109.75494384765625, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -231.79257202148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5754945278167725, + "rewards_train/margins": 8.603762865066528, + "rewards_train/rejected": -12.1792573928833, + "step": 2074 + }, + { + "epoch": 0.58, + "logps_train/chosen": -97.72517395019531, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -164.052978515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9725174307823181, + "rewards_train/margins": 5.382780611515045, + "rewards_train/rejected": -6.355298042297363, + "step": 2074 + }, + { + "epoch": 0.58, + "logps_train/chosen": -30.154375076293945, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -33.52470016479492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8654375076293945, + "rewards_train/margins": 0.9620325565338135, + "rewards_train/rejected": -1.827470064163208, + "step": 2074 + }, + { + "epoch": 0.58, + "logps_train/chosen": -23.91895866394043, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -27.25, + "logps_train/rejected": -68.54341125488281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6918959021568298, + "rewards_train/margins": 3.4374452233314514, + "rewards_train/rejected": -4.129341125488281, + "step": 2075 + }, + { + "epoch": 0.58, + "logps_train/chosen": -33.207340240478516, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -30.375, + "logps_train/rejected": -54.73405838012695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1082340478897095, + "rewards_train/margins": 1.3276718854904175, + "rewards_train/rejected": -2.435905933380127, + "step": 2075 + }, + { + "epoch": 0.58, + "logps_train/chosen": -62.39031982421875, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -72.4526138305664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1140320301055908, + "rewards_train/margins": 0.3812294006347656, + "rewards_train/rejected": -1.4952614307403564, + "step": 2075 + }, + { + "epoch": 0.58, + "logps_train/chosen": -161.84608459472656, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -182.72080993652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7846086025238037, + "rewards_train/margins": 1.1874723434448242, + "rewards_train/rejected": -3.972080945968628, + "step": 2075 + }, + { + "epoch": 0.58, + "logps_train/chosen": -125.81608581542969, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -163.42190551757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7816085815429688, + "rewards_train/margins": 2.560581922531128, + "rewards_train/rejected": -3.3421905040740967, + "step": 2075 + }, + { + "epoch": 0.58, + "logps_train/chosen": -8.167274475097656, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -20.334064483642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07702255249023438, + "rewards_train/margins": 0.46042901277542114, + "rewards_train/rejected": -0.38340646028518677, + "step": 2075 + }, + { + "epoch": 0.58, + "logps_train/chosen": -12.50910472869873, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -0.451171875, + "logps_train/rejected": -1.7310396432876587, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2509104907512665, + "rewards_train/margins": -0.12292371690273285, + "rewards_train/rejected": -0.12798677384853363, + "step": 2075 + }, + { + "epoch": 0.58, + "logps_train/chosen": -27.35952377319336, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -2.9375, + "logps_train/rejected": -31.337303161621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2172024250030518, + "rewards_train/margins": 0.6227779388427734, + "rewards_train/rejected": -2.839980363845825, + "step": 2075 + }, + { + "epoch": 0.58, + "learning_rate": 1.5320040650596522e-07, + "loss": 0.2614, + "step": 2076 + }, + { + "epoch": 0.58, + "logps_train/chosen": -20.407499313354492, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -14.125, + "logps_train/rejected": -25.590177536010742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1467500776052475, + "rewards_train/margins": 1.2932678312063217, + "rewards_train/rejected": -1.1465177536010742, + "step": 2076 + }, + { + "epoch": 0.58, + "logps_train/chosen": -10.12125015258789, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -17.89694595336914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.025374984368681908, + "rewards_train/margins": 0.06506958045065403, + "rewards_train/rejected": -0.03969459608197212, + "step": 2076 + }, + { + "epoch": 0.58, + "logps_train/chosen": -75.78231811523438, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -86.43183898925781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9782318472862244, + "rewards_train/margins": 0.3149520754814148, + "rewards_train/rejected": -1.2931839227676392, + "step": 2076 + }, + { + "epoch": 0.58, + "logps_train/chosen": -98.15875244140625, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -165.51405334472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1658753156661987, + "rewards_train/margins": 6.635530114173889, + "rewards_train/rejected": -7.801405429840088, + "step": 2076 + }, + { + "epoch": 0.58, + "logps_train/chosen": -129.16775512695312, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -239.05650329589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1167755126953125, + "rewards_train/margins": 6.488875389099121, + "rewards_train/rejected": -8.605650901794434, + "step": 2076 + }, + { + "epoch": 0.58, + "logps_train/chosen": -118.57574462890625, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -204.60012817382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.007574439048767, + "rewards_train/margins": 4.152438282966614, + "rewards_train/rejected": -5.160012722015381, + "step": 2076 + }, + { + "epoch": 0.58, + "logps_train/chosen": -175.7635955810547, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -176.76446533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.576359510421753, + "rewards_train/margins": 2.1000869274139404, + "rewards_train/rejected": -5.676446437835693, + "step": 2076 + }, + { + "epoch": 0.58, + "logps_train/chosen": -24.881446838378906, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -24.125, + "logps_train/rejected": -37.94353485107422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0631446838378906, + "rewards_train/margins": -0.6812912225723267, + "rewards_train/rejected": -1.381853461265564, + "step": 2076 + }, + { + "epoch": 0.58, + "logps_train/chosen": -191.86801147460938, + "logps_train/ref_chosen": -178.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -209.10992431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3868011236190796, + "rewards_train/margins": 4.824191212654114, + "rewards_train/rejected": -6.210992336273193, + "step": 2077 + }, + { + "epoch": 0.58, + "logps_train/chosen": -73.72935485839844, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -55.5, + "logps_train/rejected": -82.96759033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1479355096817017, + "rewards_train/margins": 1.5988236665725708, + "rewards_train/rejected": -2.7467591762542725, + "step": 2077 + }, + { + "epoch": 0.58, + "logps_train/chosen": -188.81744384765625, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -220.77793884277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4817445278167725, + "rewards_train/margins": 2.696049451828003, + "rewards_train/rejected": -6.177793979644775, + "step": 2077 + }, + { + "epoch": 0.58, + "logps_train/chosen": -26.027666091918945, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -14.420948028564453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4840166568756104, + "rewards_train/margins": -0.7731718420982361, + "rewards_train/rejected": -0.7108448147773743, + "step": 2077 + }, + { + "epoch": 0.58, + "logps_train/chosen": -93.09779357910156, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -98.33549499511719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6902206540107727, + "rewards_train/margins": 1.0237701535224915, + "rewards_train/rejected": -0.33354949951171875, + "step": 2077 + }, + { + "epoch": 0.58, + "logps_train/chosen": -155.59005737304688, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -48.08462142944336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.109005928039551, + "rewards_train/margins": -1.1130437850952148, + "rewards_train/rejected": -2.995962142944336, + "step": 2077 + }, + { + "epoch": 0.58, + "logps_train/chosen": -12.98554801940918, + "logps_train/ref_chosen": -5.40625, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -33.53141784667969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.757929801940918, + "rewards_train/margins": 0.2702120542526245, + "rewards_train/rejected": -1.0281418561935425, + "step": 2077 + }, + { + "epoch": 0.58, + "logps_train/chosen": -25.86646270751953, + "logps_train/ref_chosen": -5.65625, + "logps_train/ref_rejected": -7.09375, + "logps_train/rejected": -35.619972229003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0210213661193848, + "rewards_train/margins": 0.8316009044647217, + "rewards_train/rejected": -2.8526222705841064, + "step": 2077 + }, + { + "epoch": 0.58, + "learning_rate": 1.5179617061603655e-07, + "loss": 0.4194, + "step": 2078 + }, + { + "epoch": 0.58, + "logps_train/chosen": -9.044309616088867, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -37.0, + "logps_train/rejected": -68.69062805175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3981809616088867, + "rewards_train/margins": 2.7708818912506104, + "rewards_train/rejected": -3.169062852859497, + "step": 2078 + }, + { + "epoch": 0.58, + "logps_train/chosen": -35.866943359375, + "logps_train/ref_chosen": -4.1875, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -46.683937072753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1679444313049316, + "rewards_train/margins": 1.0129494667053223, + "rewards_train/rejected": -4.180893898010254, + "step": 2078 + }, + { + "epoch": 0.58, + "logps_train/chosen": -56.94548797607422, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -43.5, + "logps_train/rejected": -96.0533447265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.494548797607422, + "rewards_train/margins": 2.7607855796813965, + "rewards_train/rejected": -5.255334377288818, + "step": 2078 + }, + { + "epoch": 0.58, + "logps_train/chosen": -45.739688873291016, + "logps_train/ref_chosen": -24.375, + "logps_train/ref_rejected": -2.46875, + "logps_train/rejected": -17.765466690063477, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1364688873291016, + "rewards_train/margins": -0.6067972183227539, + "rewards_train/rejected": -1.5296716690063477, + "step": 2078 + }, + { + "epoch": 0.58, + "logps_train/chosen": -146.73324584960938, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -146.27935791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7233245372772217, + "rewards_train/margins": 0.7546112537384033, + "rewards_train/rejected": -3.477935791015625, + "step": 2078 + }, + { + "epoch": 0.58, + "logps_train/chosen": -54.78838348388672, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -52.640892028808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4663383960723877, + "rewards_train/margins": 1.1977508068084717, + "rewards_train/rejected": -4.664089202880859, + "step": 2078 + }, + { + "epoch": 0.58, + "logps_train/chosen": -19.059383392333984, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -59.25, + "logps_train/rejected": -66.0892105102539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31218835711479187, + "rewards_train/margins": 0.3717326819896698, + "rewards_train/rejected": -0.6839210391044617, + "step": 2078 + }, + { + "epoch": 0.58, + "logps_train/chosen": -69.8369140625, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -52.25, + "logps_train/rejected": -60.75551986694336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4336914122104645, + "rewards_train/margins": 0.4168606102466583, + "rewards_train/rejected": -0.8505520224571228, + "step": 2078 + }, + { + "epoch": 0.58, + "logps_train/chosen": -25.29769515991211, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -3.15625, + "logps_train/rejected": -18.29016876220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4360195398330688, + "rewards_train/margins": 0.07737231254577637, + "rewards_train/rejected": -1.5133918523788452, + "step": 2079 + }, + { + "epoch": 0.58, + "logps_train/chosen": -127.61394500732422, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -246.2523193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1113946437835693, + "rewards_train/margins": 8.71383786201477, + "rewards_train/rejected": -10.82523250579834, + "step": 2079 + }, + { + "epoch": 0.58, + "logps_train/chosen": -11.943878173828125, + "logps_train/ref_chosen": -0.62890625, + "logps_train/ref_rejected": -2.90625, + "logps_train/rejected": -14.06828498840332, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1314972639083862, + "rewards_train/margins": -0.015293717384338379, + "rewards_train/rejected": -1.1162035465240479, + "step": 2079 + }, + { + "epoch": 0.58, + "logps_train/chosen": -13.145224571228027, + "logps_train/ref_chosen": -2.71875, + "logps_train/ref_rejected": -3.09375, + "logps_train/rejected": -19.19415283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0426474809646606, + "rewards_train/margins": 0.5673928260803223, + "rewards_train/rejected": -1.610040307044983, + "step": 2079 + }, + { + "epoch": 0.58, + "logps_train/chosen": -231.89724731445312, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -211.787841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.08972454071045, + "rewards_train/margins": 1.7890596389770508, + "rewards_train/rejected": -10.8787841796875, + "step": 2079 + }, + { + "epoch": 0.58, + "logps_train/chosen": -191.74176025390625, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -293.1111755371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.974175930023193, + "rewards_train/margins": 10.03694200515747, + "rewards_train/rejected": -15.011117935180664, + "step": 2079 + }, + { + "epoch": 0.58, + "logps_train/chosen": -221.70188903808594, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -231.34152221679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.070188999176025, + "rewards_train/margins": 4.163963794708252, + "rewards_train/rejected": -8.234152793884277, + "step": 2079 + }, + { + "epoch": 0.58, + "logps_train/chosen": -30.259597778320312, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -41.44971466064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4509598016738892, + "rewards_train/margins": 1.8971368074417114, + "rewards_train/rejected": -3.3480966091156006, + "step": 2079 + }, + { + "epoch": 0.58, + "learning_rate": 1.5039787125361326e-07, + "loss": 0.3293, + "step": 2080 + }, + { + "epoch": 0.58, + "logps_train/chosen": -25.962316513061523, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -9.471325874328613, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1149816513061523, + "rewards_train/margins": -0.7678490579128265, + "rewards_train/rejected": -0.3471325933933258, + "step": 2080 + }, + { + "epoch": 0.58, + "logps_train/chosen": -124.06312561035156, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -226.71273803710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.006312608718872, + "rewards_train/margins": 6.564961194992065, + "rewards_train/rejected": -7.5712738037109375, + "step": 2080 + }, + { + "epoch": 0.58, + "logps_train/chosen": -197.6531982421875, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -180.53306579589844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.065320014953613, + "rewards_train/margins": -1.5120134353637695, + "rewards_train/rejected": -7.553306579589844, + "step": 2080 + }, + { + "epoch": 0.58, + "logps_train/chosen": -168.95994567871094, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -211.5801239013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.795994520187378, + "rewards_train/margins": 2.5620181560516357, + "rewards_train/rejected": -6.358012676239014, + "step": 2080 + }, + { + "epoch": 0.58, + "logps_train/chosen": -31.601184844970703, + "logps_train/ref_chosen": -23.875, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -37.472530364990234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7726184725761414, + "rewards_train/margins": 0.8246346116065979, + "rewards_train/rejected": -1.5972530841827393, + "step": 2080 + }, + { + "epoch": 0.58, + "logps_train/chosen": -34.442039489746094, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -35.64838790893555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.706704020500183, + "rewards_train/margins": 0.7518848180770874, + "rewards_train/rejected": -2.4585888385772705, + "step": 2080 + }, + { + "epoch": 0.58, + "logps_train/chosen": -216.33908081054688, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -246.40538024902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.783907890319824, + "rewards_train/margins": 4.206630706787109, + "rewards_train/rejected": -13.990538597106934, + "step": 2080 + }, + { + "epoch": 0.58, + "logps_train/chosen": -136.84620666503906, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -210.89064025878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1346206665039062, + "rewards_train/margins": 7.604443550109863, + "rewards_train/rejected": -9.73906421661377, + "step": 2080 + }, + { + "epoch": 0.58, + "logps_train/chosen": -39.274749755859375, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -167.07168579101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7399749755859375, + "rewards_train/margins": 4.567193508148193, + "rewards_train/rejected": -6.307168483734131, + "step": 2081 + }, + { + "epoch": 0.58, + "logps_train/chosen": -89.12979125976562, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -151.84652709960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7629791498184204, + "rewards_train/margins": 4.1216734647750854, + "rewards_train/rejected": -5.884652614593506, + "step": 2081 + }, + { + "epoch": 0.58, + "logps_train/chosen": -66.3177490234375, + "logps_train/ref_chosen": -47.0, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -121.63460540771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9317749738693237, + "rewards_train/margins": 3.281685471534729, + "rewards_train/rejected": -5.213460445404053, + "step": 2081 + }, + { + "epoch": 0.58, + "logps_train/chosen": -0.006163014564663172, + "logps_train/ref_chosen": -0.193359375, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -8.382363319396973, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.018719635903835297, + "rewards_train/margins": 0.4225809797644615, + "rewards_train/rejected": -0.4038613438606262, + "step": 2081 + }, + { + "epoch": 0.58, + "logps_train/chosen": -63.79085922241211, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -59.899810791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6540859341621399, + "rewards_train/margins": 4.054645240306854, + "rewards_train/rejected": -4.708731174468994, + "step": 2081 + }, + { + "epoch": 0.58, + "logps_train/chosen": -153.7661895751953, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -181.862548828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.676619052886963, + "rewards_train/margins": 5.309636116027832, + "rewards_train/rejected": -7.986255168914795, + "step": 2081 + }, + { + "epoch": 0.58, + "logps_train/chosen": -23.020423889160156, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -44.48291015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3207924365997314, + "rewards_train/margins": 2.6556236743927, + "rewards_train/rejected": -3.9764161109924316, + "step": 2081 + }, + { + "epoch": 0.58, + "logps_train/chosen": -179.8303985595703, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -259.6528625488281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5830399990081787, + "rewards_train/margins": 5.882246255874634, + "rewards_train/rejected": -9.465286254882812, + "step": 2081 + }, + { + "epoch": 0.58, + "learning_rate": 1.4900551820530827e-07, + "loss": 0.2726, + "step": 2082 + }, + { + "epoch": 0.58, + "logps_train/chosen": -64.65232849121094, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -176.60816955566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4152328968048096, + "rewards_train/margins": 5.595584154129028, + "rewards_train/rejected": -7.010817050933838, + "step": 2082 + }, + { + "epoch": 0.58, + "logps_train/chosen": -23.531288146972656, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -18.969242095947266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4593788385391235, + "rewards_train/margins": -0.4562046527862549, + "rewards_train/rejected": -1.0031741857528687, + "step": 2082 + }, + { + "epoch": 0.58, + "logps_train/chosen": -40.17802810668945, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -2.609375, + "logps_train/rejected": -37.3834114074707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1553027629852295, + "rewards_train/margins": 1.3221008777618408, + "rewards_train/rejected": -3.4774036407470703, + "step": 2082 + }, + { + "epoch": 0.58, + "logps_train/chosen": -28.719528198242188, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -30.7475643157959, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1469528675079346, + "rewards_train/margins": 1.468428611755371, + "rewards_train/rejected": -2.6153814792633057, + "step": 2082 + }, + { + "epoch": 0.58, + "logps_train/chosen": -88.88265228271484, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -251.192626953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9382652044296265, + "rewards_train/margins": 11.38099730014801, + "rewards_train/rejected": -13.319262504577637, + "step": 2082 + }, + { + "epoch": 0.58, + "logps_train/chosen": -27.642913818359375, + "logps_train/ref_chosen": -6.03125, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -25.82723617553711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1611664295196533, + "rewards_train/margins": -0.19094276428222656, + "rewards_train/rejected": -1.9702236652374268, + "step": 2082 + }, + { + "epoch": 0.58, + "logps_train/chosen": -125.14978790283203, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -225.26986694335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7649788856506348, + "rewards_train/margins": 4.362008094787598, + "rewards_train/rejected": -7.126986980438232, + "step": 2082 + }, + { + "epoch": 0.58, + "logps_train/chosen": -98.23234558105469, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -186.8622589111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6232346296310425, + "rewards_train/margins": 4.96299135684967, + "rewards_train/rejected": -6.586225986480713, + "step": 2082 + }, + { + "epoch": 0.58, + "logps_train/chosen": -112.97991943359375, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -184.35406494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.147991895675659, + "rewards_train/margins": 5.487414598464966, + "rewards_train/rejected": -8.635406494140625, + "step": 2083 + }, + { + "epoch": 0.58, + "logps_train/chosen": -149.92633056640625, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -221.07318115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.207366943359375, + "rewards_train/margins": 7.714684963226318, + "rewards_train/rejected": -7.507318019866943, + "step": 2083 + }, + { + "epoch": 0.58, + "logps_train/chosen": -41.60003662109375, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -191.48626708984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9662537574768066, + "rewards_train/margins": 3.932373046875, + "rewards_train/rejected": -6.898626804351807, + "step": 2083 + }, + { + "epoch": 0.58, + "logps_train/chosen": -201.8026123046875, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -231.95941162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.08026123046875, + "rewards_train/margins": 2.5156803131103516, + "rewards_train/rejected": -8.595941543579102, + "step": 2083 + }, + { + "epoch": 0.58, + "logps_train/chosen": -144.60012817382812, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -186.9494171142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7600128650665283, + "rewards_train/margins": 1.6349289417266846, + "rewards_train/rejected": -5.394941806793213, + "step": 2083 + }, + { + "epoch": 0.58, + "logps_train/chosen": -13.46666145324707, + "logps_train/ref_chosen": -5.875, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -40.34539794921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7591661810874939, + "rewards_train/margins": 2.6659987568855286, + "rewards_train/rejected": -3.4251649379730225, + "step": 2083 + }, + { + "epoch": 0.58, + "logps_train/chosen": -0.09141972661018372, + "logps_train/ref_chosen": -0.126953125, + "logps_train/ref_rejected": -0.126953125, + "logps_train/rejected": -0.08804340660572052, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.0035533399786800146, + "rewards_train/margins": -0.0003376319073140621, + "rewards_train/rejected": 0.0038909718859940767, + "step": 2083 + }, + { + "epoch": 0.58, + "logps_train/chosen": -144.71710205078125, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -147.71498107910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3217103481292725, + "rewards_train/margins": 0.2497878074645996, + "rewards_train/rejected": -3.571498155593872, + "step": 2083 + }, + { + "epoch": 0.58, + "learning_rate": 1.4761912121611652e-07, + "loss": 0.239, + "step": 2084 + }, + { + "epoch": 0.58, + "logps_train/chosen": -19.59160041809082, + "logps_train/ref_chosen": -3.796875, + "logps_train/ref_rejected": -2.53125, + "logps_train/rejected": -24.399402618408203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.579472541809082, + "rewards_train/margins": 0.6073427200317383, + "rewards_train/rejected": -2.1868152618408203, + "step": 2084 + }, + { + "epoch": 0.58, + "logps_train/chosen": -235.13571166992188, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -249.36477661132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.013571739196777, + "rewards_train/margins": 1.0229063034057617, + "rewards_train/rejected": -11.036478042602539, + "step": 2084 + }, + { + "epoch": 0.58, + "logps_train/chosen": -263.8045654296875, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -42.577144622802734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.68045711517334, + "rewards_train/margins": -6.672742605209351, + "rewards_train/rejected": -2.0077145099639893, + "step": 2084 + }, + { + "epoch": 0.58, + "logps_train/chosen": -128.75531005859375, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -189.01116943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6755311489105225, + "rewards_train/margins": 6.325585603713989, + "rewards_train/rejected": -10.001116752624512, + "step": 2084 + }, + { + "epoch": 0.58, + "logps_train/chosen": -45.201236724853516, + "logps_train/ref_chosen": -29.625, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -66.81168365478516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5576237440109253, + "rewards_train/margins": -1.5264553781598806, + "rewards_train/rejected": -0.031168365851044655, + "step": 2084 + }, + { + "epoch": 0.58, + "logps_train/chosen": -28.32518768310547, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -6.90625, + "logps_train/rejected": -35.79414367675781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7637687921524048, + "rewards_train/margins": 1.1250206232070923, + "rewards_train/rejected": -2.888789415359497, + "step": 2084 + }, + { + "epoch": 0.58, + "logps_train/chosen": -19.50499725341797, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -4.46875, + "logps_train/rejected": -32.88915252685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.431749701499939, + "rewards_train/margins": 1.4102905988693237, + "rewards_train/rejected": -2.8420403003692627, + "step": 2084 + }, + { + "epoch": 0.58, + "logps_train/chosen": -34.002235412597656, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -15.31583023071289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9814735651016235, + "rewards_train/margins": -1.1217655539512634, + "rewards_train/rejected": -0.8597080111503601, + "step": 2084 + }, + { + "epoch": 0.58, + "logps_train/chosen": -3.843634605407715, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -31.974212646484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.050011541694402695, + "rewards_train/margins": 1.472432877868414, + "rewards_train/rejected": -1.4224213361740112, + "step": 2085 + }, + { + "epoch": 0.58, + "logps_train/chosen": -5.945278167724609, + "logps_train/ref_chosen": -0.88671875, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -22.716815948486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5058559775352478, + "rewards_train/margins": 0.559575617313385, + "rewards_train/rejected": -1.0654315948486328, + "step": 2085 + }, + { + "epoch": 0.58, + "logps_train/chosen": -26.850841522216797, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -50.25, + "logps_train/rejected": -63.592227935791016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5350841283798218, + "rewards_train/margins": -0.20086133480072021, + "rewards_train/rejected": -1.3342227935791016, + "step": 2085 + }, + { + "epoch": 0.58, + "logps_train/chosen": -31.650634765625, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -30.514739990234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.540063500404358, + "rewards_train/margins": 1.0895355939865112, + "rewards_train/rejected": -2.629599094390869, + "step": 2085 + }, + { + "epoch": 0.58, + "logps_train/chosen": -16.363372802734375, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -16.33670425415039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0300873517990112, + "rewards_train/margins": -0.06829190254211426, + "rewards_train/rejected": -0.961795449256897, + "step": 2085 + }, + { + "epoch": 0.58, + "logps_train/chosen": -75.05732727050781, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -146.5372772216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6557327508926392, + "rewards_train/margins": 4.54799497127533, + "rewards_train/rejected": -5.203727722167969, + "step": 2085 + }, + { + "epoch": 0.58, + "logps_train/chosen": -32.69160461425781, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -50.257164001464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.206660509109497, + "rewards_train/margins": 1.869055986404419, + "rewards_train/rejected": -3.075716495513916, + "step": 2085 + }, + { + "epoch": 0.58, + "logps_train/chosen": -81.23831176757812, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -102.6203842163086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4238312244415283, + "rewards_train/margins": -0.511792778968811, + "rewards_train/rejected": -1.9120384454727173, + "step": 2085 + }, + { + "epoch": 0.58, + "learning_rate": 1.462386899893474e-07, + "loss": 0.9197, + "step": 2086 + }, + { + "epoch": 0.58, + "logps_train/chosen": -57.02476119995117, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -3.953125, + "logps_train/rejected": -41.87287521362305, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.2149763107299805, + "rewards_train/margins": -0.4230012893676758, + "rewards_train/rejected": -3.7919750213623047, + "step": 2086 + }, + { + "epoch": 0.58, + "logps_train/chosen": -159.04238891601562, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -169.68655395507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1542389392852783, + "rewards_train/margins": 1.5144164562225342, + "rewards_train/rejected": -4.6686553955078125, + "step": 2086 + }, + { + "epoch": 0.58, + "logps_train/chosen": -176.21572875976562, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -165.52261352539062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.921573162078857, + "rewards_train/margins": -0.4193115234375, + "rewards_train/rejected": -7.502261638641357, + "step": 2086 + }, + { + "epoch": 0.58, + "logps_train/chosen": -139.59349060058594, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -141.21478271484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5093491077423096, + "rewards_train/margins": 0.5121293067932129, + "rewards_train/rejected": -3.0214784145355225, + "step": 2086 + }, + { + "epoch": 0.58, + "logps_train/chosen": -118.4913330078125, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -139.1830596923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.949133396148682, + "rewards_train/margins": 1.5191726684570312, + "rewards_train/rejected": -6.468306064605713, + "step": 2086 + }, + { + "epoch": 0.58, + "logps_train/chosen": -35.196876525878906, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -34.3814697265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5196876525878906, + "rewards_train/margins": 1.4590842723846436, + "rewards_train/rejected": -2.978771924972534, + "step": 2086 + }, + { + "epoch": 0.58, + "logps_train/chosen": -119.01925659179688, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -221.88247680664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5519256591796875, + "rewards_train/margins": 6.736322402954102, + "rewards_train/rejected": -10.288248062133789, + "step": 2086 + }, + { + "epoch": 0.58, + "logps_train/chosen": -0.5726648569107056, + "logps_train/ref_chosen": -1.0859375, + "logps_train/ref_rejected": -3.375, + "logps_train/rejected": -8.904570579528809, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05132726579904556, + "rewards_train/margins": 0.6042843237519264, + "rewards_train/rejected": -0.5529570579528809, + "step": 2086 + }, + { + "epoch": 0.58, + "logps_train/chosen": -22.493492126464844, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -7.125, + "logps_train/rejected": -25.399078369140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4243491888046265, + "rewards_train/margins": 0.40305864810943604, + "rewards_train/rejected": -1.8274078369140625, + "step": 2087 + }, + { + "epoch": 0.58, + "logps_train/chosen": -21.022308349609375, + "logps_train/ref_chosen": -1.1953125, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -33.97575378417969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9826996326446533, + "rewards_train/margins": 0.8555006980895996, + "rewards_train/rejected": -2.838200330734253, + "step": 2087 + }, + { + "epoch": 0.58, + "logps_train/chosen": -106.22069549560547, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -129.08694458007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6720695495605469, + "rewards_train/margins": 0.23662489652633667, + "rewards_train/rejected": -0.9086944460868835, + "step": 2087 + }, + { + "epoch": 0.58, + "logps_train/chosen": -16.69413948059082, + "logps_train/ref_chosen": -13.625, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -44.16145324707031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.30691394209861755, + "rewards_train/margins": 1.8967315256595612, + "rewards_train/rejected": -2.2036454677581787, + "step": 2087 + }, + { + "epoch": 0.58, + "logps_train/chosen": -79.0797119140625, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -58.0, + "logps_train/rejected": -65.60350036621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5079712271690369, + "rewards_train/margins": 0.25237882137298584, + "rewards_train/rejected": -0.7603500485420227, + "step": 2087 + }, + { + "epoch": 0.58, + "logps_train/chosen": -33.608116149902344, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -29.84421157836914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2358115911483765, + "rewards_train/margins": -0.11389040946960449, + "rewards_train/rejected": -1.121921181678772, + "step": 2087 + }, + { + "epoch": 0.58, + "logps_train/chosen": -151.68136596679688, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -204.02342224121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6181366443634033, + "rewards_train/margins": 3.1842057704925537, + "rewards_train/rejected": -6.802342414855957, + "step": 2087 + }, + { + "epoch": 0.58, + "logps_train/chosen": -20.338726043701172, + "logps_train/ref_chosen": -6.75, + "logps_train/ref_rejected": -15.4375, + "logps_train/rejected": -35.34712219238281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.358872652053833, + "rewards_train/margins": 0.6320896148681641, + "rewards_train/rejected": -1.990962266921997, + "step": 2087 + }, + { + "epoch": 0.58, + "learning_rate": 1.4486423418655545e-07, + "loss": 0.4215, + "step": 2088 + }, + { + "epoch": 0.58, + "logps_train/chosen": -26.0787353515625, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -24.375, + "logps_train/rejected": -46.651824951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.995373547077179, + "rewards_train/margins": 1.2323090434074402, + "rewards_train/rejected": -2.227682590484619, + "step": 2088 + }, + { + "epoch": 0.58, + "logps_train/chosen": -165.224853515625, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -182.3432159423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.122485637664795, + "rewards_train/margins": 1.511836051940918, + "rewards_train/rejected": -5.634321689605713, + "step": 2088 + }, + { + "epoch": 0.58, + "logps_train/chosen": -51.737274169921875, + "logps_train/ref_chosen": -35.0, + "logps_train/ref_rejected": -27.75, + "logps_train/rejected": -85.5174789428711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6737273931503296, + "rewards_train/margins": 4.103020787239075, + "rewards_train/rejected": -5.776748180389404, + "step": 2088 + }, + { + "epoch": 0.58, + "logps_train/chosen": -207.554443359375, + "logps_train/ref_chosen": -180.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -196.9961700439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.755444288253784, + "rewards_train/margins": 5.694173097610474, + "rewards_train/rejected": -8.449617385864258, + "step": 2088 + }, + { + "epoch": 0.58, + "logps_train/chosen": -33.157920837402344, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -39.5701789855957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9720420837402344, + "rewards_train/margins": 0.8849759101867676, + "rewards_train/rejected": -2.857017993927002, + "step": 2088 + }, + { + "epoch": 0.58, + "logps_train/chosen": -14.753339767456055, + "logps_train/ref_chosen": -0.984375, + "logps_train/ref_rejected": -7.71875, + "logps_train/rejected": -33.008140563964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3768965005874634, + "rewards_train/margins": 1.1520425081253052, + "rewards_train/rejected": -2.5289390087127686, + "step": 2088 + }, + { + "epoch": 0.58, + "logps_train/chosen": -9.091593742370605, + "logps_train/ref_chosen": -3.71875, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -29.34933853149414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5372843742370605, + "rewards_train/margins": 1.0351494550704956, + "rewards_train/rejected": -1.5724338293075562, + "step": 2088 + }, + { + "epoch": 0.58, + "logps_train/chosen": -16.959272384643555, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -41.75, + "logps_train/rejected": -55.342384338378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5146772265434265, + "rewards_train/margins": 0.8445612788200378, + "rewards_train/rejected": -1.3592385053634644, + "step": 2088 + }, + { + "epoch": 0.58, + "logps_train/chosen": -26.243518829345703, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -61.282249450683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29935190081596375, + "rewards_train/margins": 3.65387299656868, + "rewards_train/rejected": -3.9532248973846436, + "step": 2089 + }, + { + "epoch": 0.58, + "logps_train/chosen": -27.352807998657227, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -64.5771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9852808117866516, + "rewards_train/margins": 3.8599340319633484, + "rewards_train/rejected": -4.84521484375, + "step": 2089 + }, + { + "epoch": 0.58, + "logps_train/chosen": -144.93014526367188, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -185.86373901367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.493014812469482, + "rewards_train/margins": 4.043359279632568, + "rewards_train/rejected": -9.53637409210205, + "step": 2089 + }, + { + "epoch": 0.58, + "logps_train/chosen": -11.139437675476074, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -2.921875, + "logps_train/rejected": -13.600568771362305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13269376754760742, + "rewards_train/margins": 0.9351756572723389, + "rewards_train/rejected": -1.0678694248199463, + "step": 2089 + }, + { + "epoch": 0.58, + "logps_train/chosen": -209.6328125, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -222.1201934814453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.163281440734863, + "rewards_train/margins": -0.25126218795776367, + "rewards_train/rejected": -6.9120192527771, + "step": 2089 + }, + { + "epoch": 0.58, + "logps_train/chosen": -97.47792053222656, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -31.625, + "logps_train/rejected": -85.02333068847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3522079586982727, + "rewards_train/margins": 5.692041218280792, + "rewards_train/rejected": -5.3398332595825195, + "step": 2089 + }, + { + "epoch": 0.58, + "logps_train/chosen": -13.162727355957031, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -11.184194564819336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42252275347709656, + "rewards_train/margins": 0.02714669704437256, + "rewards_train/rejected": -0.4496694505214691, + "step": 2089 + }, + { + "epoch": 0.58, + "logps_train/chosen": -195.33453369140625, + "logps_train/ref_chosen": -176.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -207.35272216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9334534406661987, + "rewards_train/margins": 1.8018187284469604, + "rewards_train/rejected": -3.735272169113159, + "step": 2089 + }, + { + "epoch": 0.58, + "learning_rate": 1.434957634274746e-07, + "loss": 0.2385, + "step": 2090 + }, + { + "epoch": 0.58, + "logps_train/chosen": -28.991378784179688, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -81.47954559326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3616379499435425, + "rewards_train/margins": 6.092566609382629, + "rewards_train/rejected": -7.454204559326172, + "step": 2090 + }, + { + "epoch": 0.58, + "logps_train/chosen": -219.42262268066406, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -187.19711303710938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.542262077331543, + "rewards_train/margins": -1.7725505828857422, + "rewards_train/rejected": -6.769711494445801, + "step": 2090 + }, + { + "epoch": 0.58, + "logps_train/chosen": -97.13081359863281, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -154.23907470703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7130813598632812, + "rewards_train/margins": 1.560826301574707, + "rewards_train/rejected": -4.273907661437988, + "step": 2090 + }, + { + "epoch": 0.58, + "logps_train/chosen": -172.1082305908203, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -168.2408905029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.3108229637146, + "rewards_train/margins": 3.1132659912109375, + "rewards_train/rejected": -7.424088954925537, + "step": 2090 + }, + { + "epoch": 0.58, + "logps_train/chosen": -109.91609191894531, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -131.82199096679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.091609239578247, + "rewards_train/margins": 3.2905900478363037, + "rewards_train/rejected": -4.382199287414551, + "step": 2090 + }, + { + "epoch": 0.58, + "logps_train/chosen": -20.954282760620117, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -3.8125, + "logps_train/rejected": -24.790884017944336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2204283475875854, + "rewards_train/margins": 0.8774100542068481, + "rewards_train/rejected": -2.0978384017944336, + "step": 2090 + }, + { + "epoch": 0.58, + "logps_train/chosen": -97.80442810058594, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -98.84033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4804428815841675, + "rewards_train/margins": 0.10359036922454834, + "rewards_train/rejected": -1.5840332508087158, + "step": 2090 + }, + { + "epoch": 0.58, + "logps_train/chosen": -76.35340881347656, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -146.11566162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6853408813476562, + "rewards_train/margins": 4.72622537612915, + "rewards_train/rejected": -6.411566257476807, + "step": 2090 + }, + { + "epoch": 0.58, + "logps_train/chosen": -117.36753845214844, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -165.81088256835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5867538452148438, + "rewards_train/margins": 1.7943344116210938, + "rewards_train/rejected": -5.3810882568359375, + "step": 2091 + }, + { + "epoch": 0.58, + "logps_train/chosen": -108.81476593017578, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -160.74928283691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5814766883850098, + "rewards_train/margins": 4.643452167510986, + "rewards_train/rejected": -8.224928855895996, + "step": 2091 + }, + { + "epoch": 0.58, + "logps_train/chosen": -18.915084838867188, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -2.8125, + "logps_train/rejected": -28.910709381103516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0852584838867188, + "rewards_train/margins": 1.5245625972747803, + "rewards_train/rejected": -2.609821081161499, + "step": 2091 + }, + { + "epoch": 0.58, + "logps_train/chosen": -63.73974609375, + "logps_train/ref_chosen": -56.75, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -79.71646118164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.698974609375, + "rewards_train/margins": 0.4226715564727783, + "rewards_train/rejected": -1.1216461658477783, + "step": 2091 + }, + { + "epoch": 0.58, + "logps_train/chosen": -180.86033630371094, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -110.17224884033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.586033582687378, + "rewards_train/margins": 0.8311913013458252, + "rewards_train/rejected": -3.417224884033203, + "step": 2091 + }, + { + "epoch": 0.58, + "logps_train/chosen": -15.627264022827148, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -30.125, + "logps_train/rejected": -40.26837158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7314764261245728, + "rewards_train/margins": 0.28286075592041016, + "rewards_train/rejected": -1.014337182044983, + "step": 2091 + }, + { + "epoch": 0.58, + "logps_train/chosen": -188.8137664794922, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -255.52011108398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.831376552581787, + "rewards_train/margins": 3.820634365081787, + "rewards_train/rejected": -11.652010917663574, + "step": 2091 + }, + { + "epoch": 0.58, + "logps_train/chosen": -44.08147430419922, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -107.3409194946289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.420647382736206, + "rewards_train/margins": 3.338444471359253, + "rewards_train/rejected": -5.759091854095459, + "step": 2091 + }, + { + "epoch": 0.58, + "learning_rate": 1.4213328728994856e-07, + "loss": 0.3152, + "step": 2092 + }, + { + "epoch": 0.58, + "logps_train/chosen": -25.275983810424805, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -51.95284652709961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0338484048843384, + "rewards_train/margins": 3.6426862478256226, + "rewards_train/rejected": -4.676534652709961, + "step": 2092 + }, + { + "epoch": 0.58, + "logps_train/chosen": -20.414810180664062, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -37.665645599365234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9289810061454773, + "rewards_train/margins": 0.737583577632904, + "rewards_train/rejected": -1.6665645837783813, + "step": 2092 + }, + { + "epoch": 0.58, + "logps_train/chosen": -87.7969741821289, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -86.72438049316406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5796974301338196, + "rewards_train/margins": -0.1072593629360199, + "rewards_train/rejected": -0.4724380671977997, + "step": 2092 + }, + { + "epoch": 0.58, + "logps_train/chosen": -92.48006439208984, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -116.38632202148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3980064392089844, + "rewards_train/margins": 1.8906259536743164, + "rewards_train/rejected": -4.288632392883301, + "step": 2092 + }, + { + "epoch": 0.58, + "logps_train/chosen": -106.20521545410156, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -7.46875, + "logps_train/rejected": -34.855010986328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.820521593093872, + "rewards_train/margins": -0.08189535140991211, + "rewards_train/rejected": -2.73862624168396, + "step": 2092 + }, + { + "epoch": 0.58, + "logps_train/chosen": -91.82222747802734, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -91.66236877441406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7822227478027344, + "rewards_train/margins": -0.015985846519470215, + "rewards_train/rejected": -0.7662369012832642, + "step": 2092 + }, + { + "epoch": 0.58, + "logps_train/chosen": -143.2003173828125, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -186.6103515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.820031642913818, + "rewards_train/margins": 3.6910033226013184, + "rewards_train/rejected": -8.511034965515137, + "step": 2092 + }, + { + "epoch": 0.58, + "logps_train/chosen": -83.09883117675781, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -92.17511749267578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.609883189201355, + "rewards_train/margins": -0.542371392250061, + "rewards_train/rejected": -1.067511796951294, + "step": 2092 + }, + { + "epoch": 0.59, + "logps_train/chosen": -2.631549119949341, + "logps_train/ref_chosen": -1.375, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -6.222278118133545, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1256549209356308, + "rewards_train/margins": -0.35967710614204407, + "rewards_train/rejected": 0.23402218520641327, + "step": 2093 + }, + { + "epoch": 0.59, + "logps_train/chosen": -1.9689226150512695, + "logps_train/ref_chosen": -0.8046875, + "logps_train/ref_rejected": -2.0, + "logps_train/rejected": -7.345831871032715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11642351001501083, + "rewards_train/margins": 0.41815970093011856, + "rewards_train/rejected": -0.5345832109451294, + "step": 2093 + }, + { + "epoch": 0.59, + "logps_train/chosen": -1.8498820066452026, + "logps_train/ref_chosen": -0.8984375, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -13.551131248474121, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09514445066452026, + "rewards_train/margins": 0.7880936861038208, + "rewards_train/rejected": -0.8832381367683411, + "step": 2093 + }, + { + "epoch": 0.59, + "logps_train/chosen": -42.99119567871094, + "logps_train/ref_chosen": -3.546875, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -29.548744201660156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.944432020187378, + "rewards_train/margins": -1.8895576000213623, + "rewards_train/rejected": -2.0548744201660156, + "step": 2093 + }, + { + "epoch": 0.59, + "logps_train/chosen": -153.89852905273438, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -165.1991729736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6898529529571533, + "rewards_train/margins": 4.080064535140991, + "rewards_train/rejected": -5.7699174880981445, + "step": 2093 + }, + { + "epoch": 0.59, + "logps_train/chosen": -19.84095001220703, + "logps_train/ref_chosen": -0.2734375, + "logps_train/ref_rejected": -0.2734375, + "logps_train/rejected": -21.18686866760254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9567512273788452, + "rewards_train/margins": 0.1345919370651245, + "rewards_train/rejected": -2.0913431644439697, + "step": 2093 + }, + { + "epoch": 0.59, + "logps_train/chosen": -24.95337677001953, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -20.375, + "logps_train/rejected": -44.960052490234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.495337724685669, + "rewards_train/margins": 0.963167667388916, + "rewards_train/rejected": -2.458505392074585, + "step": 2093 + }, + { + "epoch": 0.59, + "logps_train/chosen": -75.05712890625, + "logps_train/ref_chosen": -55.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -173.4161376953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0057129859924316, + "rewards_train/margins": 3.9359006881713867, + "rewards_train/rejected": -5.941613674163818, + "step": 2093 + }, + { + "epoch": 0.59, + "learning_rate": 1.4077681530986652e-07, + "loss": 0.5347, + "step": 2094 + }, + { + "epoch": 0.59, + "logps_train/chosen": -192.61900329589844, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -187.0, + "logps_train/rejected": -256.5954284667969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3619003295898438, + "rewards_train/margins": 3.597642421722412, + "rewards_train/rejected": -6.959542751312256, + "step": 2094 + }, + { + "epoch": 0.59, + "logps_train/chosen": -185.16580200195312, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -240.6419677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.01658034324646, + "rewards_train/margins": 6.547616243362427, + "rewards_train/rejected": -8.564196586608887, + "step": 2094 + }, + { + "epoch": 0.59, + "logps_train/chosen": -88.14881134033203, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -206.52859497070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9148811101913452, + "rewards_train/margins": 7.337978959083557, + "rewards_train/rejected": -9.252860069274902, + "step": 2094 + }, + { + "epoch": 0.59, + "logps_train/chosen": -73.32814025878906, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -116.22465515136719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8828140497207642, + "rewards_train/margins": 1.4896515607833862, + "rewards_train/rejected": -2.3724656105041504, + "step": 2094 + }, + { + "epoch": 0.59, + "logps_train/chosen": -3.468689441680908, + "logps_train/ref_chosen": -2.46875, + "logps_train/ref_rejected": -1.6015625, + "logps_train/rejected": -3.7791621685028076, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09999394416809082, + "rewards_train/margins": 0.11776602268218994, + "rewards_train/rejected": -0.21775996685028076, + "step": 2094 + }, + { + "epoch": 0.59, + "logps_train/chosen": -80.16801452636719, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -210.04043579101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.766801595687866, + "rewards_train/margins": 7.587242364883423, + "rewards_train/rejected": -10.354043960571289, + "step": 2094 + }, + { + "epoch": 0.59, + "logps_train/chosen": -35.87839889526367, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -48.47332763671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21283988654613495, + "rewards_train/margins": 2.6094929724931717, + "rewards_train/rejected": -2.8223328590393066, + "step": 2094 + }, + { + "epoch": 0.59, + "logps_train/chosen": -79.94862365722656, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -78.443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3448623418807983, + "rewards_train/margins": 0.09947359561920166, + "rewards_train/rejected": -1.4443359375, + "step": 2094 + }, + { + "epoch": 0.59, + "logps_train/chosen": -59.34825897216797, + "logps_train/ref_chosen": -49.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -141.9781036376953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0348259210586548, + "rewards_train/margins": 2.862984538078308, + "rewards_train/rejected": -3.897810459136963, + "step": 2095 + }, + { + "epoch": 0.59, + "logps_train/chosen": -9.022174835205078, + "logps_train/ref_chosen": -2.875, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -30.365610122680664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6147174835205078, + "rewards_train/margins": 1.5343434810638428, + "rewards_train/rejected": -2.1490609645843506, + "step": 2095 + }, + { + "epoch": 0.59, + "logps_train/chosen": -25.975055694580078, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -20.435230255126953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6600055694580078, + "rewards_train/margins": -0.7789825201034546, + "rewards_train/rejected": -0.8810230493545532, + "step": 2095 + }, + { + "epoch": 0.59, + "logps_train/chosen": -192.2071533203125, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -231.60682678222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.720715522766113, + "rewards_train/margins": 1.439967155456543, + "rewards_train/rejected": -8.160682678222656, + "step": 2095 + }, + { + "epoch": 0.59, + "logps_train/chosen": -177.96636962890625, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -182.80194091796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.696636915206909, + "rewards_train/margins": 3.333557367324829, + "rewards_train/rejected": -6.030194282531738, + "step": 2095 + }, + { + "epoch": 0.59, + "logps_train/chosen": -28.08275032043457, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -24.0, + "logps_train/rejected": -52.5313606262207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7082750201225281, + "rewards_train/margins": 2.1448610424995422, + "rewards_train/rejected": -2.8531360626220703, + "step": 2095 + }, + { + "epoch": 0.59, + "logps_train/chosen": -130.890869140625, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -158.01065063476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6390869617462158, + "rewards_train/margins": 4.11197829246521, + "rewards_train/rejected": -5.751065254211426, + "step": 2095 + }, + { + "epoch": 0.59, + "logps_train/chosen": -36.68669128417969, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -28.5, + "logps_train/rejected": -56.9462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7061691284179688, + "rewards_train/margins": 1.1384599208831787, + "rewards_train/rejected": -2.8446290493011475, + "step": 2095 + }, + { + "epoch": 0.59, + "learning_rate": 1.3942635698109374e-07, + "loss": 0.2278, + "step": 2096 + }, + { + "epoch": 0.59, + "logps_train/chosen": -22.482019424438477, + "logps_train/ref_chosen": -10.6875, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -13.479576110839844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1794519424438477, + "rewards_train/margins": -0.5533693432807922, + "rewards_train/rejected": -0.6260825991630554, + "step": 2096 + }, + { + "epoch": 0.59, + "logps_train/chosen": -66.02314758300781, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -143.9898223876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2273147106170654, + "rewards_train/margins": 1.271667718887329, + "rewards_train/rejected": -4.4989824295043945, + "step": 2096 + }, + { + "epoch": 0.59, + "logps_train/chosen": -92.28868103027344, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -146.47637939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22886811196804047, + "rewards_train/margins": 2.718769922852516, + "rewards_train/rejected": -2.9476380348205566, + "step": 2096 + }, + { + "epoch": 0.59, + "logps_train/chosen": -70.34917449951172, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -79.6393051147461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06508255004882812, + "rewards_train/margins": 1.1790131330490112, + "rewards_train/rejected": -1.113930583000183, + "step": 2096 + }, + { + "epoch": 0.59, + "logps_train/chosen": -14.309555053710938, + "logps_train/ref_chosen": -4.9375, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -39.24805450439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9372054934501648, + "rewards_train/margins": 2.5344749093055725, + "rewards_train/rejected": -3.4716804027557373, + "step": 2096 + }, + { + "epoch": 0.59, + "logps_train/chosen": -13.20060920715332, + "logps_train/ref_chosen": -2.390625, + "logps_train/ref_rejected": -5.09375, + "logps_train/rejected": -19.877010345458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.080998420715332, + "rewards_train/margins": 0.3973276615142822, + "rewards_train/rejected": -1.4783260822296143, + "step": 2096 + }, + { + "epoch": 0.59, + "logps_train/chosen": -120.88443756103516, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -216.67578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.738443851470947, + "rewards_train/margins": 6.079134464263916, + "rewards_train/rejected": -10.817578315734863, + "step": 2096 + }, + { + "epoch": 0.59, + "logps_train/chosen": -95.71870422363281, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -140.50115966796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8218704462051392, + "rewards_train/margins": 1.0782455205917358, + "rewards_train/rejected": -2.900115966796875, + "step": 2096 + }, + { + "epoch": 0.59, + "logps_train/chosen": -137.97877502441406, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -239.86717224121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8978774547576904, + "rewards_train/margins": 7.338839769363403, + "rewards_train/rejected": -11.236717224121094, + "step": 2097 + }, + { + "epoch": 0.59, + "logps_train/chosen": -30.703996658325195, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -64.00265502929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9828996658325195, + "rewards_train/margins": 3.7298660278320312, + "rewards_train/rejected": -4.712765693664551, + "step": 2097 + }, + { + "epoch": 0.59, + "logps_train/chosen": -9.153465270996094, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -1.296875, + "logps_train/rejected": -7.482479095458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2534034848213196, + "rewards_train/margins": 0.8719639182090759, + "rewards_train/rejected": -0.6185604333877563, + "step": 2097 + }, + { + "epoch": 0.59, + "logps_train/chosen": -57.83746337890625, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -53.135440826416016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.233746290206909, + "rewards_train/margins": 0.8297977447509766, + "rewards_train/rejected": -3.0635440349578857, + "step": 2097 + }, + { + "epoch": 0.59, + "logps_train/chosen": -16.526229858398438, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -29.375, + "logps_train/rejected": -63.53145980834961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6963729858398438, + "rewards_train/margins": 2.719273090362549, + "rewards_train/rejected": -3.4156460762023926, + "step": 2097 + }, + { + "epoch": 0.59, + "logps_train/chosen": -23.990314483642578, + "logps_train/ref_chosen": -6.25, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -48.88103485107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7740315198898315, + "rewards_train/margins": 2.1453219652175903, + "rewards_train/rejected": -3.919353485107422, + "step": 2097 + }, + { + "epoch": 0.59, + "logps_train/chosen": -21.3291015625, + "logps_train/ref_chosen": -3.1875, + "logps_train/ref_rejected": -1.0078125, + "logps_train/rejected": -38.175743103027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8141602277755737, + "rewards_train/margins": 1.9026328325271606, + "rewards_train/rejected": -3.7167930603027344, + "step": 2097 + }, + { + "epoch": 0.59, + "logps_train/chosen": -138.77256774902344, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -119.66099548339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.527256727218628, + "rewards_train/margins": 0.6388428211212158, + "rewards_train/rejected": -3.1660995483398438, + "step": 2097 + }, + { + "epoch": 0.59, + "learning_rate": 1.3808192175540644e-07, + "loss": 0.2462, + "step": 2098 + }, + { + "epoch": 0.59, + "logps_train/chosen": -83.40042114257812, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -156.10488891601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5400421619415283, + "rewards_train/margins": 4.6204469203948975, + "rewards_train/rejected": -6.160489082336426, + "step": 2098 + }, + { + "epoch": 0.59, + "logps_train/chosen": -189.32061767578125, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -183.98114013671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.682061672210693, + "rewards_train/margins": 2.1160521507263184, + "rewards_train/rejected": -8.798113822937012, + "step": 2098 + }, + { + "epoch": 0.59, + "logps_train/chosen": -26.329940795898438, + "logps_train/ref_chosen": -6.5, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -53.95331573486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9829940795898438, + "rewards_train/margins": 2.299837589263916, + "rewards_train/rejected": -4.28283166885376, + "step": 2098 + }, + { + "epoch": 0.59, + "logps_train/chosen": -101.1569595336914, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -54.96609878540039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7656960487365723, + "rewards_train/margins": 1.51841402053833, + "rewards_train/rejected": -4.284110069274902, + "step": 2098 + }, + { + "epoch": 0.59, + "logps_train/chosen": -17.506912231445312, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -3.046875, + "logps_train/rejected": -26.531553268432617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.260066270828247, + "rewards_train/margins": 1.0884015560150146, + "rewards_train/rejected": -2.3484678268432617, + "step": 2098 + }, + { + "epoch": 0.59, + "logps_train/chosen": -22.040325164794922, + "logps_train/ref_chosen": -3.796875, + "logps_train/ref_rejected": -6.21875, + "logps_train/rejected": -29.670902252197266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8243449926376343, + "rewards_train/margins": 0.5208703279495239, + "rewards_train/rejected": -2.345215320587158, + "step": 2098 + }, + { + "epoch": 0.59, + "logps_train/chosen": -183.72727966308594, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -168.7473907470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.872727870941162, + "rewards_train/margins": 2.602011203765869, + "rewards_train/rejected": -8.474739074707031, + "step": 2098 + }, + { + "epoch": 0.59, + "logps_train/chosen": -145.75387573242188, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -56.5, + "logps_train/rejected": -101.34749603271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4253876209259033, + "rewards_train/margins": 1.0593621730804443, + "rewards_train/rejected": -4.484749794006348, + "step": 2098 + }, + { + "epoch": 0.59, + "logps_train/chosen": -19.85430145263672, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -35.47188949584961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6354301571846008, + "rewards_train/margins": 0.749258816242218, + "rewards_train/rejected": -1.3846889734268188, + "step": 2099 + }, + { + "epoch": 0.59, + "logps_train/chosen": -24.01752471923828, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -40.902305603027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9330024719238281, + "rewards_train/margins": 2.4947280883789062, + "rewards_train/rejected": -3.4277305603027344, + "step": 2099 + }, + { + "epoch": 0.59, + "logps_train/chosen": -147.21385192871094, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -254.31564331054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2713851928710938, + "rewards_train/margins": 5.8601789474487305, + "rewards_train/rejected": -9.131564140319824, + "step": 2099 + }, + { + "epoch": 0.59, + "logps_train/chosen": -17.30406379699707, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -43.163299560546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5929064154624939, + "rewards_train/margins": 1.0609235167503357, + "rewards_train/rejected": -1.6538299322128296, + "step": 2099 + }, + { + "epoch": 0.59, + "logps_train/chosen": -162.4657745361328, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -165.33856201171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7965774536132812, + "rewards_train/margins": -0.26272130012512207, + "rewards_train/rejected": -3.533856153488159, + "step": 2099 + }, + { + "epoch": 0.59, + "logps_train/chosen": -10.707932472229004, + "logps_train/ref_chosen": -3.546875, + "logps_train/ref_rejected": -1.78125, + "logps_train/rejected": -26.82232666015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7161057591438293, + "rewards_train/margins": 1.7880019545555115, + "rewards_train/rejected": -2.504107713699341, + "step": 2099 + }, + { + "epoch": 0.59, + "logps_train/chosen": -38.65827941894531, + "logps_train/ref_chosen": -11.5625, + "logps_train/ref_rejected": -3.71875, + "logps_train/rejected": -22.380584716796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.709578037261963, + "rewards_train/margins": -0.8433945178985596, + "rewards_train/rejected": -1.8661835193634033, + "step": 2099 + }, + { + "epoch": 0.59, + "logps_train/chosen": -219.63558959960938, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -202.11505126953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.063559055328369, + "rewards_train/margins": -1.0520539283752441, + "rewards_train/rejected": -6.011505126953125, + "step": 2099 + }, + { + "epoch": 0.59, + "learning_rate": 1.3674351904242608e-07, + "loss": 0.3655, + "step": 2100 + }, + { + "epoch": 0.59, + "logps_train/chosen": -42.20058822631836, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -54.319488525390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.37005877494812, + "rewards_train/margins": 1.386890172958374, + "rewards_train/rejected": -3.756948947906494, + "step": 2100 + }, + { + "epoch": 0.59, + "logps_train/chosen": -103.44766235351562, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -149.6285400390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4947662353515625, + "rewards_train/margins": 6.6180877685546875, + "rewards_train/rejected": -7.11285400390625, + "step": 2100 + }, + { + "epoch": 0.59, + "logps_train/chosen": -14.994050979614258, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -15.3377046585083, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0244051218032837, + "rewards_train/margins": -0.2906346321105957, + "rewards_train/rejected": -0.733770489692688, + "step": 2100 + }, + { + "epoch": 0.59, + "logps_train/chosen": -157.2528076171875, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -230.29624938964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.425280809402466, + "rewards_train/margins": 7.304343938827515, + "rewards_train/rejected": -9.72962474822998, + "step": 2100 + }, + { + "epoch": 0.59, + "logps_train/chosen": -214.226806640625, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -139.89254760742188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.022680759429932, + "rewards_train/margins": -1.5334258079528809, + "rewards_train/rejected": -5.489254951477051, + "step": 2100 + }, + { + "epoch": 0.59, + "logps_train/chosen": -93.31246948242188, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -154.40853881835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4312469959259033, + "rewards_train/margins": 1.8096070289611816, + "rewards_train/rejected": -3.240854024887085, + "step": 2100 + }, + { + "epoch": 0.59, + "logps_train/chosen": -39.887351989746094, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -21.125, + "logps_train/rejected": -49.83955383300781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.279360294342041, + "rewards_train/margins": -0.40790486335754395, + "rewards_train/rejected": -2.871455430984497, + "step": 2100 + }, + { + "epoch": 0.59, + "logps_train/chosen": -116.99154663085938, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -11.75, + "logps_train/rejected": -41.797508239746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0991547107696533, + "rewards_train/margins": 0.9055962562561035, + "rewards_train/rejected": -3.004750967025757, + "step": 2100 + }, + { + "epoch": 0.59, + "logps_train/chosen": -5.055450439453125, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -29.06904411315918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24304504692554474, + "rewards_train/margins": 1.2388593405485153, + "rewards_train/rejected": -1.48190438747406, + "step": 2101 + }, + { + "epoch": 0.59, + "logps_train/chosen": -30.8432674407959, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -29.0762939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4093267917633057, + "rewards_train/margins": 0.22955262660980225, + "rewards_train/rejected": -1.638879418373108, + "step": 2101 + }, + { + "epoch": 0.59, + "logps_train/chosen": -26.237682342529297, + "logps_train/ref_chosen": -7.3125, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -36.21775817871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8925182819366455, + "rewards_train/margins": 0.6480076313018799, + "rewards_train/rejected": -2.5405259132385254, + "step": 2101 + }, + { + "epoch": 0.59, + "logps_train/chosen": -233.8042449951172, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -227.91757202148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.680424690246582, + "rewards_train/margins": 0.2113323211669922, + "rewards_train/rejected": -9.891757011413574, + "step": 2101 + }, + { + "epoch": 0.59, + "logps_train/chosen": -107.04131317138672, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -59.5, + "logps_train/rejected": -80.32855987548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.404131293296814, + "rewards_train/margins": 0.6787246465682983, + "rewards_train/rejected": -2.0828559398651123, + "step": 2101 + }, + { + "epoch": 0.59, + "logps_train/chosen": -164.41726684570312, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -254.24810791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.591726779937744, + "rewards_train/margins": 3.933084011077881, + "rewards_train/rejected": -8.524810791015625, + "step": 2101 + }, + { + "epoch": 0.59, + "logps_train/chosen": -18.516847610473633, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -68.8092041015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6891847848892212, + "rewards_train/margins": 2.5917357206344604, + "rewards_train/rejected": -3.2809205055236816, + "step": 2101 + }, + { + "epoch": 0.59, + "logps_train/chosen": -217.91162109375, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -173.57131958007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.191162109375, + "rewards_train/margins": 1.3659701347351074, + "rewards_train/rejected": -6.557132244110107, + "step": 2101 + }, + { + "epoch": 0.59, + "learning_rate": 1.3541115820955284e-07, + "loss": 0.425, + "step": 2102 + }, + { + "epoch": 0.59, + "logps_train/chosen": -93.50141143798828, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -96.80885314941406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.250141143798828, + "rewards_train/margins": -0.06925582885742188, + "rewards_train/rejected": -2.1808853149414062, + "step": 2102 + }, + { + "epoch": 0.59, + "logps_train/chosen": -43.59028625488281, + "logps_train/ref_chosen": -25.0, + "logps_train/ref_rejected": -2.234375, + "logps_train/rejected": -15.979007720947266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.859028697013855, + "rewards_train/margins": -0.4845653772354126, + "rewards_train/rejected": -1.3744633197784424, + "step": 2102 + }, + { + "epoch": 0.59, + "logps_train/chosen": -155.9500732421875, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -244.80606079101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.495007276535034, + "rewards_train/margins": 7.285598993301392, + "rewards_train/rejected": -10.780606269836426, + "step": 2102 + }, + { + "epoch": 0.59, + "logps_train/chosen": -12.486499786376953, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -22.80492401123047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9455249905586243, + "rewards_train/margins": -0.06503260135650635, + "rewards_train/rejected": -0.8804923892021179, + "step": 2102 + }, + { + "epoch": 0.59, + "logps_train/chosen": -106.40367889404297, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -110.81877136230469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4403679370880127, + "rewards_train/margins": 0.041509151458740234, + "rewards_train/rejected": -2.481877088546753, + "step": 2102 + }, + { + "epoch": 0.59, + "logps_train/chosen": -181.85638427734375, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -147.88262939453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.685638427734375, + "rewards_train/margins": -0.34737539291381836, + "rewards_train/rejected": -5.338263034820557, + "step": 2102 + }, + { + "epoch": 0.59, + "logps_train/chosen": -13.188203811645508, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -75.53526306152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01882038079202175, + "rewards_train/margins": 5.159706020727754, + "rewards_train/rejected": -5.178526401519775, + "step": 2102 + }, + { + "epoch": 0.59, + "logps_train/chosen": -34.29310989379883, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -26.370113372802734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2418110370635986, + "rewards_train/margins": 0.8608253002166748, + "rewards_train/rejected": -2.1026363372802734, + "step": 2102 + }, + { + "epoch": 0.59, + "logps_train/chosen": -11.742966651916504, + "logps_train/ref_chosen": -5.09375, + "logps_train/ref_rejected": -3.34375, + "logps_train/rejected": -21.616378784179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6649217009544373, + "rewards_train/margins": 1.1623411774635315, + "rewards_train/rejected": -1.8272628784179688, + "step": 2103 + }, + { + "epoch": 0.59, + "logps_train/chosen": -81.60409545898438, + "logps_train/ref_chosen": -61.25, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -121.14260864257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.035409688949585, + "rewards_train/margins": 1.5788512229919434, + "rewards_train/rejected": -3.6142609119415283, + "step": 2103 + }, + { + "epoch": 0.59, + "logps_train/chosen": -1.40863037109375, + "logps_train/ref_chosen": -0.2216796875, + "logps_train/ref_rejected": -0.2216796875, + "logps_train/rejected": -1.1615793704986572, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11869507282972336, + "rewards_train/margins": -0.024705104529857635, + "rewards_train/rejected": -0.09398996829986572, + "step": 2103 + }, + { + "epoch": 0.59, + "logps_train/chosen": -102.82909393310547, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -291.1581726074219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6329095363616943, + "rewards_train/margins": 10.282907724380493, + "rewards_train/rejected": -12.915817260742188, + "step": 2103 + }, + { + "epoch": 0.59, + "logps_train/chosen": -161.96478271484375, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -144.02789306640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.546478271484375, + "rewards_train/margins": 1.40631103515625, + "rewards_train/rejected": -4.952789306640625, + "step": 2103 + }, + { + "epoch": 0.59, + "logps_train/chosen": -45.16904067993164, + "logps_train/ref_chosen": -39.25, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -47.57097244262695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5919041037559509, + "rewards_train/margins": 3.30269318819046, + "rewards_train/rejected": -3.894597291946411, + "step": 2103 + }, + { + "epoch": 0.59, + "logps_train/chosen": -86.52685546875, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -85.43498229980469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.402685642242432, + "rewards_train/margins": -0.10918712615966797, + "rewards_train/rejected": -5.293498516082764, + "step": 2103 + }, + { + "epoch": 0.59, + "logps_train/chosen": -11.679189682006836, + "logps_train/ref_chosen": -7.25, + "logps_train/ref_rejected": -6.8125, + "logps_train/rejected": -24.672494888305664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.442918986082077, + "rewards_train/margins": 1.3430805504322052, + "rewards_train/rejected": -1.7859995365142822, + "step": 2103 + }, + { + "epoch": 0.59, + "learning_rate": 1.3408484858189962e-07, + "loss": 0.4211, + "step": 2104 + }, + { + "epoch": 0.59, + "logps_train/chosen": -135.92489624023438, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -44.61963653564453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.3924896717071533, + "rewards_train/margins": -1.0555260181427002, + "rewards_train/rejected": -2.336963653564453, + "step": 2104 + }, + { + "epoch": 0.59, + "logps_train/chosen": -12.526866912841797, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -22.131267547607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5589367151260376, + "rewards_train/margins": 0.9041900634765625, + "rewards_train/rejected": -1.4631267786026, + "step": 2104 + }, + { + "epoch": 0.59, + "logps_train/chosen": -72.24604797363281, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -47.5, + "logps_train/rejected": -93.42597198486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.674604892730713, + "rewards_train/margins": 1.9179925918579102, + "rewards_train/rejected": -4.592597484588623, + "step": 2104 + }, + { + "epoch": 0.59, + "logps_train/chosen": -15.810619354248047, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -24.5, + "logps_train/rejected": -72.07072448730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9873119592666626, + "rewards_train/margins": 3.769760489463806, + "rewards_train/rejected": -4.757072448730469, + "step": 2104 + }, + { + "epoch": 0.59, + "logps_train/chosen": -49.61290740966797, + "logps_train/ref_chosen": -33.5, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -45.83995056152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6112908124923706, + "rewards_train/margins": 0.785204291343689, + "rewards_train/rejected": -2.3964951038360596, + "step": 2104 + }, + { + "epoch": 0.59, + "logps_train/chosen": -26.082717895507812, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -25.148347854614258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7582718133926392, + "rewards_train/margins": 1.1003129482269287, + "rewards_train/rejected": -1.8585847616195679, + "step": 2104 + }, + { + "epoch": 0.59, + "logps_train/chosen": -30.481534957885742, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -59.75, + "logps_train/rejected": -106.23234558105469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4481534957885742, + "rewards_train/margins": 3.2000813484191895, + "rewards_train/rejected": -4.648234844207764, + "step": 2104 + }, + { + "epoch": 0.59, + "logps_train/chosen": -17.45822525024414, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -55.75, + "logps_train/rejected": -85.48886108398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.695822536945343, + "rewards_train/margins": 2.278063714504242, + "rewards_train/rejected": -2.973886251449585, + "step": 2104 + }, + { + "epoch": 0.59, + "logps_train/chosen": -122.18909454345703, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -146.65618896484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3189094066619873, + "rewards_train/margins": 1.7967097759246826, + "rewards_train/rejected": -4.11561918258667, + "step": 2105 + }, + { + "epoch": 0.59, + "logps_train/chosen": -70.98661804199219, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -62.5, + "logps_train/rejected": -85.63969421386719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6486617922782898, + "rewards_train/margins": 1.6653075814247131, + "rewards_train/rejected": -2.313969373703003, + "step": 2105 + }, + { + "epoch": 0.59, + "logps_train/chosen": -97.1907730102539, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -199.06304931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.119077444076538, + "rewards_train/margins": 4.437227487564087, + "rewards_train/rejected": -7.556304931640625, + "step": 2105 + }, + { + "epoch": 0.59, + "logps_train/chosen": -23.794189453125, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -66.48780822753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9512939453125, + "rewards_train/margins": 1.472486972808838, + "rewards_train/rejected": -3.423780918121338, + "step": 2105 + }, + { + "epoch": 0.59, + "logps_train/chosen": -148.23406982421875, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -56.25, + "logps_train/rejected": -101.43556213378906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.72340726852417, + "rewards_train/margins": -0.2048511505126953, + "rewards_train/rejected": -4.518556118011475, + "step": 2105 + }, + { + "epoch": 0.59, + "logps_train/chosen": -72.02291870117188, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -120.18682861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2272918224334717, + "rewards_train/margins": 0.09139108657836914, + "rewards_train/rejected": -3.318682909011841, + "step": 2105 + }, + { + "epoch": 0.59, + "logps_train/chosen": -12.055878639221191, + "logps_train/ref_chosen": -2.375, + "logps_train/ref_rejected": -2.0625, + "logps_train/rejected": -26.982404708862305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9680878520011902, + "rewards_train/margins": 1.523902714252472, + "rewards_train/rejected": -2.491990566253662, + "step": 2105 + }, + { + "epoch": 0.59, + "logps_train/chosen": -102.69230651855469, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -157.25103759765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7692307233810425, + "rewards_train/margins": 3.205873131752014, + "rewards_train/rejected": -4.975103855133057, + "step": 2105 + }, + { + "epoch": 0.59, + "learning_rate": 1.3276459944222784e-07, + "loss": 0.3042, + "step": 2106 + }, + { + "epoch": 0.59, + "logps_train/chosen": -66.6263198852539, + "logps_train/ref_chosen": -41.75, + "logps_train/ref_rejected": -46.0, + "logps_train/rejected": -108.96369934082031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4876320362091064, + "rewards_train/margins": 3.8087379932403564, + "rewards_train/rejected": -6.296370029449463, + "step": 2106 + }, + { + "epoch": 0.59, + "logps_train/chosen": -8.136075973510742, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -28.78643798828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34173259139060974, + "rewards_train/margins": 1.718161255121231, + "rewards_train/rejected": -2.059893846511841, + "step": 2106 + }, + { + "epoch": 0.59, + "logps_train/chosen": -25.31474494934082, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -41.28728485107422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4002245664596558, + "rewards_train/margins": 2.1472538709640503, + "rewards_train/rejected": -3.547478437423706, + "step": 2106 + }, + { + "epoch": 0.59, + "logps_train/chosen": -6.334264755249023, + "logps_train/ref_chosen": -0.99609375, + "logps_train/ref_rejected": -1.203125, + "logps_train/rejected": -3.2770681381225586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5338171124458313, + "rewards_train/margins": -0.3264227956533432, + "rewards_train/rejected": -0.2073943167924881, + "step": 2106 + }, + { + "epoch": 0.59, + "logps_train/chosen": -15.806844711303711, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -21.5351619720459, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.006815528962761164, + "rewards_train/margins": 1.1790817738510668, + "rewards_train/rejected": -1.1722662448883057, + "step": 2106 + }, + { + "epoch": 0.59, + "logps_train/chosen": -10.985363960266113, + "logps_train/ref_chosen": -2.390625, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -40.59025573730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8594738841056824, + "rewards_train/margins": 1.3495517373085022, + "rewards_train/rejected": -2.2090256214141846, + "step": 2106 + }, + { + "epoch": 0.59, + "logps_train/chosen": -44.0274658203125, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -6.1875, + "logps_train/rejected": -45.11273956298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.458996534347534, + "rewards_train/margins": 0.43352746963500977, + "rewards_train/rejected": -3.892524003982544, + "step": 2106 + }, + { + "epoch": 0.59, + "logps_train/chosen": -56.64336395263672, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -64.15899658203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.451836347579956, + "rewards_train/margins": 1.001563310623169, + "rewards_train/rejected": -4.453399658203125, + "step": 2106 + }, + { + "epoch": 0.59, + "logps_train/chosen": -67.17292785644531, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -109.26348876953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3172927796840668, + "rewards_train/margins": 1.5090561211109161, + "rewards_train/rejected": -1.826348900794983, + "step": 2107 + }, + { + "epoch": 0.59, + "logps_train/chosen": -96.9688720703125, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -21.187728881835938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3968873023986816, + "rewards_train/margins": -0.9093643426895142, + "rewards_train/rejected": -1.4875229597091675, + "step": 2107 + }, + { + "epoch": 0.59, + "logps_train/chosen": -69.83518981933594, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -69.84918212890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06648101657629013, + "rewards_train/margins": 0.0013992264866828918, + "rewards_train/rejected": 0.06508179008960724, + "step": 2107 + }, + { + "epoch": 0.59, + "logps_train/chosen": -35.634822845458984, + "logps_train/ref_chosen": -26.0, + "logps_train/ref_rejected": -33.25, + "logps_train/rejected": -51.014400482177734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9634823203086853, + "rewards_train/margins": 0.8129577040672302, + "rewards_train/rejected": -1.7764400243759155, + "step": 2107 + }, + { + "epoch": 0.59, + "logps_train/chosen": -15.765000343322754, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -33.93539047241211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5265000462532043, + "rewards_train/margins": 0.9045390486717224, + "rewards_train/rejected": -1.4310390949249268, + "step": 2107 + }, + { + "epoch": 0.59, + "logps_train/chosen": -141.59107971191406, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -129.82981872558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35910797119140625, + "rewards_train/margins": 1.0238739252090454, + "rewards_train/rejected": -1.3829818964004517, + "step": 2107 + }, + { + "epoch": 0.59, + "logps_train/chosen": -76.42442321777344, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -98.4879379272461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0924423933029175, + "rewards_train/margins": 1.1063514947891235, + "rewards_train/rejected": -2.198793888092041, + "step": 2107 + }, + { + "epoch": 0.59, + "logps_train/chosen": -109.52558898925781, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -235.34683227539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5525588989257812, + "rewards_train/margins": 5.58212423324585, + "rewards_train/rejected": -6.134683132171631, + "step": 2107 + }, + { + "epoch": 0.59, + "learning_rate": 1.3145042003088114e-07, + "loss": 0.371, + "step": 2108 + }, + { + "epoch": 0.59, + "logps_train/chosen": -46.45888900756836, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -45.843849182128906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.8396389484405518, + "rewards_train/margins": -0.09275388717651367, + "rewards_train/rejected": -3.746885061264038, + "step": 2108 + }, + { + "epoch": 0.59, + "logps_train/chosen": -206.9920654296875, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -232.07980346679688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.74920654296875, + "rewards_train/margins": -1.0412263870239258, + "rewards_train/rejected": -8.707980155944824, + "step": 2108 + }, + { + "epoch": 0.59, + "logps_train/chosen": -22.086515426635742, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -6.34375, + "logps_train/rejected": -29.168964385986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.680526614189148, + "rewards_train/margins": 0.6019948720932007, + "rewards_train/rejected": -2.2825214862823486, + "step": 2108 + }, + { + "epoch": 0.59, + "logps_train/chosen": -58.321067810058594, + "logps_train/ref_chosen": -41.25, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -44.86474609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7071068286895752, + "rewards_train/margins": 1.1293678283691406, + "rewards_train/rejected": -2.836474657058716, + "step": 2108 + }, + { + "epoch": 0.59, + "logps_train/chosen": -4.42313289642334, + "logps_train/ref_chosen": -2.234375, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -12.878728866577148, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21887579560279846, + "rewards_train/margins": 0.2564970850944519, + "rewards_train/rejected": -0.47537288069725037, + "step": 2108 + }, + { + "epoch": 0.59, + "logps_train/chosen": -23.274333953857422, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -26.550260543823242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17743340134620667, + "rewards_train/margins": 1.4650926291942596, + "rewards_train/rejected": -1.6425260305404663, + "step": 2108 + }, + { + "epoch": 0.59, + "logps_train/chosen": -88.99510192871094, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -113.47476959228516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1495102643966675, + "rewards_train/margins": 0.7479667663574219, + "rewards_train/rejected": -1.8974770307540894, + "step": 2108 + }, + { + "epoch": 0.59, + "logps_train/chosen": -33.077674865722656, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -32.393028259277344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7827675342559814, + "rewards_train/margins": -0.12471473217010498, + "rewards_train/rejected": -1.6580528020858765, + "step": 2108 + }, + { + "epoch": 0.59, + "logps_train/chosen": -200.32638549804688, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -200.86404418945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.2826385498046875, + "rewards_train/margins": 0.2537660598754883, + "rewards_train/rejected": -7.536404609680176, + "step": 2109 + }, + { + "epoch": 0.59, + "logps_train/chosen": -24.750858306884766, + "logps_train/ref_chosen": -15.8125, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -55.75040054321289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8938358426094055, + "rewards_train/margins": 2.5562042593955994, + "rewards_train/rejected": -3.450040102005005, + "step": 2109 + }, + { + "epoch": 0.59, + "logps_train/chosen": -339.9665222167969, + "logps_train/ref_chosen": -251.0, + "logps_train/ref_rejected": -246.0, + "logps_train/rejected": -369.4173278808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.896652221679688, + "rewards_train/margins": 3.4450807571411133, + "rewards_train/rejected": -12.3417329788208, + "step": 2109 + }, + { + "epoch": 0.59, + "logps_train/chosen": -38.725807189941406, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -53.82197570800781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.835080862045288, + "rewards_train/margins": 0.34711670875549316, + "rewards_train/rejected": -3.1821975708007812, + "step": 2109 + }, + { + "epoch": 0.59, + "logps_train/chosen": -1.1977081298828125, + "logps_train/ref_chosen": -0.9140625, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -13.21639633178711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.02836456336081028, + "rewards_train/margins": -0.025474930182099342, + "rewards_train/rejected": -0.0028896331787109375, + "step": 2109 + }, + { + "epoch": 0.59, + "logps_train/chosen": -151.98101806640625, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -253.28378295898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.448101758956909, + "rewards_train/margins": 7.780276536941528, + "rewards_train/rejected": -11.228378295898438, + "step": 2109 + }, + { + "epoch": 0.59, + "logps_train/chosen": -99.84809875488281, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -168.049560546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.434809923171997, + "rewards_train/margins": 5.570146322250366, + "rewards_train/rejected": -7.004956245422363, + "step": 2109 + }, + { + "epoch": 0.59, + "logps_train/chosen": -165.98214721679688, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -114.40048217773438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9982147216796875, + "rewards_train/margins": -1.55816650390625, + "rewards_train/rejected": -2.4400482177734375, + "step": 2109 + }, + { + "epoch": 0.59, + "learning_rate": 1.3014231954572286e-07, + "loss": 0.525, + "step": 2110 + }, + { + "epoch": 0.59, + "logps_train/chosen": -104.39531707763672, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -182.11941528320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5395317077636719, + "rewards_train/margins": 3.3724098205566406, + "rewards_train/rejected": -4.9119415283203125, + "step": 2110 + }, + { + "epoch": 0.59, + "logps_train/chosen": -7.620229244232178, + "logps_train/ref_chosen": -6.25, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -28.168533325195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13702292740345, + "rewards_train/margins": 1.6923304051160812, + "rewards_train/rejected": -1.8293533325195312, + "step": 2110 + }, + { + "epoch": 0.59, + "logps_train/chosen": -70.09640502929688, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -126.90819549560547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.34035950899124146, + "rewards_train/margins": 2.1311790347099304, + "rewards_train/rejected": -1.790819525718689, + "step": 2110 + }, + { + "epoch": 0.59, + "logps_train/chosen": -115.50985717773438, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -160.90994262695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5509857535362244, + "rewards_train/margins": 0.6400085091590881, + "rewards_train/rejected": -1.1909942626953125, + "step": 2110 + }, + { + "epoch": 0.59, + "logps_train/chosen": -158.2552490234375, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -210.93862915039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.375524997711182, + "rewards_train/margins": 5.018337726593018, + "rewards_train/rejected": -9.3938627243042, + "step": 2110 + }, + { + "epoch": 0.59, + "logps_train/chosen": -162.93405151367188, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -238.05149841308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.143405437469482, + "rewards_train/margins": 6.361744403839111, + "rewards_train/rejected": -10.505149841308594, + "step": 2110 + }, + { + "epoch": 0.59, + "logps_train/chosen": -30.515024185180664, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -26.0, + "logps_train/rejected": -54.13779067993164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3265024423599243, + "rewards_train/margins": 1.4872766733169556, + "rewards_train/rejected": -2.81377911567688, + "step": 2110 + }, + { + "epoch": 0.59, + "logps_train/chosen": -199.51773071289062, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -182.09136962890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.151773452758789, + "rewards_train/margins": -1.2426362037658691, + "rewards_train/rejected": -6.90913724899292, + "step": 2110 + }, + { + "epoch": 0.59, + "logps_train/chosen": -65.40509796142578, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -2.578125, + "logps_train/rejected": -35.578514099121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2405097484588623, + "rewards_train/margins": 0.05952930450439453, + "rewards_train/rejected": -3.300039052963257, + "step": 2111 + }, + { + "epoch": 0.59, + "logps_train/chosen": -6.335141658782959, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -31.0, + "logps_train/rejected": -56.12834930419922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18976417183876038, + "rewards_train/margins": 2.323070853948593, + "rewards_train/rejected": -2.5128350257873535, + "step": 2111 + }, + { + "epoch": 0.59, + "logps_train/chosen": -35.63705062866211, + "logps_train/ref_chosen": -1.734375, + "logps_train/ref_rejected": -2.390625, + "logps_train/rejected": -41.44242858886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3902676105499268, + "rewards_train/margins": 0.5149128437042236, + "rewards_train/rejected": -3.9051804542541504, + "step": 2111 + }, + { + "epoch": 0.59, + "logps_train/chosen": -328.8770446777344, + "logps_train/ref_chosen": -246.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -319.097900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.287704467773438, + "rewards_train/margins": 10.322086334228516, + "rewards_train/rejected": -18.609790802001953, + "step": 2111 + }, + { + "epoch": 0.59, + "logps_train/chosen": -28.03062629699707, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -12.4375, + "logps_train/rejected": -41.69770812988281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8093127012252808, + "rewards_train/margins": 1.1167081594467163, + "rewards_train/rejected": -2.926020860671997, + "step": 2111 + }, + { + "epoch": 0.59, + "logps_train/chosen": -136.77471923828125, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -235.24386596679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.427471876144409, + "rewards_train/margins": 5.996915102005005, + "rewards_train/rejected": -9.424386978149414, + "step": 2111 + }, + { + "epoch": 0.59, + "logps_train/chosen": -34.026397705078125, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -35.20249557495117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7151397466659546, + "rewards_train/margins": 0.34885990619659424, + "rewards_train/rejected": -2.063999652862549, + "step": 2111 + }, + { + "epoch": 0.59, + "logps_train/chosen": -27.092426300048828, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -7.75, + "logps_train/rejected": -52.57017517089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2748677730560303, + "rewards_train/margins": 2.2071497440338135, + "rewards_train/rejected": -4.482017517089844, + "step": 2111 + }, + { + "epoch": 0.59, + "learning_rate": 1.2884030714206872e-07, + "loss": 0.2874, + "step": 2112 + }, + { + "epoch": 0.59, + "logps_train/chosen": -83.70832824707031, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -83.71878051757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9958328008651733, + "rewards_train/margins": 0.00104522705078125, + "rewards_train/rejected": -1.9968780279159546, + "step": 2112 + }, + { + "epoch": 0.59, + "logps_train/chosen": -113.62322998046875, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -193.32275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3123230934143066, + "rewards_train/margins": 3.8699522018432617, + "rewards_train/rejected": -7.182275295257568, + "step": 2112 + }, + { + "epoch": 0.59, + "logps_train/chosen": -162.44039916992188, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -184.84710693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6440398693084717, + "rewards_train/margins": 3.8906710147857666, + "rewards_train/rejected": -7.534710884094238, + "step": 2112 + }, + { + "epoch": 0.59, + "logps_train/chosen": -17.39154624938965, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -20.566850662231445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.023345375433564186, + "rewards_train/margins": 0.7362804654985666, + "rewards_train/rejected": -0.7129350900650024, + "step": 2112 + }, + { + "epoch": 0.59, + "logps_train/chosen": -14.297006607055664, + "logps_train/ref_chosen": -7.25, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -46.220855712890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7047006487846375, + "rewards_train/margins": 2.886134922504425, + "rewards_train/rejected": -3.5908355712890625, + "step": 2112 + }, + { + "epoch": 0.59, + "logps_train/chosen": -214.802734375, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -238.003173828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.080273628234863, + "rewards_train/margins": -0.0799560546875, + "rewards_train/rejected": -8.000317573547363, + "step": 2112 + }, + { + "epoch": 0.59, + "logps_train/chosen": -6.694636821746826, + "logps_train/ref_chosen": -2.890625, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -15.299559593200684, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3804011940956116, + "rewards_train/margins": -0.46294523775577545, + "rewards_train/rejected": 0.08254404366016388, + "step": 2112 + }, + { + "epoch": 0.59, + "logps_train/chosen": -118.8661880493164, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -216.7931365966797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5866188406944275, + "rewards_train/margins": 6.992694914340973, + "rewards_train/rejected": -7.5793137550354, + "step": 2112 + }, + { + "epoch": 0.59, + "logps_train/chosen": -157.28953552246094, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -155.26107788085938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.778953552246094, + "rewards_train/margins": -0.3528456687927246, + "rewards_train/rejected": -5.426107883453369, + "step": 2113 + }, + { + "epoch": 0.59, + "logps_train/chosen": -14.710671424865723, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -20.544788360595703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4648171365261078, + "rewards_train/margins": -0.047838300466537476, + "rewards_train/rejected": -0.4169788360595703, + "step": 2113 + }, + { + "epoch": 0.59, + "logps_train/chosen": -5.020845413208008, + "logps_train/ref_chosen": -3.265625, + "logps_train/ref_rejected": -13.0, + "logps_train/rejected": -15.342084884643555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17552204430103302, + "rewards_train/margins": 0.058686450123786926, + "rewards_train/rejected": -0.23420849442481995, + "step": 2113 + }, + { + "epoch": 0.59, + "logps_train/chosen": -9.282508850097656, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -0.9453125, + "logps_train/rejected": -2.7016818523406982, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0032508850563317537, + "rewards_train/margins": 0.17238604719750583, + "rewards_train/rejected": -0.17563693225383759, + "step": 2113 + }, + { + "epoch": 0.59, + "logps_train/chosen": -20.80219268798828, + "logps_train/ref_chosen": -15.8125, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -48.23619842529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49896928668022156, + "rewards_train/margins": 1.7746506035327911, + "rewards_train/rejected": -2.2736198902130127, + "step": 2113 + }, + { + "epoch": 0.59, + "logps_train/chosen": -137.46661376953125, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -92.91571044921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.146661281585693, + "rewards_train/margins": -1.6050901412963867, + "rewards_train/rejected": -2.5415711402893066, + "step": 2113 + }, + { + "epoch": 0.59, + "logps_train/chosen": -175.649658203125, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -178.18569946289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.164965867996216, + "rewards_train/margins": 2.40360426902771, + "rewards_train/rejected": -5.568570137023926, + "step": 2113 + }, + { + "epoch": 0.59, + "logps_train/chosen": -12.475117683410645, + "logps_train/ref_chosen": -1.90625, + "logps_train/ref_rejected": -2.875, + "logps_train/rejected": -26.363889694213867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0568867921829224, + "rewards_train/margins": 1.2920023202896118, + "rewards_train/rejected": -2.348889112472534, + "step": 2113 + }, + { + "epoch": 0.59, + "learning_rate": 1.2754439193262545e-07, + "loss": 0.5011, + "step": 2114 + }, + { + "epoch": 0.59, + "logps_train/chosen": -96.24778747558594, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -137.11227416992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0747787952423096, + "rewards_train/margins": 3.0364487171173096, + "rewards_train/rejected": -4.111227512359619, + "step": 2114 + }, + { + "epoch": 0.59, + "logps_train/chosen": -200.76702880859375, + "logps_train/ref_chosen": -185.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -164.02044677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5767029523849487, + "rewards_train/margins": 2.0253418684005737, + "rewards_train/rejected": -3.6020448207855225, + "step": 2114 + }, + { + "epoch": 0.59, + "logps_train/chosen": -103.21285247802734, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -182.8134002685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.721285343170166, + "rewards_train/margins": 6.710054874420166, + "rewards_train/rejected": -10.431340217590332, + "step": 2114 + }, + { + "epoch": 0.59, + "logps_train/chosen": -24.324230194091797, + "logps_train/ref_chosen": -15.0, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -42.98523712158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9324230551719666, + "rewards_train/margins": 2.1161006093025208, + "rewards_train/rejected": -3.0485236644744873, + "step": 2114 + }, + { + "epoch": 0.59, + "logps_train/chosen": -190.13160705566406, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -241.11068725585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.913160800933838, + "rewards_train/margins": 3.997908115386963, + "rewards_train/rejected": -9.9110689163208, + "step": 2114 + }, + { + "epoch": 0.59, + "logps_train/chosen": -204.39932250976562, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -191.88729858398438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.7899322509765625, + "rewards_train/margins": -0.8012022972106934, + "rewards_train/rejected": -6.988729953765869, + "step": 2114 + }, + { + "epoch": 0.59, + "logps_train/chosen": -114.37158966064453, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -141.43594360351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6871589422225952, + "rewards_train/margins": 3.4564353227615356, + "rewards_train/rejected": -5.143594264984131, + "step": 2114 + }, + { + "epoch": 0.59, + "logps_train/chosen": -228.6056671142578, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -249.87767028808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.760566711425781, + "rewards_train/margins": 2.127200126647949, + "rewards_train/rejected": -8.88776683807373, + "step": 2114 + }, + { + "epoch": 0.59, + "logps_train/chosen": -42.61631774902344, + "logps_train/ref_chosen": -7.53125, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -34.521522521972656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.5085067749023438, + "rewards_train/margins": -0.4844794273376465, + "rewards_train/rejected": -3.0240273475646973, + "step": 2115 + }, + { + "epoch": 0.59, + "logps_train/chosen": -157.47227478027344, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -183.0087432861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.847227573394775, + "rewards_train/margins": 5.153646945953369, + "rewards_train/rejected": -11.000874519348145, + "step": 2115 + }, + { + "epoch": 0.59, + "logps_train/chosen": -13.015568733215332, + "logps_train/ref_chosen": -4.0, + "logps_train/ref_rejected": -51.75, + "logps_train/rejected": -72.39037322998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9015569090843201, + "rewards_train/margins": 1.1624804139137268, + "rewards_train/rejected": -2.064037322998047, + "step": 2115 + }, + { + "epoch": 0.59, + "logps_train/chosen": -6.841484546661377, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -1.90625, + "logps_train/rejected": -15.628273963928223, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.052898455411195755, + "rewards_train/margins": 1.3193039409816265, + "rewards_train/rejected": -1.3722023963928223, + "step": 2115 + }, + { + "epoch": 0.59, + "logps_train/chosen": -182.47744750976562, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -282.69268798828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.647744655609131, + "rewards_train/margins": 7.321524143218994, + "rewards_train/rejected": -11.969268798828125, + "step": 2115 + }, + { + "epoch": 0.59, + "logps_train/chosen": -70.45684051513672, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -76.02597045898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4206840991973877, + "rewards_train/margins": 1.806913137435913, + "rewards_train/rejected": -5.227597236633301, + "step": 2115 + }, + { + "epoch": 0.59, + "logps_train/chosen": -112.8432846069336, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -137.20001220703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5343284606933594, + "rewards_train/margins": 2.535672664642334, + "rewards_train/rejected": -5.070001125335693, + "step": 2115 + }, + { + "epoch": 0.59, + "logps_train/chosen": -112.47071838378906, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -154.6520233154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9470717906951904, + "rewards_train/margins": 3.1181304454803467, + "rewards_train/rejected": -7.065202236175537, + "step": 2115 + }, + { + "epoch": 0.59, + "learning_rate": 1.2625458298742586e-07, + "loss": 0.2107, + "step": 2116 + }, + { + "epoch": 0.59, + "logps_train/chosen": -114.93571472167969, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -53.13616180419922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.5435714721679688, + "rewards_train/margins": -0.36745524406433105, + "rewards_train/rejected": -3.1761162281036377, + "step": 2116 + }, + { + "epoch": 0.59, + "logps_train/chosen": -19.126379013061523, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -31.97358512878418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9751378893852234, + "rewards_train/margins": 1.3409705758094788, + "rewards_train/rejected": -2.316108465194702, + "step": 2116 + }, + { + "epoch": 0.59, + "logps_train/chosen": -142.17288208007812, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -259.88690185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.367288112640381, + "rewards_train/margins": 6.621402263641357, + "rewards_train/rejected": -10.988690376281738, + "step": 2116 + }, + { + "epoch": 0.59, + "logps_train/chosen": -25.869739532470703, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -31.724388122558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6869739294052124, + "rewards_train/margins": 0.6729649305343628, + "rewards_train/rejected": -2.359938859939575, + "step": 2116 + }, + { + "epoch": 0.59, + "logps_train/chosen": -147.37429809570312, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -153.01116943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7374298572540283, + "rewards_train/margins": 1.9136872291564941, + "rewards_train/rejected": -3.6511170864105225, + "step": 2116 + }, + { + "epoch": 0.59, + "logps_train/chosen": -83.62284088134766, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -154.9709014892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16228409111499786, + "rewards_train/margins": 5.334806248545647, + "rewards_train/rejected": -5.4970903396606445, + "step": 2116 + }, + { + "epoch": 0.59, + "logps_train/chosen": -100.41285705566406, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -114.37255859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9412857294082642, + "rewards_train/margins": 2.3959702253341675, + "rewards_train/rejected": -4.337255954742432, + "step": 2116 + }, + { + "epoch": 0.59, + "logps_train/chosen": -1.4823887348175049, + "logps_train/ref_chosen": -1.515625, + "logps_train/ref_rejected": -1.59375, + "logps_train/rejected": -1.8216869831085205, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.003323626471683383, + "rewards_train/margins": 0.026117325527593493, + "rewards_train/rejected": -0.02279369905591011, + "step": 2116 + }, + { + "epoch": 0.59, + "logps_train/chosen": -37.82371520996094, + "logps_train/ref_chosen": -24.875, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -34.090476989746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2948715686798096, + "rewards_train/margins": 1.251676082611084, + "rewards_train/rejected": -2.5465476512908936, + "step": 2117 + }, + { + "epoch": 0.59, + "logps_train/chosen": -118.8084487915039, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -128.91961669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.630844831466675, + "rewards_train/margins": 2.361117124557495, + "rewards_train/rejected": -4.99196195602417, + "step": 2117 + }, + { + "epoch": 0.59, + "logps_train/chosen": -137.54336547851562, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -155.24560546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6043365001678467, + "rewards_train/margins": 0.5202240943908691, + "rewards_train/rejected": -3.124560594558716, + "step": 2117 + }, + { + "epoch": 0.59, + "logps_train/chosen": -124.74532318115234, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -213.5318603515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.824532389640808, + "rewards_train/margins": 4.72865355014801, + "rewards_train/rejected": -6.553185939788818, + "step": 2117 + }, + { + "epoch": 0.59, + "logps_train/chosen": -49.97358703613281, + "logps_train/ref_chosen": -44.75, + "logps_train/ref_rejected": -44.75, + "logps_train/rejected": -48.94733428955078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5223587155342102, + "rewards_train/margins": -0.1026252806186676, + "rewards_train/rejected": -0.4197334349155426, + "step": 2117 + }, + { + "epoch": 0.59, + "logps_train/chosen": -12.629961013793945, + "logps_train/ref_chosen": -3.421875, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -21.041601181030273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9208086133003235, + "rewards_train/margins": 0.2771015763282776, + "rewards_train/rejected": -1.197910189628601, + "step": 2117 + }, + { + "epoch": 0.59, + "logps_train/chosen": -35.00286102294922, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -36.08994674682617, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.237786054611206, + "rewards_train/margins": -0.6162914037704468, + "rewards_train/rejected": -1.6214946508407593, + "step": 2117 + }, + { + "epoch": 0.59, + "logps_train/chosen": -95.14720153808594, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -185.32000732421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2852798402309418, + "rewards_train/margins": 7.067280858755112, + "rewards_train/rejected": -6.78200101852417, + "step": 2117 + }, + { + "epoch": 0.59, + "learning_rate": 1.2497088933376477e-07, + "loss": 0.352, + "step": 2118 + }, + { + "epoch": 0.59, + "logps_train/chosen": -101.61502838134766, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -168.00875854492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4615029096603394, + "rewards_train/margins": 4.639372944831848, + "rewards_train/rejected": -6.1008758544921875, + "step": 2118 + }, + { + "epoch": 0.59, + "logps_train/chosen": -115.13954162597656, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -124.08243560791016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8639541864395142, + "rewards_train/margins": 1.0942894220352173, + "rewards_train/rejected": -2.9582436084747314, + "step": 2118 + }, + { + "epoch": 0.59, + "logps_train/chosen": -134.08633422851562, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -211.79022216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.108633518218994, + "rewards_train/margins": 4.820389270782471, + "rewards_train/rejected": -8.929022789001465, + "step": 2118 + }, + { + "epoch": 0.59, + "logps_train/chosen": -21.452144622802734, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -38.489585876464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03271446377038956, + "rewards_train/margins": 1.803744100034237, + "rewards_train/rejected": -1.8364585638046265, + "step": 2118 + }, + { + "epoch": 0.59, + "logps_train/chosen": -92.57404327392578, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -185.5199737548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2574043273925781, + "rewards_train/margins": 8.094593048095703, + "rewards_train/rejected": -9.351997375488281, + "step": 2118 + }, + { + "epoch": 0.59, + "logps_train/chosen": -16.633249282836914, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -2.421875, + "logps_train/rejected": -13.901432037353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6195749640464783, + "rewards_train/margins": 0.528380811214447, + "rewards_train/rejected": -1.1479557752609253, + "step": 2118 + }, + { + "epoch": 0.59, + "logps_train/chosen": -65.29242706298828, + "logps_train/ref_chosen": -40.25, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -155.70779418945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5042426586151123, + "rewards_train/margins": 4.166537046432495, + "rewards_train/rejected": -6.670779705047607, + "step": 2118 + }, + { + "epoch": 0.59, + "logps_train/chosen": -35.97394561767578, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -45.5, + "logps_train/rejected": -80.35920715332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.478644609451294, + "rewards_train/margins": 1.0072760581970215, + "rewards_train/rejected": -3.4859206676483154, + "step": 2118 + }, + { + "epoch": 0.59, + "logps_train/chosen": -38.962589263916016, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -39.937320709228516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.052509069442749, + "rewards_train/margins": -0.52752685546875, + "rewards_train/rejected": -2.524982213973999, + "step": 2119 + }, + { + "epoch": 0.59, + "logps_train/chosen": -112.25725555419922, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -177.64862060546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.725725531578064, + "rewards_train/margins": 3.489136815071106, + "rewards_train/rejected": -5.21486234664917, + "step": 2119 + }, + { + "epoch": 0.59, + "logps_train/chosen": -123.00970458984375, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -145.58114624023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9509705305099487, + "rewards_train/margins": 3.807144284248352, + "rewards_train/rejected": -5.758114814758301, + "step": 2119 + }, + { + "epoch": 0.59, + "logps_train/chosen": -21.51789093017578, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -62.970455169677734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11428909748792648, + "rewards_train/margins": 2.3577565625309944, + "rewards_train/rejected": -2.472045660018921, + "step": 2119 + }, + { + "epoch": 0.59, + "logps_train/chosen": -84.88079071044922, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -220.6192169189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23807907104492188, + "rewards_train/margins": 4.923842906951904, + "rewards_train/rejected": -5.161921977996826, + "step": 2119 + }, + { + "epoch": 0.59, + "logps_train/chosen": -8.819785118103027, + "logps_train/ref_chosen": -0.64453125, + "logps_train/ref_rejected": -0.64453125, + "logps_train/rejected": -9.053184509277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8175253868103027, + "rewards_train/margins": 0.023339927196502686, + "rewards_train/rejected": -0.8408653140068054, + "step": 2119 + }, + { + "epoch": 0.59, + "logps_train/chosen": -118.1651840209961, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -182.16334533691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4165184497833252, + "rewards_train/margins": 4.349816083908081, + "rewards_train/rejected": -5.766334533691406, + "step": 2119 + }, + { + "epoch": 0.59, + "logps_train/chosen": -20.02033042907715, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -31.482481002807617, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8270330429077148, + "rewards_train/margins": -0.4162849485874176, + "rewards_train/rejected": -0.41074809432029724, + "step": 2119 + }, + { + "epoch": 0.59, + "learning_rate": 1.2369331995613663e-07, + "loss": 0.2504, + "step": 2120 + }, + { + "epoch": 0.59, + "logps_train/chosen": -103.53350067138672, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -159.35906982421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5533500909805298, + "rewards_train/margins": 2.73255717754364, + "rewards_train/rejected": -4.28590726852417, + "step": 2120 + }, + { + "epoch": 0.59, + "logps_train/chosen": -98.23027038574219, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -116.55284881591797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.723026990890503, + "rewards_train/margins": -0.7177419662475586, + "rewards_train/rejected": -2.0052850246429443, + "step": 2120 + }, + { + "epoch": 0.59, + "logps_train/chosen": -25.633031845092773, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -29.125, + "logps_train/rejected": -44.51703643798828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6883032321929932, + "rewards_train/margins": -0.14909958839416504, + "rewards_train/rejected": -1.5392036437988281, + "step": 2120 + }, + { + "epoch": 0.59, + "logps_train/chosen": -101.306396484375, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -195.2223358154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7806396484375, + "rewards_train/margins": 4.641593933105469, + "rewards_train/rejected": -6.422233581542969, + "step": 2120 + }, + { + "epoch": 0.59, + "logps_train/chosen": -16.847122192382812, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -20.938467025756836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7409622073173523, + "rewards_train/margins": 0.3903844952583313, + "rewards_train/rejected": -1.1313467025756836, + "step": 2120 + }, + { + "epoch": 0.59, + "logps_train/chosen": -114.47090148925781, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -124.40062713623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.897090196609497, + "rewards_train/margins": 3.7429726123809814, + "rewards_train/rejected": -5.6400628089904785, + "step": 2120 + }, + { + "epoch": 0.59, + "logps_train/chosen": -165.81048583984375, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -122.99165344238281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.081048607826233, + "rewards_train/margins": 1.1681166887283325, + "rewards_train/rejected": -2.2491652965545654, + "step": 2120 + }, + { + "epoch": 0.59, + "logps_train/chosen": -158.10752868652344, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -219.19384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.010753154754639, + "rewards_train/margins": 2.9086318016052246, + "rewards_train/rejected": -6.919384956359863, + "step": 2120 + }, + { + "epoch": 0.59, + "logps_train/chosen": -26.980222702026367, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -1.7265625, + "logps_train/rejected": -19.28731346130371, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8855223059654236, + "rewards_train/margins": 0.8705528378486633, + "rewards_train/rejected": -1.756075143814087, + "step": 2121 + }, + { + "epoch": 0.59, + "logps_train/chosen": -22.731964111328125, + "logps_train/ref_chosen": -4.65625, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -18.46169090270996, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8075714111328125, + "rewards_train/margins": -0.7676522731781006, + "rewards_train/rejected": -1.039919137954712, + "step": 2121 + }, + { + "epoch": 0.59, + "logps_train/chosen": -85.87873840332031, + "logps_train/ref_chosen": -55.75, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -149.25396728515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.012873888015747, + "rewards_train/margins": 3.2125227451324463, + "rewards_train/rejected": -6.225396633148193, + "step": 2121 + }, + { + "epoch": 0.59, + "logps_train/chosen": -94.6951904296875, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -117.70834350585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.019519090652466, + "rewards_train/margins": 0.20131540298461914, + "rewards_train/rejected": -3.220834493637085, + "step": 2121 + }, + { + "epoch": 0.59, + "logps_train/chosen": -173.83047485351562, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -182.29580688476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.383047580718994, + "rewards_train/margins": 1.946533203125, + "rewards_train/rejected": -6.329580783843994, + "step": 2121 + }, + { + "epoch": 0.59, + "logps_train/chosen": -5.677402973175049, + "logps_train/ref_chosen": -3.890625, + "logps_train/ref_rejected": -2.828125, + "logps_train/rejected": -3.46425199508667, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.17867779731750488, + "rewards_train/margins": -0.11506509780883789, + "rewards_train/rejected": -0.06361269950866699, + "step": 2121 + }, + { + "epoch": 0.59, + "logps_train/chosen": -167.947021484375, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -225.22329711914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.994702100753784, + "rewards_train/margins": 7.227627992630005, + "rewards_train/rejected": -10.222330093383789, + "step": 2121 + }, + { + "epoch": 0.59, + "logps_train/chosen": -249.67294311523438, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -283.82623291015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -13.1672945022583, + "rewards_train/margins": 1.2153291702270508, + "rewards_train/rejected": -14.382623672485352, + "step": 2121 + }, + { + "epoch": 0.59, + "learning_rate": 1.2242188379617236e-07, + "loss": 0.3816, + "step": 2122 + }, + { + "epoch": 0.59, + "logps_train/chosen": -233.55320739746094, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -255.69168090820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.355320930480957, + "rewards_train/margins": 3.413846969604492, + "rewards_train/rejected": -11.76916790008545, + "step": 2122 + }, + { + "epoch": 0.59, + "logps_train/chosen": -78.61050415039062, + "logps_train/ref_chosen": -54.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -200.79794311523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.41105055809021, + "rewards_train/margins": 5.118743658065796, + "rewards_train/rejected": -7.529794216156006, + "step": 2122 + }, + { + "epoch": 0.59, + "logps_train/chosen": -2.093040943145752, + "logps_train/ref_chosen": -1.46875, + "logps_train/ref_rejected": -1.75, + "logps_train/rejected": -1.1887006759643555, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.062429096549749374, + "rewards_train/margins": -0.11855902895331383, + "rewards_train/rejected": 0.05612993240356445, + "step": 2122 + }, + { + "epoch": 0.59, + "logps_train/chosen": -35.15575408935547, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -2.40625, + "logps_train/rejected": -36.59618377685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8093254566192627, + "rewards_train/margins": 0.6096680164337158, + "rewards_train/rejected": -3.4189934730529785, + "step": 2122 + }, + { + "epoch": 0.59, + "logps_train/chosen": -85.86640167236328, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -113.7083740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1366403102874756, + "rewards_train/margins": 1.8341970443725586, + "rewards_train/rejected": -3.970837354660034, + "step": 2122 + }, + { + "epoch": 0.59, + "logps_train/chosen": -231.6414794921875, + "logps_train/ref_chosen": -200.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -215.56529235839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1641480922698975, + "rewards_train/margins": 2.092381238937378, + "rewards_train/rejected": -5.256529331207275, + "step": 2122 + }, + { + "epoch": 0.59, + "logps_train/chosen": -24.209016799926758, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -14.875, + "logps_train/rejected": -32.076324462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1021517515182495, + "rewards_train/margins": 0.6179807186126709, + "rewards_train/rejected": -1.7201324701309204, + "step": 2122 + }, + { + "epoch": 0.59, + "logps_train/chosen": -89.93601989746094, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -127.64537048339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19360199570655823, + "rewards_train/margins": 4.720934957265854, + "rewards_train/rejected": -4.914536952972412, + "step": 2122 + }, + { + "epoch": 0.59, + "logps_train/chosen": -138.7259979248047, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -168.72288513183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.272599935531616, + "rewards_train/margins": 2.849688768386841, + "rewards_train/rejected": -5.122288703918457, + "step": 2123 + }, + { + "epoch": 0.59, + "logps_train/chosen": -178.4027557373047, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -222.200439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2402756214141846, + "rewards_train/margins": 2.279768228530884, + "rewards_train/rejected": -5.520043849945068, + "step": 2123 + }, + { + "epoch": 0.59, + "logps_train/chosen": -63.10765838623047, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -6.375, + "logps_train/rejected": -18.991731643676758, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.879516124725342, + "rewards_train/margins": -3.61784291267395, + "rewards_train/rejected": -1.2616732120513916, + "step": 2123 + }, + { + "epoch": 0.59, + "logps_train/chosen": -10.192737579345703, + "logps_train/ref_chosen": -3.75, + "logps_train/ref_rejected": -5.375, + "logps_train/rejected": -25.475841522216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6442737579345703, + "rewards_train/margins": 1.3658103942871094, + "rewards_train/rejected": -2.0100841522216797, + "step": 2123 + }, + { + "epoch": 0.59, + "logps_train/chosen": -41.24262619018555, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -7.78125, + "logps_train/rejected": -37.75007247924805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5117626190185547, + "rewards_train/margins": 0.4851195812225342, + "rewards_train/rejected": -2.996882200241089, + "step": 2123 + }, + { + "epoch": 0.59, + "logps_train/chosen": -120.55744171142578, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -130.76429748535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6557443141937256, + "rewards_train/margins": 2.6706855297088623, + "rewards_train/rejected": -5.326429843902588, + "step": 2123 + }, + { + "epoch": 0.59, + "logps_train/chosen": -10.726076126098633, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -11.653868675231934, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8101076483726501, + "rewards_train/margins": -0.21034574508666992, + "rewards_train/rejected": -0.5997619032859802, + "step": 2123 + }, + { + "epoch": 0.59, + "logps_train/chosen": -142.25296020507812, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -167.20809936523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6752960681915283, + "rewards_train/margins": 2.1455137729644775, + "rewards_train/rejected": -4.820809841156006, + "step": 2123 + }, + { + "epoch": 0.59, + "learning_rate": 1.2115658975257736e-07, + "loss": 0.4635, + "step": 2124 + }, + { + "epoch": 0.59, + "logps_train/chosen": -210.8755645751953, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -265.23175048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.687556743621826, + "rewards_train/margins": 5.135618686676025, + "rewards_train/rejected": -12.823175430297852, + "step": 2124 + }, + { + "epoch": 0.59, + "logps_train/chosen": -34.926795959472656, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -29.717178344726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8801796436309814, + "rewards_train/margins": 0.22903823852539062, + "rewards_train/rejected": -2.109217882156372, + "step": 2124 + }, + { + "epoch": 0.59, + "logps_train/chosen": -104.3327865600586, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -121.54074096679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6332786083221436, + "rewards_train/margins": 1.370795488357544, + "rewards_train/rejected": -5.0040740966796875, + "step": 2124 + }, + { + "epoch": 0.59, + "logps_train/chosen": -23.572521209716797, + "logps_train/ref_chosen": -1.3828125, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -120.26943969726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.218971014022827, + "rewards_train/margins": 2.3079731464385986, + "rewards_train/rejected": -4.526944160461426, + "step": 2124 + }, + { + "epoch": 0.59, + "logps_train/chosen": -114.87910461425781, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -158.5975341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3879106044769287, + "rewards_train/margins": 4.071842908859253, + "rewards_train/rejected": -7.459753513336182, + "step": 2124 + }, + { + "epoch": 0.59, + "logps_train/chosen": -26.47977638244629, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -43.308170318603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7729776501655579, + "rewards_train/margins": 1.9640893340110779, + "rewards_train/rejected": -2.7370669841766357, + "step": 2124 + }, + { + "epoch": 0.59, + "logps_train/chosen": -13.55370807647705, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -24.89011001586914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3178708255290985, + "rewards_train/margins": 0.9586402475833893, + "rewards_train/rejected": -1.2765110731124878, + "step": 2124 + }, + { + "epoch": 0.59, + "logps_train/chosen": -76.1339111328125, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -90.37382507324219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.613391101360321, + "rewards_train/margins": 0.023991405963897705, + "rewards_train/rejected": -0.6373825073242188, + "step": 2124 + }, + { + "epoch": 0.59, + "logps_train/chosen": -217.7003631591797, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -215.1631622314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.070036888122559, + "rewards_train/margins": 0.24627971649169922, + "rewards_train/rejected": -8.316316604614258, + "step": 2125 + }, + { + "epoch": 0.59, + "logps_train/chosen": -6.466790199279785, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -30.471912384033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1158209815621376, + "rewards_train/margins": 1.9817622676491737, + "rewards_train/rejected": -1.8659412860870361, + "step": 2125 + }, + { + "epoch": 0.59, + "logps_train/chosen": -207.11807250976562, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -145.74163818359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.311807155609131, + "rewards_train/margins": -2.237643241882324, + "rewards_train/rejected": -4.074163913726807, + "step": 2125 + }, + { + "epoch": 0.59, + "logps_train/chosen": -131.56967163085938, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -134.3108673095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.356967449188232, + "rewards_train/margins": 0.6241192817687988, + "rewards_train/rejected": -4.981086730957031, + "step": 2125 + }, + { + "epoch": 0.59, + "logps_train/chosen": -125.5660171508789, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -135.28778076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3066017627716064, + "rewards_train/margins": 2.8721764087677, + "rewards_train/rejected": -6.178778171539307, + "step": 2125 + }, + { + "epoch": 0.59, + "logps_train/chosen": -167.7064971923828, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -176.511474609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.270649909973145, + "rewards_train/margins": -0.11950206756591797, + "rewards_train/rejected": -8.151147842407227, + "step": 2125 + }, + { + "epoch": 0.59, + "logps_train/chosen": -110.02653503417969, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -90.6014633178711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7526535987854004, + "rewards_train/margins": -0.5925071239471436, + "rewards_train/rejected": -2.160146474838257, + "step": 2125 + }, + { + "epoch": 0.59, + "logps_train/chosen": -11.869112014770508, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -11.068276405334473, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1869112104177475, + "rewards_train/margins": -0.048833563923835754, + "rewards_train/rejected": -0.13807764649391174, + "step": 2125 + }, + { + "epoch": 0.59, + "learning_rate": 1.198974466810686e-07, + "loss": 0.5062, + "step": 2126 + }, + { + "epoch": 0.59, + "logps_train/chosen": -10.342424392700195, + "logps_train/ref_chosen": -9.0625, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -10.623964309692383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1279924362897873, + "rewards_train/margins": 0.503153994679451, + "rewards_train/rejected": -0.6311464309692383, + "step": 2126 + }, + { + "epoch": 0.59, + "logps_train/chosen": -129.13934326171875, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -183.46087646484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.313934326171875, + "rewards_train/margins": 4.632153511047363, + "rewards_train/rejected": -5.946087837219238, + "step": 2126 + }, + { + "epoch": 0.59, + "logps_train/chosen": -14.239802360534668, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -7.875, + "logps_train/rejected": -11.699308395385742, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8271052241325378, + "rewards_train/margins": -0.44467437267303467, + "rewards_train/rejected": -0.3824308514595032, + "step": 2126 + }, + { + "epoch": 0.59, + "logps_train/chosen": -90.61155700683594, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -48.75, + "logps_train/rejected": -79.34754180908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4111557006835938, + "rewards_train/margins": 1.6485984325408936, + "rewards_train/rejected": -3.0597541332244873, + "step": 2126 + }, + { + "epoch": 0.59, + "logps_train/chosen": -16.54237174987793, + "logps_train/ref_chosen": -1.9375, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -15.133151054382324, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4604872465133667, + "rewards_train/margins": -0.3846721649169922, + "rewards_train/rejected": -1.0758150815963745, + "step": 2126 + }, + { + "epoch": 0.59, + "logps_train/chosen": -7.684534549713135, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -35.1064338684082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14970345795154572, + "rewards_train/margins": 2.5640650242567062, + "rewards_train/rejected": -2.713768482208252, + "step": 2126 + }, + { + "epoch": 0.59, + "logps_train/chosen": -20.557933807373047, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -3.46875, + "logps_train/rejected": -21.137344360351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3745434284210205, + "rewards_train/margins": 0.39231598377227783, + "rewards_train/rejected": -1.7668594121932983, + "step": 2126 + }, + { + "epoch": 0.59, + "logps_train/chosen": -22.953845977783203, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -1.09375, + "logps_train/rejected": -14.921785354614258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48288461565971375, + "rewards_train/margins": 0.89991894364357, + "rewards_train/rejected": -1.3828035593032837, + "step": 2126 + }, + { + "epoch": 0.59, + "logps_train/chosen": -190.0218963623047, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -160.54185485839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0021896362304688, + "rewards_train/margins": 1.8019959926605225, + "rewards_train/rejected": -3.804185628890991, + "step": 2127 + }, + { + "epoch": 0.59, + "logps_train/chosen": -8.965575218200684, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -9.025022506713867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0840575248003006, + "rewards_train/margins": 0.005944728851318359, + "rewards_train/rejected": -0.09000225365161896, + "step": 2127 + }, + { + "epoch": 0.59, + "logps_train/chosen": -18.641048431396484, + "logps_train/ref_chosen": -5.75, + "logps_train/ref_rejected": -2.34375, + "logps_train/rejected": -21.91252899169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2891048192977905, + "rewards_train/margins": 0.6677731275558472, + "rewards_train/rejected": -1.9568779468536377, + "step": 2127 + }, + { + "epoch": 0.59, + "logps_train/chosen": -16.616336822509766, + "logps_train/ref_chosen": -2.953125, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -35.55377960205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3663212060928345, + "rewards_train/margins": 1.1390568017959595, + "rewards_train/rejected": -2.505378007888794, + "step": 2127 + }, + { + "epoch": 0.59, + "logps_train/chosen": -33.86566162109375, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -29.875, + "logps_train/rejected": -56.64678192138672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.236566185951233, + "rewards_train/margins": 1.4406119585037231, + "rewards_train/rejected": -2.677178144454956, + "step": 2127 + }, + { + "epoch": 0.59, + "logps_train/chosen": -124.23320007324219, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -163.5855712890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9733200073242188, + "rewards_train/margins": 1.9852371215820312, + "rewards_train/rejected": -4.95855712890625, + "step": 2127 + }, + { + "epoch": 0.59, + "logps_train/chosen": -46.716552734375, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -47.393218994140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5654053688049316, + "rewards_train/margins": 0.16766667366027832, + "rewards_train/rejected": -3.73307204246521, + "step": 2127 + }, + { + "epoch": 0.59, + "logps_train/chosen": -24.203563690185547, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -19.56178092956543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8203563690185547, + "rewards_train/margins": 0.042071759700775146, + "rewards_train/rejected": -0.8624281287193298, + "step": 2127 + }, + { + "epoch": 0.59, + "learning_rate": 1.1864446339431244e-07, + "loss": 0.4123, + "step": 2128 + }, + { + "epoch": 0.59, + "logps_train/chosen": -99.91744995117188, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -148.01785278320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2417449951171875, + "rewards_train/margins": 3.6600403785705566, + "rewards_train/rejected": -4.901785373687744, + "step": 2128 + }, + { + "epoch": 0.59, + "logps_train/chosen": -3.603079319000244, + "logps_train/ref_chosen": -3.984375, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -34.7653923034668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.038129568099975586, + "rewards_train/margins": 2.4209189414978027, + "rewards_train/rejected": -2.382789373397827, + "step": 2128 + }, + { + "epoch": 0.59, + "logps_train/chosen": -38.591278076171875, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -40.3738899230957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6091278195381165, + "rewards_train/margins": 2.028261125087738, + "rewards_train/rejected": -2.6373889446258545, + "step": 2128 + }, + { + "epoch": 0.59, + "logps_train/chosen": -46.670257568359375, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -72.97994995117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.210775852203369, + "rewards_train/margins": 1.9372191429138184, + "rewards_train/rejected": -5.1479949951171875, + "step": 2128 + }, + { + "epoch": 0.59, + "logps_train/chosen": -9.704262733459473, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -24.875, + "logps_train/rejected": -50.80213928222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14832372963428497, + "rewards_train/margins": 2.7410378009080887, + "rewards_train/rejected": -2.5927140712738037, + "step": 2128 + }, + { + "epoch": 0.59, + "logps_train/chosen": -30.324935913085938, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -40.10614013671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9074935913085938, + "rewards_train/margins": 0.928120493888855, + "rewards_train/rejected": -1.8356140851974487, + "step": 2128 + }, + { + "epoch": 0.59, + "logps_train/chosen": -17.59235954284668, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -2.375, + "logps_train/rejected": -2.872917652130127, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.827985942363739, + "rewards_train/margins": -0.7781941778957844, + "rewards_train/rejected": -0.049791764467954636, + "step": 2128 + }, + { + "epoch": 0.59, + "logps_train/chosen": -146.87619018554688, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -146.77870178222656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0876190662384033, + "rewards_train/margins": -0.00974893569946289, + "rewards_train/rejected": -2.0778701305389404, + "step": 2128 + }, + { + "epoch": 0.6, + "logps_train/chosen": -115.19154357910156, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -101.82074737548828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21915435791015625, + "rewards_train/margins": -0.08707961440086365, + "rewards_train/rejected": -0.1320747435092926, + "step": 2129 + }, + { + "epoch": 0.6, + "logps_train/chosen": -5.089016437530518, + "logps_train/ref_chosen": -3.21875, + "logps_train/ref_rejected": -0.65625, + "logps_train/rejected": -2.065648078918457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.18702664971351624, + "rewards_train/margins": -0.046086832880973816, + "rewards_train/rejected": -0.14093981683254242, + "step": 2129 + }, + { + "epoch": 0.6, + "logps_train/chosen": -160.0048828125, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -221.7560272216797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.950488567352295, + "rewards_train/margins": 2.5251145362854004, + "rewards_train/rejected": -8.475603103637695, + "step": 2129 + }, + { + "epoch": 0.6, + "logps_train/chosen": -11.773160934448242, + "logps_train/ref_chosen": -2.65625, + "logps_train/ref_rejected": -2.359375, + "logps_train/rejected": -17.389623641967773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9116911292076111, + "rewards_train/margins": 0.59133380651474, + "rewards_train/rejected": -1.503024935722351, + "step": 2129 + }, + { + "epoch": 0.6, + "logps_train/chosen": -39.233219146728516, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -43.46495819091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0483219623565674, + "rewards_train/margins": 0.9106738567352295, + "rewards_train/rejected": -1.9589958190917969, + "step": 2129 + }, + { + "epoch": 0.6, + "logps_train/chosen": -61.351253509521484, + "logps_train/ref_chosen": -55.0, + "logps_train/ref_rejected": -33.0, + "logps_train/rejected": -67.50718688964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6351253390312195, + "rewards_train/margins": 2.8155933022499084, + "rewards_train/rejected": -3.450718641281128, + "step": 2129 + }, + { + "epoch": 0.6, + "logps_train/chosen": -110.38212585449219, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -229.59945678710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.838212728500366, + "rewards_train/margins": 10.821732759475708, + "rewards_train/rejected": -13.659945487976074, + "step": 2129 + }, + { + "epoch": 0.6, + "logps_train/chosen": -9.639405250549316, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -16.94617462158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08894052356481552, + "rewards_train/margins": 0.3681769445538521, + "rewards_train/rejected": -0.4571174681186676, + "step": 2129 + }, + { + "epoch": 0.6, + "learning_rate": 1.1739764866186308e-07, + "loss": 0.3445, + "step": 2130 + }, + { + "epoch": 0.6, + "logps_train/chosen": -180.55352783203125, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -198.0, + "logps_train/rejected": -248.58372497558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.255352735519409, + "rewards_train/margins": 2.8030197620391846, + "rewards_train/rejected": -5.058372497558594, + "step": 2130 + }, + { + "epoch": 0.6, + "logps_train/chosen": -65.97042083740234, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -47.61228942871094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.172042369842529, + "rewards_train/margins": -0.3920633792877197, + "rewards_train/rejected": -3.7799789905548096, + "step": 2130 + }, + { + "epoch": 0.6, + "logps_train/chosen": -56.14216613769531, + "logps_train/ref_chosen": -43.0, + "logps_train/ref_rejected": -41.25, + "logps_train/rejected": -78.33160400390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3142166137695312, + "rewards_train/margins": 2.3939437866210938, + "rewards_train/rejected": -3.708160400390625, + "step": 2130 + }, + { + "epoch": 0.6, + "logps_train/chosen": -4.33160400390625, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -25.645774841308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12534789741039276, + "rewards_train/margins": 0.5017296224832535, + "rewards_train/rejected": -0.6270775198936462, + "step": 2130 + }, + { + "epoch": 0.6, + "logps_train/chosen": -32.158287048339844, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -8.746688842773438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6720787286758423, + "rewards_train/margins": -1.356784850358963, + "rewards_train/rejected": -0.3152938783168793, + "step": 2130 + }, + { + "epoch": 0.6, + "logps_train/chosen": -20.32883071899414, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -26.711917877197266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09538307040929794, + "rewards_train/margins": 1.5695587173104286, + "rewards_train/rejected": -1.6649417877197266, + "step": 2130 + }, + { + "epoch": 0.6, + "logps_train/chosen": -10.204261779785156, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -67.96558380126953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16417618095874786, + "rewards_train/margins": 5.157382294535637, + "rewards_train/rejected": -5.321558475494385, + "step": 2130 + }, + { + "epoch": 0.6, + "logps_train/chosen": -29.182998657226562, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -2.3125, + "logps_train/rejected": -16.10635757446289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0557998418807983, + "rewards_train/margins": 0.32358598709106445, + "rewards_train/rejected": -1.3793858289718628, + "step": 2130 + }, + { + "epoch": 0.6, + "logps_train/chosen": -36.92744445800781, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -29.53929901123047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.958369493484497, + "rewards_train/margins": -0.6294395923614502, + "rewards_train/rejected": -2.328929901123047, + "step": 2131 + }, + { + "epoch": 0.6, + "logps_train/chosen": -4.288908004760742, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -6.75, + "logps_train/rejected": -6.889060974121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04298419877886772, + "rewards_train/margins": 0.05689029674977064, + "rewards_train/rejected": -0.01390609797090292, + "step": 2131 + }, + { + "epoch": 0.6, + "logps_train/chosen": -45.8599853515625, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -26.0, + "logps_train/rejected": -53.67466735839844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.973498582839966, + "rewards_train/margins": -0.20603179931640625, + "rewards_train/rejected": -2.7674667835235596, + "step": 2131 + }, + { + "epoch": 0.6, + "logps_train/chosen": -193.8340301513672, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -184.2760009765625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.58340311050415, + "rewards_train/margins": -2.1558027267456055, + "rewards_train/rejected": -4.427600383758545, + "step": 2131 + }, + { + "epoch": 0.6, + "logps_train/chosen": -45.73522186279297, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -48.345237731933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2735222578048706, + "rewards_train/margins": 2.6860016584396362, + "rewards_train/rejected": -3.959523916244507, + "step": 2131 + }, + { + "epoch": 0.6, + "logps_train/chosen": -7.799002170562744, + "logps_train/ref_chosen": -7.75, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -29.703718185424805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.004900217056274414, + "rewards_train/margins": 1.8529716730117798, + "rewards_train/rejected": -1.8578718900680542, + "step": 2131 + }, + { + "epoch": 0.6, + "logps_train/chosen": -196.89002990722656, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -219.2279052734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.639002799987793, + "rewards_train/margins": -0.5162124633789062, + "rewards_train/rejected": -9.122790336608887, + "step": 2131 + }, + { + "epoch": 0.6, + "logps_train/chosen": -75.09544372558594, + "logps_train/ref_chosen": -52.75, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -52.26411056518555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.234544515609741, + "rewards_train/margins": 0.45436668395996094, + "rewards_train/rejected": -2.688911199569702, + "step": 2131 + }, + { + "epoch": 0.6, + "learning_rate": 1.1615701121010213e-07, + "loss": 0.6455, + "step": 2132 + }, + { + "epoch": 0.6, + "logps_train/chosen": -74.86089324951172, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -261.2580261230469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4360893964767456, + "rewards_train/margins": 7.789713025093079, + "rewards_train/rejected": -9.225802421569824, + "step": 2132 + }, + { + "epoch": 0.6, + "logps_train/chosen": -14.871668815612793, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -41.358299255371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9809169173240662, + "rewards_train/margins": 1.2549131512641907, + "rewards_train/rejected": -2.235830068588257, + "step": 2132 + }, + { + "epoch": 0.6, + "logps_train/chosen": -9.008729934692383, + "logps_train/ref_chosen": -2.359375, + "logps_train/ref_rejected": -3.421875, + "logps_train/rejected": -17.14309310913086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6649355292320251, + "rewards_train/margins": 0.7071862816810608, + "rewards_train/rejected": -1.372121810913086, + "step": 2132 + }, + { + "epoch": 0.6, + "logps_train/chosen": -19.34205436706543, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -33.46806335449219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0404554605484009, + "rewards_train/margins": 1.025100827217102, + "rewards_train/rejected": -2.065556287765503, + "step": 2132 + }, + { + "epoch": 0.6, + "logps_train/chosen": -162.36871337890625, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -188.74472045898438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.186871528625488, + "rewards_train/margins": -0.31239938735961914, + "rewards_train/rejected": -5.874472141265869, + "step": 2132 + }, + { + "epoch": 0.6, + "logps_train/chosen": -21.675582885742188, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -6.21875, + "logps_train/rejected": -43.85500717163086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8675583004951477, + "rewards_train/margins": 2.8960675597190857, + "rewards_train/rejected": -3.7636258602142334, + "step": 2132 + }, + { + "epoch": 0.6, + "logps_train/chosen": -228.6584014892578, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -226.50277709960938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.665840148925781, + "rewards_train/margins": -1.3155622482299805, + "rewards_train/rejected": -6.350277900695801, + "step": 2132 + }, + { + "epoch": 0.6, + "logps_train/chosen": -282.4085693359375, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -253.39402770996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.140856742858887, + "rewards_train/margins": 1.098546028137207, + "rewards_train/rejected": -11.239402770996094, + "step": 2132 + }, + { + "epoch": 0.6, + "logps_train/chosen": -126.4588394165039, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -243.17236328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.295884132385254, + "rewards_train/margins": 7.421352386474609, + "rewards_train/rejected": -11.717236518859863, + "step": 2133 + }, + { + "epoch": 0.6, + "logps_train/chosen": -242.89385986328125, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -240.5699462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.889386177062988, + "rewards_train/margins": 1.0676088333129883, + "rewards_train/rejected": -8.956995010375977, + "step": 2133 + }, + { + "epoch": 0.6, + "logps_train/chosen": -16.219072341918945, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -23.5, + "logps_train/rejected": -48.034706115722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0437822341918945, + "rewards_train/margins": 1.4096884727478027, + "rewards_train/rejected": -2.4534707069396973, + "step": 2133 + }, + { + "epoch": 0.6, + "logps_train/chosen": -171.0323944091797, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -170.01882934570312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6032395362854004, + "rewards_train/margins": -0.10135650634765625, + "rewards_train/rejected": -2.501883029937744, + "step": 2133 + }, + { + "epoch": 0.6, + "logps_train/chosen": -122.4424819946289, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -195.64083862304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7942482233047485, + "rewards_train/margins": 5.069835543632507, + "rewards_train/rejected": -5.864083766937256, + "step": 2133 + }, + { + "epoch": 0.6, + "logps_train/chosen": -3.885021448135376, + "logps_train/ref_chosen": -3.46875, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -8.626212120056152, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04162714630365372, + "rewards_train/margins": 0.3459940776228905, + "rewards_train/rejected": -0.3876212239265442, + "step": 2133 + }, + { + "epoch": 0.6, + "logps_train/chosen": -99.75950622558594, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -71.74784851074219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2259505987167358, + "rewards_train/margins": -0.8511657416820526, + "rewards_train/rejected": -0.3747848570346832, + "step": 2133 + }, + { + "epoch": 0.6, + "logps_train/chosen": -34.98444366455078, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -184.76683044433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.260944366455078, + "rewards_train/margins": 1.415738821029663, + "rewards_train/rejected": -3.676683187484741, + "step": 2133 + }, + { + "epoch": 0.6, + "learning_rate": 1.1492255972217613e-07, + "loss": 0.4342, + "step": 2134 + }, + { + "epoch": 0.6, + "logps_train/chosen": -120.90901184082031, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -123.15632629394531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7909011840820312, + "rewards_train/margins": 0.4247314929962158, + "rewards_train/rejected": -3.215632677078247, + "step": 2134 + }, + { + "epoch": 0.6, + "logps_train/chosen": -10.114799499511719, + "logps_train/ref_chosen": -1.671875, + "logps_train/ref_rejected": -1.1640625, + "logps_train/rejected": -5.4880170822143555, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8442924618721008, + "rewards_train/margins": -0.4118970036506653, + "rewards_train/rejected": -0.43239545822143555, + "step": 2134 + }, + { + "epoch": 0.6, + "logps_train/chosen": -14.2965669631958, + "logps_train/ref_chosen": -4.59375, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -27.68997573852539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.970281720161438, + "rewards_train/margins": 0.5862158536911011, + "rewards_train/rejected": -1.556497573852539, + "step": 2134 + }, + { + "epoch": 0.6, + "logps_train/chosen": -127.7696533203125, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -178.0174560546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.07696533203125, + "rewards_train/margins": 5.174780368804932, + "rewards_train/rejected": -6.251745700836182, + "step": 2134 + }, + { + "epoch": 0.6, + "logps_train/chosen": -1.148423194885254, + "logps_train/ref_chosen": -1.1953125, + "logps_train/ref_rejected": -1.375, + "logps_train/rejected": -1.2275679111480713, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.004688930697739124, + "rewards_train/margins": -0.010054278187453747, + "rewards_train/rejected": 0.014743208885192871, + "step": 2134 + }, + { + "epoch": 0.6, + "logps_train/chosen": -45.83100128173828, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -44.73900604248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.626850128173828, + "rewards_train/margins": 0.1783006191253662, + "rewards_train/rejected": -3.8051507472991943, + "step": 2134 + }, + { + "epoch": 0.6, + "logps_train/chosen": -113.54090118408203, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -113.13981628417969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7040901184082031, + "rewards_train/margins": -0.04010850191116333, + "rewards_train/rejected": -0.6639816164970398, + "step": 2134 + }, + { + "epoch": 0.6, + "logps_train/chosen": -38.49957275390625, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -50.97710418701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.25004273653030396, + "rewards_train/margins": 1.3477532267570496, + "rewards_train/rejected": -1.0977104902267456, + "step": 2134 + }, + { + "epoch": 0.6, + "logps_train/chosen": -55.669654846191406, + "logps_train/ref_chosen": -53.5, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -42.640174865722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2169654816389084, + "rewards_train/margins": 3.4595520049333572, + "rewards_train/rejected": -3.6765174865722656, + "step": 2135 + }, + { + "epoch": 0.6, + "logps_train/chosen": -32.888450622558594, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -65.14688110351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5513451099395752, + "rewards_train/margins": 3.3820931911468506, + "rewards_train/rejected": -4.933438301086426, + "step": 2135 + }, + { + "epoch": 0.6, + "logps_train/chosen": -206.14239501953125, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -205.10745239257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.714239597320557, + "rewards_train/margins": 0.8965058326721191, + "rewards_train/rejected": -7.610745429992676, + "step": 2135 + }, + { + "epoch": 0.6, + "logps_train/chosen": -23.710432052612305, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -35.77649688720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8991682529449463, + "rewards_train/margins": 1.2284815311431885, + "rewards_train/rejected": -3.1276497840881348, + "step": 2135 + }, + { + "epoch": 0.6, + "logps_train/chosen": -12.558890342712402, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -30.131221771240234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8308890461921692, + "rewards_train/margins": 1.3197330832481384, + "rewards_train/rejected": -2.1506221294403076, + "step": 2135 + }, + { + "epoch": 0.6, + "logps_train/chosen": -189.0443115234375, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -163.86721801757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7044312953948975, + "rewards_train/margins": 2.7822906970977783, + "rewards_train/rejected": -6.486721992492676, + "step": 2135 + }, + { + "epoch": 0.6, + "logps_train/chosen": -8.67424201965332, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -20.312150955200195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18257580697536469, + "rewards_train/margins": 1.3887909501791, + "rewards_train/rejected": -1.2062151432037354, + "step": 2135 + }, + { + "epoch": 0.6, + "logps_train/chosen": -50.7902946472168, + "logps_train/ref_chosen": -47.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -112.06615447998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3790294826030731, + "rewards_train/margins": 1.0275860130786896, + "rewards_train/rejected": -1.4066154956817627, + "step": 2135 + }, + { + "epoch": 0.6, + "learning_rate": 1.1369430283793691e-07, + "loss": 0.3507, + "step": 2136 + }, + { + "epoch": 0.6, + "logps_train/chosen": -42.943359375, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -44.42334747314453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4818360805511475, + "rewards_train/margins": -0.13950133323669434, + "rewards_train/rejected": -2.342334747314453, + "step": 2136 + }, + { + "epoch": 0.6, + "logps_train/chosen": -0.2777017056941986, + "logps_train/ref_chosen": -0.345703125, + "logps_train/ref_rejected": -2.15625, + "logps_train/rejected": -1.7488352060317993, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.006800142116844654, + "rewards_train/margins": -0.033941338770091534, + "rewards_train/rejected": 0.04074148088693619, + "step": 2136 + }, + { + "epoch": 0.6, + "logps_train/chosen": -206.9661102294922, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -240.1912841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5966110229492188, + "rewards_train/margins": 8.322517395019531, + "rewards_train/rejected": -10.91912841796875, + "step": 2136 + }, + { + "epoch": 0.6, + "logps_train/chosen": -11.674254417419434, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -10.478153228759766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6799254417419434, + "rewards_train/margins": -0.21961012482643127, + "rewards_train/rejected": -0.4603153169155121, + "step": 2136 + }, + { + "epoch": 0.6, + "logps_train/chosen": -79.6011962890625, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -73.41546630859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010119629092514515, + "rewards_train/margins": 1.2814270732924342, + "rewards_train/rejected": -1.2915467023849487, + "step": 2136 + }, + { + "epoch": 0.6, + "logps_train/chosen": -56.342681884765625, + "logps_train/ref_chosen": -45.0, + "logps_train/ref_rejected": -22.625, + "logps_train/rejected": -48.435279846191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1342681646347046, + "rewards_train/margins": 1.446759819984436, + "rewards_train/rejected": -2.5810279846191406, + "step": 2136 + }, + { + "epoch": 0.6, + "logps_train/chosen": -175.30133056640625, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -182.69113159179688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.880133152008057, + "rewards_train/margins": -1.1610198020935059, + "rewards_train/rejected": -6.719113349914551, + "step": 2136 + }, + { + "epoch": 0.6, + "logps_train/chosen": -147.57101440429688, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -201.5790557861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7571014761924744, + "rewards_train/margins": 4.000804007053375, + "rewards_train/rejected": -4.75790548324585, + "step": 2136 + }, + { + "epoch": 0.6, + "logps_train/chosen": -140.49554443359375, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -228.78265380859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.649554431438446, + "rewards_train/margins": 6.328710854053497, + "rewards_train/rejected": -6.978265285491943, + "step": 2137 + }, + { + "epoch": 0.6, + "logps_train/chosen": -48.290069580078125, + "logps_train/ref_chosen": -29.875, + "logps_train/ref_rejected": -52.0, + "logps_train/rejected": -79.93614959716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8415069580078125, + "rewards_train/margins": 0.9521081447601318, + "rewards_train/rejected": -2.7936151027679443, + "step": 2137 + }, + { + "epoch": 0.6, + "logps_train/chosen": -20.568117141723633, + "logps_train/ref_chosen": -3.390625, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -18.717288970947266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7177492380142212, + "rewards_train/margins": -1.4085203409194946, + "rewards_train/rejected": -0.30922889709472656, + "step": 2137 + }, + { + "epoch": 0.6, + "logps_train/chosen": -98.77497863769531, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -152.09671020507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4274978637695312, + "rewards_train/margins": 2.182173252105713, + "rewards_train/rejected": -3.609671115875244, + "step": 2137 + }, + { + "epoch": 0.6, + "logps_train/chosen": -12.967568397521973, + "logps_train/ref_chosen": -1.9453125, + "logps_train/ref_rejected": -26.25, + "logps_train/rejected": -40.84368133544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.102225661277771, + "rewards_train/margins": 0.35714244842529297, + "rewards_train/rejected": -1.459368109703064, + "step": 2137 + }, + { + "epoch": 0.6, + "logps_train/chosen": -387.31988525390625, + "logps_train/ref_chosen": -302.0, + "logps_train/ref_rejected": -28.25, + "logps_train/rejected": -46.507415771484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.531989097595215, + "rewards_train/margins": -6.706247448921204, + "rewards_train/rejected": -1.8257416486740112, + "step": 2137 + }, + { + "epoch": 0.6, + "logps_train/chosen": -33.065826416015625, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -50.07389831542969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6065826416015625, + "rewards_train/margins": 0.37580716609954834, + "rewards_train/rejected": -1.9823898077011108, + "step": 2137 + }, + { + "epoch": 0.6, + "logps_train/chosen": -114.44731903076172, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -222.34495544433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.644731879234314, + "rewards_train/margins": 8.33976423740387, + "rewards_train/rejected": -9.984496116638184, + "step": 2137 + }, + { + "epoch": 0.6, + "learning_rate": 1.124722491538801e-07, + "loss": 0.8744, + "step": 2138 + }, + { + "epoch": 0.6, + "logps_train/chosen": -102.07861328125, + "logps_train/ref_chosen": -59.75, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -230.96652221679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.232861518859863, + "rewards_train/margins": 8.01379108428955, + "rewards_train/rejected": -12.246652603149414, + "step": 2138 + }, + { + "epoch": 0.6, + "logps_train/chosen": -60.92157745361328, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -156.56246948242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7796578407287598, + "rewards_train/margins": 5.2765889167785645, + "rewards_train/rejected": -9.056246757507324, + "step": 2138 + }, + { + "epoch": 0.6, + "logps_train/chosen": -95.27854919433594, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -162.62535095214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3278549313545227, + "rewards_train/margins": 0.8346801400184631, + "rewards_train/rejected": -1.1625350713729858, + "step": 2138 + }, + { + "epoch": 0.6, + "logps_train/chosen": -0.19676247239112854, + "logps_train/ref_chosen": -0.236328125, + "logps_train/ref_rejected": -0.236328125, + "logps_train/rejected": -0.19679982960224152, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0039565651677548885, + "rewards_train/margins": 3.7355348467826843e-06, + "rewards_train/rejected": 0.003952829632908106, + "step": 2138 + }, + { + "epoch": 0.6, + "logps_train/chosen": -90.55546569824219, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -46.320369720458984, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.730546712875366, + "rewards_train/margins": -0.004759788513183594, + "rewards_train/rejected": -3.7257869243621826, + "step": 2138 + }, + { + "epoch": 0.6, + "logps_train/chosen": -49.523250579833984, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -2.78125, + "logps_train/rejected": -14.565529823303223, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4023250341415405, + "rewards_train/margins": -0.22389698028564453, + "rewards_train/rejected": -1.178428053855896, + "step": 2138 + }, + { + "epoch": 0.6, + "logps_train/chosen": -137.63682556152344, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -230.19613647460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.363682508468628, + "rewards_train/margins": 6.055931329727173, + "rewards_train/rejected": -8.4196138381958, + "step": 2138 + }, + { + "epoch": 0.6, + "logps_train/chosen": -7.005403518676758, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -19.778564453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2724153697490692, + "rewards_train/margins": -0.007058918476104736, + "rewards_train/rejected": -0.2653564512729645, + "step": 2138 + }, + { + "epoch": 0.6, + "logps_train/chosen": -88.63011932373047, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -111.4830322265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.263011932373047, + "rewards_train/margins": 0.8352913856506348, + "rewards_train/rejected": -3.0983033180236816, + "step": 2139 + }, + { + "epoch": 0.6, + "logps_train/chosen": -16.79555320739746, + "logps_train/ref_chosen": -2.890625, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -26.247819900512695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3904927968978882, + "rewards_train/margins": 0.19053924083709717, + "rewards_train/rejected": -1.5810320377349854, + "step": 2139 + }, + { + "epoch": 0.6, + "logps_train/chosen": -81.06050872802734, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -64.8580093383789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8560508489608765, + "rewards_train/margins": 1.25475013256073, + "rewards_train/rejected": -3.1108009815216064, + "step": 2139 + }, + { + "epoch": 0.6, + "logps_train/chosen": -143.19895935058594, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -151.55984497070312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.919896125793457, + "rewards_train/margins": -1.2639117240905762, + "rewards_train/rejected": -5.655984401702881, + "step": 2139 + }, + { + "epoch": 0.6, + "logps_train/chosen": -24.868419647216797, + "logps_train/ref_chosen": -7.46875, + "logps_train/ref_rejected": -2.265625, + "logps_train/rejected": -39.45224380493164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7399669885635376, + "rewards_train/margins": 1.978695034980774, + "rewards_train/rejected": -3.7186620235443115, + "step": 2139 + }, + { + "epoch": 0.6, + "logps_train/chosen": -125.50859832763672, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -149.90985107421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1008598804473877, + "rewards_train/margins": 5.640125513076782, + "rewards_train/rejected": -6.74098539352417, + "step": 2139 + }, + { + "epoch": 0.6, + "logps_train/chosen": -88.53287506103516, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -123.447021484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4532875120639801, + "rewards_train/margins": 3.541414588689804, + "rewards_train/rejected": -3.994702100753784, + "step": 2139 + }, + { + "epoch": 0.6, + "logps_train/chosen": -2.7926814556121826, + "logps_train/ref_chosen": -3.078125, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -12.921656608581543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028544355183839798, + "rewards_train/margins": 0.21446001902222633, + "rewards_train/rejected": -0.18591566383838654, + "step": 2139 + }, + { + "epoch": 0.6, + "learning_rate": 1.1125640722308626e-07, + "loss": 0.4214, + "step": 2140 + }, + { + "epoch": 0.6, + "logps_train/chosen": -21.53626823425293, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -6.53125, + "logps_train/rejected": -34.36983108520508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.40362682938575745, + "rewards_train/margins": 2.3802312314510345, + "rewards_train/rejected": -2.783858060836792, + "step": 2140 + }, + { + "epoch": 0.6, + "logps_train/chosen": -20.59605598449707, + "logps_train/ref_chosen": -14.5, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -40.852195739746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.609605610370636, + "rewards_train/margins": 2.2631139159202576, + "rewards_train/rejected": -2.8727195262908936, + "step": 2140 + }, + { + "epoch": 0.6, + "logps_train/chosen": -165.2220458984375, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -210.5386962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.172204494476318, + "rewards_train/margins": 2.8816657066345215, + "rewards_train/rejected": -8.05387020111084, + "step": 2140 + }, + { + "epoch": 0.6, + "logps_train/chosen": -6.464818477630615, + "logps_train/ref_chosen": -2.390625, + "logps_train/ref_rejected": -1.2421875, + "logps_train/rejected": -6.447750091552734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.407419353723526, + "rewards_train/margins": 0.11313691735267639, + "rewards_train/rejected": -0.5205562710762024, + "step": 2140 + }, + { + "epoch": 0.6, + "logps_train/chosen": -27.136066436767578, + "logps_train/ref_chosen": -1.3359375, + "logps_train/ref_rejected": -3.375, + "logps_train/rejected": -33.415382385253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5800130367279053, + "rewards_train/margins": 0.424025297164917, + "rewards_train/rejected": -3.0040383338928223, + "step": 2140 + }, + { + "epoch": 0.6, + "logps_train/chosen": -67.42424011230469, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -28.5, + "logps_train/rejected": -79.70951843261719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.417423963546753, + "rewards_train/margins": 2.7035281658172607, + "rewards_train/rejected": -5.120952129364014, + "step": 2140 + }, + { + "epoch": 0.6, + "logps_train/chosen": -87.05266571044922, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -194.0426025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8552665710449219, + "rewards_train/margins": 6.84899377822876, + "rewards_train/rejected": -7.704260349273682, + "step": 2140 + }, + { + "epoch": 0.6, + "logps_train/chosen": -162.5474090576172, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -162.945068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9547408819198608, + "rewards_train/margins": 0.03976595401763916, + "rewards_train/rejected": -1.9945068359375, + "step": 2140 + }, + { + "epoch": 0.6, + "logps_train/chosen": -116.52690124511719, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -63.78229522705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4526901245117188, + "rewards_train/margins": 0.8255395889282227, + "rewards_train/rejected": -4.278229713439941, + "step": 2141 + }, + { + "epoch": 0.6, + "logps_train/chosen": -33.03663635253906, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -42.47014617919922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.478663682937622, + "rewards_train/margins": -1.6316490769386292, + "rewards_train/rejected": -0.8470146059989929, + "step": 2141 + }, + { + "epoch": 0.6, + "logps_train/chosen": -20.273563385009766, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -60.302772521972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43360635638237, + "rewards_train/margins": 3.8591711819171906, + "rewards_train/rejected": -4.2927775382995605, + "step": 2141 + }, + { + "epoch": 0.6, + "logps_train/chosen": -96.65072631835938, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -120.65618896484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2150726318359375, + "rewards_train/margins": 4.350546360015869, + "rewards_train/rejected": -4.565618991851807, + "step": 2141 + }, + { + "epoch": 0.6, + "logps_train/chosen": -19.15463638305664, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -30.848876953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4310886859893799, + "rewards_train/margins": 1.1881740093231201, + "rewards_train/rejected": -2.6192626953125, + "step": 2141 + }, + { + "epoch": 0.6, + "logps_train/chosen": -70.30029296875, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -35.25, + "logps_train/rejected": -75.59150695800781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.705029249191284, + "rewards_train/margins": 1.3291213512420654, + "rewards_train/rejected": -4.03415060043335, + "step": 2141 + }, + { + "epoch": 0.6, + "logps_train/chosen": -107.72315979003906, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -6.59375, + "logps_train/rejected": -31.41204833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4223159849643707, + "rewards_train/margins": 2.05951389670372, + "rewards_train/rejected": -2.481829881668091, + "step": 2141 + }, + { + "epoch": 0.6, + "logps_train/chosen": -117.75579833984375, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -147.15142822265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.525579810142517, + "rewards_train/margins": 2.1895631551742554, + "rewards_train/rejected": -3.7151429653167725, + "step": 2141 + }, + { + "epoch": 0.6, + "learning_rate": 1.1004678555515956e-07, + "loss": 0.316, + "step": 2142 + }, + { + "epoch": 0.6, + "logps_train/chosen": -190.72869873046875, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -239.9686279296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.972869873046875, + "rewards_train/margins": 4.523993492126465, + "rewards_train/rejected": -9.49686336517334, + "step": 2142 + }, + { + "epoch": 0.6, + "logps_train/chosen": -17.126691818237305, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -51.65211868286133, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5251691937446594, + "rewards_train/margins": 3.4275426268577576, + "rewards_train/rejected": -3.952711820602417, + "step": 2142 + }, + { + "epoch": 0.6, + "logps_train/chosen": -259.91162109375, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -364.0, + "logps_train/rejected": -484.5655822753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.791162490844727, + "rewards_train/margins": 3.2653961181640625, + "rewards_train/rejected": -12.056558609008789, + "step": 2142 + }, + { + "epoch": 0.6, + "logps_train/chosen": -50.31946563720703, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -44.25, + "logps_train/rejected": -67.58784484863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1319465637207031, + "rewards_train/margins": 1.2018380165100098, + "rewards_train/rejected": -2.333784580230713, + "step": 2142 + }, + { + "epoch": 0.6, + "logps_train/chosen": -175.20712280273438, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -156.6038360595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.520712375640869, + "rewards_train/margins": 1.3896713256835938, + "rewards_train/rejected": -5.910383701324463, + "step": 2142 + }, + { + "epoch": 0.6, + "logps_train/chosen": -1.134894609451294, + "logps_train/ref_chosen": -0.4140625, + "logps_train/ref_rejected": -3.546875, + "logps_train/rejected": -6.78861665725708, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07208321243524551, + "rewards_train/margins": 0.2520909532904625, + "rewards_train/rejected": -0.324174165725708, + "step": 2142 + }, + { + "epoch": 0.6, + "logps_train/chosen": -85.90705871582031, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -171.93637084960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009294128976762295, + "rewards_train/margins": 6.902931309305131, + "rewards_train/rejected": -6.893637180328369, + "step": 2142 + }, + { + "epoch": 0.6, + "logps_train/chosen": -311.803466796875, + "logps_train/ref_chosen": -169.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -295.1244201660156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -14.280346870422363, + "rewards_train/margins": -0.06790447235107422, + "rewards_train/rejected": -14.212442398071289, + "step": 2142 + }, + { + "epoch": 0.6, + "logps_train/chosen": -15.990840911865234, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -18.023269653320312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8178340792655945, + "rewards_train/margins": -0.41550710797309875, + "rewards_train/rejected": -0.4023269712924957, + "step": 2143 + }, + { + "epoch": 0.6, + "logps_train/chosen": -115.45442199707031, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -198.47633361816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7954423427581787, + "rewards_train/margins": 5.3021910190582275, + "rewards_train/rejected": -8.097633361816406, + "step": 2143 + }, + { + "epoch": 0.6, + "logps_train/chosen": -117.15009307861328, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -181.92132568359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1650092601776123, + "rewards_train/margins": 5.227123498916626, + "rewards_train/rejected": -8.392132759094238, + "step": 2143 + }, + { + "epoch": 0.6, + "logps_train/chosen": -147.2646484375, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -141.53369140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.62646484375, + "rewards_train/margins": 3.826904296875, + "rewards_train/rejected": -6.453369140625, + "step": 2143 + }, + { + "epoch": 0.6, + "logps_train/chosen": -41.16764450073242, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -22.193248748779297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2042644023895264, + "rewards_train/margins": -0.40681445598602295, + "rewards_train/rejected": -1.7974499464035034, + "step": 2143 + }, + { + "epoch": 0.6, + "logps_train/chosen": -12.097296714782715, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -16.64957618713379, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7347297072410583, + "rewards_train/margins": 0.04272794723510742, + "rewards_train/rejected": -0.7774576544761658, + "step": 2143 + }, + { + "epoch": 0.6, + "logps_train/chosen": -25.7852783203125, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -54.049217224121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.30352783203125, + "rewards_train/margins": 2.676393985748291, + "rewards_train/rejected": -3.979921817779541, + "step": 2143 + }, + { + "epoch": 0.6, + "logps_train/chosen": -10.457809448242188, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -25.75, + "logps_train/rejected": -30.203372955322266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03953094407916069, + "rewards_train/margins": 0.40580635145306587, + "rewards_train/rejected": -0.44533729553222656, + "step": 2143 + }, + { + "epoch": 0.6, + "learning_rate": 1.0884339261616915e-07, + "loss": 0.312, + "step": 2144 + }, + { + "epoch": 0.6, + "logps_train/chosen": -124.723876953125, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -254.88204956054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5223877429962158, + "rewards_train/margins": 8.765817403793335, + "rewards_train/rejected": -10.28820514678955, + "step": 2144 + }, + { + "epoch": 0.6, + "logps_train/chosen": -48.36940002441406, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -82.24354553222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7994401454925537, + "rewards_train/margins": 2.924914598464966, + "rewards_train/rejected": -5.7243547439575195, + "step": 2144 + }, + { + "epoch": 0.6, + "logps_train/chosen": -30.573822021484375, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -14.625, + "logps_train/rejected": -41.52846908569336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5636322498321533, + "rewards_train/margins": 1.1267147064208984, + "rewards_train/rejected": -2.6903469562530518, + "step": 2144 + }, + { + "epoch": 0.6, + "logps_train/chosen": -31.185985565185547, + "logps_train/ref_chosen": -4.59375, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -39.80610275268555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6592235565185547, + "rewards_train/margins": 0.5807616710662842, + "rewards_train/rejected": -3.239985227584839, + "step": 2144 + }, + { + "epoch": 0.6, + "logps_train/chosen": -17.40106964111328, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -43.25, + "logps_train/rejected": -61.544063568115234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8276069760322571, + "rewards_train/margins": 1.0017994046211243, + "rewards_train/rejected": -1.8294063806533813, + "step": 2144 + }, + { + "epoch": 0.6, + "logps_train/chosen": -87.67875671386719, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -140.1085205078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9178756475448608, + "rewards_train/margins": 2.642976403236389, + "rewards_train/rejected": -4.56085205078125, + "step": 2144 + }, + { + "epoch": 0.6, + "logps_train/chosen": -163.06832885742188, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -166.79901123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.306833028793335, + "rewards_train/margins": 2.2730681896209717, + "rewards_train/rejected": -4.579901218414307, + "step": 2144 + }, + { + "epoch": 0.6, + "logps_train/chosen": -20.68105697631836, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -19.525772094726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4306057095527649, + "rewards_train/margins": 0.7157215476036072, + "rewards_train/rejected": -1.146327257156372, + "step": 2144 + }, + { + "epoch": 0.6, + "logps_train/chosen": -126.48277282714844, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -189.17047119140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0982773303985596, + "rewards_train/margins": 5.768769979476929, + "rewards_train/rejected": -7.867047309875488, + "step": 2145 + }, + { + "epoch": 0.6, + "logps_train/chosen": -116.78591918945312, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -101.1551742553711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9785919189453125, + "rewards_train/margins": -1.0630743503570557, + "rewards_train/rejected": -2.915517568588257, + "step": 2145 + }, + { + "epoch": 0.6, + "logps_train/chosen": -38.24795913696289, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -28.875, + "logps_train/rejected": -35.26367950439453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2997959852218628, + "rewards_train/margins": -0.6609280109405518, + "rewards_train/rejected": -0.638867974281311, + "step": 2145 + }, + { + "epoch": 0.6, + "logps_train/chosen": -66.69403076171875, + "logps_train/ref_chosen": -58.25, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -118.98318481445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.844403088092804, + "rewards_train/margins": 3.00391548871994, + "rewards_train/rejected": -3.848318576812744, + "step": 2145 + }, + { + "epoch": 0.6, + "logps_train/chosen": -6.7202324867248535, + "logps_train/ref_chosen": -2.34375, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -41.69393539428711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4376482665538788, + "rewards_train/margins": 3.037995368242264, + "rewards_train/rejected": -3.4756436347961426, + "step": 2145 + }, + { + "epoch": 0.6, + "logps_train/chosen": -43.00775909423828, + "logps_train/ref_chosen": -37.25, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -37.31779861450195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5757759213447571, + "rewards_train/margins": 1.3560039401054382, + "rewards_train/rejected": -1.9317798614501953, + "step": 2145 + }, + { + "epoch": 0.6, + "logps_train/chosen": -14.13438606262207, + "logps_train/ref_chosen": -1.5625, + "logps_train/ref_rejected": -2.03125, + "logps_train/rejected": -20.35791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2571886777877808, + "rewards_train/margins": 0.5754773616790771, + "rewards_train/rejected": -1.832666039466858, + "step": 2145 + }, + { + "epoch": 0.6, + "logps_train/chosen": -59.240020751953125, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -42.25, + "logps_train/rejected": -90.18193054199219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.549002170562744, + "rewards_train/margins": 2.2441911697387695, + "rewards_train/rejected": -4.793193340301514, + "step": 2145 + }, + { + "epoch": 0.6, + "learning_rate": 1.0764623682858964e-07, + "loss": 0.3102, + "step": 2146 + }, + { + "epoch": 0.6, + "logps_train/chosen": -14.621089935302734, + "logps_train/ref_chosen": -4.0, + "logps_train/ref_rejected": -3.578125, + "logps_train/rejected": -31.842205047607422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0621089935302734, + "rewards_train/margins": 1.7642991542816162, + "rewards_train/rejected": -2.8264081478118896, + "step": 2146 + }, + { + "epoch": 0.6, + "logps_train/chosen": -32.049415588378906, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -24.428770065307617, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6549415588378906, + "rewards_train/margins": -0.6526894569396973, + "rewards_train/rejected": -2.0022521018981934, + "step": 2146 + }, + { + "epoch": 0.6, + "logps_train/chosen": -66.62457275390625, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -66.76124572753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16245727241039276, + "rewards_train/margins": 0.013667300343513489, + "rewards_train/rejected": -0.17612457275390625, + "step": 2146 + }, + { + "epoch": 0.6, + "logps_train/chosen": -20.748291015625, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -22.375, + "logps_train/rejected": -24.55740737915039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.868579089641571, + "rewards_train/margins": -0.650338351726532, + "rewards_train/rejected": -0.21824073791503906, + "step": 2146 + }, + { + "epoch": 0.6, + "logps_train/chosen": -12.781367301940918, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -43.10273361206055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0906367301940918, + "rewards_train/margins": 3.7665116786956787, + "rewards_train/rejected": -3.8571484088897705, + "step": 2146 + }, + { + "epoch": 0.6, + "logps_train/chosen": -153.30323791503906, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -192.0, + "logps_train/rejected": -282.1172180175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.130323886871338, + "rewards_train/margins": 5.881397724151611, + "rewards_train/rejected": -9.01172161102295, + "step": 2146 + }, + { + "epoch": 0.6, + "logps_train/chosen": -25.212297439575195, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -45.628360748291016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9149798154830933, + "rewards_train/margins": 0.9478563070297241, + "rewards_train/rejected": -2.8628361225128174, + "step": 2146 + }, + { + "epoch": 0.6, + "logps_train/chosen": -34.720252990722656, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -233.20111083984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3720253705978394, + "rewards_train/margins": 4.248085618019104, + "rewards_train/rejected": -5.620110988616943, + "step": 2146 + }, + { + "epoch": 0.6, + "logps_train/chosen": -257.3055114746094, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -254.49710083007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.030550956726074, + "rewards_train/margins": 0.5191593170166016, + "rewards_train/rejected": -9.549710273742676, + "step": 2147 + }, + { + "epoch": 0.6, + "logps_train/chosen": -24.57675552368164, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -55.5, + "logps_train/rejected": -93.36576080322266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2514256238937378, + "rewards_train/margins": 2.535150408744812, + "rewards_train/rejected": -3.78657603263855, + "step": 2147 + }, + { + "epoch": 0.6, + "logps_train/chosen": -141.09579467773438, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -214.6353759765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.309579372406006, + "rewards_train/margins": 6.253958225250244, + "rewards_train/rejected": -10.56353759765625, + "step": 2147 + }, + { + "epoch": 0.6, + "logps_train/chosen": -59.75334548950195, + "logps_train/ref_chosen": -35.25, + "logps_train/ref_rejected": -25.0, + "logps_train/rejected": -84.05132293701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4503345489501953, + "rewards_train/margins": 3.4547977447509766, + "rewards_train/rejected": -5.905132293701172, + "step": 2147 + }, + { + "epoch": 0.6, + "logps_train/chosen": -62.750587463378906, + "logps_train/ref_chosen": -31.375, + "logps_train/ref_rejected": -2.15625, + "logps_train/rejected": -22.084922790527344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.137558698654175, + "rewards_train/margins": -1.1446913480758667, + "rewards_train/rejected": -1.992867350578308, + "step": 2147 + }, + { + "epoch": 0.6, + "logps_train/chosen": -179.5458984375, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -194.0, + "logps_train/rejected": -294.1055603027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.504590034484863, + "rewards_train/margins": 3.5059661865234375, + "rewards_train/rejected": -10.0105562210083, + "step": 2147 + }, + { + "epoch": 0.6, + "logps_train/chosen": -19.284196853637695, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -40.85165023803711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9221696853637695, + "rewards_train/margins": 2.0567452907562256, + "rewards_train/rejected": -2.978914976119995, + "step": 2147 + }, + { + "epoch": 0.6, + "logps_train/chosen": -158.20928955078125, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -219.4525146484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.420928955078125, + "rewards_train/margins": 2.4243226051330566, + "rewards_train/rejected": -5.845251560211182, + "step": 2147 + }, + { + "epoch": 0.6, + "learning_rate": 1.0645532657124224e-07, + "loss": 0.349, + "step": 2148 + }, + { + "epoch": 0.6, + "logps_train/chosen": -0.13520923256874084, + "logps_train/ref_chosen": -0.60546875, + "logps_train/ref_rejected": -7.4375, + "logps_train/rejected": -8.497783660888672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.047025952488183975, + "rewards_train/margins": 0.15305432304739952, + "rewards_train/rejected": -0.10602837055921555, + "step": 2148 + }, + { + "epoch": 0.6, + "logps_train/chosen": -13.97594928741455, + "logps_train/ref_chosen": -2.625, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -40.16316223144531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1350950002670288, + "rewards_train/margins": 1.6312211751937866, + "rewards_train/rejected": -2.7663161754608154, + "step": 2148 + }, + { + "epoch": 0.6, + "logps_train/chosen": -137.598388671875, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -186.21591186523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.4598388671875, + "rewards_train/margins": 3.5117526054382324, + "rewards_train/rejected": -7.971591472625732, + "step": 2148 + }, + { + "epoch": 0.6, + "logps_train/chosen": -124.57324981689453, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -181.02706909179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8073251247406006, + "rewards_train/margins": 4.4453818798065186, + "rewards_train/rejected": -7.252707004547119, + "step": 2148 + }, + { + "epoch": 0.6, + "logps_train/chosen": -93.68444061279297, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -171.5635986328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1684441566467285, + "rewards_train/margins": 3.8379158973693848, + "rewards_train/rejected": -6.006360054016113, + "step": 2148 + }, + { + "epoch": 0.6, + "logps_train/chosen": -21.561382293701172, + "logps_train/ref_chosen": -4.65625, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -45.17144775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.690513253211975, + "rewards_train/margins": 1.0516315698623657, + "rewards_train/rejected": -2.742144823074341, + "step": 2148 + }, + { + "epoch": 0.6, + "logps_train/chosen": -18.844837188720703, + "logps_train/ref_chosen": -5.21875, + "logps_train/ref_rejected": -39.0, + "logps_train/rejected": -87.40676879882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.362608790397644, + "rewards_train/margins": 3.478067994117737, + "rewards_train/rejected": -4.840676784515381, + "step": 2148 + }, + { + "epoch": 0.6, + "logps_train/chosen": -152.548828125, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -146.3067169189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.904882907867432, + "rewards_train/margins": 0.12578868865966797, + "rewards_train/rejected": -5.0306715965271, + "step": 2148 + }, + { + "epoch": 0.6, + "logps_train/chosen": -122.99593353271484, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -132.9253387451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8995933532714844, + "rewards_train/margins": 1.5929408073425293, + "rewards_train/rejected": -4.492534160614014, + "step": 2149 + }, + { + "epoch": 0.6, + "logps_train/chosen": -217.38485717773438, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -134.47161865234375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.638485908508301, + "rewards_train/margins": -1.3413240909576416, + "rewards_train/rejected": -3.297161817550659, + "step": 2149 + }, + { + "epoch": 0.6, + "logps_train/chosen": -128.68588256835938, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -211.55194091796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7185882329940796, + "rewards_train/margins": 6.036605954170227, + "rewards_train/rejected": -7.755194187164307, + "step": 2149 + }, + { + "epoch": 0.6, + "logps_train/chosen": -6.208824157714844, + "logps_train/ref_chosen": -2.8125, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -25.63929557800293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33963242173194885, + "rewards_train/margins": 1.8180471360683441, + "rewards_train/rejected": -2.157679557800293, + "step": 2149 + }, + { + "epoch": 0.6, + "logps_train/chosen": -7.488543510437012, + "logps_train/ref_chosen": -1.734375, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -19.55047607421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5754168629646301, + "rewards_train/margins": 0.5358807444572449, + "rewards_train/rejected": -1.111297607421875, + "step": 2149 + }, + { + "epoch": 0.6, + "logps_train/chosen": -172.7281494140625, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -233.62696838378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.872815132141113, + "rewards_train/margins": 4.589881896972656, + "rewards_train/rejected": -9.46269702911377, + "step": 2149 + }, + { + "epoch": 0.6, + "logps_train/chosen": -10.252944946289062, + "logps_train/ref_chosen": -0.7265625, + "logps_train/ref_rejected": -0.7265625, + "logps_train/rejected": -10.471479415893555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9526382684707642, + "rewards_train/margins": 0.02185344696044922, + "rewards_train/rejected": -0.9744917154312134, + "step": 2149 + }, + { + "epoch": 0.6, + "logps_train/chosen": -142.07237243652344, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -215.82229614257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.257237195968628, + "rewards_train/margins": 5.324992418289185, + "rewards_train/rejected": -8.582229614257812, + "step": 2149 + }, + { + "epoch": 0.6, + "learning_rate": 1.0527067017923652e-07, + "loss": 0.3058, + "step": 2150 + }, + { + "epoch": 0.6, + "logps_train/chosen": -17.213502883911133, + "logps_train/ref_chosen": -2.609375, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -32.20816421508789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.460412859916687, + "rewards_train/margins": 0.02290356159210205, + "rewards_train/rejected": -1.483316421508789, + "step": 2150 + }, + { + "epoch": 0.6, + "logps_train/chosen": -133.35922241210938, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -60.0, + "logps_train/rejected": -97.80255889892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.435922384262085, + "rewards_train/margins": 1.3443336486816406, + "rewards_train/rejected": -3.7802560329437256, + "step": 2150 + }, + { + "epoch": 0.6, + "logps_train/chosen": -87.77906799316406, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -72.83006286621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7279068231582642, + "rewards_train/margins": 4.086349368095398, + "rewards_train/rejected": -5.814256191253662, + "step": 2150 + }, + { + "epoch": 0.6, + "logps_train/chosen": -226.23977661132812, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -266.45794677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.723977565765381, + "rewards_train/margins": 8.22181749343872, + "rewards_train/rejected": -12.945795059204102, + "step": 2150 + }, + { + "epoch": 0.6, + "logps_train/chosen": -8.5938138961792, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -14.4375, + "logps_train/rejected": -58.75255584716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4828189015388489, + "rewards_train/margins": 3.9486867785453796, + "rewards_train/rejected": -4.4315056800842285, + "step": 2150 + }, + { + "epoch": 0.6, + "logps_train/chosen": -76.25894165039062, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -40.737552642822266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6008942127227783, + "rewards_train/margins": 0.32286107540130615, + "rewards_train/rejected": -1.9237552881240845, + "step": 2150 + }, + { + "epoch": 0.6, + "logps_train/chosen": -18.408565521240234, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -7.875, + "logps_train/rejected": -18.990589141845703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9033565521240234, + "rewards_train/margins": 0.20820236206054688, + "rewards_train/rejected": -1.1115589141845703, + "step": 2150 + }, + { + "epoch": 0.6, + "logps_train/chosen": -88.78351593017578, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -149.8984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7283515930175781, + "rewards_train/margins": 1.2614922523498535, + "rewards_train/rejected": -2.9898438453674316, + "step": 2150 + }, + { + "epoch": 0.6, + "logps_train/chosen": -104.74266052246094, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -184.94497680664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3742661476135254, + "rewards_train/margins": 8.120232105255127, + "rewards_train/rejected": -10.494498252868652, + "step": 2151 + }, + { + "epoch": 0.6, + "logps_train/chosen": -120.53474426269531, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -106.23280334472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.203474521636963, + "rewards_train/margins": 0.769805908203125, + "rewards_train/rejected": -3.973280429840088, + "step": 2151 + }, + { + "epoch": 0.6, + "logps_train/chosen": -18.656387329101562, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -51.054351806640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6218887567520142, + "rewards_train/margins": 2.58354651927948, + "rewards_train/rejected": -3.205435276031494, + "step": 2151 + }, + { + "epoch": 0.6, + "logps_train/chosen": -23.885234832763672, + "logps_train/ref_chosen": -7.21875, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -55.10580825805664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.666648507118225, + "rewards_train/margins": 1.9064322710037231, + "rewards_train/rejected": -3.5730807781219482, + "step": 2151 + }, + { + "epoch": 0.6, + "logps_train/chosen": -5.691240310668945, + "logps_train/ref_chosen": -1.2109375, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -11.175132751464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4480302929878235, + "rewards_train/margins": 0.20385801792144775, + "rewards_train/rejected": -0.6518883109092712, + "step": 2151 + }, + { + "epoch": 0.6, + "logps_train/chosen": -19.585243225097656, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -33.80260467529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0147743225097656, + "rewards_train/margins": 1.8998610973358154, + "rewards_train/rejected": -2.914635419845581, + "step": 2151 + }, + { + "epoch": 0.6, + "logps_train/chosen": -8.354652404785156, + "logps_train/ref_chosen": -0.77734375, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -13.19133186340332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7577309012413025, + "rewards_train/margins": 0.1551523208618164, + "rewards_train/rejected": -0.9128832221031189, + "step": 2151 + }, + { + "epoch": 0.6, + "logps_train/chosen": -149.20993041992188, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -163.6761474609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.320992946624756, + "rewards_train/margins": 1.9466218948364258, + "rewards_train/rejected": -6.267614841461182, + "step": 2151 + }, + { + "epoch": 0.6, + "learning_rate": 1.0409227594391101e-07, + "loss": 0.276, + "step": 2152 + }, + { + "epoch": 0.6, + "logps_train/chosen": -9.393757820129395, + "logps_train/ref_chosen": -2.203125, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -20.010238647460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7190632820129395, + "rewards_train/margins": 0.4007105827331543, + "rewards_train/rejected": -1.1197738647460938, + "step": 2152 + }, + { + "epoch": 0.6, + "logps_train/chosen": -243.95433044433594, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -193.41819763183594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.095433235168457, + "rewards_train/margins": -1.0536136627197266, + "rewards_train/rejected": -8.04181957244873, + "step": 2152 + }, + { + "epoch": 0.6, + "logps_train/chosen": -248.86685180664062, + "logps_train/ref_chosen": -197.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -263.7490234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.186685085296631, + "rewards_train/margins": 4.088217258453369, + "rewards_train/rejected": -9.27490234375, + "step": 2152 + }, + { + "epoch": 0.6, + "logps_train/chosen": -124.86707305908203, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -126.11946105957031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2367074489593506, + "rewards_train/margins": 0.12523865699768066, + "rewards_train/rejected": -3.3619461059570312, + "step": 2152 + }, + { + "epoch": 0.6, + "logps_train/chosen": -145.82408142089844, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -232.52984619140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.182408332824707, + "rewards_train/margins": 4.670576095581055, + "rewards_train/rejected": -8.852984428405762, + "step": 2152 + }, + { + "epoch": 0.6, + "logps_train/chosen": -187.29696655273438, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -236.23074340820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.0796966552734375, + "rewards_train/margins": 3.4433774948120117, + "rewards_train/rejected": -10.52307415008545, + "step": 2152 + }, + { + "epoch": 0.6, + "logps_train/chosen": -106.72559356689453, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -191.01803588867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9725593328475952, + "rewards_train/margins": 2.229244351387024, + "rewards_train/rejected": -4.201803684234619, + "step": 2152 + }, + { + "epoch": 0.6, + "logps_train/chosen": -152.20123291015625, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -105.90119171142578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.520123243331909, + "rewards_train/margins": -0.23000407218933105, + "rewards_train/rejected": -2.290119171142578, + "step": 2152 + }, + { + "epoch": 0.6, + "logps_train/chosen": -154.74423217773438, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -197.34356689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.324423313140869, + "rewards_train/margins": 3.859933376312256, + "rewards_train/rejected": -9.184356689453125, + "step": 2153 + }, + { + "epoch": 0.6, + "logps_train/chosen": -79.00656127929688, + "logps_train/ref_chosen": -41.75, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -104.05309295654297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.725656270980835, + "rewards_train/margins": -1.3203468322753906, + "rewards_train/rejected": -2.4053094387054443, + "step": 2153 + }, + { + "epoch": 0.6, + "logps_train/chosen": -128.45523071289062, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -168.70986938476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6455230712890625, + "rewards_train/margins": 5.275464057922363, + "rewards_train/rejected": -7.920987129211426, + "step": 2153 + }, + { + "epoch": 0.6, + "logps_train/chosen": -103.6688232421875, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -49.25, + "logps_train/rejected": -65.96631622314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.36688232421875, + "rewards_train/margins": 0.30474936962127686, + "rewards_train/rejected": -1.6716316938400269, + "step": 2153 + }, + { + "epoch": 0.6, + "logps_train/chosen": -0.07747837901115417, + "logps_train/ref_chosen": -0.1513671875, + "logps_train/ref_rejected": -0.1513671875, + "logps_train/rejected": -0.06925681233406067, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.00738888094201684, + "rewards_train/margins": -0.0008221571333706379, + "rewards_train/rejected": 0.008211038075387478, + "step": 2153 + }, + { + "epoch": 0.6, + "logps_train/chosen": -30.20656967163086, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -22.625, + "logps_train/rejected": -56.4513053894043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2331570386886597, + "rewards_train/margins": 2.149473547935486, + "rewards_train/rejected": -3.3826305866241455, + "step": 2153 + }, + { + "epoch": 0.6, + "logps_train/chosen": -193.8038330078125, + "logps_train/ref_chosen": -171.0, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -44.8369026184082, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.280383348464966, + "rewards_train/margins": -0.13419294357299805, + "rewards_train/rejected": -2.1461904048919678, + "step": 2153 + }, + { + "epoch": 0.6, + "logps_train/chosen": -92.94017791748047, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -114.65060424804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1440179347991943, + "rewards_train/margins": 0.9210424423217773, + "rewards_train/rejected": -3.0650603771209717, + "step": 2153 + }, + { + "epoch": 0.6, + "learning_rate": 1.029201521127756e-07, + "loss": 0.4681, + "step": 2154 + }, + { + "epoch": 0.6, + "logps_train/chosen": -127.13581848144531, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -201.0, + "logps_train/rejected": -260.1512451171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8635818362236023, + "rewards_train/margins": 5.051542580127716, + "rewards_train/rejected": -5.915124416351318, + "step": 2154 + }, + { + "epoch": 0.6, + "logps_train/chosen": -12.051895141601562, + "logps_train/ref_chosen": -5.71875, + "logps_train/ref_rejected": -3.515625, + "logps_train/rejected": -12.616767883300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6333145499229431, + "rewards_train/margins": 0.276799738407135, + "rewards_train/rejected": -0.9101142883300781, + "step": 2154 + }, + { + "epoch": 0.6, + "logps_train/chosen": -22.29395866394043, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -2.875, + "logps_train/rejected": -23.967546463012695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.385645866394043, + "rewards_train/margins": 0.7236087322235107, + "rewards_train/rejected": -2.1092545986175537, + "step": 2154 + }, + { + "epoch": 0.6, + "logps_train/chosen": -11.398321151733398, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -14.599215507507324, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7117071151733398, + "rewards_train/margins": -0.1642855405807495, + "rewards_train/rejected": -0.5474215745925903, + "step": 2154 + }, + { + "epoch": 0.6, + "logps_train/chosen": -195.25433349609375, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -263.83013916015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.825433731079102, + "rewards_train/margins": 3.95758056640625, + "rewards_train/rejected": -13.783014297485352, + "step": 2154 + }, + { + "epoch": 0.6, + "logps_train/chosen": -117.97526550292969, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -122.02318572998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09752655029296875, + "rewards_train/margins": 3.154792070388794, + "rewards_train/rejected": -3.2523186206817627, + "step": 2154 + }, + { + "epoch": 0.6, + "logps_train/chosen": -160.39010620117188, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -251.41384887695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.039010763168335, + "rewards_train/margins": 6.8023741245269775, + "rewards_train/rejected": -9.841384887695312, + "step": 2154 + }, + { + "epoch": 0.6, + "logps_train/chosen": -19.090124130249023, + "logps_train/ref_chosen": -7.3125, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -17.557567596435547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1777623891830444, + "rewards_train/margins": -0.19075560569763184, + "rewards_train/rejected": -0.9870067834854126, + "step": 2154 + }, + { + "epoch": 0.6, + "logps_train/chosen": -164.86746215820312, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -217.93264770507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.18674635887146, + "rewards_train/margins": 4.006518602371216, + "rewards_train/rejected": -6.193264961242676, + "step": 2155 + }, + { + "epoch": 0.6, + "logps_train/chosen": -93.49017333984375, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -48.24638748168945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.599017381668091, + "rewards_train/margins": 1.0006213188171387, + "rewards_train/rejected": -3.5996387004852295, + "step": 2155 + }, + { + "epoch": 0.6, + "logps_train/chosen": -190.35226440429688, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -186.00497436523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.4352264404296875, + "rewards_train/margins": 1.96527099609375, + "rewards_train/rejected": -6.4004974365234375, + "step": 2155 + }, + { + "epoch": 0.6, + "logps_train/chosen": -20.880603790283203, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -40.75, + "logps_train/rejected": -49.922584533691406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3161853551864624, + "rewards_train/margins": -0.39892691373825073, + "rewards_train/rejected": -0.9172584414482117, + "step": 2155 + }, + { + "epoch": 0.6, + "logps_train/chosen": -146.83665466308594, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -200.16787719726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.283665657043457, + "rewards_train/margins": 3.633122444152832, + "rewards_train/rejected": -9.916788101196289, + "step": 2155 + }, + { + "epoch": 0.6, + "logps_train/chosen": -23.677627563476562, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -121.71908569335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7927627563476562, + "rewards_train/margins": 5.054145812988281, + "rewards_train/rejected": -5.8469085693359375, + "step": 2155 + }, + { + "epoch": 0.6, + "logps_train/chosen": -2.9527950286865234, + "logps_train/ref_chosen": -1.453125, + "logps_train/ref_rejected": -0.9375, + "logps_train/rejected": -1.1206663846969604, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1499669998884201, + "rewards_train/margins": -0.13165036030113697, + "rewards_train/rejected": -0.018316639587283134, + "step": 2155 + }, + { + "epoch": 0.6, + "logps_train/chosen": -188.37351989746094, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -176.9798126220703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.83735179901123, + "rewards_train/margins": -0.18937015533447266, + "rewards_train/rejected": -8.647981643676758, + "step": 2155 + }, + { + "epoch": 0.6, + "learning_rate": 1.0175430688945486e-07, + "loss": 0.3474, + "step": 2156 + }, + { + "epoch": 0.6, + "logps_train/chosen": -13.742262840270996, + "logps_train/ref_chosen": -3.28125, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -10.318443298339844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0461013317108154, + "rewards_train/margins": -0.5673820078372955, + "rewards_train/rejected": -0.4787193238735199, + "step": 2156 + }, + { + "epoch": 0.6, + "logps_train/chosen": -59.393611907958984, + "logps_train/ref_chosen": -42.5, + "logps_train/ref_rejected": -26.375, + "logps_train/rejected": -62.9560432434082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6893612146377563, + "rewards_train/margins": 1.9687432050704956, + "rewards_train/rejected": -3.658104419708252, + "step": 2156 + }, + { + "epoch": 0.6, + "logps_train/chosen": -125.07746124267578, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -132.49899291992188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.00774621963501, + "rewards_train/margins": -0.9078469276428223, + "rewards_train/rejected": -4.0998992919921875, + "step": 2156 + }, + { + "epoch": 0.6, + "logps_train/chosen": -21.48863410949707, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -3.171875, + "logps_train/rejected": -36.353172302246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3238634169101715, + "rewards_train/margins": 2.9942663609981537, + "rewards_train/rejected": -3.318129777908325, + "step": 2156 + }, + { + "epoch": 0.6, + "logps_train/chosen": -19.669776916503906, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -3.03125, + "logps_train/rejected": -32.17082214355469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1232277154922485, + "rewards_train/margins": 1.7907296419143677, + "rewards_train/rejected": -2.913957357406616, + "step": 2156 + }, + { + "epoch": 0.6, + "logps_train/chosen": -22.79279899597168, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -22.875, + "logps_train/rejected": -67.37682342529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.766779899597168, + "rewards_train/margins": 3.6834025382995605, + "rewards_train/rejected": -4.4501824378967285, + "step": 2156 + }, + { + "epoch": 0.6, + "logps_train/chosen": -13.369621276855469, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -24.52651596069336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46196213364601135, + "rewards_train/margins": 0.5219394862651825, + "rewards_train/rejected": -0.9839016199111938, + "step": 2156 + }, + { + "epoch": 0.6, + "logps_train/chosen": -124.15886688232422, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -128.69186401367188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4158867597579956, + "rewards_train/margins": -1.1467003524303436, + "rewards_train/rejected": -0.269186407327652, + "step": 2156 + }, + { + "epoch": 0.6, + "logps_train/chosen": -0.09899090975522995, + "logps_train/ref_chosen": -0.2080078125, + "logps_train/ref_rejected": -0.2080078125, + "logps_train/rejected": -0.10054249316453934, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01090169046074152, + "rewards_train/margins": 0.00015515834093093872, + "rewards_train/rejected": 0.010746532119810581, + "step": 2157 + }, + { + "epoch": 0.6, + "logps_train/chosen": -167.72998046875, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -146.06167602539062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.572998046875, + "rewards_train/margins": -0.36683034896850586, + "rewards_train/rejected": -3.206167697906494, + "step": 2157 + }, + { + "epoch": 0.6, + "logps_train/chosen": -90.88463592529297, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -184.4083251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.088463544845581, + "rewards_train/margins": 4.352369070053101, + "rewards_train/rejected": -6.440832614898682, + "step": 2157 + }, + { + "epoch": 0.6, + "logps_train/chosen": -169.20162963867188, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -254.60211181640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.220163106918335, + "rewards_train/margins": 8.440048456192017, + "rewards_train/rejected": -11.660211563110352, + "step": 2157 + }, + { + "epoch": 0.6, + "logps_train/chosen": -82.41197967529297, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -164.16201782226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4411979913711548, + "rewards_train/margins": 3.4250038862228394, + "rewards_train/rejected": -4.866201877593994, + "step": 2157 + }, + { + "epoch": 0.6, + "logps_train/chosen": -23.714574813842773, + "logps_train/ref_chosen": -0.703125, + "logps_train/ref_rejected": -0.703125, + "logps_train/rejected": -23.759403228759766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.301145076751709, + "rewards_train/margins": 0.004482746124267578, + "rewards_train/rejected": -2.3056278228759766, + "step": 2157 + }, + { + "epoch": 0.6, + "logps_train/chosen": -27.3519287109375, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -116.18506622314453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.13519287109375, + "rewards_train/margins": -0.26668626070022583, + "rewards_train/rejected": -0.8685066103935242, + "step": 2157 + }, + { + "epoch": 0.6, + "logps_train/chosen": -54.03453063964844, + "logps_train/ref_chosen": -44.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -121.45758819580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0034531354904175, + "rewards_train/margins": 3.6923056840896606, + "rewards_train/rejected": -4.695758819580078, + "step": 2157 + }, + { + "epoch": 0.6, + "learning_rate": 1.0059474843362892e-07, + "loss": 0.4808, + "step": 2158 + }, + { + "epoch": 0.6, + "logps_train/chosen": -56.23904037475586, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -26.375, + "logps_train/rejected": -46.67158508300781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.37390398979187, + "rewards_train/margins": -0.34424543380737305, + "rewards_train/rejected": -2.029658555984497, + "step": 2158 + }, + { + "epoch": 0.6, + "logps_train/chosen": -17.443456649780273, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -36.85734558105469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4255956709384918, + "rewards_train/margins": 1.4351389110088348, + "rewards_train/rejected": -1.8607345819473267, + "step": 2158 + }, + { + "epoch": 0.6, + "logps_train/chosen": -76.8382797241211, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -85.25008392333984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8338279724121094, + "rewards_train/margins": 0.4411804676055908, + "rewards_train/rejected": -1.2750084400177002, + "step": 2158 + }, + { + "epoch": 0.6, + "logps_train/chosen": -162.677001953125, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -216.0, + "logps_train/rejected": -228.2550811767578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7677001953125, + "rewards_train/margins": -0.5421921014785767, + "rewards_train/rejected": -1.2255080938339233, + "step": 2158 + }, + { + "epoch": 0.6, + "logps_train/chosen": -90.82781982421875, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -92.59686279296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.782781958580017, + "rewards_train/margins": 1.0269044637680054, + "rewards_train/rejected": -2.8096864223480225, + "step": 2158 + }, + { + "epoch": 0.6, + "logps_train/chosen": -36.979957580566406, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -53.177284240722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.166745901107788, + "rewards_train/margins": 1.0259826183319092, + "rewards_train/rejected": -3.1927285194396973, + "step": 2158 + }, + { + "epoch": 0.6, + "logps_train/chosen": -7.829294204711914, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -13.308774948120117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07667942345142365, + "rewards_train/margins": 0.047948069870471954, + "rewards_train/rejected": -0.1246274933218956, + "step": 2158 + }, + { + "epoch": 0.6, + "logps_train/chosen": -14.107402801513672, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -25.21409797668457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5232402682304382, + "rewards_train/margins": 0.6606695055961609, + "rewards_train/rejected": -1.1839097738265991, + "step": 2158 + }, + { + "epoch": 0.6, + "logps_train/chosen": -41.23973083496094, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -2.953125, + "logps_train/rejected": -27.06174659729004, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8114731311798096, + "rewards_train/margins": 0.5993890762329102, + "rewards_train/rejected": -2.4108622074127197, + "step": 2159 + }, + { + "epoch": 0.6, + "logps_train/chosen": -19.872791290283203, + "logps_train/ref_chosen": -3.5, + "logps_train/ref_rejected": -1.7578125, + "logps_train/rejected": -19.437746047973633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6372791528701782, + "rewards_train/margins": 0.13071417808532715, + "rewards_train/rejected": -1.7679933309555054, + "step": 2159 + }, + { + "epoch": 0.6, + "logps_train/chosen": -23.731807708740234, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -69.33329772949219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5169308185577393, + "rewards_train/margins": 3.6038992404937744, + "rewards_train/rejected": -5.120830059051514, + "step": 2159 + }, + { + "epoch": 0.6, + "logps_train/chosen": -194.54598999023438, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -247.78038024902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.454598903656006, + "rewards_train/margins": 5.773438930511475, + "rewards_train/rejected": -12.22803783416748, + "step": 2159 + }, + { + "epoch": 0.6, + "logps_train/chosen": -220.35006713867188, + "logps_train/ref_chosen": -183.0, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -189.6665802001953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.735006809234619, + "rewards_train/margins": -1.468348741531372, + "rewards_train/rejected": -2.266658067703247, + "step": 2159 + }, + { + "epoch": 0.6, + "logps_train/chosen": -89.67900085449219, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -175.04739379882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9679001569747925, + "rewards_train/margins": 4.086839318275452, + "rewards_train/rejected": -6.054739475250244, + "step": 2159 + }, + { + "epoch": 0.6, + "logps_train/chosen": -134.3572998046875, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -153.08880615234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3357300758361816, + "rewards_train/margins": 1.3731508255004883, + "rewards_train/rejected": -4.70888090133667, + "step": 2159 + }, + { + "epoch": 0.6, + "logps_train/chosen": -279.82696533203125, + "logps_train/ref_chosen": -224.0, + "logps_train/ref_rejected": -211.0, + "logps_train/rejected": -270.20574951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.582696437835693, + "rewards_train/margins": 0.3378787040710449, + "rewards_train/rejected": -5.920575141906738, + "step": 2159 + }, + { + "epoch": 0.6, + "learning_rate": 9.944148486097792e-08, + "loss": 0.4903, + "step": 2160 + }, + { + "epoch": 0.6, + "logps_train/chosen": -54.84477996826172, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -0.83984375, + "logps_train/rejected": -18.190940856933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14052200317382812, + "rewards_train/margins": 1.8756316900253296, + "rewards_train/rejected": -1.7351096868515015, + "step": 2160 + }, + { + "epoch": 0.6, + "logps_train/chosen": -20.706151962280273, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -17.146318435668945, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.579990267753601, + "rewards_train/margins": -0.4966083765029907, + "rewards_train/rejected": -1.0833818912506104, + "step": 2160 + }, + { + "epoch": 0.6, + "logps_train/chosen": -178.51266479492188, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -187.03842163085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.7512664794921875, + "rewards_train/margins": 2.2525758743286133, + "rewards_train/rejected": -7.003842353820801, + "step": 2160 + }, + { + "epoch": 0.6, + "logps_train/chosen": -257.0326843261719, + "logps_train/ref_chosen": -191.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -216.2229461669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.603268623352051, + "rewards_train/margins": 0.7190260887145996, + "rewards_train/rejected": -7.32229471206665, + "step": 2160 + }, + { + "epoch": 0.6, + "logps_train/chosen": -19.647232055664062, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -23.0081787109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5084732174873352, + "rewards_train/margins": -0.07015535235404968, + "rewards_train/rejected": -0.4383178651332855, + "step": 2160 + }, + { + "epoch": 0.6, + "logps_train/chosen": -109.14935302734375, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -144.9793701171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.664935290813446, + "rewards_train/margins": 3.9330018162727356, + "rewards_train/rejected": -4.597937107086182, + "step": 2160 + }, + { + "epoch": 0.6, + "logps_train/chosen": -77.56245422363281, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -42.25, + "logps_train/rejected": -85.31697082519531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6062453985214233, + "rewards_train/margins": 2.700451970100403, + "rewards_train/rejected": -4.306697368621826, + "step": 2160 + }, + { + "epoch": 0.6, + "logps_train/chosen": -19.52836036682129, + "logps_train/ref_chosen": -7.96875, + "logps_train/ref_rejected": -28.875, + "logps_train/rejected": -53.40191650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.155961036682129, + "rewards_train/margins": 1.2967307567596436, + "rewards_train/rejected": -2.4526917934417725, + "step": 2160 + }, + { + "epoch": 0.6, + "logps_train/chosen": -9.059248924255371, + "logps_train/ref_chosen": -0.53125, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -16.23192024230957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8527998924255371, + "rewards_train/margins": 0.282892107963562, + "rewards_train/rejected": -1.1356920003890991, + "step": 2161 + }, + { + "epoch": 0.6, + "logps_train/chosen": -155.29925537109375, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -183.39547729492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.929925560951233, + "rewards_train/margins": 2.5096224546432495, + "rewards_train/rejected": -4.439548015594482, + "step": 2161 + }, + { + "epoch": 0.6, + "logps_train/chosen": -43.79798126220703, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -184.3465576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0797981023788452, + "rewards_train/margins": 5.854857563972473, + "rewards_train/rejected": -6.934655666351318, + "step": 2161 + }, + { + "epoch": 0.6, + "logps_train/chosen": -206.05599975585938, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -208.04710388183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.805600166320801, + "rewards_train/margins": 0.09911012649536133, + "rewards_train/rejected": -6.904710292816162, + "step": 2161 + }, + { + "epoch": 0.6, + "logps_train/chosen": -13.126252174377441, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -37.146400451660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7657502293586731, + "rewards_train/margins": 1.1738898158073425, + "rewards_train/rejected": -1.9396400451660156, + "step": 2161 + }, + { + "epoch": 0.6, + "logps_train/chosen": -78.3242416381836, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -111.87586212158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.107424259185791, + "rewards_train/margins": 1.8301620483398438, + "rewards_train/rejected": -3.9375863075256348, + "step": 2161 + }, + { + "epoch": 0.6, + "logps_train/chosen": -10.28268814086914, + "logps_train/ref_chosen": -5.875, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -32.92177963256836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4407688081264496, + "rewards_train/margins": 1.1389091312885284, + "rewards_train/rejected": -1.579677939414978, + "step": 2161 + }, + { + "epoch": 0.6, + "logps_train/chosen": -149.9268798828125, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -198.0, + "logps_train/rejected": -225.7359161376953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3926880359649658, + "rewards_train/margins": 1.380903720855713, + "rewards_train/rejected": -2.7735917568206787, + "step": 2161 + }, + { + "epoch": 0.6, + "learning_rate": 9.829452424312379e-08, + "loss": 0.3045, + "step": 2162 + }, + { + "epoch": 0.6, + "logps_train/chosen": -272.546630859375, + "logps_train/ref_chosen": -235.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -224.47865295410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7546632289886475, + "rewards_train/margins": 3.393202066421509, + "rewards_train/rejected": -7.147865295410156, + "step": 2162 + }, + { + "epoch": 0.6, + "logps_train/chosen": -140.73211669921875, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -130.19448852539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4732117652893066, + "rewards_train/margins": 1.7462372779846191, + "rewards_train/rejected": -5.219449043273926, + "step": 2162 + }, + { + "epoch": 0.6, + "logps_train/chosen": -16.802200317382812, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -22.535282135009766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6989700198173523, + "rewards_train/margins": 0.723308265209198, + "rewards_train/rejected": -1.4222782850265503, + "step": 2162 + }, + { + "epoch": 0.6, + "logps_train/chosen": -21.739116668701172, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -21.593961715698242, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6176616549491882, + "rewards_train/margins": -0.333265483379364, + "rewards_train/rejected": -0.2843961715698242, + "step": 2162 + }, + { + "epoch": 0.6, + "logps_train/chosen": -9.20115852355957, + "logps_train/ref_chosen": -3.453125, + "logps_train/ref_rejected": -2.328125, + "logps_train/rejected": -6.246247291564941, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.574803352355957, + "rewards_train/margins": -0.1829911172389984, + "rewards_train/rejected": -0.3918122351169586, + "step": 2162 + }, + { + "epoch": 0.6, + "logps_train/chosen": -260.8155822753906, + "logps_train/ref_chosen": -215.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -195.50259399414062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.5815582275390625, + "rewards_train/margins": -1.4312987327575684, + "rewards_train/rejected": -3.150259494781494, + "step": 2162 + }, + { + "epoch": 0.6, + "logps_train/chosen": -23.14274787902832, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -24.569904327392578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.739274799823761, + "rewards_train/margins": 0.34271568059921265, + "rewards_train/rejected": -1.0819904804229736, + "step": 2162 + }, + { + "epoch": 0.6, + "logps_train/chosen": -13.31994915008545, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -29.399127960205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0819949135184288, + "rewards_train/margins": 1.770417906343937, + "rewards_train/rejected": -1.8524128198623657, + "step": 2162 + }, + { + "epoch": 0.6, + "logps_train/chosen": -17.67255973815918, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -28.858022689819336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.420244038105011, + "rewards_train/margins": 1.2935463190078735, + "rewards_train/rejected": -0.8733022809028625, + "step": 2163 + }, + { + "epoch": 0.6, + "logps_train/chosen": -224.9981231689453, + "logps_train/ref_chosen": -185.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -204.13050842285156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.999812364578247, + "rewards_train/margins": -0.18676137924194336, + "rewards_train/rejected": -3.8130509853363037, + "step": 2163 + }, + { + "epoch": 0.6, + "logps_train/chosen": -115.40550231933594, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -167.65074157714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7905502319335938, + "rewards_train/margins": 4.024524211883545, + "rewards_train/rejected": -5.815074443817139, + "step": 2163 + }, + { + "epoch": 0.6, + "logps_train/chosen": -55.50801086425781, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -55.0, + "logps_train/rejected": -108.14461517333984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.325801134109497, + "rewards_train/margins": 3.9886605739593506, + "rewards_train/rejected": -5.314461708068848, + "step": 2163 + }, + { + "epoch": 0.6, + "logps_train/chosen": -114.09123229980469, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -115.2442855834961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.209123373031616, + "rewards_train/margins": 1.2653052806854248, + "rewards_train/rejected": -4.474428653717041, + "step": 2163 + }, + { + "epoch": 0.6, + "logps_train/chosen": -99.79546356201172, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -190.12608337402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0295464992523193, + "rewards_train/margins": 3.333061933517456, + "rewards_train/rejected": -6.362608432769775, + "step": 2163 + }, + { + "epoch": 0.6, + "logps_train/chosen": -24.553417205810547, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -29.903827667236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5178417563438416, + "rewards_train/margins": 1.572541058063507, + "rewards_train/rejected": -2.0903828144073486, + "step": 2163 + }, + { + "epoch": 0.6, + "logps_train/chosen": -16.962980270385742, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -46.76260757446289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6587980389595032, + "rewards_train/margins": 2.07996267080307, + "rewards_train/rejected": -2.7387607097625732, + "step": 2163 + }, + { + "epoch": 0.6, + "learning_rate": 9.715387460757507e-08, + "loss": 0.3906, + "step": 2164 + }, + { + "epoch": 0.6, + "logps_train/chosen": -25.828861236572266, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -34.24213790893555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9203861355781555, + "rewards_train/margins": 0.678827702999115, + "rewards_train/rejected": -1.5992138385772705, + "step": 2164 + }, + { + "epoch": 0.6, + "logps_train/chosen": -44.150482177734375, + "logps_train/ref_chosen": -37.25, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -49.99386978149414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6900482177734375, + "rewards_train/margins": 3.849963665008545, + "rewards_train/rejected": -4.540011882781982, + "step": 2164 + }, + { + "epoch": 0.6, + "logps_train/chosen": -11.953435897827148, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -24.125, + "logps_train/rejected": -41.920040130615234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25159358978271484, + "rewards_train/margins": 1.5279104709625244, + "rewards_train/rejected": -1.7795040607452393, + "step": 2164 + }, + { + "epoch": 0.6, + "logps_train/chosen": -112.49535369873047, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -237.42462158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.349535346031189, + "rewards_train/margins": 6.492926716804504, + "rewards_train/rejected": -7.842462062835693, + "step": 2164 + }, + { + "epoch": 0.6, + "logps_train/chosen": -22.298507690429688, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -40.716835021972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7517257928848267, + "rewards_train/margins": 1.9340201616287231, + "rewards_train/rejected": -3.68574595451355, + "step": 2164 + }, + { + "epoch": 0.6, + "logps_train/chosen": -177.1403045654297, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -44.74525451660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.814030408859253, + "rewards_train/margins": 0.3167450428009033, + "rewards_train/rejected": -3.1307754516601562, + "step": 2164 + }, + { + "epoch": 0.6, + "logps_train/chosen": -107.48819732666016, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -169.15011596679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0488197803497314, + "rewards_train/margins": 5.616192102432251, + "rewards_train/rejected": -6.665011882781982, + "step": 2164 + }, + { + "epoch": 0.6, + "logps_train/chosen": -58.95326232910156, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -33.519248962402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3703262507915497, + "rewards_train/margins": 1.9690985977649689, + "rewards_train/rejected": -2.3394248485565186, + "step": 2164 + }, + { + "epoch": 0.61, + "logps_train/chosen": -25.631637573242188, + "logps_train/ref_chosen": -0.984375, + "logps_train/ref_rejected": -2.46875, + "logps_train/rejected": -6.138330459594727, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.464726209640503, + "rewards_train/margins": -2.097768157720566, + "rewards_train/rejected": -0.36695805191993713, + "step": 2165 + }, + { + "epoch": 0.61, + "logps_train/chosen": -22.274303436279297, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -1.1796875, + "logps_train/rejected": -6.086060047149658, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6711803674697876, + "rewards_train/margins": -0.18054309487342834, + "rewards_train/rejected": -0.49063727259635925, + "step": 2165 + }, + { + "epoch": 0.61, + "logps_train/chosen": -186.27076721191406, + "logps_train/ref_chosen": -175.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -202.94784545898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1270767450332642, + "rewards_train/margins": 3.9677079916000366, + "rewards_train/rejected": -5.094784736633301, + "step": 2165 + }, + { + "epoch": 0.61, + "logps_train/chosen": -47.833953857421875, + "logps_train/ref_chosen": -43.5, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -87.58680725097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4333953857421875, + "rewards_train/margins": 1.2752853631973267, + "rewards_train/rejected": -1.7086807489395142, + "step": 2165 + }, + { + "epoch": 0.61, + "logps_train/chosen": -30.403079986572266, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -47.02723693847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2778080701828003, + "rewards_train/margins": 2.8280407190322876, + "rewards_train/rejected": -4.105848789215088, + "step": 2165 + }, + { + "epoch": 0.61, + "logps_train/chosen": -61.27881622314453, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -28.875, + "logps_train/rejected": -42.18580627441406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.240381717681885, + "rewards_train/margins": -2.9093010425567627, + "rewards_train/rejected": -1.331080675125122, + "step": 2165 + }, + { + "epoch": 0.61, + "logps_train/chosen": -37.79705047607422, + "logps_train/ref_chosen": -26.0, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -41.69647216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.179705023765564, + "rewards_train/margins": 2.002442240715027, + "rewards_train/rejected": -3.182147264480591, + "step": 2165 + }, + { + "epoch": 0.61, + "logps_train/chosen": -169.53988647460938, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -214.78131103515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.3539886474609375, + "rewards_train/margins": 3.9741430282592773, + "rewards_train/rejected": -10.328131675720215, + "step": 2165 + }, + { + "epoch": 0.61, + "learning_rate": 9.601954393766975e-08, + "loss": 0.4913, + "step": 2166 + }, + { + "epoch": 0.61, + "logps_train/chosen": -13.244616508483887, + "logps_train/ref_chosen": -4.15625, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -11.964241027832031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9088366627693176, + "rewards_train/margins": -0.22491252422332764, + "rewards_train/rejected": -0.68392413854599, + "step": 2166 + }, + { + "epoch": 0.61, + "logps_train/chosen": -144.1354522705078, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -163.015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9135453701019287, + "rewards_train/margins": 1.0380172729492188, + "rewards_train/rejected": -3.9515626430511475, + "step": 2166 + }, + { + "epoch": 0.61, + "logps_train/chosen": -4.972073554992676, + "logps_train/ref_chosen": -1.5390625, + "logps_train/ref_rejected": -3.5, + "logps_train/rejected": -14.304618835449219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34330111742019653, + "rewards_train/margins": 0.7371607422828674, + "rewards_train/rejected": -1.080461859703064, + "step": 2166 + }, + { + "epoch": 0.61, + "logps_train/chosen": -109.78620147705078, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -225.3683319091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6786202192306519, + "rewards_train/margins": 8.058213353157043, + "rewards_train/rejected": -9.736833572387695, + "step": 2166 + }, + { + "epoch": 0.61, + "logps_train/chosen": -296.7647705078125, + "logps_train/ref_chosen": -215.0, + "logps_train/ref_rejected": -186.0, + "logps_train/rejected": -277.64764404296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.176477432250977, + "rewards_train/margins": 0.9882869720458984, + "rewards_train/rejected": -9.164764404296875, + "step": 2166 + }, + { + "epoch": 0.61, + "logps_train/chosen": -122.09706115722656, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -125.21952819824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.359706163406372, + "rewards_train/margins": 0.31224679946899414, + "rewards_train/rejected": -2.671952962875366, + "step": 2166 + }, + { + "epoch": 0.61, + "logps_train/chosen": -14.015081405639648, + "logps_train/ref_chosen": -13.625, + "logps_train/ref_rejected": -42.25, + "logps_train/rejected": -81.5696792602539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.039008140563964844, + "rewards_train/margins": 3.8929598331451416, + "rewards_train/rejected": -3.9319679737091064, + "step": 2166 + }, + { + "epoch": 0.61, + "logps_train/chosen": -96.13119506835938, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -94.35685729980469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2131195068359375, + "rewards_train/margins": -0.17743372917175293, + "rewards_train/rejected": -3.0356857776641846, + "step": 2166 + }, + { + "epoch": 0.61, + "logps_train/chosen": -212.44921875, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -230.0, + "logps_train/rejected": -360.90234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.644921779632568, + "rewards_train/margins": 8.445312976837158, + "rewards_train/rejected": -13.090234756469727, + "step": 2167 + }, + { + "epoch": 0.61, + "logps_train/chosen": -68.34585571289062, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -28.0, + "logps_train/rejected": -66.99898529052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7345855832099915, + "rewards_train/margins": 3.165312945842743, + "rewards_train/rejected": -3.8998985290527344, + "step": 2167 + }, + { + "epoch": 0.61, + "logps_train/chosen": -50.86311721801758, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -5.6875, + "logps_train/rejected": -43.63931655883789, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.905061721801758, + "rewards_train/margins": -0.10987997055053711, + "rewards_train/rejected": -3.7951817512512207, + "step": 2167 + }, + { + "epoch": 0.61, + "logps_train/chosen": -267.0436706542969, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -224.8166961669922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.004366874694824, + "rewards_train/margins": -0.7226972579956055, + "rewards_train/rejected": -9.281669616699219, + "step": 2167 + }, + { + "epoch": 0.61, + "logps_train/chosen": -0.017350073903799057, + "logps_train/ref_chosen": -0.3359375, + "logps_train/ref_rejected": -3.6875, + "logps_train/rejected": -9.029244422912598, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.031858742237091064, + "rewards_train/margins": 0.5660331845283508, + "rewards_train/rejected": -0.5341744422912598, + "step": 2167 + }, + { + "epoch": 0.61, + "logps_train/chosen": -50.044246673583984, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -271.17889404296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2044247388839722, + "rewards_train/margins": 12.61346447467804, + "rewards_train/rejected": -13.817889213562012, + "step": 2167 + }, + { + "epoch": 0.61, + "logps_train/chosen": -157.90908813476562, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -165.85433959960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9909088611602783, + "rewards_train/margins": 1.294525384902954, + "rewards_train/rejected": -5.285434246063232, + "step": 2167 + }, + { + "epoch": 0.61, + "logps_train/chosen": -16.426828384399414, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -47.489540100097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1270579099655151, + "rewards_train/margins": 1.9343961477279663, + "rewards_train/rejected": -3.0614540576934814, + "step": 2167 + }, + { + "epoch": 0.61, + "learning_rate": 9.489154017251977e-08, + "loss": 0.3697, + "step": 2168 + }, + { + "epoch": 0.61, + "logps_train/chosen": -103.90423583984375, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -157.5645294189453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.140423536300659, + "rewards_train/margins": 3.216029405593872, + "rewards_train/rejected": -5.356452941894531, + "step": 2168 + }, + { + "epoch": 0.61, + "logps_train/chosen": -3.37384033203125, + "logps_train/ref_chosen": -1.6796875, + "logps_train/ref_rejected": -1.6796875, + "logps_train/rejected": -3.645730972290039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16941528022289276, + "rewards_train/margins": 0.02718907594680786, + "rewards_train/rejected": -0.19660435616970062, + "step": 2168 + }, + { + "epoch": 0.61, + "logps_train/chosen": -154.0723876953125, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -174.05029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9572389125823975, + "rewards_train/margins": 2.847790479660034, + "rewards_train/rejected": -5.805029392242432, + "step": 2168 + }, + { + "epoch": 0.61, + "logps_train/chosen": -42.398834228515625, + "logps_train/ref_chosen": -24.875, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -71.78671264648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7523834705352783, + "rewards_train/margins": 1.8762879371643066, + "rewards_train/rejected": -3.628671407699585, + "step": 2168 + }, + { + "epoch": 0.61, + "logps_train/chosen": -41.397422790527344, + "logps_train/ref_chosen": -34.25, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -35.68968200683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7147423028945923, + "rewards_train/margins": 0.3417259454727173, + "rewards_train/rejected": -1.0564682483673096, + "step": 2168 + }, + { + "epoch": 0.61, + "logps_train/chosen": -26.21327018737793, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -84.62957763671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7963270545005798, + "rewards_train/margins": 6.0541306138038635, + "rewards_train/rejected": -6.850457668304443, + "step": 2168 + }, + { + "epoch": 0.61, + "logps_train/chosen": -26.25802230834961, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -37.549278259277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9633022546768188, + "rewards_train/margins": 2.0728756189346313, + "rewards_train/rejected": -3.03617787361145, + "step": 2168 + }, + { + "epoch": 0.61, + "logps_train/chosen": -104.96849060058594, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -209.04432678222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4468491077423096, + "rewards_train/margins": 6.057583570480347, + "rewards_train/rejected": -8.504432678222656, + "step": 2168 + }, + { + "epoch": 0.61, + "logps_train/chosen": -148.7877960205078, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -171.2188262939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.078779697418213, + "rewards_train/margins": 2.1931028366088867, + "rewards_train/rejected": -6.2718825340271, + "step": 2169 + }, + { + "epoch": 0.61, + "logps_train/chosen": -17.06160545349121, + "logps_train/ref_chosen": -3.28125, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -22.64598274230957, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.378035545349121, + "rewards_train/margins": -0.5696872472763062, + "rewards_train/rejected": -0.8083482980728149, + "step": 2169 + }, + { + "epoch": 0.61, + "logps_train/chosen": -37.670372009277344, + "logps_train/ref_chosen": -4.125, + "logps_train/ref_rejected": -5.21875, + "logps_train/rejected": -33.0719108581543, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.35453724861145, + "rewards_train/margins": -0.569221019744873, + "rewards_train/rejected": -2.785316228866577, + "step": 2169 + }, + { + "epoch": 0.61, + "logps_train/chosen": -127.97900390625, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -166.95045471191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.997900485992432, + "rewards_train/margins": 1.047144889831543, + "rewards_train/rejected": -6.045045375823975, + "step": 2169 + }, + { + "epoch": 0.61, + "logps_train/chosen": -172.779052734375, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -138.5939178466797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.777905464172363, + "rewards_train/margins": -2.2685136795043945, + "rewards_train/rejected": -5.509391784667969, + "step": 2169 + }, + { + "epoch": 0.61, + "logps_train/chosen": -184.45355224609375, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -191.43397521972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.745355129241943, + "rewards_train/margins": 0.04804229736328125, + "rewards_train/rejected": -7.793397426605225, + "step": 2169 + }, + { + "epoch": 0.61, + "logps_train/chosen": -0.8724322319030762, + "logps_train/ref_chosen": -0.81640625, + "logps_train/ref_rejected": -11.75, + "logps_train/rejected": -16.494998931884766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.005602598190307617, + "rewards_train/margins": 0.4688973128795624, + "rewards_train/rejected": -0.47449991106987, + "step": 2169 + }, + { + "epoch": 0.61, + "logps_train/chosen": -12.09492301940918, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -19.98031234741211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.253007709980011, + "rewards_train/margins": 1.1260389685630798, + "rewards_train/rejected": -0.8730312585830688, + "step": 2169 + }, + { + "epoch": 0.61, + "learning_rate": 9.376987120695545e-08, + "loss": 0.4888, + "step": 2170 + }, + { + "epoch": 0.61, + "logps_train/chosen": -90.51406860351562, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -95.220703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5514068603515625, + "rewards_train/margins": 1.670663595199585, + "rewards_train/rejected": -3.2220704555511475, + "step": 2170 + }, + { + "epoch": 0.61, + "logps_train/chosen": -121.21373748779297, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -201.61627197265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8713737726211548, + "rewards_train/margins": 3.9902533292770386, + "rewards_train/rejected": -5.861627101898193, + "step": 2170 + }, + { + "epoch": 0.61, + "logps_train/chosen": -11.630074501037598, + "logps_train/ref_chosen": -1.7421875, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -10.891059875488281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9887887239456177, + "rewards_train/margins": -0.6309327185153961, + "rewards_train/rejected": -0.35785600543022156, + "step": 2170 + }, + { + "epoch": 0.61, + "logps_train/chosen": -105.28976440429688, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -144.12733459472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5289764404296875, + "rewards_train/margins": 3.7337570190429688, + "rewards_train/rejected": -6.262733459472656, + "step": 2170 + }, + { + "epoch": 0.61, + "logps_train/chosen": -82.28910064697266, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -180.4600830078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8539100885391235, + "rewards_train/margins": 4.942098498344421, + "rewards_train/rejected": -6.796008586883545, + "step": 2170 + }, + { + "epoch": 0.61, + "logps_train/chosen": -4.430972576141357, + "logps_train/ref_chosen": -3.328125, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -40.58642578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11028476059436798, + "rewards_train/margins": 1.8983579128980637, + "rewards_train/rejected": -2.0086426734924316, + "step": 2170 + }, + { + "epoch": 0.61, + "logps_train/chosen": -82.7763442993164, + "logps_train/ref_chosen": -49.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -119.19145202636719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.327634572982788, + "rewards_train/margins": 0.1415107250213623, + "rewards_train/rejected": -3.4691452980041504, + "step": 2170 + }, + { + "epoch": 0.61, + "logps_train/chosen": -24.286209106445312, + "logps_train/ref_chosen": -1.2578125, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -34.572444915771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.302839756011963, + "rewards_train/margins": 0.2044048309326172, + "rewards_train/rejected": -2.50724458694458, + "step": 2170 + }, + { + "epoch": 0.61, + "logps_train/chosen": -10.249273300170898, + "logps_train/ref_chosen": -3.484375, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -20.034997940063477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6764898300170898, + "rewards_train/margins": 0.2520099878311157, + "rewards_train/rejected": -0.9284998178482056, + "step": 2171 + }, + { + "epoch": 0.61, + "logps_train/chosen": -190.58349609375, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -191.67230224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.358349800109863, + "rewards_train/margins": 2.8588805198669434, + "rewards_train/rejected": -7.217230319976807, + "step": 2171 + }, + { + "epoch": 0.61, + "logps_train/chosen": -16.658836364746094, + "logps_train/ref_chosen": -2.6875, + "logps_train/ref_rejected": -5.15625, + "logps_train/rejected": -27.161418914794922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.397133708000183, + "rewards_train/margins": 0.8033832311630249, + "rewards_train/rejected": -2.200516939163208, + "step": 2171 + }, + { + "epoch": 0.61, + "logps_train/chosen": -117.924072265625, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -117.60714721679688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0924072265625, + "rewards_train/margins": -0.0316925048828125, + "rewards_train/rejected": -1.0607147216796875, + "step": 2171 + }, + { + "epoch": 0.61, + "logps_train/chosen": -28.323122024536133, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -29.26708984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6948121786117554, + "rewards_train/margins": 0.3756469488143921, + "rewards_train/rejected": -2.0704591274261475, + "step": 2171 + }, + { + "epoch": 0.61, + "logps_train/chosen": -69.70979309082031, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -199.45663452148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6959792971611023, + "rewards_train/margins": 2.5496841073036194, + "rewards_train/rejected": -3.2456634044647217, + "step": 2171 + }, + { + "epoch": 0.61, + "logps_train/chosen": -9.237245559692383, + "logps_train/ref_chosen": -3.5625, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -24.09335708618164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5674745440483093, + "rewards_train/margins": 0.7293612360954285, + "rewards_train/rejected": -1.2968357801437378, + "step": 2171 + }, + { + "epoch": 0.61, + "logps_train/chosen": -140.48477172851562, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -173.94789123535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8484771847724915, + "rewards_train/margins": 4.046312034130096, + "rewards_train/rejected": -4.894789218902588, + "step": 2171 + }, + { + "epoch": 0.61, + "learning_rate": 9.265454489147051e-08, + "loss": 0.3347, + "step": 2172 + }, + { + "epoch": 0.61, + "logps_train/chosen": -20.34525489807129, + "logps_train/ref_chosen": -13.5625, + "logps_train/ref_rejected": -3.015625, + "logps_train/rejected": -21.579023361206055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6782755255699158, + "rewards_train/margins": 1.1780642867088318, + "rewards_train/rejected": -1.8563398122787476, + "step": 2172 + }, + { + "epoch": 0.61, + "logps_train/chosen": -33.57694625854492, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -20.125, + "logps_train/rejected": -59.85276412963867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.420194625854492, + "rewards_train/margins": 1.552581787109375, + "rewards_train/rejected": -3.972776412963867, + "step": 2172 + }, + { + "epoch": 0.61, + "logps_train/chosen": -0.4028263986110687, + "logps_train/ref_chosen": -0.64453125, + "logps_train/ref_rejected": -0.64453125, + "logps_train/rejected": -0.42792201042175293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.024170486256480217, + "rewards_train/margins": 0.00250956229865551, + "rewards_train/rejected": 0.021660923957824707, + "step": 2172 + }, + { + "epoch": 0.61, + "logps_train/chosen": -155.9740753173828, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -225.754638671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2974075078964233, + "rewards_train/margins": 5.078056454658508, + "rewards_train/rejected": -6.375463962554932, + "step": 2172 + }, + { + "epoch": 0.61, + "logps_train/chosen": -150.593505859375, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -197.69696044921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6093506813049316, + "rewards_train/margins": 4.260345458984375, + "rewards_train/rejected": -6.869696140289307, + "step": 2172 + }, + { + "epoch": 0.61, + "logps_train/chosen": -22.041465759277344, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -1.7421875, + "logps_train/rejected": -27.24493408203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7135215997695923, + "rewards_train/margins": 0.8367530107498169, + "rewards_train/rejected": -2.550274610519409, + "step": 2172 + }, + { + "epoch": 0.61, + "logps_train/chosen": -129.3822479248047, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -171.31784057617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3882248401641846, + "rewards_train/margins": 2.9935591220855713, + "rewards_train/rejected": -4.381783962249756, + "step": 2172 + }, + { + "epoch": 0.61, + "logps_train/chosen": -157.51898193359375, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -185.5762481689453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.851898193359375, + "rewards_train/margins": 3.155726432800293, + "rewards_train/rejected": -9.007624626159668, + "step": 2172 + }, + { + "epoch": 0.61, + "logps_train/chosen": -10.647951126098633, + "logps_train/ref_chosen": -2.3125, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -7.437817096710205, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8335451483726501, + "rewards_train/margins": -0.4991384446620941, + "rewards_train/rejected": -0.33440670371055603, + "step": 2173 + }, + { + "epoch": 0.61, + "logps_train/chosen": -9.402769088745117, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -36.19088363647461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.009723091498017311, + "rewards_train/margins": 1.9038114789873362, + "rewards_train/rejected": -1.8940883874893188, + "step": 2173 + }, + { + "epoch": 0.61, + "logps_train/chosen": -8.123686790466309, + "logps_train/ref_chosen": -3.390625, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -29.098731994628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47330617904663086, + "rewards_train/margins": 1.7053170204162598, + "rewards_train/rejected": -2.1786231994628906, + "step": 2173 + }, + { + "epoch": 0.61, + "logps_train/chosen": -160.42930603027344, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -170.34596252441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9429305791854858, + "rewards_train/margins": 2.2916656732559204, + "rewards_train/rejected": -4.234596252441406, + "step": 2173 + }, + { + "epoch": 0.61, + "logps_train/chosen": -147.78440856933594, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -189.0, + "logps_train/rejected": -243.49404907226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2784408628940582, + "rewards_train/margins": 5.170964330434799, + "rewards_train/rejected": -5.449405193328857, + "step": 2173 + }, + { + "epoch": 0.61, + "logps_train/chosen": -18.772308349609375, + "logps_train/ref_chosen": -3.1875, + "logps_train/ref_rejected": -1.2265625, + "logps_train/rejected": -18.657007217407227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5584808588027954, + "rewards_train/margins": 0.18456363677978516, + "rewards_train/rejected": -1.7430444955825806, + "step": 2173 + }, + { + "epoch": 0.61, + "logps_train/chosen": -31.300424575805664, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -46.55511474609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0675424337387085, + "rewards_train/margins": 1.337969183921814, + "rewards_train/rejected": -2.4055116176605225, + "step": 2173 + }, + { + "epoch": 0.61, + "logps_train/chosen": -34.62027359008789, + "logps_train/ref_chosen": -32.75, + "logps_train/ref_rejected": -61.0, + "logps_train/rejected": -88.46337890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18702736496925354, + "rewards_train/margins": 2.5593105256557465, + "rewards_train/rejected": -2.746337890625, + "step": 2173 + }, + { + "epoch": 0.61, + "learning_rate": 9.15455690321667e-08, + "loss": 0.2448, + "step": 2174 + }, + { + "epoch": 0.61, + "logps_train/chosen": -29.324901580810547, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -32.747623443603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.232490301132202, + "rewards_train/margins": 0.38602209091186523, + "rewards_train/rejected": -2.6185123920440674, + "step": 2174 + }, + { + "epoch": 0.61, + "logps_train/chosen": -105.1629867553711, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -206.31036376953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0162986516952515, + "rewards_train/margins": 8.514737725257874, + "rewards_train/rejected": -9.531036376953125, + "step": 2174 + }, + { + "epoch": 0.61, + "logps_train/chosen": -25.472091674804688, + "logps_train/ref_chosen": -7.46875, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -150.7632293701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8003342151641846, + "rewards_train/margins": 5.2259886264801025, + "rewards_train/rejected": -7.026322841644287, + "step": 2174 + }, + { + "epoch": 0.61, + "logps_train/chosen": -26.211353302001953, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -44.49795913696289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9992603063583374, + "rewards_train/margins": 0.6005357503890991, + "rewards_train/rejected": -2.5997960567474365, + "step": 2174 + }, + { + "epoch": 0.61, + "logps_train/chosen": -79.73046875, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -33.20024108886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.376953125, + "rewards_train/margins": 2.2282272577285767, + "rewards_train/rejected": -1.8512741327285767, + "step": 2174 + }, + { + "epoch": 0.61, + "logps_train/chosen": -17.5526065826416, + "logps_train/ref_chosen": -6.6875, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -29.894756317138672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0865106582641602, + "rewards_train/margins": 0.24046504497528076, + "rewards_train/rejected": -1.326975703239441, + "step": 2174 + }, + { + "epoch": 0.61, + "logps_train/chosen": -27.876888275146484, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -2.828125, + "logps_train/rejected": -21.955799102783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7251888513565063, + "rewards_train/margins": 0.18757855892181396, + "rewards_train/rejected": -1.9127674102783203, + "step": 2174 + }, + { + "epoch": 0.61, + "logps_train/chosen": -1.3919626474380493, + "logps_train/ref_chosen": -0.98046875, + "logps_train/ref_rejected": -1.6328125, + "logps_train/rejected": -31.81181526184082, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04114938899874687, + "rewards_train/margins": 2.9767508395016193, + "rewards_train/rejected": -3.017900228500366, + "step": 2174 + }, + { + "epoch": 0.61, + "logps_train/chosen": -13.271037101745605, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -53.5, + "logps_train/rejected": -84.37908935546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2771037220954895, + "rewards_train/margins": 2.8108052611351013, + "rewards_train/rejected": -3.087908983230591, + "step": 2175 + }, + { + "epoch": 0.61, + "logps_train/chosen": -29.22456169128418, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -7.78125, + "logps_train/rejected": -34.33759307861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4162062406539917, + "rewards_train/margins": 1.239428162574768, + "rewards_train/rejected": -2.6556344032287598, + "step": 2175 + }, + { + "epoch": 0.61, + "logps_train/chosen": -5.779240608215332, + "logps_train/ref_chosen": -0.06103515625, + "logps_train/ref_rejected": -0.06103515625, + "logps_train/rejected": -5.780856609344482, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5718205571174622, + "rewards_train/margins": 0.00016158819198608398, + "rewards_train/rejected": -0.5719821453094482, + "step": 2175 + }, + { + "epoch": 0.61, + "logps_train/chosen": -128.4033203125, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -147.4142608642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.09033203125, + "rewards_train/margins": 2.0510940551757812, + "rewards_train/rejected": -5.141426086425781, + "step": 2175 + }, + { + "epoch": 0.61, + "logps_train/chosen": -88.61495208740234, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -237.0, + "logps_train/rejected": -238.1851806640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3614952266216278, + "rewards_train/margins": -0.24297715723514557, + "rewards_train/rejected": -0.11851806938648224, + "step": 2175 + }, + { + "epoch": 0.61, + "logps_train/chosen": -15.873838424682617, + "logps_train/ref_chosen": -3.625, + "logps_train/ref_rejected": -19.875, + "logps_train/rejected": -30.486814498901367, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2248839139938354, + "rewards_train/margins": -0.16370248794555664, + "rewards_train/rejected": -1.0611814260482788, + "step": 2175 + }, + { + "epoch": 0.61, + "logps_train/chosen": -71.10255432128906, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -136.92652893066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.33525550365448, + "rewards_train/margins": 4.657397389411926, + "rewards_train/rejected": -5.992652893066406, + "step": 2175 + }, + { + "epoch": 0.61, + "logps_train/chosen": -27.89138412475586, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -29.237930297851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8141384124755859, + "rewards_train/margins": 0.42215466499328613, + "rewards_train/rejected": -1.236293077468872, + "step": 2175 + }, + { + "epoch": 0.61, + "learning_rate": 9.044295139069913e-08, + "loss": 0.3459, + "step": 2176 + }, + { + "epoch": 0.61, + "logps_train/chosen": -238.8370361328125, + "logps_train/ref_chosen": -186.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -137.23135375976562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.283703804016113, + "rewards_train/margins": -1.4105684757232666, + "rewards_train/rejected": -3.8731353282928467, + "step": 2176 + }, + { + "epoch": 0.61, + "logps_train/chosen": -0.041484516113996506, + "logps_train/ref_chosen": -0.09765625, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -9.395952224731445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005617173388600349, + "rewards_train/margins": 0.2733373958617449, + "rewards_train/rejected": -0.26772022247314453, + "step": 2176 + }, + { + "epoch": 0.61, + "logps_train/chosen": -9.570971488952637, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -27.261655807495117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6711596846580505, + "rewards_train/margins": 1.080005943775177, + "rewards_train/rejected": -1.7511656284332275, + "step": 2176 + }, + { + "epoch": 0.61, + "logps_train/chosen": -15.94967269897461, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -38.45779037475586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06371726840734482, + "rewards_train/margins": 1.9820617213845253, + "rewards_train/rejected": -2.04577898979187, + "step": 2176 + }, + { + "epoch": 0.61, + "logps_train/chosen": -204.852294921875, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -230.48036193847656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.185229778289795, + "rewards_train/margins": -0.23719358444213867, + "rewards_train/rejected": -5.948036193847656, + "step": 2176 + }, + { + "epoch": 0.61, + "logps_train/chosen": -217.64376831054688, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -199.0856475830078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.964376926422119, + "rewards_train/margins": -0.9558119773864746, + "rewards_train/rejected": -5.0085649490356445, + "step": 2176 + }, + { + "epoch": 0.61, + "logps_train/chosen": -63.661766052246094, + "logps_train/ref_chosen": -27.875, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -164.46890258789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.578676700592041, + "rewards_train/margins": 5.668213367462158, + "rewards_train/rejected": -9.2468900680542, + "step": 2176 + }, + { + "epoch": 0.61, + "logps_train/chosen": -46.24478530883789, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -32.60634231567383, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6494786739349365, + "rewards_train/margins": -1.551344394683838, + "rewards_train/rejected": -1.0981342792510986, + "step": 2176 + }, + { + "epoch": 0.61, + "logps_train/chosen": -5.927955627441406, + "logps_train/ref_chosen": -0.52734375, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -36.37567138671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5400611758232117, + "rewards_train/margins": 2.5318809151649475, + "rewards_train/rejected": -3.071942090988159, + "step": 2177 + }, + { + "epoch": 0.61, + "logps_train/chosen": -78.90827178955078, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -50.368988037109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4408272504806519, + "rewards_train/margins": 2.4398216009140015, + "rewards_train/rejected": -3.8806488513946533, + "step": 2177 + }, + { + "epoch": 0.61, + "logps_train/chosen": -84.25782775878906, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -146.0194091796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1257827281951904, + "rewards_train/margins": 7.401158571243286, + "rewards_train/rejected": -9.526941299438477, + "step": 2177 + }, + { + "epoch": 0.61, + "logps_train/chosen": -230.04849243164062, + "logps_train/ref_chosen": -202.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -178.0223388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.80484938621521, + "rewards_train/margins": 0.19738459587097168, + "rewards_train/rejected": -3.0022339820861816, + "step": 2177 + }, + { + "epoch": 0.61, + "logps_train/chosen": -123.33016967773438, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -201.7312469482422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.883017063140869, + "rewards_train/margins": 3.190107822418213, + "rewards_train/rejected": -8.073124885559082, + "step": 2177 + }, + { + "epoch": 0.61, + "logps_train/chosen": -119.1688003540039, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -179.80169677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0168800354003906, + "rewards_train/margins": 4.363289833068848, + "rewards_train/rejected": -6.380169868469238, + "step": 2177 + }, + { + "epoch": 0.61, + "logps_train/chosen": -207.6263427734375, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -197.05203247070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.162634372711182, + "rewards_train/margins": 0.2925691604614258, + "rewards_train/rejected": -7.455203533172607, + "step": 2177 + }, + { + "epoch": 0.61, + "logps_train/chosen": -27.50545883178711, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -2.4375, + "logps_train/rejected": -10.443283081054688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5255459547042847, + "rewards_train/margins": -0.7249676585197449, + "rewards_train/rejected": -0.8005782961845398, + "step": 2177 + }, + { + "epoch": 0.61, + "learning_rate": 8.934669968422204e-08, + "loss": 0.5595, + "step": 2178 + }, + { + "epoch": 0.61, + "logps_train/chosen": -143.79379272460938, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -207.324462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4293792247772217, + "rewards_train/margins": 3.8030669689178467, + "rewards_train/rejected": -7.232446193695068, + "step": 2178 + }, + { + "epoch": 0.61, + "logps_train/chosen": -155.4344482421875, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -254.77035522460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.743444800376892, + "rewards_train/margins": 7.733590722084045, + "rewards_train/rejected": -9.477035522460938, + "step": 2178 + }, + { + "epoch": 0.61, + "logps_train/chosen": -162.9105987548828, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -88.18600463867188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.041059970855713, + "rewards_train/margins": -3.3974595069885254, + "rewards_train/rejected": -2.6436004638671875, + "step": 2178 + }, + { + "epoch": 0.61, + "logps_train/chosen": -1.0212070941925049, + "logps_train/ref_chosen": -1.078125, + "logps_train/ref_rejected": -1.078125, + "logps_train/rejected": -1.0485116243362427, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005691790487617254, + "rewards_train/margins": 0.002730452921241522, + "rewards_train/rejected": 0.0029613375663757324, + "step": 2178 + }, + { + "epoch": 0.61, + "logps_train/chosen": -30.560070037841797, + "logps_train/ref_chosen": -6.4375, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -60.7381477355957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.412256956100464, + "rewards_train/margins": 2.3053081035614014, + "rewards_train/rejected": -4.717565059661865, + "step": 2178 + }, + { + "epoch": 0.61, + "logps_train/chosen": -30.123706817626953, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -2.78125, + "logps_train/rejected": -12.372047424316406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0373706817626953, + "rewards_train/margins": -0.07829093933105469, + "rewards_train/rejected": -0.9590797424316406, + "step": 2178 + }, + { + "epoch": 0.61, + "logps_train/chosen": -14.710858345031738, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -40.65140914916992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5023358464241028, + "rewards_train/margins": 1.9628052115440369, + "rewards_train/rejected": -2.4651410579681396, + "step": 2178 + }, + { + "epoch": 0.61, + "logps_train/chosen": -230.58621215820312, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -321.9034423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.358621597290039, + "rewards_train/margins": 7.831722259521484, + "rewards_train/rejected": -16.190343856811523, + "step": 2178 + }, + { + "epoch": 0.61, + "logps_train/chosen": -172.17837524414062, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -238.74942016601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0178375244140625, + "rewards_train/margins": 4.6571044921875, + "rewards_train/rejected": -7.6749420166015625, + "step": 2179 + }, + { + "epoch": 0.61, + "logps_train/chosen": -119.3829345703125, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -124.23519897460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0882935523986816, + "rewards_train/margins": 1.8352265357971191, + "rewards_train/rejected": -4.923520088195801, + "step": 2179 + }, + { + "epoch": 0.61, + "logps_train/chosen": -34.597747802734375, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -20.197792053222656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8597748279571533, + "rewards_train/margins": -0.9962456226348877, + "rewards_train/rejected": -0.8635292053222656, + "step": 2179 + }, + { + "epoch": 0.61, + "logps_train/chosen": -99.4706039428711, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -112.58845520019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.497060537338257, + "rewards_train/margins": 2.0117852687835693, + "rewards_train/rejected": -4.508845806121826, + "step": 2179 + }, + { + "epoch": 0.61, + "logps_train/chosen": -32.60226058959961, + "logps_train/ref_chosen": -14.75, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -55.41242218017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7852261066436768, + "rewards_train/margins": 2.006016254425049, + "rewards_train/rejected": -3.7912423610687256, + "step": 2179 + }, + { + "epoch": 0.61, + "logps_train/chosen": -44.207950592041016, + "logps_train/ref_chosen": -31.75, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -109.28776550292969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2457951307296753, + "rewards_train/margins": 1.532981514930725, + "rewards_train/rejected": -2.7787766456604004, + "step": 2179 + }, + { + "epoch": 0.61, + "logps_train/chosen": -122.58002471923828, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -145.22677612304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4580025672912598, + "rewards_train/margins": 1.164675235748291, + "rewards_train/rejected": -4.622677803039551, + "step": 2179 + }, + { + "epoch": 0.61, + "logps_train/chosen": -28.852548599243164, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -11.75, + "logps_train/rejected": -40.956085205078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9727548360824585, + "rewards_train/margins": 0.947853684425354, + "rewards_train/rejected": -2.9206085205078125, + "step": 2179 + }, + { + "epoch": 0.61, + "learning_rate": 8.825682158533553e-08, + "loss": 0.4761, + "step": 2180 + }, + { + "epoch": 0.61, + "logps_train/chosen": -40.36163330078125, + "logps_train/ref_chosen": -46.25, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -68.92533874511719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.588836669921875, + "rewards_train/margins": 1.3563705682754517, + "rewards_train/rejected": -0.7675338983535767, + "step": 2180 + }, + { + "epoch": 0.61, + "logps_train/chosen": -51.30990219116211, + "logps_train/ref_chosen": -37.5, + "logps_train/ref_rejected": -31.75, + "logps_train/rejected": -72.54840850830078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3809902667999268, + "rewards_train/margins": 2.6988508701324463, + "rewards_train/rejected": -4.079841136932373, + "step": 2180 + }, + { + "epoch": 0.61, + "logps_train/chosen": -0.0060736034065485, + "logps_train/ref_chosen": -0.055908203125, + "logps_train/ref_rejected": -0.055908203125, + "logps_train/rejected": -0.007209255825728178, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0049834600649774075, + "rewards_train/margins": 0.00011356547474861145, + "rewards_train/rejected": 0.004869894590228796, + "step": 2180 + }, + { + "epoch": 0.61, + "logps_train/chosen": -8.710285186767578, + "logps_train/ref_chosen": -3.265625, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -16.78815269470215, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5444660186767578, + "rewards_train/margins": -0.14065074920654297, + "rewards_train/rejected": -0.40381526947021484, + "step": 2180 + }, + { + "epoch": 0.61, + "logps_train/chosen": -29.910181045532227, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -3.15625, + "logps_train/rejected": -42.969566345214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8160181045532227, + "rewards_train/margins": 3.165313482284546, + "rewards_train/rejected": -3.9813315868377686, + "step": 2180 + }, + { + "epoch": 0.61, + "logps_train/chosen": -43.92628479003906, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -2.15625, + "logps_train/rejected": -33.005367279052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.948878526687622, + "rewards_train/margins": 0.136033296585083, + "rewards_train/rejected": -3.084911823272705, + "step": 2180 + }, + { + "epoch": 0.61, + "logps_train/chosen": -59.93605422973633, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -50.0, + "logps_train/rejected": -73.881591796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.956394612789154, + "rewards_train/margins": 3.3445538878440857, + "rewards_train/rejected": -2.3881592750549316, + "step": 2180 + }, + { + "epoch": 0.61, + "logps_train/chosen": -18.14298439025879, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -31.938262939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7330484390258789, + "rewards_train/margins": 1.7826528549194336, + "rewards_train/rejected": -2.5157012939453125, + "step": 2180 + }, + { + "epoch": 0.61, + "logps_train/chosen": -2.0144057273864746, + "logps_train/ref_chosen": -0.421875, + "logps_train/ref_rejected": -0.421875, + "logps_train/rejected": -2.0576493740081787, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1592530757188797, + "rewards_train/margins": 0.004324361681938171, + "rewards_train/rejected": -0.16357743740081787, + "step": 2181 + }, + { + "epoch": 0.61, + "logps_train/chosen": -159.65447998046875, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -207.0, + "logps_train/rejected": -321.3219299316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.965447902679443, + "rewards_train/margins": 6.466744899749756, + "rewards_train/rejected": -11.4321928024292, + "step": 2181 + }, + { + "epoch": 0.61, + "logps_train/chosen": -3.190600872039795, + "logps_train/ref_chosen": -2.890625, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -28.363744735717773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.029997587203979492, + "rewards_train/margins": 1.0563769340515137, + "rewards_train/rejected": -1.0863745212554932, + "step": 2181 + }, + { + "epoch": 0.61, + "logps_train/chosen": -6.290666580200195, + "logps_train/ref_chosen": -3.75, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -5.710800647735596, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.25406667590141296, + "rewards_train/margins": -0.19548660889267921, + "rewards_train/rejected": -0.05858006700873375, + "step": 2181 + }, + { + "epoch": 0.61, + "logps_train/chosen": -96.00039672851562, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -192.94140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5000396966934204, + "rewards_train/margins": 5.4441012144088745, + "rewards_train/rejected": -6.944140911102295, + "step": 2181 + }, + { + "epoch": 0.61, + "logps_train/chosen": -7.127719879150391, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -30.273563385009766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2497280091047287, + "rewards_train/margins": 2.220834419131279, + "rewards_train/rejected": -1.9711064100265503, + "step": 2181 + }, + { + "epoch": 0.61, + "logps_train/chosen": -167.35012817382812, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -143.41595458984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.085012912750244, + "rewards_train/margins": -1.4434175491333008, + "rewards_train/rejected": -5.641595363616943, + "step": 2181 + }, + { + "epoch": 0.61, + "logps_train/chosen": -107.74649047851562, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -208.15353393554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4746490716934204, + "rewards_train/margins": 8.64070451259613, + "rewards_train/rejected": -10.11535358428955, + "step": 2181 + }, + { + "epoch": 0.61, + "learning_rate": 8.717332472203032e-08, + "loss": 0.3853, + "step": 2182 + }, + { + "epoch": 0.61, + "logps_train/chosen": -58.010414123535156, + "logps_train/ref_chosen": -27.875, + "logps_train/ref_rejected": -27.75, + "logps_train/rejected": -47.56568145751953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0135414600372314, + "rewards_train/margins": -1.0319732427597046, + "rewards_train/rejected": -1.9815682172775269, + "step": 2182 + }, + { + "epoch": 0.61, + "logps_train/chosen": -10.252952575683594, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -30.659278869628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5127952694892883, + "rewards_train/margins": 1.0968826413154602, + "rewards_train/rejected": -1.6096779108047485, + "step": 2182 + }, + { + "epoch": 0.61, + "logps_train/chosen": -37.75653076171875, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -24.985090255737305, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.306903123855591, + "rewards_train/margins": -0.8083940744400024, + "rewards_train/rejected": -1.4985090494155884, + "step": 2182 + }, + { + "epoch": 0.61, + "logps_train/chosen": -90.04499053955078, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -113.34807586669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1544990539550781, + "rewards_train/margins": 2.830308675765991, + "rewards_train/rejected": -3.9848077297210693, + "step": 2182 + }, + { + "epoch": 0.61, + "logps_train/chosen": -66.5281982421875, + "logps_train/ref_chosen": -61.25, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -115.85099029541016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.527819812297821, + "rewards_train/margins": 4.107279121875763, + "rewards_train/rejected": -4.635098934173584, + "step": 2182 + }, + { + "epoch": 0.61, + "logps_train/chosen": -98.6950912475586, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -178.0442352294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7195091247558594, + "rewards_train/margins": 4.084914684295654, + "rewards_train/rejected": -5.804423809051514, + "step": 2182 + }, + { + "epoch": 0.61, + "logps_train/chosen": -36.90501022338867, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -28.25, + "logps_train/rejected": -77.52289581298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8030011653900146, + "rewards_train/margins": 2.124288320541382, + "rewards_train/rejected": -4.9272894859313965, + "step": 2182 + }, + { + "epoch": 0.61, + "logps_train/chosen": -194.589111328125, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -200.03082275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.958911418914795, + "rewards_train/margins": 2.5941710472106934, + "rewards_train/rejected": -8.553082466125488, + "step": 2182 + }, + { + "epoch": 0.61, + "logps_train/chosen": -25.87068748474121, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -29.93885040283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.996443748474121, + "rewards_train/margins": 0.40056633949279785, + "rewards_train/rejected": -2.397010087966919, + "step": 2183 + }, + { + "epoch": 0.61, + "logps_train/chosen": -185.75848388671875, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -197.60409545898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.775848388671875, + "rewards_train/margins": 0.9845614433288574, + "rewards_train/rejected": -7.760409832000732, + "step": 2183 + }, + { + "epoch": 0.61, + "logps_train/chosen": -86.91171264648438, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -81.849853515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6411713361740112, + "rewards_train/margins": 3.956314206123352, + "rewards_train/rejected": -5.597485542297363, + "step": 2183 + }, + { + "epoch": 0.61, + "logps_train/chosen": -150.98207092285156, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -233.9954833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.498207092285156, + "rewards_train/margins": 9.151341438293457, + "rewards_train/rejected": -13.649548530578613, + "step": 2183 + }, + { + "epoch": 0.61, + "logps_train/chosen": -24.97467803955078, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -7.0625, + "logps_train/rejected": -33.793941497802734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33496782183647156, + "rewards_train/margins": 2.338176280260086, + "rewards_train/rejected": -2.6731441020965576, + "step": 2183 + }, + { + "epoch": 0.61, + "logps_train/chosen": -6.321030139923096, + "logps_train/ref_chosen": -3.828125, + "logps_train/ref_rejected": -3.34375, + "logps_train/rejected": -17.129714965820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24929051101207733, + "rewards_train/margins": 1.1293060332536697, + "rewards_train/rejected": -1.378596544265747, + "step": 2183 + }, + { + "epoch": 0.61, + "logps_train/chosen": -15.946334838867188, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -27.471309661865234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3008835017681122, + "rewards_train/margins": 0.38374748826026917, + "rewards_train/rejected": -0.6846309900283813, + "step": 2183 + }, + { + "epoch": 0.61, + "logps_train/chosen": -164.2469482421875, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -195.805908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.574695110321045, + "rewards_train/margins": 0.5058956146240234, + "rewards_train/rejected": -6.080590724945068, + "step": 2183 + }, + { + "epoch": 0.61, + "learning_rate": 8.609621667763611e-08, + "loss": 0.3306, + "step": 2184 + }, + { + "epoch": 0.61, + "logps_train/chosen": -83.86299133300781, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -203.44796752929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4862991571426392, + "rewards_train/margins": 7.258497595787048, + "rewards_train/rejected": -8.744796752929688, + "step": 2184 + }, + { + "epoch": 0.61, + "logps_train/chosen": -17.37332534790039, + "logps_train/ref_chosen": -0.8984375, + "logps_train/ref_rejected": -3.828125, + "logps_train/rejected": -22.43421745300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6474888324737549, + "rewards_train/margins": 0.2131204605102539, + "rewards_train/rejected": -1.8606092929840088, + "step": 2184 + }, + { + "epoch": 0.61, + "logps_train/chosen": -37.358154296875, + "logps_train/ref_chosen": -25.875, + "logps_train/ref_rejected": -1.3515625, + "logps_train/rejected": -18.579906463623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1483154296875, + "rewards_train/margins": 0.5745190382003784, + "rewards_train/rejected": -1.7228344678878784, + "step": 2184 + }, + { + "epoch": 0.61, + "logps_train/chosen": -206.53948974609375, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -204.0, + "logps_train/rejected": -301.1774597167969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.053948879241943, + "rewards_train/margins": 5.663796901702881, + "rewards_train/rejected": -9.717745780944824, + "step": 2184 + }, + { + "epoch": 0.61, + "logps_train/chosen": -7.023830413818359, + "logps_train/ref_chosen": -1.765625, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -14.030250549316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5258205533027649, + "rewards_train/margins": 0.4647045135498047, + "rewards_train/rejected": -0.9905250668525696, + "step": 2184 + }, + { + "epoch": 0.61, + "logps_train/chosen": -140.14913940429688, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -63.6728630065918, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8149139881134033, + "rewards_train/margins": 2.0773723125457764, + "rewards_train/rejected": -3.8922863006591797, + "step": 2184 + }, + { + "epoch": 0.61, + "logps_train/chosen": -134.40493774414062, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -169.1680908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.640493869781494, + "rewards_train/margins": 1.4263153076171875, + "rewards_train/rejected": -5.066809177398682, + "step": 2184 + }, + { + "epoch": 0.61, + "logps_train/chosen": -207.32098388671875, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -202.0, + "logps_train/rejected": -274.67901611328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.63209867477417, + "rewards_train/margins": -0.36419677734375, + "rewards_train/rejected": -7.26790189743042, + "step": 2184 + }, + { + "epoch": 0.61, + "logps_train/chosen": -143.75144958496094, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -222.20913696289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2751450538635254, + "rewards_train/margins": 3.9457688331604004, + "rewards_train/rejected": -6.220913887023926, + "step": 2185 + }, + { + "epoch": 0.61, + "logps_train/chosen": -146.30117797851562, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -182.3229522705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.130117893218994, + "rewards_train/margins": 3.352177143096924, + "rewards_train/rejected": -9.482295036315918, + "step": 2185 + }, + { + "epoch": 0.61, + "logps_train/chosen": -128.65013122558594, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -284.6539611816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7650132179260254, + "rewards_train/margins": 9.600383281707764, + "rewards_train/rejected": -12.365396499633789, + "step": 2185 + }, + { + "epoch": 0.61, + "logps_train/chosen": -193.55422973632812, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -200.4197998046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8554229736328125, + "rewards_train/margins": 2.1865572929382324, + "rewards_train/rejected": -6.041980266571045, + "step": 2185 + }, + { + "epoch": 0.61, + "logps_train/chosen": -17.509559631347656, + "logps_train/ref_chosen": -5.375, + "logps_train/ref_rejected": -28.125, + "logps_train/rejected": -30.592309951782227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2134560346603394, + "rewards_train/margins": -0.9667250365018845, + "rewards_train/rejected": -0.2467309981584549, + "step": 2185 + }, + { + "epoch": 0.61, + "logps_train/chosen": -12.022093772888184, + "logps_train/ref_chosen": -1.953125, + "logps_train/ref_rejected": -1.2109375, + "logps_train/rejected": -14.311188697814941, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0068968534469604, + "rewards_train/margins": 0.3031282424926758, + "rewards_train/rejected": -1.3100250959396362, + "step": 2185 + }, + { + "epoch": 0.61, + "logps_train/chosen": -92.97438049316406, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -143.86817932128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1474380493164062, + "rewards_train/margins": 0.9393801689147949, + "rewards_train/rejected": -4.086818218231201, + "step": 2185 + }, + { + "epoch": 0.61, + "logps_train/chosen": -3.3287081718444824, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -34.68720626831055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.185879185795784, + "rewards_train/margins": 1.6295998841524124, + "rewards_train/rejected": -1.4437206983566284, + "step": 2185 + }, + { + "epoch": 0.61, + "learning_rate": 8.502550499076688e-08, + "loss": 0.3294, + "step": 2186 + }, + { + "epoch": 0.61, + "logps_train/chosen": -60.727638244628906, + "logps_train/ref_chosen": -43.75, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -205.01959228515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6977638006210327, + "rewards_train/margins": 6.504195809364319, + "rewards_train/rejected": -8.201959609985352, + "step": 2186 + }, + { + "epoch": 0.61, + "logps_train/chosen": -164.13504028320312, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -180.48123168945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.363503932952881, + "rewards_train/margins": 3.0346198081970215, + "rewards_train/rejected": -8.398123741149902, + "step": 2186 + }, + { + "epoch": 0.61, + "logps_train/chosen": -126.91304779052734, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -136.17779541015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5913047790527344, + "rewards_train/margins": 0.6264748573303223, + "rewards_train/rejected": -4.217779636383057, + "step": 2186 + }, + { + "epoch": 0.61, + "logps_train/chosen": -45.999603271484375, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -43.5, + "logps_train/rejected": -86.80965423583984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2249603271484375, + "rewards_train/margins": 3.1060051918029785, + "rewards_train/rejected": -4.330965518951416, + "step": 2186 + }, + { + "epoch": 0.61, + "logps_train/chosen": -286.7093200683594, + "logps_train/ref_chosen": -168.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -269.5029602050781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.870932579040527, + "rewards_train/margins": -0.3206367492675781, + "rewards_train/rejected": -11.55029582977295, + "step": 2186 + }, + { + "epoch": 0.61, + "logps_train/chosen": -183.2587890625, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -287.72564697265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.925879001617432, + "rewards_train/margins": 6.046685695648193, + "rewards_train/rejected": -11.972564697265625, + "step": 2186 + }, + { + "epoch": 0.61, + "logps_train/chosen": -9.264503479003906, + "logps_train/ref_chosen": -7.75, + "logps_train/ref_rejected": -50.0, + "logps_train/rejected": -77.85954284667969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15145035088062286, + "rewards_train/margins": 2.63450388610363, + "rewards_train/rejected": -2.785954236984253, + "step": 2186 + }, + { + "epoch": 0.61, + "logps_train/chosen": -21.392274856567383, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -23.508625030517578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7173525094985962, + "rewards_train/margins": 0.13038504123687744, + "rewards_train/rejected": -1.8477375507354736, + "step": 2186 + }, + { + "epoch": 0.61, + "logps_train/chosen": -16.295188903808594, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -24.952436447143555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7607688903808594, + "rewards_train/margins": 0.859474778175354, + "rewards_train/rejected": -1.6202436685562134, + "step": 2187 + }, + { + "epoch": 0.61, + "logps_train/chosen": -78.04955291748047, + "logps_train/ref_chosen": -39.25, + "logps_train/ref_rejected": -12.5625, + "logps_train/rejected": -40.604183197021484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.879955291748047, + "rewards_train/margins": -1.075786828994751, + "rewards_train/rejected": -2.804168462753296, + "step": 2187 + }, + { + "epoch": 0.61, + "logps_train/chosen": -30.484390258789062, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -20.905319213867188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2609390020370483, + "rewards_train/margins": -0.2954070568084717, + "rewards_train/rejected": -0.9655319452285767, + "step": 2187 + }, + { + "epoch": 0.61, + "logps_train/chosen": -71.94824981689453, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -57.25, + "logps_train/rejected": -71.16832733154297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.469825029373169, + "rewards_train/margins": -0.07799232006072998, + "rewards_train/rejected": -1.391832709312439, + "step": 2187 + }, + { + "epoch": 0.61, + "logps_train/chosen": -26.176420211791992, + "logps_train/ref_chosen": -3.65625, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -35.55457305908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.252017021179199, + "rewards_train/margins": 0.5534403324127197, + "rewards_train/rejected": -2.805457353591919, + "step": 2187 + }, + { + "epoch": 0.61, + "logps_train/chosen": -140.63308715820312, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -152.16116333007812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6633087396621704, + "rewards_train/margins": -0.047192394733428955, + "rewards_train/rejected": -0.6161163449287415, + "step": 2187 + }, + { + "epoch": 0.61, + "logps_train/chosen": -178.706298828125, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -256.8409118652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.720630168914795, + "rewards_train/margins": 2.7634615898132324, + "rewards_train/rejected": -8.484091758728027, + "step": 2187 + }, + { + "epoch": 0.61, + "logps_train/chosen": -46.480506896972656, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -46.0, + "logps_train/rejected": -103.1047134399414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.054300785064697, + "rewards_train/margins": 1.6561708450317383, + "rewards_train/rejected": -5.7104716300964355, + "step": 2187 + }, + { + "epoch": 0.61, + "learning_rate": 8.396119715526939e-08, + "loss": 0.4248, + "step": 2188 + }, + { + "epoch": 0.61, + "logps_train/chosen": -14.801729202270508, + "logps_train/ref_chosen": -7.40625, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -29.97068977355957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7395479083061218, + "rewards_train/margins": 1.7762711644172668, + "rewards_train/rejected": -2.5158190727233887, + "step": 2188 + }, + { + "epoch": 0.61, + "logps_train/chosen": -28.703645706176758, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -48.5, + "logps_train/rejected": -61.5374870300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0328645706176758, + "rewards_train/margins": 0.2708841562271118, + "rewards_train/rejected": -1.3037487268447876, + "step": 2188 + }, + { + "epoch": 0.61, + "logps_train/chosen": -12.818079948425293, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -17.477264404296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6599330306053162, + "rewards_train/margins": 0.6190434098243713, + "rewards_train/rejected": -1.2789764404296875, + "step": 2188 + }, + { + "epoch": 0.61, + "logps_train/chosen": -18.799972534179688, + "logps_train/ref_chosen": -5.875, + "logps_train/ref_rejected": -5.84375, + "logps_train/rejected": -57.57698059082031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2924972772598267, + "rewards_train/margins": 3.8808258771896362, + "rewards_train/rejected": -5.173323154449463, + "step": 2188 + }, + { + "epoch": 0.61, + "logps_train/chosen": -139.2832489013672, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -242.75184631347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9783248901367188, + "rewards_train/margins": 8.296859741210938, + "rewards_train/rejected": -12.275184631347656, + "step": 2188 + }, + { + "epoch": 0.61, + "logps_train/chosen": -47.85737609863281, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -28.18541145324707, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4357376098632812, + "rewards_train/margins": -1.5234464406967163, + "rewards_train/rejected": -1.912291169166565, + "step": 2188 + }, + { + "epoch": 0.61, + "logps_train/chosen": -21.603317260742188, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -80.1441650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5103317499160767, + "rewards_train/margins": 5.629084944725037, + "rewards_train/rejected": -6.139416694641113, + "step": 2188 + }, + { + "epoch": 0.61, + "logps_train/chosen": -28.368282318115234, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -36.34815216064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0368282794952393, + "rewards_train/margins": 1.2792370319366455, + "rewards_train/rejected": -2.3160653114318848, + "step": 2188 + }, + { + "epoch": 0.61, + "logps_train/chosen": -207.73956298828125, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -210.1357421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.97395658493042, + "rewards_train/margins": 1.139617919921875, + "rewards_train/rejected": -6.113574504852295, + "step": 2189 + }, + { + "epoch": 0.61, + "logps_train/chosen": -23.6507625579834, + "logps_train/ref_chosen": -15.5, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -22.63623809814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8150762915611267, + "rewards_train/margins": 0.5922974944114685, + "rewards_train/rejected": -1.4073737859725952, + "step": 2189 + }, + { + "epoch": 0.61, + "logps_train/chosen": -180.56161499023438, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -240.4867401123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.106161594390869, + "rewards_train/margins": 3.4425129890441895, + "rewards_train/rejected": -9.548674583435059, + "step": 2189 + }, + { + "epoch": 0.61, + "logps_train/chosen": -124.67571258544922, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -141.77857971191406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.2425713539123535, + "rewards_train/margins": -0.364713191986084, + "rewards_train/rejected": -5.8778581619262695, + "step": 2189 + }, + { + "epoch": 0.61, + "logps_train/chosen": -11.240293502807617, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -36.62810516357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4896543622016907, + "rewards_train/margins": 2.707531154155731, + "rewards_train/rejected": -3.197185516357422, + "step": 2189 + }, + { + "epoch": 0.61, + "logps_train/chosen": -28.03217124938965, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -42.35706329345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.596967101097107, + "rewards_train/margins": 2.0793641805648804, + "rewards_train/rejected": -3.6763312816619873, + "step": 2189 + }, + { + "epoch": 0.61, + "logps_train/chosen": -112.09980010986328, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -224.20033264160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3599800169467926, + "rewards_train/margins": 6.660053342580795, + "rewards_train/rejected": -7.020033359527588, + "step": 2189 + }, + { + "epoch": 0.61, + "logps_train/chosen": -180.47117614746094, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -177.02236938476562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.247117519378662, + "rewards_train/margins": -0.04488039016723633, + "rewards_train/rejected": -5.202237129211426, + "step": 2189 + }, + { + "epoch": 0.61, + "learning_rate": 8.290330062017014e-08, + "loss": 0.3553, + "step": 2190 + }, + { + "epoch": 0.61, + "logps_train/chosen": -13.813654899597168, + "logps_train/ref_chosen": -3.390625, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -20.38182258605957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0423029661178589, + "rewards_train/margins": 0.47712934017181396, + "rewards_train/rejected": -1.5194323062896729, + "step": 2190 + }, + { + "epoch": 0.61, + "logps_train/chosen": -164.3133544921875, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -169.623046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.43133544921875, + "rewards_train/margins": 0.3309692144393921, + "rewards_train/rejected": -1.762304663658142, + "step": 2190 + }, + { + "epoch": 0.61, + "logps_train/chosen": -25.52570343017578, + "logps_train/ref_chosen": -2.0625, + "logps_train/ref_rejected": -2.671875, + "logps_train/rejected": -26.7672176361084, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.346320390701294, + "rewards_train/margins": 0.06321382522583008, + "rewards_train/rejected": -2.409534215927124, + "step": 2190 + }, + { + "epoch": 0.61, + "logps_train/chosen": -146.28878784179688, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -185.90826416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6788787841796875, + "rewards_train/margins": 2.611947536468506, + "rewards_train/rejected": -5.290826320648193, + "step": 2190 + }, + { + "epoch": 0.61, + "logps_train/chosen": -74.12310791015625, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -113.90223693847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.33768922090530396, + "rewards_train/margins": 0.07791289687156677, + "rewards_train/rejected": 0.2597763240337372, + "step": 2190 + }, + { + "epoch": 0.61, + "logps_train/chosen": -35.624019622802734, + "logps_train/ref_chosen": -2.25, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -18.17108726501465, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.337402105331421, + "rewards_train/margins": -2.004668354988098, + "rewards_train/rejected": -1.3327337503433228, + "step": 2190 + }, + { + "epoch": 0.61, + "logps_train/chosen": -12.916885375976562, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -4.5625, + "logps_train/rejected": -10.893089294433594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8166885375976562, + "rewards_train/margins": -0.18362957239151, + "rewards_train/rejected": -0.6330589652061462, + "step": 2190 + }, + { + "epoch": 0.61, + "logps_train/chosen": -22.900365829467773, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -23.625, + "logps_train/rejected": -36.86323928833008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1025365591049194, + "rewards_train/margins": 0.22128736972808838, + "rewards_train/rejected": -1.3238239288330078, + "step": 2190 + }, + { + "epoch": 0.61, + "logps_train/chosen": -7.160635948181152, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -2.734375, + "logps_train/rejected": -27.700851440429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18393640220165253, + "rewards_train/margins": 2.6805839985609055, + "rewards_train/rejected": -2.496647596359253, + "step": 2191 + }, + { + "epoch": 0.61, + "logps_train/chosen": -23.467071533203125, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -12.375, + "logps_train/rejected": -45.259071350097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0592072010040283, + "rewards_train/margins": 2.2291998863220215, + "rewards_train/rejected": -3.28840708732605, + "step": 2191 + }, + { + "epoch": 0.61, + "logps_train/chosen": -35.2335205078125, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -40.07027816772461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2733520567417145, + "rewards_train/margins": 2.071175903081894, + "rewards_train/rejected": -2.3445279598236084, + "step": 2191 + }, + { + "epoch": 0.61, + "logps_train/chosen": -150.59036254882812, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -172.78228759765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3590362071990967, + "rewards_train/margins": 5.219192743301392, + "rewards_train/rejected": -8.578228950500488, + "step": 2191 + }, + { + "epoch": 0.61, + "logps_train/chosen": -88.26406860351562, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -2.9375, + "logps_train/rejected": -12.315985679626465, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8764069080352783, + "rewards_train/margins": -0.9385583400726318, + "rewards_train/rejected": -0.9378485679626465, + "step": 2191 + }, + { + "epoch": 0.61, + "logps_train/chosen": -67.04827117919922, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -113.79621887207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.929827094078064, + "rewards_train/margins": 1.5997947454452515, + "rewards_train/rejected": -3.5296218395233154, + "step": 2191 + }, + { + "epoch": 0.61, + "logps_train/chosen": -9.536369323730469, + "logps_train/ref_chosen": -3.671875, + "logps_train/ref_rejected": -7.21875, + "logps_train/rejected": -17.579030990600586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5864494442939758, + "rewards_train/margins": 0.4495787024497986, + "rewards_train/rejected": -1.0360281467437744, + "step": 2191 + }, + { + "epoch": 0.61, + "logps_train/chosen": -52.433555603027344, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -92.57681274414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.8566444516181946, + "rewards_train/margins": 2.464325726032257, + "rewards_train/rejected": -1.6076812744140625, + "step": 2191 + }, + { + "epoch": 0.61, + "learning_rate": 8.185182278962288e-08, + "loss": 0.5153, + "step": 2192 + }, + { + "epoch": 0.61, + "logps_train/chosen": -89.01675415039062, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -58.358516693115234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9516754150390625, + "rewards_train/margins": 1.2841763496398926, + "rewards_train/rejected": -2.235851764678955, + "step": 2192 + }, + { + "epoch": 0.61, + "logps_train/chosen": -130.53469848632812, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -246.80160522460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0034698247909546, + "rewards_train/margins": 7.2766910791397095, + "rewards_train/rejected": -8.280160903930664, + "step": 2192 + }, + { + "epoch": 0.61, + "logps_train/chosen": -197.67010498046875, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -199.6776580810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.217010498046875, + "rewards_train/margins": 2.4007558822631836, + "rewards_train/rejected": -9.617766380310059, + "step": 2192 + }, + { + "epoch": 0.61, + "logps_train/chosen": -112.03145599365234, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -120.84779357910156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.253145694732666, + "rewards_train/margins": -0.018366336822509766, + "rewards_train/rejected": -3.2347793579101562, + "step": 2192 + }, + { + "epoch": 0.61, + "logps_train/chosen": -166.99217224121094, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -135.6149139404297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.6992173194885254, + "rewards_train/margins": -0.03772592544555664, + "rewards_train/rejected": -3.6614913940429688, + "step": 2192 + }, + { + "epoch": 0.61, + "logps_train/chosen": -28.206241607666016, + "logps_train/ref_chosen": -13.875, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -68.07778930664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4331241846084595, + "rewards_train/margins": 2.349654793739319, + "rewards_train/rejected": -3.7827789783477783, + "step": 2192 + }, + { + "epoch": 0.61, + "logps_train/chosen": -34.34839630126953, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -39.6160888671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.447339653968811, + "rewards_train/margins": 0.20176923274993896, + "rewards_train/rejected": -1.64910888671875, + "step": 2192 + }, + { + "epoch": 0.61, + "logps_train/chosen": -92.69373321533203, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -188.12396240234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.069373369216919, + "rewards_train/margins": 6.643023252487183, + "rewards_train/rejected": -8.712396621704102, + "step": 2192 + }, + { + "epoch": 0.61, + "logps_train/chosen": -213.53359985351562, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -340.1650695800781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.103360176086426, + "rewards_train/margins": 7.613146781921387, + "rewards_train/rejected": -16.716506958007812, + "step": 2193 + }, + { + "epoch": 0.61, + "logps_train/chosen": -26.54857063293457, + "logps_train/ref_chosen": -6.09375, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -34.727020263671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0454821586608887, + "rewards_train/margins": 0.9834699630737305, + "rewards_train/rejected": -3.028952121734619, + "step": 2193 + }, + { + "epoch": 0.61, + "logps_train/chosen": -143.00428771972656, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -204.11740112304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5004289150238037, + "rewards_train/margins": 6.411311388015747, + "rewards_train/rejected": -8.91174030303955, + "step": 2193 + }, + { + "epoch": 0.61, + "logps_train/chosen": -33.9615592956543, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -14.875, + "logps_train/rejected": -26.043306350708008, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4211559295654297, + "rewards_train/margins": -1.3043252229690552, + "rewards_train/rejected": -1.1168307065963745, + "step": 2193 + }, + { + "epoch": 0.61, + "logps_train/chosen": -19.14013671875, + "logps_train/ref_chosen": -7.375, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -26.53655433654785, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.176513671875, + "rewards_train/margins": 0.6646417379379272, + "rewards_train/rejected": -1.8411554098129272, + "step": 2193 + }, + { + "epoch": 0.61, + "logps_train/chosen": -26.708696365356445, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -40.92200469970703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6708697080612183, + "rewards_train/margins": 0.9150809049606323, + "rewards_train/rejected": -2.5859506130218506, + "step": 2193 + }, + { + "epoch": 0.61, + "logps_train/chosen": -26.782936096191406, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -2.015625, + "logps_train/rejected": -11.576032638549805, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4032936096191406, + "rewards_train/margins": -0.4472528100013733, + "rewards_train/rejected": -0.9560407996177673, + "step": 2193 + }, + { + "epoch": 0.61, + "logps_train/chosen": -9.410834312438965, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -17.744497299194336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16516657173633575, + "rewards_train/margins": 0.8771163374185562, + "rewards_train/rejected": -0.7119497656822205, + "step": 2193 + }, + { + "epoch": 0.61, + "learning_rate": 8.080677102285805e-08, + "loss": 0.3963, + "step": 2194 + }, + { + "epoch": 0.61, + "logps_train/chosen": -32.14319610595703, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -30.911361694335938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.514319658279419, + "rewards_train/margins": -0.3481835126876831, + "rewards_train/rejected": -1.1661361455917358, + "step": 2194 + }, + { + "epoch": 0.61, + "logps_train/chosen": -123.08995056152344, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -179.28689575195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4089951515197754, + "rewards_train/margins": 3.5696945190429688, + "rewards_train/rejected": -6.978689670562744, + "step": 2194 + }, + { + "epoch": 0.61, + "logps_train/chosen": -208.32601928710938, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -241.1244354248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.682601928710938, + "rewards_train/margins": 2.3298416137695312, + "rewards_train/rejected": -11.012443542480469, + "step": 2194 + }, + { + "epoch": 0.61, + "logps_train/chosen": -143.807861328125, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -186.98040771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5307862758636475, + "rewards_train/margins": 1.6672546863555908, + "rewards_train/rejected": -5.198040962219238, + "step": 2194 + }, + { + "epoch": 0.61, + "logps_train/chosen": -139.4652557373047, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -144.50099182128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4465255737304688, + "rewards_train/margins": 1.8535735607147217, + "rewards_train/rejected": -3.3000991344451904, + "step": 2194 + }, + { + "epoch": 0.61, + "logps_train/chosen": -1.6530847549438477, + "logps_train/ref_chosen": -2.21875, + "logps_train/ref_rejected": -2.484375, + "logps_train/rejected": -28.559646606445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.056566525250673294, + "rewards_train/margins": 2.664093781262636, + "rewards_train/rejected": -2.607527256011963, + "step": 2194 + }, + { + "epoch": 0.61, + "logps_train/chosen": -9.657815933227539, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -1.1953125, + "logps_train/rejected": -9.696147918701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1595315933227539, + "rewards_train/margins": 0.6905519366264343, + "rewards_train/rejected": -0.8500835299491882, + "step": 2194 + }, + { + "epoch": 0.61, + "logps_train/chosen": -30.200302124023438, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -45.88548278808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4419052600860596, + "rewards_train/margins": 0.7091431617736816, + "rewards_train/rejected": -3.151048421859741, + "step": 2194 + }, + { + "epoch": 0.61, + "logps_train/chosen": -208.7803955078125, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -237.92340087890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.978039741516113, + "rewards_train/margins": 0.21430015563964844, + "rewards_train/rejected": -9.192339897155762, + "step": 2195 + }, + { + "epoch": 0.61, + "logps_train/chosen": -19.29696273803711, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -6.90625, + "logps_train/rejected": -34.791473388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9171962738037109, + "rewards_train/margins": 1.871326208114624, + "rewards_train/rejected": -2.788522481918335, + "step": 2195 + }, + { + "epoch": 0.61, + "logps_train/chosen": -7.0775251388549805, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -3.89973783493042, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5797474980354309, + "rewards_train/margins": 0.5290962792932987, + "rewards_train/rejected": 0.05065121874213219, + "step": 2195 + }, + { + "epoch": 0.61, + "logps_train/chosen": -5.427908897399902, + "logps_train/ref_chosen": -7.75, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -45.31186294555664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.23220911622047424, + "rewards_train/margins": 2.0258953869342804, + "rewards_train/rejected": -1.7936862707138062, + "step": 2195 + }, + { + "epoch": 0.61, + "logps_train/chosen": -64.83247375488281, + "logps_train/ref_chosen": -42.0, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -76.19527435302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.283247470855713, + "rewards_train/margins": 1.6612799167633057, + "rewards_train/rejected": -3.9445273876190186, + "step": 2195 + }, + { + "epoch": 0.61, + "logps_train/chosen": -168.35720825195312, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -211.42633056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.935720920562744, + "rewards_train/margins": 3.906912326812744, + "rewards_train/rejected": -7.842633247375488, + "step": 2195 + }, + { + "epoch": 0.61, + "logps_train/chosen": -108.39747619628906, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -185.79440307617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6397476196289062, + "rewards_train/margins": 6.0396928787231445, + "rewards_train/rejected": -6.679440498352051, + "step": 2195 + }, + { + "epoch": 0.61, + "logps_train/chosen": -43.445980072021484, + "logps_train/ref_chosen": -31.5, + "logps_train/ref_rejected": -45.0, + "logps_train/rejected": -95.41050720214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1945980787277222, + "rewards_train/margins": 3.846452832221985, + "rewards_train/rejected": -5.041050910949707, + "step": 2195 + }, + { + "epoch": 0.61, + "learning_rate": 7.976815263412961e-08, + "loss": 0.2334, + "step": 2196 + }, + { + "epoch": 0.61, + "logps_train/chosen": -33.35638427734375, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -60.73739242553711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.745013475418091, + "rewards_train/margins": 1.1662259101867676, + "rewards_train/rejected": -3.9112393856048584, + "step": 2196 + }, + { + "epoch": 0.61, + "logps_train/chosen": -59.1552619934082, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -54.75, + "logps_train/rejected": -59.10449981689453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.44052621722221375, + "rewards_train/margins": -0.005076229572296143, + "rewards_train/rejected": -0.4354499876499176, + "step": 2196 + }, + { + "epoch": 0.61, + "logps_train/chosen": -116.1978759765625, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -121.09811401367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.419787645339966, + "rewards_train/margins": 0.0400238037109375, + "rewards_train/rejected": -3.4598114490509033, + "step": 2196 + }, + { + "epoch": 0.61, + "logps_train/chosen": -29.907936096191406, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -64.8086166381836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9032936096191406, + "rewards_train/margins": 2.127568006515503, + "rewards_train/rejected": -3.0308616161346436, + "step": 2196 + }, + { + "epoch": 0.61, + "logps_train/chosen": -22.963088989257812, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -64.55626678466797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3838089108467102, + "rewards_train/margins": 5.662442862987518, + "rewards_train/rejected": -6.0462517738342285, + "step": 2196 + }, + { + "epoch": 0.61, + "logps_train/chosen": -172.1609649658203, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -174.92526245117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6160964965820312, + "rewards_train/margins": 0.2764298915863037, + "rewards_train/rejected": -3.892526388168335, + "step": 2196 + }, + { + "epoch": 0.61, + "logps_train/chosen": -89.9044189453125, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -51.5, + "logps_train/rejected": -78.9361572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0904419422149658, + "rewards_train/margins": 1.6531739234924316, + "rewards_train/rejected": -2.7436158657073975, + "step": 2196 + }, + { + "epoch": 0.61, + "logps_train/chosen": -3.7002134323120117, + "logps_train/ref_chosen": -0.373046875, + "logps_train/ref_rejected": -2.296875, + "logps_train/rejected": -9.527575492858887, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3327166736125946, + "rewards_train/margins": 0.39035341143608093, + "rewards_train/rejected": -0.7230700850486755, + "step": 2196 + }, + { + "epoch": 0.61, + "logps_train/chosen": -53.292442321777344, + "logps_train/ref_chosen": -52.75, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -114.3975601196289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.054244231432676315, + "rewards_train/margins": 2.2855117805302143, + "rewards_train/rejected": -2.3397560119628906, + "step": 2197 + }, + { + "epoch": 0.61, + "logps_train/chosen": -94.24763488769531, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -93.6263427734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.274763584136963, + "rewards_train/margins": -0.01212930679321289, + "rewards_train/rejected": -2.26263427734375, + "step": 2197 + }, + { + "epoch": 0.61, + "logps_train/chosen": -14.7052001953125, + "logps_train/ref_chosen": -3.0625, + "logps_train/ref_rejected": -3.703125, + "logps_train/rejected": -16.639179229736328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.164270043373108, + "rewards_train/margins": 0.1293354034423828, + "rewards_train/rejected": -1.2936054468154907, + "step": 2197 + }, + { + "epoch": 0.61, + "logps_train/chosen": -42.252540588378906, + "logps_train/ref_chosen": -28.875, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -70.0970230102539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3377541303634644, + "rewards_train/margins": 2.0469483137130737, + "rewards_train/rejected": -3.384702444076538, + "step": 2197 + }, + { + "epoch": 0.61, + "logps_train/chosen": -51.89154815673828, + "logps_train/ref_chosen": -49.25, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -62.55061340332031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2641548216342926, + "rewards_train/margins": 4.865906804800034, + "rewards_train/rejected": -5.130061626434326, + "step": 2197 + }, + { + "epoch": 0.61, + "logps_train/chosen": -159.16839599609375, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -170.38980102539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0168397426605225, + "rewards_train/margins": 4.372140645980835, + "rewards_train/rejected": -6.388980388641357, + "step": 2197 + }, + { + "epoch": 0.61, + "logps_train/chosen": -19.294668197631836, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -69.6427001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.435716837644577, + "rewards_train/margins": 3.178553134202957, + "rewards_train/rejected": -3.614269971847534, + "step": 2197 + }, + { + "epoch": 0.61, + "logps_train/chosen": -24.278562545776367, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -11.6875, + "logps_train/rejected": -35.47860336303711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6403562426567078, + "rewards_train/margins": 1.7387540936470032, + "rewards_train/rejected": -2.379110336303711, + "step": 2197 + }, + { + "epoch": 0.61, + "learning_rate": 7.873597489266559e-08, + "loss": 0.299, + "step": 2198 + }, + { + "epoch": 0.61, + "logps_train/chosen": -33.72559356689453, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -25.75, + "logps_train/rejected": -46.044708251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5725593566894531, + "rewards_train/margins": 0.456911563873291, + "rewards_train/rejected": -2.029470920562744, + "step": 2198 + }, + { + "epoch": 0.61, + "logps_train/chosen": -60.100276947021484, + "logps_train/ref_chosen": -40.0, + "logps_train/ref_rejected": -51.75, + "logps_train/rejected": -91.59083557128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0100276470184326, + "rewards_train/margins": 1.9740560054779053, + "rewards_train/rejected": -3.984083652496338, + "step": 2198 + }, + { + "epoch": 0.61, + "logps_train/chosen": -0.2904024124145508, + "logps_train/ref_chosen": -0.9921875, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -16.49995231628418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07017850875854492, + "rewards_train/margins": 0.7201737761497498, + "rewards_train/rejected": -0.6499952673912048, + "step": 2198 + }, + { + "epoch": 0.61, + "logps_train/chosen": -49.40742874145508, + "logps_train/ref_chosen": -28.125, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -30.561935424804688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1282429695129395, + "rewards_train/margins": -0.3220493793487549, + "rewards_train/rejected": -1.8061935901641846, + "step": 2198 + }, + { + "epoch": 0.61, + "logps_train/chosen": -26.723365783691406, + "logps_train/ref_chosen": -24.375, + "logps_train/ref_rejected": -5.21875, + "logps_train/rejected": -49.32852554321289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23483657836914062, + "rewards_train/margins": 4.176141262054443, + "rewards_train/rejected": -4.410977840423584, + "step": 2198 + }, + { + "epoch": 0.61, + "logps_train/chosen": -18.016897201538086, + "logps_train/ref_chosen": -3.34375, + "logps_train/ref_rejected": -0.5390625, + "logps_train/rejected": -5.766101360321045, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4673147201538086, + "rewards_train/margins": -0.9446108341217041, + "rewards_train/rejected": -0.5227038860321045, + "step": 2198 + }, + { + "epoch": 0.61, + "logps_train/chosen": -8.910164833068848, + "logps_train/ref_chosen": -3.59375, + "logps_train/ref_rejected": -0.84765625, + "logps_train/rejected": -0.7908577919006348, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5316414833068848, + "rewards_train/margins": -0.5373213291168213, + "rewards_train/rejected": 0.0056798458099365234, + "step": 2198 + }, + { + "epoch": 0.61, + "logps_train/chosen": -131.48666381835938, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -192.69921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0986664295196533, + "rewards_train/margins": 5.171255350112915, + "rewards_train/rejected": -7.269921779632568, + "step": 2198 + }, + { + "epoch": 0.61, + "logps_train/chosen": -32.670265197753906, + "logps_train/ref_chosen": -7.5625, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -30.809289932250977, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5107765197753906, + "rewards_train/margins": -0.12359738349914551, + "rewards_train/rejected": -2.387179136276245, + "step": 2199 + }, + { + "epoch": 0.61, + "logps_train/chosen": -24.126541137695312, + "logps_train/ref_chosen": -23.0, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -17.259845733642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11265411227941513, + "rewards_train/margins": 0.8195804730057716, + "rewards_train/rejected": -0.9322345852851868, + "step": 2199 + }, + { + "epoch": 0.61, + "logps_train/chosen": -374.4278564453125, + "logps_train/ref_chosen": -314.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -172.08226013183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.04278564453125, + "rewards_train/margins": 2.3154401779174805, + "rewards_train/rejected": -8.35822582244873, + "step": 2199 + }, + { + "epoch": 0.61, + "logps_train/chosen": -22.59706687927246, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -38.979244232177734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5597066879272461, + "rewards_train/margins": 0.5882177352905273, + "rewards_train/rejected": -1.1479244232177734, + "step": 2199 + }, + { + "epoch": 0.61, + "logps_train/chosen": -99.03871154785156, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -220.75523376464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.003871202468872, + "rewards_train/margins": 5.871652364730835, + "rewards_train/rejected": -8.875523567199707, + "step": 2199 + }, + { + "epoch": 0.61, + "logps_train/chosen": -34.48334884643555, + "logps_train/ref_chosen": -7.375, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -101.50856018066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7108349800109863, + "rewards_train/margins": 1.2650210857391357, + "rewards_train/rejected": -3.975856065750122, + "step": 2199 + }, + { + "epoch": 0.61, + "logps_train/chosen": -28.07537078857422, + "logps_train/ref_chosen": -3.703125, + "logps_train/ref_rejected": -2.4375, + "logps_train/rejected": -40.218360900878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4372246265411377, + "rewards_train/margins": 1.3408615589141846, + "rewards_train/rejected": -3.7780861854553223, + "step": 2199 + }, + { + "epoch": 0.61, + "logps_train/chosen": -137.0568084716797, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -187.0, + "logps_train/rejected": -224.17800903320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4056808948516846, + "rewards_train/margins": 0.3121199607849121, + "rewards_train/rejected": -3.7178008556365967, + "step": 2199 + }, + { + "epoch": 0.61, + "learning_rate": 7.771024502261525e-08, + "loss": 0.429, + "step": 2200 + }, + { + "epoch": 0.61, + "logps_train/chosen": -130.81964111328125, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -212.9499053955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5319641828536987, + "rewards_train/margins": 3.9630266427993774, + "rewards_train/rejected": -5.494990825653076, + "step": 2200 + }, + { + "epoch": 0.61, + "logps_train/chosen": -158.23883056640625, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -193.40255737304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.323883056640625, + "rewards_train/margins": 3.816372871398926, + "rewards_train/rejected": -10.14025592803955, + "step": 2200 + }, + { + "epoch": 0.61, + "logps_train/chosen": -57.15155029296875, + "logps_train/ref_chosen": -38.25, + "logps_train/ref_rejected": -1.2421875, + "logps_train/rejected": -12.97906494140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8901550769805908, + "rewards_train/margins": -0.7164672613143921, + "rewards_train/rejected": -1.1736878156661987, + "step": 2200 + }, + { + "epoch": 0.61, + "logps_train/chosen": -36.17887878417969, + "logps_train/ref_chosen": -34.5, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -47.830169677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.167887881398201, + "rewards_train/margins": 3.2713790386915207, + "rewards_train/rejected": -3.4392669200897217, + "step": 2200 + }, + { + "epoch": 0.61, + "logps_train/chosen": -171.36465454101562, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -112.49324035644531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.636465549468994, + "rewards_train/margins": -0.9871413707733154, + "rewards_train/rejected": -2.6493241786956787, + "step": 2200 + }, + { + "epoch": 0.61, + "logps_train/chosen": -15.948451042175293, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -14.018763542175293, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.15734510123729706, + "rewards_train/margins": -0.005468741059303284, + "rewards_train/rejected": -0.15187636017799377, + "step": 2200 + }, + { + "epoch": 0.61, + "logps_train/chosen": -51.784671783447266, + "logps_train/ref_chosen": -26.625, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -109.77583312988281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5159671306610107, + "rewards_train/margins": 1.7616164684295654, + "rewards_train/rejected": -4.277583599090576, + "step": 2200 + }, + { + "epoch": 0.61, + "logps_train/chosen": -116.61983489990234, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -112.1629409790039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.038016509264707565, + "rewards_train/margins": 1.804310631006956, + "rewards_train/rejected": -1.7662941217422485, + "step": 2200 + }, + { + "epoch": 0.62, + "logps_train/chosen": -18.906949996948242, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -40.17982864379883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1094449758529663, + "rewards_train/margins": 0.5710378885269165, + "rewards_train/rejected": -1.6804828643798828, + "step": 2201 + }, + { + "epoch": 0.62, + "logps_train/chosen": -93.48020935058594, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -106.17584991455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7480209469795227, + "rewards_train/margins": 1.2195640206336975, + "rewards_train/rejected": -1.9675849676132202, + "step": 2201 + }, + { + "epoch": 0.62, + "logps_train/chosen": -55.45951461791992, + "logps_train/ref_chosen": -14.25, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -48.58409881591797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.1209516525268555, + "rewards_train/margins": -1.0750417709350586, + "rewards_train/rejected": -3.045909881591797, + "step": 2201 + }, + { + "epoch": 0.62, + "logps_train/chosen": -141.99563598632812, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -180.21856689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8495635986328125, + "rewards_train/margins": 3.422293186187744, + "rewards_train/rejected": -5.271856784820557, + "step": 2201 + }, + { + "epoch": 0.62, + "logps_train/chosen": -19.38786506652832, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -55.6795654296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5262865424156189, + "rewards_train/margins": 2.9666700959205627, + "rewards_train/rejected": -3.4929566383361816, + "step": 2201 + }, + { + "epoch": 0.62, + "logps_train/chosen": -51.90816879272461, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -74.68370819091797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.640816867351532, + "rewards_train/margins": 3.0025539994239807, + "rewards_train/rejected": -3.6433708667755127, + "step": 2201 + }, + { + "epoch": 0.62, + "logps_train/chosen": -107.28984832763672, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -132.5084991455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.478984832763672, + "rewards_train/margins": 0.021865129470825195, + "rewards_train/rejected": -3.500849962234497, + "step": 2201 + }, + { + "epoch": 0.62, + "logps_train/chosen": -336.0244140625, + "logps_train/ref_chosen": -196.0, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -326.5381164550781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -14.00244140625, + "rewards_train/margins": -0.3486299514770508, + "rewards_train/rejected": -13.65381145477295, + "step": 2201 + }, + { + "epoch": 0.62, + "learning_rate": 7.669097020300064e-08, + "loss": 0.4548, + "step": 2202 + }, + { + "epoch": 0.62, + "logps_train/chosen": -65.73469543457031, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -55.07689666748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1265304535627365, + "rewards_train/margins": 0.5592201203107834, + "rewards_train/rejected": -0.4326896667480469, + "step": 2202 + }, + { + "epoch": 0.62, + "logps_train/chosen": -151.5237274169922, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -224.42921447753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.852372884750366, + "rewards_train/margins": 9.19054913520813, + "rewards_train/rejected": -12.042922019958496, + "step": 2202 + }, + { + "epoch": 0.62, + "logps_train/chosen": -13.030680656433105, + "logps_train/ref_chosen": -6.4375, + "logps_train/ref_rejected": -3.515625, + "logps_train/rejected": -9.845407485961914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6593180894851685, + "rewards_train/margins": -0.026339828968048096, + "rewards_train/rejected": -0.6329782605171204, + "step": 2202 + }, + { + "epoch": 0.62, + "logps_train/chosen": -1.7518291473388672, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -14.875, + "logps_train/rejected": -20.61119842529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16544209420681, + "rewards_train/margins": 0.7390619367361069, + "rewards_train/rejected": -0.5736198425292969, + "step": 2202 + }, + { + "epoch": 0.62, + "logps_train/chosen": -23.426250457763672, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -28.5, + "logps_train/rejected": -41.47711181640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.330125093460083, + "rewards_train/margins": -0.03241384029388428, + "rewards_train/rejected": -1.2977112531661987, + "step": 2202 + }, + { + "epoch": 0.62, + "logps_train/chosen": -19.89432716369629, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -35.533416748046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1394327878952026, + "rewards_train/margins": 1.5451589822769165, + "rewards_train/rejected": -2.684591770172119, + "step": 2202 + }, + { + "epoch": 0.62, + "logps_train/chosen": -11.738011360168457, + "logps_train/ref_chosen": -1.9609375, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -23.60423469543457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9777073860168457, + "rewards_train/margins": -0.22978389263153076, + "rewards_train/rejected": -0.7479234933853149, + "step": 2202 + }, + { + "epoch": 0.62, + "logps_train/chosen": -28.83922576904297, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -23.84847640991211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.321422576904297, + "rewards_train/margins": -0.46782493591308594, + "rewards_train/rejected": -1.853597640991211, + "step": 2202 + }, + { + "epoch": 0.62, + "logps_train/chosen": -15.972908020019531, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -7.4375, + "logps_train/rejected": -9.299015998840332, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.33479079604148865, + "rewards_train/margins": -0.14863918721675873, + "rewards_train/rejected": -0.18615160882472992, + "step": 2203 + }, + { + "epoch": 0.62, + "logps_train/chosen": -122.10722351074219, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -170.47068786621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6607223749160767, + "rewards_train/margins": 3.386346697807312, + "rewards_train/rejected": -4.047069072723389, + "step": 2203 + }, + { + "epoch": 0.62, + "logps_train/chosen": -262.9210205078125, + "logps_train/ref_chosen": -169.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -180.55245971679688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.392102241516113, + "rewards_train/margins": -1.9368562698364258, + "rewards_train/rejected": -7.4552459716796875, + "step": 2203 + }, + { + "epoch": 0.62, + "logps_train/chosen": -7.9073638916015625, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -30.84112548828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1969863921403885, + "rewards_train/margins": 1.8246262520551682, + "rewards_train/rejected": -2.0216126441955566, + "step": 2203 + }, + { + "epoch": 0.62, + "logps_train/chosen": -175.20553588867188, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -124.37667083740234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.5205535888671875, + "rewards_train/margins": -0.18288660049438477, + "rewards_train/rejected": -4.337666988372803, + "step": 2203 + }, + { + "epoch": 0.62, + "logps_train/chosen": -64.26226806640625, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -43.19703674316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02622680738568306, + "rewards_train/margins": 3.8091020099818707, + "rewards_train/rejected": -3.8353288173675537, + "step": 2203 + }, + { + "epoch": 0.62, + "logps_train/chosen": -32.65205764770508, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -32.5, + "logps_train/rejected": -73.64289855957031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0027058124542236, + "rewards_train/margins": 3.111583948135376, + "rewards_train/rejected": -4.1142897605896, + "step": 2203 + }, + { + "epoch": 0.62, + "logps_train/chosen": -36.556968688964844, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -38.1472282409668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3681968450546265, + "rewards_train/margins": 0.9277759790420532, + "rewards_train/rejected": -2.2959728240966797, + "step": 2203 + }, + { + "epoch": 0.62, + "learning_rate": 7.567815756766471e-08, + "loss": 0.5272, + "step": 2204 + }, + { + "epoch": 0.62, + "logps_train/chosen": -103.95037841796875, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -206.12979125976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.395037889480591, + "rewards_train/margins": 7.117941617965698, + "rewards_train/rejected": -10.512979507446289, + "step": 2204 + }, + { + "epoch": 0.62, + "logps_train/chosen": -3.6205191612243652, + "logps_train/ref_chosen": -0.76953125, + "logps_train/ref_rejected": -0.76953125, + "logps_train/rejected": -3.7415826320648193, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2850987911224365, + "rewards_train/margins": 0.012106359004974365, + "rewards_train/rejected": -0.2972051501274109, + "step": 2204 + }, + { + "epoch": 0.62, + "logps_train/chosen": -139.39712524414062, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -227.125244140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9397125244140625, + "rewards_train/margins": 7.372812271118164, + "rewards_train/rejected": -8.312524795532227, + "step": 2204 + }, + { + "epoch": 0.62, + "logps_train/chosen": -5.151316165924072, + "logps_train/ref_chosen": -0.4921875, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -16.120758056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4659128785133362, + "rewards_train/margins": 0.02116292715072632, + "rewards_train/rejected": -0.4870758056640625, + "step": 2204 + }, + { + "epoch": 0.62, + "logps_train/chosen": -171.9603271484375, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -178.56021118164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.146032810211182, + "rewards_train/margins": 0.1599884033203125, + "rewards_train/rejected": -7.306021213531494, + "step": 2204 + }, + { + "epoch": 0.62, + "logps_train/chosen": -117.60721588134766, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -162.52313232421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.91072154045105, + "rewards_train/margins": 2.5915915966033936, + "rewards_train/rejected": -5.502313137054443, + "step": 2204 + }, + { + "epoch": 0.62, + "logps_train/chosen": -45.90569305419922, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -97.75303649902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8030693531036377, + "rewards_train/margins": 1.2222344875335693, + "rewards_train/rejected": -4.025303840637207, + "step": 2204 + }, + { + "epoch": 0.62, + "logps_train/chosen": -5.1413679122924805, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -0.490234375, + "logps_train/rejected": -17.562740325927734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17961321771144867, + "rewards_train/margins": 1.886863812804222, + "rewards_train/rejected": -1.7072505950927734, + "step": 2204 + }, + { + "epoch": 0.62, + "logps_train/chosen": -167.61203002929688, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -181.70199584960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.3112030029296875, + "rewards_train/margins": 1.4589967727661133, + "rewards_train/rejected": -6.770199775695801, + "step": 2205 + }, + { + "epoch": 0.62, + "logps_train/chosen": -111.01774597167969, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -167.45333862304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2017745971679688, + "rewards_train/margins": 2.3435592651367188, + "rewards_train/rejected": -4.5453338623046875, + "step": 2205 + }, + { + "epoch": 0.62, + "logps_train/chosen": -130.71400451660156, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -115.76983642578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9214004278182983, + "rewards_train/margins": 0.45558321475982666, + "rewards_train/rejected": -2.376983642578125, + "step": 2205 + }, + { + "epoch": 0.62, + "logps_train/chosen": -44.30864334106445, + "logps_train/ref_chosen": -44.75, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -47.21800994873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04413566738367081, + "rewards_train/margins": 3.2346867099404335, + "rewards_train/rejected": -3.1905510425567627, + "step": 2205 + }, + { + "epoch": 0.62, + "logps_train/chosen": -40.63237762451172, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -6.59375, + "logps_train/rejected": -20.897220611572266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.138237714767456, + "rewards_train/margins": -0.7078906297683716, + "rewards_train/rejected": -1.4303470849990845, + "step": 2205 + }, + { + "epoch": 0.62, + "logps_train/chosen": -171.53565979003906, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -125.16580200195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1535661220550537, + "rewards_train/margins": 0.8630142211914062, + "rewards_train/rejected": -3.01658034324646, + "step": 2205 + }, + { + "epoch": 0.62, + "logps_train/chosen": -28.143474578857422, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -48.24837875366211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.001847505569458, + "rewards_train/margins": 2.487053155899048, + "rewards_train/rejected": -4.488900661468506, + "step": 2205 + }, + { + "epoch": 0.62, + "logps_train/chosen": -26.78557014465332, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -30.875, + "logps_train/rejected": -65.63774108886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5285570025444031, + "rewards_train/margins": 2.947717249393463, + "rewards_train/rejected": -3.476274251937866, + "step": 2205 + }, + { + "epoch": 0.62, + "learning_rate": 7.467181420522173e-08, + "loss": 0.305, + "step": 2206 + }, + { + "epoch": 0.62, + "logps_train/chosen": -177.0350341796875, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -223.82815551757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.65350341796875, + "rewards_train/margins": 3.1793127059936523, + "rewards_train/rejected": -9.832816123962402, + "step": 2206 + }, + { + "epoch": 0.62, + "logps_train/chosen": -153.1304931640625, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -162.26983642578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.06304931640625, + "rewards_train/margins": 1.7139344215393066, + "rewards_train/rejected": -4.776983737945557, + "step": 2206 + }, + { + "epoch": 0.62, + "logps_train/chosen": -67.135498046875, + "logps_train/ref_chosen": -58.25, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -136.6441192626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8885498046875, + "rewards_train/margins": 1.775862216949463, + "rewards_train/rejected": -2.664412021636963, + "step": 2206 + }, + { + "epoch": 0.62, + "logps_train/chosen": -35.719879150390625, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -39.72826385498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4219878911972046, + "rewards_train/margins": 1.757088541984558, + "rewards_train/rejected": -3.1790764331817627, + "step": 2206 + }, + { + "epoch": 0.62, + "logps_train/chosen": -153.51397705078125, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -153.1678009033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.151397705078125, + "rewards_train/margins": 1.8653826713562012, + "rewards_train/rejected": -4.016780376434326, + "step": 2206 + }, + { + "epoch": 0.62, + "logps_train/chosen": -152.29757690429688, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -162.25233459472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0297577381134033, + "rewards_train/margins": 2.4454758167266846, + "rewards_train/rejected": -4.475233554840088, + "step": 2206 + }, + { + "epoch": 0.62, + "logps_train/chosen": -14.077909469604492, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -28.625, + "logps_train/rejected": -64.36465454101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2952909469604492, + "rewards_train/margins": 3.278674602508545, + "rewards_train/rejected": -3.573965549468994, + "step": 2206 + }, + { + "epoch": 0.62, + "logps_train/chosen": -130.73886108398438, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -123.64932250976562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.523886203765869, + "rewards_train/margins": -0.05895400047302246, + "rewards_train/rejected": -2.4649322032928467, + "step": 2206 + }, + { + "epoch": 0.62, + "logps_train/chosen": -24.879009246826172, + "logps_train/ref_chosen": -1.7265625, + "logps_train/ref_rejected": -1.7265625, + "logps_train/rejected": -25.378662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.315244674682617, + "rewards_train/margins": 0.04996538162231445, + "rewards_train/rejected": -2.3652100563049316, + "step": 2207 + }, + { + "epoch": 0.62, + "logps_train/chosen": -2.9831631183624268, + "logps_train/ref_chosen": -2.78125, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -23.53697967529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.020191311836242676, + "rewards_train/margins": 1.39600670337677, + "rewards_train/rejected": -1.4161980152130127, + "step": 2207 + }, + { + "epoch": 0.62, + "logps_train/chosen": -15.20717716217041, + "logps_train/ref_chosen": -3.96875, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -56.42141342163086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.123842716217041, + "rewards_train/margins": 3.1807985305786133, + "rewards_train/rejected": -4.304641246795654, + "step": 2207 + }, + { + "epoch": 0.62, + "logps_train/chosen": -22.848905563354492, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -17.333070755004883, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8723905682563782, + "rewards_train/margins": -0.07658350467681885, + "rewards_train/rejected": -0.7958070635795593, + "step": 2207 + }, + { + "epoch": 0.62, + "logps_train/chosen": -179.14230346679688, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -125.09911346435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21423034369945526, + "rewards_train/margins": 0.9956810027360916, + "rewards_train/rejected": -1.2099113464355469, + "step": 2207 + }, + { + "epoch": 0.62, + "logps_train/chosen": -37.95460510253906, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -23.584136962890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9642105102539062, + "rewards_train/margins": -1.46204674243927, + "rewards_train/rejected": -1.5021637678146362, + "step": 2207 + }, + { + "epoch": 0.62, + "logps_train/chosen": -57.72476577758789, + "logps_train/ref_chosen": -38.25, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -15.76694393157959, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9474766254425049, + "rewards_train/margins": -1.233282208442688, + "rewards_train/rejected": -0.7141944169998169, + "step": 2207 + }, + { + "epoch": 0.62, + "logps_train/chosen": -95.71974182128906, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -44.5, + "logps_train/rejected": -63.54238510131836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1719741821289062, + "rewards_train/margins": 0.7322643995285034, + "rewards_train/rejected": -1.9042385816574097, + "step": 2207 + }, + { + "epoch": 0.62, + "learning_rate": 7.367194715900859e-08, + "loss": 0.4399, + "step": 2208 + }, + { + "epoch": 0.62, + "logps_train/chosen": -85.8143539428711, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -27.375, + "logps_train/rejected": -55.81025314331055, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.406435489654541, + "rewards_train/margins": -0.5629100799560547, + "rewards_train/rejected": -2.8435254096984863, + "step": 2208 + }, + { + "epoch": 0.62, + "logps_train/chosen": -110.65185546875, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -185.94317626953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9151855707168579, + "rewards_train/margins": 5.179132342338562, + "rewards_train/rejected": -6.09431791305542, + "step": 2208 + }, + { + "epoch": 0.62, + "logps_train/chosen": -110.3738784790039, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -102.88371276855469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2873878479003906, + "rewards_train/margins": -0.19901657104492188, + "rewards_train/rejected": -3.0883712768554688, + "step": 2208 + }, + { + "epoch": 0.62, + "logps_train/chosen": -91.38121032714844, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -206.75253295898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3381210565567017, + "rewards_train/margins": 7.587132811546326, + "rewards_train/rejected": -8.925253868103027, + "step": 2208 + }, + { + "epoch": 0.62, + "logps_train/chosen": -154.514892578125, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -192.63394165039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4014892578125, + "rewards_train/margins": 4.161904811859131, + "rewards_train/rejected": -7.563394069671631, + "step": 2208 + }, + { + "epoch": 0.62, + "logps_train/chosen": -50.85976791381836, + "logps_train/ref_chosen": -44.75, + "logps_train/ref_rejected": -59.75, + "logps_train/rejected": -78.11658477783203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6109768152236938, + "rewards_train/margins": 1.2256816625595093, + "rewards_train/rejected": -1.8366584777832031, + "step": 2208 + }, + { + "epoch": 0.62, + "logps_train/chosen": -228.3451690673828, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -212.09872436523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.334517478942871, + "rewards_train/margins": 0.8753547668457031, + "rewards_train/rejected": -9.209872245788574, + "step": 2208 + }, + { + "epoch": 0.62, + "logps_train/chosen": -121.32115936279297, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -154.39328002929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.582115888595581, + "rewards_train/margins": 1.5572121143341064, + "rewards_train/rejected": -4.1393280029296875, + "step": 2208 + }, + { + "epoch": 0.62, + "logps_train/chosen": -128.0853271484375, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -195.65597534179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.258533000946045, + "rewards_train/margins": 4.807064533233643, + "rewards_train/rejected": -9.065597534179688, + "step": 2209 + }, + { + "epoch": 0.62, + "logps_train/chosen": -47.32521057128906, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -85.58590698242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.182521104812622, + "rewards_train/margins": 0.17606961727142334, + "rewards_train/rejected": -1.3585907220840454, + "step": 2209 + }, + { + "epoch": 0.62, + "logps_train/chosen": -20.860275268554688, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -7.90625, + "logps_train/rejected": -29.62537956237793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6641525030136108, + "rewards_train/margins": 0.5077604055404663, + "rewards_train/rejected": -2.171912908554077, + "step": 2209 + }, + { + "epoch": 0.62, + "logps_train/chosen": -26.737117767333984, + "logps_train/ref_chosen": -24.875, + "logps_train/ref_rejected": -18.5, + "logps_train/rejected": -24.520309448242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18621177971363068, + "rewards_train/margins": 0.4158191531896591, + "rewards_train/rejected": -0.6020309329032898, + "step": 2209 + }, + { + "epoch": 0.62, + "logps_train/chosen": -8.14055061340332, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -10.877751350402832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10780506581068039, + "rewards_train/margins": 0.4018451049923897, + "rewards_train/rejected": -0.5096501708030701, + "step": 2209 + }, + { + "epoch": 0.62, + "logps_train/chosen": -123.1595458984375, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -119.10212707519531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4659546613693237, + "rewards_train/margins": -0.45574188232421875, + "rewards_train/rejected": -1.010212779045105, + "step": 2209 + }, + { + "epoch": 0.62, + "logps_train/chosen": -148.01136779785156, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -147.845458984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.251136779785156, + "rewards_train/margins": -0.016590595245361328, + "rewards_train/rejected": -5.234546184539795, + "step": 2209 + }, + { + "epoch": 0.62, + "logps_train/chosen": -8.653499603271484, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -34.67206573486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21222496032714844, + "rewards_train/margins": 1.3799816370010376, + "rewards_train/rejected": -1.592206597328186, + "step": 2209 + }, + { + "epoch": 0.62, + "learning_rate": 7.26785634270346e-08, + "loss": 0.4131, + "step": 2210 + }, + { + "epoch": 0.62, + "logps_train/chosen": -153.76181030273438, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -188.29074096679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4761810302734375, + "rewards_train/margins": 1.7528929710388184, + "rewards_train/rejected": -4.229074001312256, + "step": 2210 + }, + { + "epoch": 0.62, + "logps_train/chosen": -207.7069549560547, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -148.2711181640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.570695400238037, + "rewards_train/margins": -2.593583583831787, + "rewards_train/rejected": -3.97711181640625, + "step": 2210 + }, + { + "epoch": 0.62, + "logps_train/chosen": -8.582582473754883, + "logps_train/ref_chosen": -7.84375, + "logps_train/ref_rejected": -14.1875, + "logps_train/rejected": -14.731082916259766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07388325035572052, + "rewards_train/margins": -0.019524957984685898, + "rewards_train/rejected": -0.05435829237103462, + "step": 2210 + }, + { + "epoch": 0.62, + "logps_train/chosen": -228.5714111328125, + "logps_train/ref_chosen": -193.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -134.20608520507812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.557141065597534, + "rewards_train/margins": -1.586532473564148, + "rewards_train/rejected": -1.9706085920333862, + "step": 2210 + }, + { + "epoch": 0.62, + "logps_train/chosen": -167.2093505859375, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -195.580322265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.52093505859375, + "rewards_train/margins": 2.4370970726013184, + "rewards_train/rejected": -5.958032131195068, + "step": 2210 + }, + { + "epoch": 0.62, + "logps_train/chosen": -77.828857421875, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -149.1721649169922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0828857421875, + "rewards_train/margins": 3.934330940246582, + "rewards_train/rejected": -5.017216682434082, + "step": 2210 + }, + { + "epoch": 0.62, + "logps_train/chosen": -3.685839891433716, + "logps_train/ref_chosen": -1.2578125, + "logps_train/ref_rejected": -1.0078125, + "logps_train/rejected": -25.647424697875977, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24280273914337158, + "rewards_train/margins": 2.2211586236953735, + "rewards_train/rejected": -2.463961362838745, + "step": 2210 + }, + { + "epoch": 0.62, + "logps_train/chosen": -250.81735229492188, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -221.41162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.181735038757324, + "rewards_train/margins": 0.7594270706176758, + "rewards_train/rejected": -10.941162109375, + "step": 2210 + }, + { + "epoch": 0.62, + "logps_train/chosen": -37.73833084106445, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -46.50551223754883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4863331317901611, + "rewards_train/margins": 1.8579680919647217, + "rewards_train/rejected": -3.344301223754883, + "step": 2211 + }, + { + "epoch": 0.62, + "logps_train/chosen": -6.617990016937256, + "logps_train/ref_chosen": -1.875, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -25.101253509521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47429901361465454, + "rewards_train/margins": 0.8108263611793518, + "rewards_train/rejected": -1.2851253747940063, + "step": 2211 + }, + { + "epoch": 0.62, + "logps_train/chosen": -47.26031494140625, + "logps_train/ref_chosen": -31.875, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -48.467105865478516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5385315418243408, + "rewards_train/margins": -0.14182090759277344, + "rewards_train/rejected": -1.3967106342315674, + "step": 2211 + }, + { + "epoch": 0.62, + "logps_train/chosen": -105.363525390625, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -200.5352783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.286352515220642, + "rewards_train/margins": 5.6671754121780396, + "rewards_train/rejected": -6.953527927398682, + "step": 2211 + }, + { + "epoch": 0.62, + "logps_train/chosen": -8.065183639526367, + "logps_train/ref_chosen": -0.78125, + "logps_train/ref_rejected": -4.03125, + "logps_train/rejected": -11.499300003051758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7283933758735657, + "rewards_train/margins": 0.018411636352539062, + "rewards_train/rejected": -0.7468050122261047, + "step": 2211 + }, + { + "epoch": 0.62, + "logps_train/chosen": -103.66079711914062, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -177.33050537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.116079807281494, + "rewards_train/margins": 3.866971015930176, + "rewards_train/rejected": -6.98305082321167, + "step": 2211 + }, + { + "epoch": 0.62, + "logps_train/chosen": -33.54133987426758, + "logps_train/ref_chosen": -18.875, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -40.8526611328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4666340351104736, + "rewards_train/margins": 1.6998820304870605, + "rewards_train/rejected": -3.166516065597534, + "step": 2211 + }, + { + "epoch": 0.62, + "logps_train/chosen": -29.15641212463379, + "logps_train/ref_chosen": -4.25, + "logps_train/ref_rejected": -0.77734375, + "logps_train/rejected": -26.436725616455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4906413555145264, + "rewards_train/margins": 0.07529687881469727, + "rewards_train/rejected": -2.5659382343292236, + "step": 2211 + }, + { + "epoch": 0.62, + "learning_rate": 7.169166996193254e-08, + "loss": 0.5439, + "step": 2212 + }, + { + "epoch": 0.62, + "logps_train/chosen": -93.52679443359375, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -28.625, + "logps_train/rejected": -42.79562759399414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4526795148849487, + "rewards_train/margins": -0.03561675548553467, + "rewards_train/rejected": -1.417062759399414, + "step": 2212 + }, + { + "epoch": 0.62, + "logps_train/chosen": -44.27710723876953, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -7.4375, + "logps_train/rejected": -29.588546752929688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.602710723876953, + "rewards_train/margins": -0.3876059055328369, + "rewards_train/rejected": -2.215104818344116, + "step": 2212 + }, + { + "epoch": 0.62, + "logps_train/chosen": -28.481735229492188, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -41.45285415649414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5981735587120056, + "rewards_train/margins": 1.2096118330955505, + "rewards_train/rejected": -1.8077853918075562, + "step": 2212 + }, + { + "epoch": 0.62, + "logps_train/chosen": -7.727426528930664, + "logps_train/ref_chosen": -0.69921875, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -27.603517532348633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7028207778930664, + "rewards_train/margins": 1.585655927658081, + "rewards_train/rejected": -2.2884767055511475, + "step": 2212 + }, + { + "epoch": 0.62, + "logps_train/chosen": -76.74606323242188, + "logps_train/ref_chosen": -56.75, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -83.44493865966797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9996063709259033, + "rewards_train/margins": -0.15511250495910645, + "rewards_train/rejected": -1.8444938659667969, + "step": 2212 + }, + { + "epoch": 0.62, + "logps_train/chosen": -130.35452270507812, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -158.67869567871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1854522228240967, + "rewards_train/margins": 0.48241734504699707, + "rewards_train/rejected": -3.6678695678710938, + "step": 2212 + }, + { + "epoch": 0.62, + "logps_train/chosen": -7.519789695739746, + "logps_train/ref_chosen": -1.0234375, + "logps_train/ref_rejected": -1.5546875, + "logps_train/rejected": -7.597146034240723, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6496352553367615, + "rewards_train/margins": -0.045389413833618164, + "rewards_train/rejected": -0.6042458415031433, + "step": 2212 + }, + { + "epoch": 0.62, + "logps_train/chosen": -143.1074981689453, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -117.11408233642578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.3107500076293945, + "rewards_train/margins": -2.5493416786193848, + "rewards_train/rejected": -4.76140832901001, + "step": 2212 + }, + { + "epoch": 0.62, + "logps_train/chosen": -80.38996887207031, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -136.91259765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4889968633651733, + "rewards_train/margins": 2.8022629022598267, + "rewards_train/rejected": -4.291259765625, + "step": 2213 + }, + { + "epoch": 0.62, + "logps_train/chosen": -219.2813720703125, + "logps_train/ref_chosen": -182.0, + "logps_train/ref_rejected": -205.0, + "logps_train/rejected": -267.94891357421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.728137254714966, + "rewards_train/margins": 2.566754102706909, + "rewards_train/rejected": -6.294891357421875, + "step": 2213 + }, + { + "epoch": 0.62, + "logps_train/chosen": -8.905378341674805, + "logps_train/ref_chosen": -2.828125, + "logps_train/ref_rejected": -2.296875, + "logps_train/rejected": -6.538817882537842, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6077253222465515, + "rewards_train/margins": -0.1835310161113739, + "rewards_train/rejected": -0.4241943061351776, + "step": 2213 + }, + { + "epoch": 0.62, + "logps_train/chosen": -116.69078063964844, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -122.78934478759766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.269078016281128, + "rewards_train/margins": 0.2598564624786377, + "rewards_train/rejected": -3.5289344787597656, + "step": 2213 + }, + { + "epoch": 0.62, + "logps_train/chosen": -15.744665145874023, + "logps_train/ref_chosen": -2.09375, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -31.876434326171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3650915622711182, + "rewards_train/margins": 1.0694270133972168, + "rewards_train/rejected": -2.434518575668335, + "step": 2213 + }, + { + "epoch": 0.62, + "logps_train/chosen": -39.82520294189453, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -10.6875, + "logps_train/rejected": -18.70768165588379, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0075204372406006, + "rewards_train/margins": -2.2055022716522217, + "rewards_train/rejected": -0.8020181655883789, + "step": 2213 + }, + { + "epoch": 0.62, + "logps_train/chosen": -89.53334045410156, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -127.42852020263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2533340454101562, + "rewards_train/margins": 2.6895179748535156, + "rewards_train/rejected": -3.942852020263672, + "step": 2213 + }, + { + "epoch": 0.62, + "logps_train/chosen": -61.969482421875, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -51.25, + "logps_train/rejected": -96.0409164428711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4469482898712158, + "rewards_train/margins": 3.0321433544158936, + "rewards_train/rejected": -4.479091644287109, + "step": 2213 + }, + { + "epoch": 0.62, + "learning_rate": 7.071127367091023e-08, + "loss": 0.679, + "step": 2214 + }, + { + "epoch": 0.62, + "logps_train/chosen": -23.88588523864746, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -48.57623291015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0948385000228882, + "rewards_train/margins": 1.925284743309021, + "rewards_train/rejected": -3.020123243331909, + "step": 2214 + }, + { + "epoch": 0.62, + "logps_train/chosen": -123.76451110839844, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -105.73396301269531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0764511823654175, + "rewards_train/margins": 1.8469451665878296, + "rewards_train/rejected": -2.923396348953247, + "step": 2214 + }, + { + "epoch": 0.62, + "logps_train/chosen": -109.59176635742188, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -185.90411376953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.009176731109619, + "rewards_train/margins": 4.631234645843506, + "rewards_train/rejected": -6.640411376953125, + "step": 2214 + }, + { + "epoch": 0.62, + "logps_train/chosen": -1.3731931447982788, + "logps_train/ref_chosen": -0.59765625, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -9.706762313842773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07755368947982788, + "rewards_train/margins": 0.37437254190444946, + "rewards_train/rejected": -0.45192623138427734, + "step": 2214 + }, + { + "epoch": 0.62, + "logps_train/chosen": -106.3836898803711, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -192.457763671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.588369131088257, + "rewards_train/margins": 3.857407331466675, + "rewards_train/rejected": -6.445776462554932, + "step": 2214 + }, + { + "epoch": 0.62, + "logps_train/chosen": -14.102131843566895, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -51.47412872314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.48521319031715393, + "rewards_train/margins": 2.4746998250484467, + "rewards_train/rejected": -2.9599130153656006, + "step": 2214 + }, + { + "epoch": 0.62, + "logps_train/chosen": -10.782297134399414, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -54.269344329833984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0967702865600586, + "rewards_train/margins": 2.148704767227173, + "rewards_train/rejected": -2.0519344806671143, + "step": 2214 + }, + { + "epoch": 0.62, + "logps_train/chosen": -18.303213119506836, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -17.96832275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29282131791114807, + "rewards_train/margins": 0.5790109932422638, + "rewards_train/rejected": -0.8718323111534119, + "step": 2214 + }, + { + "epoch": 0.62, + "logps_train/chosen": -71.24633026123047, + "logps_train/ref_chosen": -22.125, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -71.17646789550781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.91213321685791, + "rewards_train/margins": -0.006986141204833984, + "rewards_train/rejected": -4.905147075653076, + "step": 2215 + }, + { + "epoch": 0.62, + "logps_train/chosen": -5.895502090454102, + "logps_train/ref_chosen": -0.78125, + "logps_train/ref_rejected": -10.875, + "logps_train/rejected": -16.30453872680664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5114251971244812, + "rewards_train/margins": 0.03152871131896973, + "rewards_train/rejected": -0.5429539084434509, + "step": 2215 + }, + { + "epoch": 0.62, + "logps_train/chosen": -145.32473754882812, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -249.80938720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.182473659515381, + "rewards_train/margins": 6.198464870452881, + "rewards_train/rejected": -10.380938529968262, + "step": 2215 + }, + { + "epoch": 0.62, + "logps_train/chosen": -21.332008361816406, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -1.203125, + "logps_train/rejected": -5.251081466674805, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9207008481025696, + "rewards_train/margins": -0.5159052014350891, + "rewards_train/rejected": -0.40479564666748047, + "step": 2215 + }, + { + "epoch": 0.62, + "logps_train/chosen": -188.54119873046875, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -235.5601806640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.554120063781738, + "rewards_train/margins": 4.601898193359375, + "rewards_train/rejected": -10.156018257141113, + "step": 2215 + }, + { + "epoch": 0.62, + "logps_train/chosen": -32.44951248168945, + "logps_train/ref_chosen": -26.25, + "logps_train/ref_rejected": -4.03125, + "logps_train/rejected": -26.550823211669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6199512481689453, + "rewards_train/margins": 1.6320061683654785, + "rewards_train/rejected": -2.251957416534424, + "step": 2215 + }, + { + "epoch": 0.62, + "logps_train/chosen": -118.97419738769531, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -204.70510864257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8974196910858154, + "rewards_train/margins": 7.72309136390686, + "rewards_train/rejected": -11.620511054992676, + "step": 2215 + }, + { + "epoch": 0.62, + "logps_train/chosen": -150.84457397460938, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -148.846435546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.634457588195801, + "rewards_train/margins": 0.6501860618591309, + "rewards_train/rejected": -5.284643650054932, + "step": 2215 + }, + { + "epoch": 0.62, + "learning_rate": 6.97373814157023e-08, + "loss": 0.2776, + "step": 2216 + }, + { + "epoch": 0.62, + "logps_train/chosen": -137.4950408935547, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -232.0, + "logps_train/rejected": -325.98028564453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.599504232406616, + "rewards_train/margins": 5.7985241413116455, + "rewards_train/rejected": -9.398028373718262, + "step": 2216 + }, + { + "epoch": 0.62, + "logps_train/chosen": -94.94422912597656, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -29.0, + "logps_train/rejected": -51.85186767578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6444229483604431, + "rewards_train/margins": 1.6407638192176819, + "rewards_train/rejected": -2.285186767578125, + "step": 2216 + }, + { + "epoch": 0.62, + "logps_train/chosen": -14.211706161499023, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -126.39599609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9242956042289734, + "rewards_train/margins": 3.8153041005134583, + "rewards_train/rejected": -4.739599704742432, + "step": 2216 + }, + { + "epoch": 0.62, + "logps_train/chosen": -126.36076354980469, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -185.1238555908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6360763311386108, + "rewards_train/margins": 4.67630922794342, + "rewards_train/rejected": -6.312385559082031, + "step": 2216 + }, + { + "epoch": 0.62, + "logps_train/chosen": -15.182601928710938, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -48.9559326171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3682602047920227, + "rewards_train/margins": 3.1648330092430115, + "rewards_train/rejected": -3.533093214035034, + "step": 2216 + }, + { + "epoch": 0.62, + "logps_train/chosen": -13.421937942504883, + "logps_train/ref_chosen": -3.015625, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -40.408748626708984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0406312942504883, + "rewards_train/margins": 2.2002437114715576, + "rewards_train/rejected": -3.240875005722046, + "step": 2216 + }, + { + "epoch": 0.62, + "logps_train/chosen": -3.8215556144714355, + "logps_train/ref_chosen": -1.734375, + "logps_train/ref_rejected": -0.61328125, + "logps_train/rejected": -1.7756097316741943, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20871806144714355, + "rewards_train/margins": -0.092485211789608, + "rewards_train/rejected": -0.11623284965753555, + "step": 2216 + }, + { + "epoch": 0.62, + "logps_train/chosen": -144.12258911132812, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -170.38758850097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01225891150534153, + "rewards_train/margins": 5.076500033959746, + "rewards_train/rejected": -5.088758945465088, + "step": 2216 + }, + { + "epoch": 0.62, + "logps_train/chosen": -246.5347900390625, + "logps_train/ref_chosen": -161.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -255.22386169433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.553479194641113, + "rewards_train/margins": 2.6689071655273438, + "rewards_train/rejected": -11.222386360168457, + "step": 2217 + }, + { + "epoch": 0.62, + "logps_train/chosen": -44.44154357910156, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -0.97265625, + "logps_train/rejected": -7.379404067993164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.481654405593872, + "rewards_train/margins": -1.8409796357154846, + "rewards_train/rejected": -0.6406747698783875, + "step": 2217 + }, + { + "epoch": 0.62, + "logps_train/chosen": -70.26276397705078, + "logps_train/ref_chosen": -31.5, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -70.52149963378906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.8762764930725098, + "rewards_train/margins": -0.024126529693603516, + "rewards_train/rejected": -3.8521499633789062, + "step": 2217 + }, + { + "epoch": 0.62, + "logps_train/chosen": -138.111572265625, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -141.7430419921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0611572265625, + "rewards_train/margins": 0.46314704418182373, + "rewards_train/rejected": -1.5243042707443237, + "step": 2217 + }, + { + "epoch": 0.62, + "logps_train/chosen": -114.02013397216797, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -224.8853302001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.552013397216797, + "rewards_train/margins": 2.2365198135375977, + "rewards_train/rejected": -5.7885332107543945, + "step": 2217 + }, + { + "epoch": 0.62, + "logps_train/chosen": -5.845939636230469, + "logps_train/ref_chosen": -0.85546875, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -13.954036712646484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.49904710054397583, + "rewards_train/margins": 0.3088566064834595, + "rewards_train/rejected": -0.8079037070274353, + "step": 2217 + }, + { + "epoch": 0.62, + "logps_train/chosen": -90.7274398803711, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -88.88581085205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2227439880371094, + "rewards_train/margins": 3.1283373832702637, + "rewards_train/rejected": -6.351081371307373, + "step": 2217 + }, + { + "epoch": 0.62, + "logps_train/chosen": -41.5400390625, + "logps_train/ref_chosen": -13.875, + "logps_train/ref_rejected": -2.109375, + "logps_train/rejected": -35.843414306640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7665040493011475, + "rewards_train/margins": 0.6068999767303467, + "rewards_train/rejected": -3.373404026031494, + "step": 2217 + }, + { + "epoch": 0.62, + "learning_rate": 6.877000001252208e-08, + "loss": 0.3429, + "step": 2218 + }, + { + "epoch": 0.62, + "logps_train/chosen": -200.8548583984375, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -173.70130920410156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0854859352111816, + "rewards_train/margins": -1.9153549671173096, + "rewards_train/rejected": -1.170130968093872, + "step": 2218 + }, + { + "epoch": 0.62, + "logps_train/chosen": -208.58761596679688, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -249.13206481933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.658761739730835, + "rewards_train/margins": 3.954444646835327, + "rewards_train/rejected": -7.613206386566162, + "step": 2218 + }, + { + "epoch": 0.62, + "logps_train/chosen": -116.78996276855469, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -142.35232543945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.328996419906616, + "rewards_train/margins": 1.356236219406128, + "rewards_train/rejected": -4.685232639312744, + "step": 2218 + }, + { + "epoch": 0.62, + "logps_train/chosen": -14.71355152130127, + "logps_train/ref_chosen": -5.21875, + "logps_train/ref_rejected": -6.28125, + "logps_train/rejected": -22.047428131103516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9494801759719849, + "rewards_train/margins": 0.6271376609802246, + "rewards_train/rejected": -1.5766178369522095, + "step": 2218 + }, + { + "epoch": 0.62, + "logps_train/chosen": -66.89654541015625, + "logps_train/ref_chosen": -41.75, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -123.83822631835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5146546363830566, + "rewards_train/margins": 2.419167995452881, + "rewards_train/rejected": -4.9338226318359375, + "step": 2218 + }, + { + "epoch": 0.62, + "logps_train/chosen": -214.17262268066406, + "logps_train/ref_chosen": -183.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -201.84193420410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.117262363433838, + "rewards_train/margins": 3.4669313430786133, + "rewards_train/rejected": -6.584193706512451, + "step": 2218 + }, + { + "epoch": 0.62, + "logps_train/chosen": -18.249454498291016, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -3.015625, + "logps_train/rejected": -12.834968566894531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5061954855918884, + "rewards_train/margins": 0.47573888301849365, + "rewards_train/rejected": -0.9819343686103821, + "step": 2218 + }, + { + "epoch": 0.62, + "logps_train/chosen": -90.24503326416016, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -214.15713500976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5745033621788025, + "rewards_train/margins": 7.841209948062897, + "rewards_train/rejected": -8.4157133102417, + "step": 2218 + }, + { + "epoch": 0.62, + "logps_train/chosen": -191.53884887695312, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -192.0, + "logps_train/rejected": -265.75714111328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.1538848876953125, + "rewards_train/margins": 0.22182941436767578, + "rewards_train/rejected": -7.375714302062988, + "step": 2219 + }, + { + "epoch": 0.62, + "logps_train/chosen": -62.68778610229492, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -78.03990936279297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.19377863407135, + "rewards_train/margins": -0.33978766202926636, + "rewards_train/rejected": -0.8539909720420837, + "step": 2219 + }, + { + "epoch": 0.62, + "logps_train/chosen": -5.161284446716309, + "logps_train/ref_chosen": -2.375, + "logps_train/ref_rejected": -2.78125, + "logps_train/rejected": -14.484067916870117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2786284387111664, + "rewards_train/margins": 0.8916533291339874, + "rewards_train/rejected": -1.1702817678451538, + "step": 2219 + }, + { + "epoch": 0.62, + "logps_train/chosen": -9.04332160949707, + "logps_train/ref_chosen": -4.59375, + "logps_train/ref_rejected": -6.34375, + "logps_train/rejected": -22.25169563293457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4449571669101715, + "rewards_train/margins": 1.1458373963832855, + "rewards_train/rejected": -1.590794563293457, + "step": 2219 + }, + { + "epoch": 0.62, + "logps_train/chosen": -135.44509887695312, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -204.1903076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4945099353790283, + "rewards_train/margins": 6.374521017074585, + "rewards_train/rejected": -8.869030952453613, + "step": 2219 + }, + { + "epoch": 0.62, + "logps_train/chosen": -131.62876892089844, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -215.0107421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3628768920898438, + "rewards_train/margins": 7.938197135925293, + "rewards_train/rejected": -11.301074028015137, + "step": 2219 + }, + { + "epoch": 0.62, + "logps_train/chosen": -9.94257640838623, + "logps_train/ref_chosen": -2.0625, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -35.15077590942383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7880076766014099, + "rewards_train/margins": 1.7833200097084045, + "rewards_train/rejected": -2.5713276863098145, + "step": 2219 + }, + { + "epoch": 0.62, + "logps_train/chosen": -177.59719848632812, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -230.09628295898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.809720039367676, + "rewards_train/margins": 3.7999086380004883, + "rewards_train/rejected": -9.609628677368164, + "step": 2219 + }, + { + "epoch": 0.62, + "learning_rate": 6.780913623201345e-08, + "loss": 0.3496, + "step": 2220 + }, + { + "epoch": 0.62, + "logps_train/chosen": -157.34344482421875, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -132.29769897460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3343446254730225, + "rewards_train/margins": 0.44542527198791504, + "rewards_train/rejected": -2.7797698974609375, + "step": 2220 + }, + { + "epoch": 0.62, + "logps_train/chosen": -19.110172271728516, + "logps_train/ref_chosen": -3.21875, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -40.27101135253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5891422033309937, + "rewards_train/margins": 1.2317088842391968, + "rewards_train/rejected": -2.8208510875701904, + "step": 2220 + }, + { + "epoch": 0.62, + "logps_train/chosen": -31.06545066833496, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -39.0, + "logps_train/rejected": -93.71632385253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.394045114517212, + "rewards_train/margins": 4.077587366104126, + "rewards_train/rejected": -5.471632480621338, + "step": 2220 + }, + { + "epoch": 0.62, + "logps_train/chosen": -14.419719696044922, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -30.938398361206055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8357219696044922, + "rewards_train/margins": 1.508117914199829, + "rewards_train/rejected": -2.3438398838043213, + "step": 2220 + }, + { + "epoch": 0.62, + "logps_train/chosen": -125.01670837402344, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -150.75115966796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7016708850860596, + "rewards_train/margins": 4.373445272445679, + "rewards_train/rejected": -7.075116157531738, + "step": 2220 + }, + { + "epoch": 0.62, + "logps_train/chosen": -140.77447509765625, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -168.87672424316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07744751125574112, + "rewards_train/margins": 4.6602248176932335, + "rewards_train/rejected": -4.737672328948975, + "step": 2220 + }, + { + "epoch": 0.62, + "logps_train/chosen": -215.5396270751953, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -347.4439697265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.6539626121521, + "rewards_train/margins": 10.890434741973877, + "rewards_train/rejected": -16.544397354125977, + "step": 2220 + }, + { + "epoch": 0.62, + "logps_train/chosen": -140.51852416992188, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -154.25393676757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.251852512359619, + "rewards_train/margins": 0.223541259765625, + "rewards_train/rejected": -5.475393772125244, + "step": 2220 + }, + { + "epoch": 0.62, + "logps_train/chosen": -33.598472595214844, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -26.945749282836914, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7348473072052002, + "rewards_train/margins": -0.596522331237793, + "rewards_train/rejected": -1.1383249759674072, + "step": 2221 + }, + { + "epoch": 0.62, + "logps_train/chosen": -128.5128173828125, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -229.65234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.101281642913818, + "rewards_train/margins": 6.1639533042907715, + "rewards_train/rejected": -12.26523494720459, + "step": 2221 + }, + { + "epoch": 0.62, + "logps_train/chosen": -5.906369209289551, + "logps_train/ref_chosen": -3.96875, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -31.900156021118164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1937619298696518, + "rewards_train/margins": 2.140003815293312, + "rewards_train/rejected": -2.333765745162964, + "step": 2221 + }, + { + "epoch": 0.62, + "logps_train/chosen": -182.961669921875, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -175.42605590820312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7961671352386475, + "rewards_train/margins": -0.25356149673461914, + "rewards_train/rejected": -3.5426056385040283, + "step": 2221 + }, + { + "epoch": 0.62, + "logps_train/chosen": -94.29930877685547, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -163.39105224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6299309730529785, + "rewards_train/margins": 4.009174346923828, + "rewards_train/rejected": -6.639105319976807, + "step": 2221 + }, + { + "epoch": 0.62, + "logps_train/chosen": -223.09097290039062, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -228.029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.5090970993042, + "rewards_train/margins": 0.6938323974609375, + "rewards_train/rejected": -10.202929496765137, + "step": 2221 + }, + { + "epoch": 0.62, + "logps_train/chosen": -16.451765060424805, + "logps_train/ref_chosen": -2.21875, + "logps_train/ref_rejected": -3.875, + "logps_train/rejected": -38.03190612792969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4233015775680542, + "rewards_train/margins": 1.9923890829086304, + "rewards_train/rejected": -3.4156906604766846, + "step": 2221 + }, + { + "epoch": 0.62, + "logps_train/chosen": -73.02920532226562, + "logps_train/ref_chosen": -52.75, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -137.39854431152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0279204845428467, + "rewards_train/margins": 3.2119338512420654, + "rewards_train/rejected": -5.239854335784912, + "step": 2221 + }, + { + "epoch": 0.62, + "learning_rate": 6.685479679920458e-08, + "loss": 0.2591, + "step": 2222 + }, + { + "epoch": 0.62, + "logps_train/chosen": -68.66693115234375, + "logps_train/ref_chosen": -53.75, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -51.58644485473633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.491693139076233, + "rewards_train/margins": 1.3669513463974, + "rewards_train/rejected": -2.858644485473633, + "step": 2222 + }, + { + "epoch": 0.62, + "logps_train/chosen": -63.905818939208984, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -124.72443389892578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0905818939208984, + "rewards_train/margins": 2.631861448287964, + "rewards_train/rejected": -3.7224433422088623, + "step": 2222 + }, + { + "epoch": 0.62, + "logps_train/chosen": -27.115121841430664, + "logps_train/ref_chosen": -7.125, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -48.68443298339844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9990122318267822, + "rewards_train/margins": -0.48056888580322266, + "rewards_train/rejected": -1.5184433460235596, + "step": 2222 + }, + { + "epoch": 0.62, + "logps_train/chosen": -13.204919815063477, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -13.25460433959961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2267419844865799, + "rewards_train/margins": 0.0049684494733810425, + "rewards_train/rejected": -0.23171043395996094, + "step": 2222 + }, + { + "epoch": 0.62, + "logps_train/chosen": -2.7917580604553223, + "logps_train/ref_chosen": -2.640625, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -6.990612983703613, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.015113306231796741, + "rewards_train/margins": 0.2433229861781001, + "rewards_train/rejected": -0.25843629240989685, + "step": 2222 + }, + { + "epoch": 0.62, + "logps_train/chosen": -68.01441192626953, + "logps_train/ref_chosen": -58.5, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -78.2654037475586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.95144122838974, + "rewards_train/margins": 0.22509914636611938, + "rewards_train/rejected": -1.1765403747558594, + "step": 2222 + }, + { + "epoch": 0.62, + "logps_train/chosen": -225.1273193359375, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -223.58132934570312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.11273193359375, + "rewards_train/margins": -0.25459909439086914, + "rewards_train/rejected": -7.858132839202881, + "step": 2222 + }, + { + "epoch": 0.62, + "logps_train/chosen": -18.073150634765625, + "logps_train/ref_chosen": -3.75, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -48.05970001220703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4323151111602783, + "rewards_train/margins": 1.2736549377441406, + "rewards_train/rejected": -2.705970048904419, + "step": 2222 + }, + { + "epoch": 0.62, + "logps_train/chosen": -103.07837677001953, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -63.0, + "logps_train/rejected": -87.80169677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.457837700843811, + "rewards_train/margins": 1.0223320722579956, + "rewards_train/rejected": -2.4801697731018066, + "step": 2223 + }, + { + "epoch": 0.62, + "logps_train/chosen": -43.11441421508789, + "logps_train/ref_chosen": -31.875, + "logps_train/ref_rejected": -3.84375, + "logps_train/rejected": -7.795223712921143, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.123941421508789, + "rewards_train/margins": -0.7287940382957458, + "rewards_train/rejected": -0.3951473832130432, + "step": 2223 + }, + { + "epoch": 0.62, + "logps_train/chosen": -115.11402893066406, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -152.09091186523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3114029169082642, + "rewards_train/margins": 5.447688460350037, + "rewards_train/rejected": -6.759091377258301, + "step": 2223 + }, + { + "epoch": 0.62, + "logps_train/chosen": -169.58477783203125, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -47.079490661621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.508477687835693, + "rewards_train/margins": -1.9755284786224365, + "rewards_train/rejected": -2.532949209213257, + "step": 2223 + }, + { + "epoch": 0.62, + "logps_train/chosen": -6.979674339294434, + "logps_train/ref_chosen": -3.109375, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -33.07939529418945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3870299458503723, + "rewards_train/margins": 2.436534583568573, + "rewards_train/rejected": -2.8235645294189453, + "step": 2223 + }, + { + "epoch": 0.62, + "logps_train/chosen": -7.74754524230957, + "logps_train/ref_chosen": -3.5, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -25.645078659057617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4247545301914215, + "rewards_train/margins": 0.8710033595561981, + "rewards_train/rejected": -1.2957578897476196, + "step": 2223 + }, + { + "epoch": 0.62, + "logps_train/chosen": -71.59199523925781, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -148.18234252929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5591995120048523, + "rewards_train/margins": 2.459034740924835, + "rewards_train/rejected": -3.0182342529296875, + "step": 2223 + }, + { + "epoch": 0.62, + "logps_train/chosen": -34.885353088378906, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -22.7559814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1635353565216064, + "rewards_train/margins": 0.24331283569335938, + "rewards_train/rejected": -1.4068481922149658, + "step": 2223 + }, + { + "epoch": 0.62, + "learning_rate": 6.590698839345921e-08, + "loss": 0.5503, + "step": 2224 + }, + { + "epoch": 0.62, + "logps_train/chosen": -87.00956726074219, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -167.22802734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5509567260742188, + "rewards_train/margins": 7.321846008300781, + "rewards_train/rejected": -8.872802734375, + "step": 2224 + }, + { + "epoch": 0.62, + "logps_train/chosen": -126.80796813964844, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -114.73783874511719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2307968139648438, + "rewards_train/margins": -0.6070129871368408, + "rewards_train/rejected": -2.623783826828003, + "step": 2224 + }, + { + "epoch": 0.62, + "logps_train/chosen": -39.465030670166016, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -50.57218551635742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.884003162384033, + "rewards_train/margins": 0.985715389251709, + "rewards_train/rejected": -3.869718551635742, + "step": 2224 + }, + { + "epoch": 0.62, + "logps_train/chosen": -1.482406735420227, + "logps_train/ref_chosen": -1.4453125, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -8.028131484985352, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.003709423588588834, + "rewards_train/margins": 0.26785371894948184, + "rewards_train/rejected": -0.2715631425380707, + "step": 2224 + }, + { + "epoch": 0.62, + "logps_train/chosen": -161.29168701171875, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -183.332275390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7791688442230225, + "rewards_train/margins": 1.6540586948394775, + "rewards_train/rejected": -5.4332275390625, + "step": 2224 + }, + { + "epoch": 0.62, + "logps_train/chosen": -120.75509643554688, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -122.39700317382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9255096316337585, + "rewards_train/margins": 1.9141908288002014, + "rewards_train/rejected": -2.83970046043396, + "step": 2224 + }, + { + "epoch": 0.62, + "logps_train/chosen": -4.501614570617676, + "logps_train/ref_chosen": -3.15625, + "logps_train/ref_rejected": -2.046875, + "logps_train/rejected": -22.804332733154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13453646004199982, + "rewards_train/margins": 1.9412093609571457, + "rewards_train/rejected": -2.0757458209991455, + "step": 2224 + }, + { + "epoch": 0.62, + "logps_train/chosen": -95.44585418701172, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -113.26171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9445854425430298, + "rewards_train/margins": 0.33158648014068604, + "rewards_train/rejected": -2.276171922683716, + "step": 2224 + }, + { + "epoch": 0.62, + "logps_train/chosen": -186.09397888183594, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -277.5569763183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.709397792816162, + "rewards_train/margins": 8.346299648284912, + "rewards_train/rejected": -13.055697441101074, + "step": 2225 + }, + { + "epoch": 0.62, + "logps_train/chosen": -122.36981201171875, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -177.16790771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.636981248855591, + "rewards_train/margins": 3.379809617996216, + "rewards_train/rejected": -7.016790866851807, + "step": 2225 + }, + { + "epoch": 0.62, + "logps_train/chosen": -142.92620849609375, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -141.941650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.392620801925659, + "rewards_train/margins": 1.051544427871704, + "rewards_train/rejected": -4.444165229797363, + "step": 2225 + }, + { + "epoch": 0.62, + "logps_train/chosen": -64.24251556396484, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -159.4615936279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9992515444755554, + "rewards_train/margins": 4.996908009052277, + "rewards_train/rejected": -5.996159553527832, + "step": 2225 + }, + { + "epoch": 0.62, + "logps_train/chosen": -66.13208770751953, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -42.25, + "logps_train/rejected": -68.2054443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3882088661193848, + "rewards_train/margins": 0.2073357105255127, + "rewards_train/rejected": -2.5955445766448975, + "step": 2225 + }, + { + "epoch": 0.62, + "logps_train/chosen": -95.85153198242188, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -148.12696838378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.1101531982421875, + "rewards_train/margins": 3.9525442123413086, + "rewards_train/rejected": -8.062697410583496, + "step": 2225 + }, + { + "epoch": 0.62, + "logps_train/chosen": -197.6424560546875, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -181.0, + "logps_train/rejected": -271.2514953613281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.464245796203613, + "rewards_train/margins": 4.560903549194336, + "rewards_train/rejected": -9.02514934539795, + "step": 2225 + }, + { + "epoch": 0.62, + "logps_train/chosen": -47.300537109375, + "logps_train/ref_chosen": -31.25, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -57.89564514160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6050537824630737, + "rewards_train/margins": 0.63451087474823, + "rewards_train/rejected": -2.2395646572113037, + "step": 2225 + }, + { + "epoch": 0.62, + "learning_rate": 6.496571764843161e-08, + "loss": 0.2691, + "step": 2226 + }, + { + "epoch": 0.62, + "logps_train/chosen": -38.08149337768555, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -45.05127716064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5081493854522705, + "rewards_train/margins": 0.9469783306121826, + "rewards_train/rejected": -3.455127716064453, + "step": 2226 + }, + { + "epoch": 0.62, + "logps_train/chosen": -15.456876754760742, + "logps_train/ref_chosen": -5.6875, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -42.89425277709961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9769377112388611, + "rewards_train/margins": 1.1249876618385315, + "rewards_train/rejected": -2.1019253730773926, + "step": 2226 + }, + { + "epoch": 0.62, + "logps_train/chosen": -22.202054977416992, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -40.27685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.095205545425415, + "rewards_train/margins": 1.9887299537658691, + "rewards_train/rejected": -3.083935499191284, + "step": 2226 + }, + { + "epoch": 0.62, + "logps_train/chosen": -184.357666015625, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -209.88185119628906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.385766506195068, + "rewards_train/margins": -0.49758148193359375, + "rewards_train/rejected": -6.888185024261475, + "step": 2226 + }, + { + "epoch": 0.62, + "logps_train/chosen": -66.71415710449219, + "logps_train/ref_chosen": -52.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -168.62603759765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4214156866073608, + "rewards_train/margins": 5.341188073158264, + "rewards_train/rejected": -6.762603759765625, + "step": 2226 + }, + { + "epoch": 0.62, + "logps_train/chosen": -179.50692749023438, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -129.41249084472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1506927013397217, + "rewards_train/margins": 0.09055638313293457, + "rewards_train/rejected": -3.2412490844726562, + "step": 2226 + }, + { + "epoch": 0.62, + "logps_train/chosen": -55.60626220703125, + "logps_train/ref_chosen": -33.5, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -102.00442504882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2106263637542725, + "rewards_train/margins": 1.864816427230835, + "rewards_train/rejected": -4.075442790985107, + "step": 2226 + }, + { + "epoch": 0.62, + "logps_train/chosen": -283.4300537109375, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -274.4958801269531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -13.14300537109375, + "rewards_train/margins": -0.09341716766357422, + "rewards_train/rejected": -13.049588203430176, + "step": 2226 + }, + { + "epoch": 0.62, + "logps_train/chosen": -0.5169277191162109, + "logps_train/ref_chosen": -0.45703125, + "logps_train/ref_rejected": -2.328125, + "logps_train/rejected": -3.8699076175689697, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.005989647004753351, + "rewards_train/margins": 0.14818861475214362, + "rewards_train/rejected": -0.15417826175689697, + "step": 2227 + }, + { + "epoch": 0.62, + "logps_train/chosen": -44.20854949951172, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -37.5, + "logps_train/rejected": -72.67588806152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44585496187210083, + "rewards_train/margins": 3.0717338919639587, + "rewards_train/rejected": -3.5175888538360596, + "step": 2227 + }, + { + "epoch": 0.62, + "logps_train/chosen": -125.33758544921875, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -274.4363098144531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.633758544921875, + "rewards_train/margins": 7.109872817993164, + "rewards_train/rejected": -9.743631362915039, + "step": 2227 + }, + { + "epoch": 0.62, + "logps_train/chosen": -109.27315521240234, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -101.8438949584961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.477315664291382, + "rewards_train/margins": -0.8429261445999146, + "rewards_train/rejected": -1.6343895196914673, + "step": 2227 + }, + { + "epoch": 0.62, + "logps_train/chosen": -24.747295379638672, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -70.48162841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8809795379638672, + "rewards_train/margins": 2.617183208465576, + "rewards_train/rejected": -4.498162746429443, + "step": 2227 + }, + { + "epoch": 0.62, + "logps_train/chosen": -97.66358184814453, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -150.13938903808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.416358232498169, + "rewards_train/margins": 3.0475809574127197, + "rewards_train/rejected": -4.463939189910889, + "step": 2227 + }, + { + "epoch": 0.62, + "logps_train/chosen": -3.9787638187408447, + "logps_train/ref_chosen": -1.421875, + "logps_train/ref_rejected": -1.0546875, + "logps_train/rejected": -5.108846664428711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25568887591362, + "rewards_train/margins": 0.14972704648971558, + "rewards_train/rejected": -0.40541592240333557, + "step": 2227 + }, + { + "epoch": 0.62, + "logps_train/chosen": -85.73643493652344, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -122.09584045410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7236435413360596, + "rewards_train/margins": 3.3359405994415283, + "rewards_train/rejected": -5.059584140777588, + "step": 2227 + }, + { + "epoch": 0.62, + "learning_rate": 6.403099115201893e-08, + "loss": 0.3682, + "step": 2228 + }, + { + "epoch": 0.62, + "logps_train/chosen": -41.67494583129883, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -78.63587951660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.198744535446167, + "rewards_train/margins": 2.277343511581421, + "rewards_train/rejected": -5.476088047027588, + "step": 2228 + }, + { + "epoch": 0.62, + "logps_train/chosen": -12.4501953125, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -3.0625, + "logps_train/rejected": -34.10268783569336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9043945670127869, + "rewards_train/margins": 2.1996243596076965, + "rewards_train/rejected": -3.1040189266204834, + "step": 2228 + }, + { + "epoch": 0.62, + "logps_train/chosen": -147.99114990234375, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -76.82540130615234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.749114990234375, + "rewards_train/margins": -4.266574859619141, + "rewards_train/rejected": -1.4825401306152344, + "step": 2228 + }, + { + "epoch": 0.62, + "logps_train/chosen": -52.28215026855469, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -59.093910217285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9907150268554688, + "rewards_train/margins": 0.8624262809753418, + "rewards_train/rejected": -4.8531413078308105, + "step": 2228 + }, + { + "epoch": 0.62, + "logps_train/chosen": -45.78839111328125, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -127.58992004394531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.778839111328125, + "rewards_train/margins": 4.280152797698975, + "rewards_train/rejected": -5.0589919090271, + "step": 2228 + }, + { + "epoch": 0.62, + "logps_train/chosen": -142.85446166992188, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -214.4593505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.135446310043335, + "rewards_train/margins": 5.810488939285278, + "rewards_train/rejected": -8.945935249328613, + "step": 2228 + }, + { + "epoch": 0.62, + "logps_train/chosen": -27.238929748535156, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -22.875, + "logps_train/rejected": -23.144412994384766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4301429986953735, + "rewards_train/margins": -1.403201699256897, + "rewards_train/rejected": -0.026941299438476562, + "step": 2228 + }, + { + "epoch": 0.62, + "logps_train/chosen": -35.021297454833984, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -47.58806228637695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9552547931671143, + "rewards_train/margins": 0.8785514831542969, + "rewards_train/rejected": -3.833806276321411, + "step": 2228 + }, + { + "epoch": 0.62, + "logps_train/chosen": -8.201173782348633, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -3.109375, + "logps_train/rejected": -14.48571491241455, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.004882622044533491, + "rewards_train/margins": 1.1425166609697044, + "rewards_train/rejected": -1.137634038925171, + "step": 2229 + }, + { + "epoch": 0.62, + "logps_train/chosen": -13.025862693786621, + "logps_train/ref_chosen": -3.546875, + "logps_train/ref_rejected": -15.4375, + "logps_train/rejected": -43.484920501708984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.947898805141449, + "rewards_train/margins": 1.8568432927131653, + "rewards_train/rejected": -2.8047420978546143, + "step": 2229 + }, + { + "epoch": 0.62, + "logps_train/chosen": -70.26667785644531, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -88.81086730957031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5266677737236023, + "rewards_train/margins": 0.4044189453125, + "rewards_train/rejected": -0.9310867190361023, + "step": 2229 + }, + { + "epoch": 0.62, + "logps_train/chosen": -147.51983642578125, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -209.0, + "logps_train/rejected": -276.84130859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4019837379455566, + "rewards_train/margins": 3.3821473121643066, + "rewards_train/rejected": -6.784131050109863, + "step": 2229 + }, + { + "epoch": 0.62, + "logps_train/chosen": -32.48264694213867, + "logps_train/ref_chosen": -15.625, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -26.396228790283203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6857646703720093, + "rewards_train/margins": -0.43364179134368896, + "rewards_train/rejected": -1.2521228790283203, + "step": 2229 + }, + { + "epoch": 0.62, + "logps_train/chosen": -215.36099243164062, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -177.2440185546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.2360992431640625, + "rewards_train/margins": -2.7116971015930176, + "rewards_train/rejected": -4.524402141571045, + "step": 2229 + }, + { + "epoch": 0.62, + "logps_train/chosen": -70.78746032714844, + "logps_train/ref_chosen": -48.0, + "logps_train/ref_rejected": -7.90625, + "logps_train/rejected": -37.418880462646484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2787461280822754, + "rewards_train/margins": 0.6725170612335205, + "rewards_train/rejected": -2.951263189315796, + "step": 2229 + }, + { + "epoch": 0.62, + "logps_train/chosen": -57.79160690307617, + "logps_train/ref_chosen": -47.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -222.4431915283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0791606903076172, + "rewards_train/margins": 9.865159034729004, + "rewards_train/rejected": -10.944319725036621, + "step": 2229 + }, + { + "epoch": 0.62, + "learning_rate": 6.310281544631546e-08, + "loss": 0.7446, + "step": 2230 + }, + { + "epoch": 0.62, + "logps_train/chosen": -55.65657043457031, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -3.703125, + "logps_train/rejected": -20.193443298339844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0156571865081787, + "rewards_train/margins": -0.3666253089904785, + "rewards_train/rejected": -1.6490318775177002, + "step": 2230 + }, + { + "epoch": 0.62, + "logps_train/chosen": -144.22557067871094, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -119.26809692382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6725571155548096, + "rewards_train/margins": 2.104252576828003, + "rewards_train/rejected": -4.7768096923828125, + "step": 2230 + }, + { + "epoch": 0.62, + "logps_train/chosen": -33.69404602050781, + "logps_train/ref_chosen": -12.5, + "logps_train/ref_rejected": -18.0, + "logps_train/rejected": -51.095767974853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1194045543670654, + "rewards_train/margins": 1.1901721954345703, + "rewards_train/rejected": -3.3095767498016357, + "step": 2230 + }, + { + "epoch": 0.62, + "logps_train/chosen": -140.3665771484375, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -149.0680694580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.38665771484375, + "rewards_train/margins": 1.2701492309570312, + "rewards_train/rejected": -2.6568069458007812, + "step": 2230 + }, + { + "epoch": 0.62, + "logps_train/chosen": -34.72462463378906, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -46.564788818359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.87871253490448, + "rewards_train/margins": -1.0972336530685425, + "rewards_train/rejected": -0.7814788818359375, + "step": 2230 + }, + { + "epoch": 0.62, + "logps_train/chosen": -348.35565185546875, + "logps_train/ref_chosen": -254.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -255.71607971191406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.435564994812012, + "rewards_train/margins": -1.4639568328857422, + "rewards_train/rejected": -7.9716081619262695, + "step": 2230 + }, + { + "epoch": 0.62, + "logps_train/chosen": -27.670818328857422, + "logps_train/ref_chosen": -7.84375, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -33.016136169433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.982706904411316, + "rewards_train/margins": 0.47515666484832764, + "rewards_train/rejected": -2.4578635692596436, + "step": 2230 + }, + { + "epoch": 0.62, + "logps_train/chosen": -70.53691101074219, + "logps_train/ref_chosen": -59.25, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -79.48808288574219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1286910772323608, + "rewards_train/margins": 0.17011725902557373, + "rewards_train/rejected": -1.2988083362579346, + "step": 2230 + }, + { + "epoch": 0.62, + "logps_train/chosen": -0.7613863348960876, + "logps_train/ref_chosen": -0.09912109375, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -7.629241466522217, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.066226527094841, + "rewards_train/margins": 0.16232262551784515, + "rewards_train/rejected": -0.22854915261268616, + "step": 2231 + }, + { + "epoch": 0.62, + "logps_train/chosen": -123.51256561279297, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -166.67623901367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8512565493583679, + "rewards_train/margins": 2.816367447376251, + "rewards_train/rejected": -3.667623996734619, + "step": 2231 + }, + { + "epoch": 0.62, + "logps_train/chosen": -20.945430755615234, + "logps_train/ref_chosen": -7.15625, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -34.11604309082031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3789180517196655, + "rewards_train/margins": 1.4701863527297974, + "rewards_train/rejected": -2.849104404449463, + "step": 2231 + }, + { + "epoch": 0.62, + "logps_train/chosen": -96.96080017089844, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -178.51336669921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.496080160140991, + "rewards_train/margins": 6.405256509780884, + "rewards_train/rejected": -8.901336669921875, + "step": 2231 + }, + { + "epoch": 0.62, + "logps_train/chosen": -161.94381713867188, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -157.3055877685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.344381809234619, + "rewards_train/margins": 2.6861772537231445, + "rewards_train/rejected": -7.030559062957764, + "step": 2231 + }, + { + "epoch": 0.62, + "logps_train/chosen": -208.21743774414062, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -244.96229553222656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.371744155883789, + "rewards_train/margins": -0.7755146026611328, + "rewards_train/rejected": -9.596229553222656, + "step": 2231 + }, + { + "epoch": 0.62, + "logps_train/chosen": -29.904739379882812, + "logps_train/ref_chosen": -11.375, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -29.776256561279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8529739379882812, + "rewards_train/margins": 0.6871516704559326, + "rewards_train/rejected": -2.540125608444214, + "step": 2231 + }, + { + "epoch": 0.62, + "logps_train/chosen": -193.47705078125, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -233.85238647460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.647705078125, + "rewards_train/margins": 6.437533378601074, + "rewards_train/rejected": -8.085238456726074, + "step": 2231 + }, + { + "epoch": 0.62, + "learning_rate": 6.218119702756708e-08, + "loss": 0.5115, + "step": 2232 + }, + { + "epoch": 0.62, + "logps_train/chosen": -0.22954688966274261, + "logps_train/ref_chosen": -1.1875, + "logps_train/ref_rejected": -1.1875, + "logps_train/rejected": -0.22210156917572021, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.09579531103372574, + "rewards_train/margins": -0.0007445365190505981, + "rewards_train/rejected": 0.09653984755277634, + "step": 2232 + }, + { + "epoch": 0.62, + "logps_train/chosen": -22.257848739624023, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -68.507568359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.024215126410126686, + "rewards_train/margins": 3.274972105398774, + "rewards_train/rejected": -3.2507569789886475, + "step": 2232 + }, + { + "epoch": 0.62, + "logps_train/chosen": -43.72899627685547, + "logps_train/ref_chosen": -16.5, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -37.02950668334961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7228996753692627, + "rewards_train/margins": -0.3386988639831543, + "rewards_train/rejected": -2.3842008113861084, + "step": 2232 + }, + { + "epoch": 0.62, + "logps_train/chosen": -18.553600311279297, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -24.810688018798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7491100430488586, + "rewards_train/margins": 1.191333830356598, + "rewards_train/rejected": -1.9404438734054565, + "step": 2232 + }, + { + "epoch": 0.62, + "logps_train/chosen": -9.916668891906738, + "logps_train/ref_chosen": -2.109375, + "logps_train/ref_rejected": -7.5625, + "logps_train/rejected": -24.709918975830078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7807294130325317, + "rewards_train/margins": 0.9340125322341919, + "rewards_train/rejected": -1.7147419452667236, + "step": 2232 + }, + { + "epoch": 0.62, + "logps_train/chosen": -146.90321350097656, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -173.86077880859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3903214931488037, + "rewards_train/margins": 1.395756483078003, + "rewards_train/rejected": -4.786077976226807, + "step": 2232 + }, + { + "epoch": 0.62, + "logps_train/chosen": -50.50270080566406, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -101.27680969238281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1752700805664062, + "rewards_train/margins": 1.1024110317230225, + "rewards_train/rejected": -3.2776811122894287, + "step": 2232 + }, + { + "epoch": 0.62, + "logps_train/chosen": -92.74845886230469, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -126.88919830322266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9748458862304688, + "rewards_train/margins": 2.664073944091797, + "rewards_train/rejected": -3.6389198303222656, + "step": 2232 + }, + { + "epoch": 0.62, + "logps_train/chosen": -87.41558074951172, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -173.88937377929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4915580749511719, + "rewards_train/margins": 4.747379302978516, + "rewards_train/rejected": -6.2389373779296875, + "step": 2233 + }, + { + "epoch": 0.62, + "logps_train/chosen": -31.583518981933594, + "logps_train/ref_chosen": -16.0, + "logps_train/ref_rejected": -56.75, + "logps_train/rejected": -58.591400146484375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5583518743515015, + "rewards_train/margins": -1.3742118626832962, + "rewards_train/rejected": -0.18414001166820526, + "step": 2233 + }, + { + "epoch": 0.62, + "logps_train/chosen": -182.89320373535156, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -141.46533203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9893205165863037, + "rewards_train/margins": -2.74278724193573, + "rewards_train/rejected": -1.2465332746505737, + "step": 2233 + }, + { + "epoch": 0.62, + "logps_train/chosen": -131.06024169921875, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -191.0235595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.106024265289307, + "rewards_train/margins": 2.146331787109375, + "rewards_train/rejected": -7.252356052398682, + "step": 2233 + }, + { + "epoch": 0.62, + "logps_train/chosen": -12.297689437866211, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -37.06837463378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22976894676685333, + "rewards_train/margins": 1.2270685881376266, + "rewards_train/rejected": -1.45683753490448, + "step": 2233 + }, + { + "epoch": 0.62, + "logps_train/chosen": -171.00717163085938, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -279.2506103515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.450717449188232, + "rewards_train/margins": 10.274343967437744, + "rewards_train/rejected": -14.725061416625977, + "step": 2233 + }, + { + "epoch": 0.62, + "logps_train/chosen": -78.09575653076172, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -131.6872100830078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1595757007598877, + "rewards_train/margins": 0.7591453790664673, + "rewards_train/rejected": -1.918721079826355, + "step": 2233 + }, + { + "epoch": 0.62, + "logps_train/chosen": -32.986412048339844, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -57.16582489013672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.473641276359558, + "rewards_train/margins": 2.1429413557052612, + "rewards_train/rejected": -3.6165826320648193, + "step": 2233 + }, + { + "epoch": 0.62, + "learning_rate": 6.126614234612593e-08, + "loss": 0.5035, + "step": 2234 + }, + { + "epoch": 0.62, + "logps_train/chosen": -109.26615905761719, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -178.4740447998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7266159057617188, + "rewards_train/margins": 3.7207884788513184, + "rewards_train/rejected": -6.447404384613037, + "step": 2234 + }, + { + "epoch": 0.62, + "logps_train/chosen": -30.213655471801758, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -29.380252838134766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0963655710220337, + "rewards_train/margins": 0.6229097843170166, + "rewards_train/rejected": -1.7192753553390503, + "step": 2234 + }, + { + "epoch": 0.62, + "logps_train/chosen": -135.41195678710938, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -226.25677490234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0911957025527954, + "rewards_train/margins": 9.13448178768158, + "rewards_train/rejected": -10.225677490234375, + "step": 2234 + }, + { + "epoch": 0.62, + "logps_train/chosen": -255.71011352539062, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -259.0628967285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.9710111618042, + "rewards_train/margins": 1.4352788925170898, + "rewards_train/rejected": -11.406290054321289, + "step": 2234 + }, + { + "epoch": 0.62, + "logps_train/chosen": -89.15966033935547, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -202.75375366210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7659660577774048, + "rewards_train/margins": 10.10940968990326, + "rewards_train/rejected": -11.875375747680664, + "step": 2234 + }, + { + "epoch": 0.62, + "logps_train/chosen": -49.41401672363281, + "logps_train/ref_chosen": -48.25, + "logps_train/ref_rejected": -5.1875, + "logps_train/rejected": -51.486610412597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11640167236328125, + "rewards_train/margins": 4.513509273529053, + "rewards_train/rejected": -4.629910945892334, + "step": 2234 + }, + { + "epoch": 0.62, + "logps_train/chosen": -189.40725708007812, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -213.29458618164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.340725898742676, + "rewards_train/margins": 1.5887327194213867, + "rewards_train/rejected": -7.9294586181640625, + "step": 2234 + }, + { + "epoch": 0.62, + "logps_train/chosen": -72.49634552001953, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -85.14936828613281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.59963458776474, + "rewards_train/margins": 0.5153022408485413, + "rewards_train/rejected": -1.1149368286132812, + "step": 2234 + }, + { + "epoch": 0.62, + "logps_train/chosen": -58.96855926513672, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -65.47261810302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5218560695648193, + "rewards_train/margins": 0.42540574073791504, + "rewards_train/rejected": -2.9472618103027344, + "step": 2235 + }, + { + "epoch": 0.62, + "logps_train/chosen": -94.2745132446289, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -103.68228912353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7274513244628906, + "rewards_train/margins": 0.44077765941619873, + "rewards_train/rejected": -1.1682289838790894, + "step": 2235 + }, + { + "epoch": 0.62, + "logps_train/chosen": -137.7360382080078, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -211.01817321777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9736039638519287, + "rewards_train/margins": 0.928213357925415, + "rewards_train/rejected": -3.9018173217773438, + "step": 2235 + }, + { + "epoch": 0.62, + "logps_train/chosen": -135.35202026367188, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -210.91796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.635202169418335, + "rewards_train/margins": 5.656594514846802, + "rewards_train/rejected": -8.291796684265137, + "step": 2235 + }, + { + "epoch": 0.62, + "logps_train/chosen": -146.51255798339844, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -195.81695556640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.001255750656128, + "rewards_train/margins": 3.9804399013519287, + "rewards_train/rejected": -5.981695652008057, + "step": 2235 + }, + { + "epoch": 0.62, + "logps_train/chosen": -179.09783935546875, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -180.2685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.209784030914307, + "rewards_train/margins": 0.11707162857055664, + "rewards_train/rejected": -4.326855659484863, + "step": 2235 + }, + { + "epoch": 0.62, + "logps_train/chosen": -91.15377044677734, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -141.61329650878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7653770446777344, + "rewards_train/margins": 2.495952606201172, + "rewards_train/rejected": -4.261329650878906, + "step": 2235 + }, + { + "epoch": 0.62, + "logps_train/chosen": -200.6502685546875, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -104.64302062988281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.96502685546875, + "rewards_train/margins": -6.100724697113037, + "rewards_train/rejected": -2.864302158355713, + "step": 2235 + }, + { + "epoch": 0.62, + "learning_rate": 6.035765780640422e-08, + "loss": 0.5941, + "step": 2236 + }, + { + "epoch": 0.62, + "logps_train/chosen": -264.4739990234375, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -178.0, + "logps_train/rejected": -277.65997314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.04740047454834, + "rewards_train/margins": 0.9185972213745117, + "rewards_train/rejected": -9.965997695922852, + "step": 2236 + }, + { + "epoch": 0.62, + "logps_train/chosen": -8.462320327758789, + "logps_train/ref_chosen": -1.1171875, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -28.06882667541504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7345132827758789, + "rewards_train/margins": 0.4723694324493408, + "rewards_train/rejected": -1.2068827152252197, + "step": 2236 + }, + { + "epoch": 0.62, + "logps_train/chosen": -81.10289001464844, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -131.222900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3102890253067017, + "rewards_train/margins": 2.1620010137557983, + "rewards_train/rejected": -3.4722900390625, + "step": 2236 + }, + { + "epoch": 0.62, + "logps_train/chosen": -24.657485961914062, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -13.4375, + "logps_train/rejected": -29.75042152404785, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.303248643875122, + "rewards_train/margins": 0.3280435800552368, + "rewards_train/rejected": -1.6312922239303589, + "step": 2236 + }, + { + "epoch": 0.62, + "logps_train/chosen": -85.514404296875, + "logps_train/ref_chosen": -59.0, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -130.78713989257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.651440382003784, + "rewards_train/margins": 3.6772735118865967, + "rewards_train/rejected": -6.328713893890381, + "step": 2236 + }, + { + "epoch": 0.62, + "logps_train/chosen": -317.652587890625, + "logps_train/ref_chosen": -201.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -229.4857177734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.66525936126709, + "rewards_train/margins": -2.5166873931884766, + "rewards_train/rejected": -9.148571968078613, + "step": 2236 + }, + { + "epoch": 0.62, + "logps_train/chosen": -139.74693298339844, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -200.5478515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8746932744979858, + "rewards_train/margins": 2.0800918340682983, + "rewards_train/rejected": -3.954785108566284, + "step": 2236 + }, + { + "epoch": 0.62, + "logps_train/chosen": -236.7922821044922, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -200.54214477539062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.879228591918945, + "rewards_train/margins": -1.2250137329101562, + "rewards_train/rejected": -8.654214859008789, + "step": 2236 + }, + { + "epoch": 0.63, + "logps_train/chosen": -27.654077529907227, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -52.50844192504883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4716577529907227, + "rewards_train/margins": 2.0666863918304443, + "rewards_train/rejected": -3.538344144821167, + "step": 2237 + }, + { + "epoch": 0.63, + "logps_train/chosen": -14.462162971496582, + "logps_train/ref_chosen": -5.75, + "logps_train/ref_rejected": -7.125, + "logps_train/rejected": -17.764787673950195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8712162971496582, + "rewards_train/margins": 0.19276249408721924, + "rewards_train/rejected": -1.0639787912368774, + "step": 2237 + }, + { + "epoch": 0.63, + "logps_train/chosen": -133.1165008544922, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -126.93680572509766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.861650228500366, + "rewards_train/margins": -0.9679696559906006, + "rewards_train/rejected": -2.8936805725097656, + "step": 2237 + }, + { + "epoch": 0.63, + "logps_train/chosen": -115.13787841796875, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -172.06405639648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.813787817955017, + "rewards_train/margins": 4.89261782169342, + "rewards_train/rejected": -6.7064056396484375, + "step": 2237 + }, + { + "epoch": 0.63, + "logps_train/chosen": -5.1749701499938965, + "logps_train/ref_chosen": -4.75, + "logps_train/ref_rejected": -5.59375, + "logps_train/rejected": -30.190105438232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.04249701648950577, + "rewards_train/margins": 2.4171384796500206, + "rewards_train/rejected": -2.4596354961395264, + "step": 2237 + }, + { + "epoch": 0.63, + "logps_train/chosen": -20.182586669921875, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -29.248859405517578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0192413330078125, + "rewards_train/margins": 2.350377321243286, + "rewards_train/rejected": -2.3311359882354736, + "step": 2237 + }, + { + "epoch": 0.63, + "logps_train/chosen": -80.93596649169922, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -215.68240356445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8685966730117798, + "rewards_train/margins": 7.849644064903259, + "rewards_train/rejected": -9.718240737915039, + "step": 2237 + }, + { + "epoch": 0.63, + "logps_train/chosen": -48.80158615112305, + "logps_train/ref_chosen": -28.125, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -93.62523651123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0676586627960205, + "rewards_train/margins": 3.2948648929595947, + "rewards_train/rejected": -5.362523555755615, + "step": 2237 + }, + { + "epoch": 0.63, + "learning_rate": 5.945574976683077e-08, + "loss": 0.4952, + "step": 2238 + }, + { + "epoch": 0.63, + "logps_train/chosen": -160.02330017089844, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -177.26516723632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5523300170898438, + "rewards_train/margins": 2.3741869926452637, + "rewards_train/rejected": -5.926517009735107, + "step": 2238 + }, + { + "epoch": 0.63, + "logps_train/chosen": -3.427691698074341, + "logps_train/ref_chosen": -0.6796875, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -11.789153099060059, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2748004198074341, + "rewards_train/margins": 0.4947398900985718, + "rewards_train/rejected": -0.7695403099060059, + "step": 2238 + }, + { + "epoch": 0.63, + "logps_train/chosen": -230.21348571777344, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -142.81082153320312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.171348571777344, + "rewards_train/margins": -5.890266418457031, + "rewards_train/rejected": -5.2810821533203125, + "step": 2238 + }, + { + "epoch": 0.63, + "logps_train/chosen": -33.298072814941406, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -1.921875, + "logps_train/rejected": -20.31891441345215, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.367307424545288, + "rewards_train/margins": -0.5276035070419312, + "rewards_train/rejected": -1.839703917503357, + "step": 2238 + }, + { + "epoch": 0.63, + "logps_train/chosen": -1.5201667547225952, + "logps_train/ref_chosen": -1.5390625, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -12.1506929397583, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0018895745743066072, + "rewards_train/margins": 0.3794588625896722, + "rewards_train/rejected": -0.3775692880153656, + "step": 2238 + }, + { + "epoch": 0.63, + "logps_train/chosen": -12.50804328918457, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -1.7890625, + "logps_train/rejected": -10.21884536743164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.760179340839386, + "rewards_train/margins": 0.08279895782470703, + "rewards_train/rejected": -0.842978298664093, + "step": 2238 + }, + { + "epoch": 0.63, + "logps_train/chosen": -128.33425903320312, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -222.00888061523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4334259033203125, + "rewards_train/margins": 9.417462348937988, + "rewards_train/rejected": -9.8508882522583, + "step": 2238 + }, + { + "epoch": 0.63, + "logps_train/chosen": -100.67277526855469, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -122.70823669433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8672775030136108, + "rewards_train/margins": 0.4535461664199829, + "rewards_train/rejected": -2.3208236694335938, + "step": 2238 + }, + { + "epoch": 0.63, + "logps_train/chosen": -80.90031433105469, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -115.67659759521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8900314569473267, + "rewards_train/margins": 1.0276283025741577, + "rewards_train/rejected": -1.9176597595214844, + "step": 2239 + }, + { + "epoch": 0.63, + "logps_train/chosen": -172.67514038085938, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -188.04092407226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.2675142288208, + "rewards_train/margins": 0.8865785598754883, + "rewards_train/rejected": -9.154092788696289, + "step": 2239 + }, + { + "epoch": 0.63, + "logps_train/chosen": -107.80851745605469, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -136.9745635986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13085174560546875, + "rewards_train/margins": 3.66660475730896, + "rewards_train/rejected": -3.7974565029144287, + "step": 2239 + }, + { + "epoch": 0.63, + "logps_train/chosen": -169.8474884033203, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -231.78030395507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7847487926483154, + "rewards_train/margins": 7.04328179359436, + "rewards_train/rejected": -10.828030586242676, + "step": 2239 + }, + { + "epoch": 0.63, + "logps_train/chosen": -10.129913330078125, + "logps_train/ref_chosen": -2.53125, + "logps_train/ref_rejected": -3.546875, + "logps_train/rejected": -8.520654678344727, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7598663568496704, + "rewards_train/margins": -0.26248839497566223, + "rewards_train/rejected": -0.4973779618740082, + "step": 2239 + }, + { + "epoch": 0.63, + "logps_train/chosen": -30.65779685974121, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -29.75, + "logps_train/rejected": -59.59477615356445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.222029685974121, + "rewards_train/margins": 0.7624480724334717, + "rewards_train/rejected": -2.9844777584075928, + "step": 2239 + }, + { + "epoch": 0.63, + "logps_train/chosen": -212.63140869140625, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -180.08160400390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.563140869140625, + "rewards_train/margins": 0.3450198173522949, + "rewards_train/rejected": -7.90816068649292, + "step": 2239 + }, + { + "epoch": 0.63, + "logps_train/chosen": -31.133262634277344, + "logps_train/ref_chosen": -16.125, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -24.05314826965332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5008262395858765, + "rewards_train/margins": 0.4857386350631714, + "rewards_train/rejected": -1.9865648746490479, + "step": 2239 + }, + { + "epoch": 0.63, + "learning_rate": 5.8560424539805255e-08, + "loss": 0.7514, + "step": 2240 + }, + { + "epoch": 0.63, + "logps_train/chosen": -50.215789794921875, + "logps_train/ref_chosen": -32.25, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -50.00080108642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7965790033340454, + "rewards_train/margins": 1.3410011529922485, + "rewards_train/rejected": -3.137580156326294, + "step": 2240 + }, + { + "epoch": 0.63, + "logps_train/chosen": -8.468338012695312, + "logps_train/ref_chosen": -2.390625, + "logps_train/ref_rejected": -29.5, + "logps_train/rejected": -35.44816970825195, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6077713370323181, + "rewards_train/margins": -0.012954354286193848, + "rewards_train/rejected": -0.5948169827461243, + "step": 2240 + }, + { + "epoch": 0.63, + "logps_train/chosen": -125.22064971923828, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -219.19912719726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.172065019607544, + "rewards_train/margins": 6.447848081588745, + "rewards_train/rejected": -8.619913101196289, + "step": 2240 + }, + { + "epoch": 0.63, + "logps_train/chosen": -37.47926330566406, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -22.25, + "logps_train/rejected": -67.9629898071289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.329176425933838, + "rewards_train/margins": 2.2421226501464844, + "rewards_train/rejected": -4.571299076080322, + "step": 2240 + }, + { + "epoch": 0.63, + "logps_train/chosen": -20.196901321411133, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -62.62800979614258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7384401559829712, + "rewards_train/margins": 3.58686101436615, + "rewards_train/rejected": -4.325301170349121, + "step": 2240 + }, + { + "epoch": 0.63, + "logps_train/chosen": -55.86310958862305, + "logps_train/ref_chosen": -27.625, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -58.19329071044922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8238110542297363, + "rewards_train/margins": -0.4044818878173828, + "rewards_train/rejected": -2.4193291664123535, + "step": 2240 + }, + { + "epoch": 0.63, + "logps_train/chosen": -96.81776428222656, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -190.68310546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2817764282226562, + "rewards_train/margins": 6.136534690856934, + "rewards_train/rejected": -8.41831111907959, + "step": 2240 + }, + { + "epoch": 0.63, + "logps_train/chosen": -163.71090698242188, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -209.8941650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.221090793609619, + "rewards_train/margins": 1.2683258056640625, + "rewards_train/rejected": -7.489416599273682, + "step": 2240 + }, + { + "epoch": 0.63, + "logps_train/chosen": -152.46121215820312, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -269.0459899902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.54612135887146, + "rewards_train/margins": 7.258477449417114, + "rewards_train/rejected": -10.804598808288574, + "step": 2241 + }, + { + "epoch": 0.63, + "logps_train/chosen": -68.04327392578125, + "logps_train/ref_chosen": -58.25, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -121.3001937866211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.979327380657196, + "rewards_train/margins": 3.7506921887397766, + "rewards_train/rejected": -4.730019569396973, + "step": 2241 + }, + { + "epoch": 0.63, + "logps_train/chosen": -159.76556396484375, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -218.14529418945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9765565395355225, + "rewards_train/margins": 5.23797345161438, + "rewards_train/rejected": -8.214529991149902, + "step": 2241 + }, + { + "epoch": 0.63, + "logps_train/chosen": -17.96398162841797, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -10.72874641418457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2807731628417969, + "rewards_train/margins": -0.6453984975814819, + "rewards_train/rejected": -0.6353746652603149, + "step": 2241 + }, + { + "epoch": 0.63, + "logps_train/chosen": -231.1333465576172, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -231.9011993408203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.01333475112915, + "rewards_train/margins": -0.8232145309448242, + "rewards_train/rejected": -4.190120220184326, + "step": 2241 + }, + { + "epoch": 0.63, + "logps_train/chosen": -245.65646362304688, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -220.82229614257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.865646362304688, + "rewards_train/margins": 1.5665836334228516, + "rewards_train/rejected": -10.432229995727539, + "step": 2241 + }, + { + "epoch": 0.63, + "logps_train/chosen": -104.15188598632812, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -162.1735076904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6151885986328125, + "rewards_train/margins": 4.652162075042725, + "rewards_train/rejected": -6.267350673675537, + "step": 2241 + }, + { + "epoch": 0.63, + "logps_train/chosen": -111.9879150390625, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -194.03341674804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.798791527748108, + "rewards_train/margins": 7.804550528526306, + "rewards_train/rejected": -9.603342056274414, + "step": 2241 + }, + { + "epoch": 0.63, + "learning_rate": 5.767168839165537e-08, + "loss": 0.2945, + "step": 2242 + }, + { + "epoch": 0.63, + "logps_train/chosen": -114.08326721191406, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -239.5335693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2083266973495483, + "rewards_train/margins": 10.095030426979065, + "rewards_train/rejected": -11.303357124328613, + "step": 2242 + }, + { + "epoch": 0.63, + "logps_train/chosen": -3.7695484161376953, + "logps_train/ref_chosen": -2.03125, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -3.740837574005127, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1738298386335373, + "rewards_train/margins": -0.23099608346819878, + "rewards_train/rejected": 0.057166244834661484, + "step": 2242 + }, + { + "epoch": 0.63, + "logps_train/chosen": -29.780147552490234, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -7.4375, + "logps_train/rejected": -40.61232376098633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.171764850616455, + "rewards_train/margins": 1.1457176208496094, + "rewards_train/rejected": -3.3174824714660645, + "step": 2242 + }, + { + "epoch": 0.63, + "logps_train/chosen": -34.5312614440918, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -6.46875, + "logps_train/rejected": -41.03955841064453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9375011920928955, + "rewards_train/margins": 0.5195796489715576, + "rewards_train/rejected": -3.457080841064453, + "step": 2242 + }, + { + "epoch": 0.63, + "logps_train/chosen": -152.3216094970703, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -188.50633239746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.782160997390747, + "rewards_train/margins": 2.8684723377227783, + "rewards_train/rejected": -6.650633335113525, + "step": 2242 + }, + { + "epoch": 0.63, + "logps_train/chosen": -16.32833480834961, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -33.00695037841797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41408348083496094, + "rewards_train/margins": 0.7866115570068359, + "rewards_train/rejected": -1.2006950378417969, + "step": 2242 + }, + { + "epoch": 0.63, + "logps_train/chosen": -83.92767333984375, + "logps_train/ref_chosen": -60.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -120.84857177734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3927674293518066, + "rewards_train/margins": 2.6920900344848633, + "rewards_train/rejected": -5.08485746383667, + "step": 2242 + }, + { + "epoch": 0.63, + "logps_train/chosen": -11.113533973693848, + "logps_train/ref_chosen": -6.09375, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -34.53550338745117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5019783973693848, + "rewards_train/margins": 1.2640719413757324, + "rewards_train/rejected": -1.7660503387451172, + "step": 2242 + }, + { + "epoch": 0.63, + "logps_train/chosen": -30.109535217285156, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -7.71875, + "logps_train/rejected": -19.146209716796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3484535217285156, + "rewards_train/margins": -0.20570755004882812, + "rewards_train/rejected": -1.1427459716796875, + "step": 2243 + }, + { + "epoch": 0.63, + "logps_train/chosen": -39.047969818115234, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -52.3575325012207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0702030211687088, + "rewards_train/margins": 3.2809562236070633, + "rewards_train/rejected": -3.2107532024383545, + "step": 2243 + }, + { + "epoch": 0.63, + "logps_train/chosen": -23.855953216552734, + "logps_train/ref_chosen": -2.703125, + "logps_train/ref_rejected": -13.6875, + "logps_train/rejected": -56.24292755126953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1152827739715576, + "rewards_train/margins": 2.1402599811553955, + "rewards_train/rejected": -4.255542755126953, + "step": 2243 + }, + { + "epoch": 0.63, + "logps_train/chosen": -52.80437469482422, + "logps_train/ref_chosen": -30.125, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -81.56755828857422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.267937421798706, + "rewards_train/margins": 2.413818597793579, + "rewards_train/rejected": -4.681756019592285, + "step": 2243 + }, + { + "epoch": 0.63, + "logps_train/chosen": -10.767129898071289, + "logps_train/ref_chosen": -7.03125, + "logps_train/ref_rejected": -6.375, + "logps_train/rejected": -14.715946197509766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3735879957675934, + "rewards_train/margins": 0.4605066478252411, + "rewards_train/rejected": -0.8340946435928345, + "step": 2243 + }, + { + "epoch": 0.63, + "logps_train/chosen": -24.967008590698242, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -43.87242889404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8342008590698242, + "rewards_train/margins": 1.8780419826507568, + "rewards_train/rejected": -2.712242841720581, + "step": 2243 + }, + { + "epoch": 0.63, + "logps_train/chosen": -9.192447662353516, + "logps_train/ref_chosen": -0.54296875, + "logps_train/ref_rejected": -2.578125, + "logps_train/rejected": -19.284204483032227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8649479150772095, + "rewards_train/margins": 0.8056600093841553, + "rewards_train/rejected": -1.6706079244613647, + "step": 2243 + }, + { + "epoch": 0.63, + "logps_train/chosen": -1.020636796951294, + "logps_train/ref_chosen": -0.5546875, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -13.484374046325684, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.046594928950071335, + "rewards_train/margins": 0.27684249356389046, + "rewards_train/rejected": -0.3234374225139618, + "step": 2243 + }, + { + "epoch": 0.63, + "learning_rate": 5.678954754259169e-08, + "loss": 0.3065, + "step": 2244 + }, + { + "epoch": 0.63, + "logps_train/chosen": -175.73788452148438, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -162.994873046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.973788738250732, + "rewards_train/margins": 0.2756986618041992, + "rewards_train/rejected": -6.249487400054932, + "step": 2244 + }, + { + "epoch": 0.63, + "logps_train/chosen": -66.7442398071289, + "logps_train/ref_chosen": -49.75, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -114.44493103027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6994240283966064, + "rewards_train/margins": 1.9450690746307373, + "rewards_train/rejected": -3.6444931030273438, + "step": 2244 + }, + { + "epoch": 0.63, + "logps_train/chosen": -20.566055297851562, + "logps_train/ref_chosen": -0.08154296875, + "logps_train/ref_rejected": -0.08154296875, + "logps_train/rejected": -20.04427719116211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0484511852264404, + "rewards_train/margins": -0.0521777868270874, + "rewards_train/rejected": -1.996273398399353, + "step": 2244 + }, + { + "epoch": 0.63, + "logps_train/chosen": -29.974773406982422, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -67.39324951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1224772930145264, + "rewards_train/margins": 3.6230976581573486, + "rewards_train/rejected": -5.745574951171875, + "step": 2244 + }, + { + "epoch": 0.63, + "logps_train/chosen": -73.94210815429688, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -91.2416763305664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.794210910797119, + "rewards_train/margins": -0.6700432300567627, + "rewards_train/rejected": -2.1241676807403564, + "step": 2244 + }, + { + "epoch": 0.63, + "logps_train/chosen": -50.30359649658203, + "logps_train/ref_chosen": -38.0, + "logps_train/ref_rejected": -24.375, + "logps_train/rejected": -54.92129135131836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.230359673500061, + "rewards_train/margins": 1.824269413948059, + "rewards_train/rejected": -3.05462908744812, + "step": 2244 + }, + { + "epoch": 0.63, + "logps_train/chosen": -138.81883239746094, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -114.82022094726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5318832397460938, + "rewards_train/margins": 0.35013890266418457, + "rewards_train/rejected": -3.8820221424102783, + "step": 2244 + }, + { + "epoch": 0.63, + "logps_train/chosen": -125.20903015136719, + "logps_train/ref_chosen": -55.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -221.46751403808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.02090311050415, + "rewards_train/margins": 3.47584867477417, + "rewards_train/rejected": -10.49675178527832, + "step": 2244 + }, + { + "epoch": 0.63, + "logps_train/chosen": -74.78073120117188, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -105.3681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4030730724334717, + "rewards_train/margins": 1.73374342918396, + "rewards_train/rejected": -4.136816501617432, + "step": 2245 + }, + { + "epoch": 0.63, + "logps_train/chosen": -6.755348205566406, + "logps_train/ref_chosen": -3.71875, + "logps_train/ref_rejected": -31.625, + "logps_train/rejected": -30.630565643310547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3036598265171051, + "rewards_train/margins": -0.4031032621860504, + "rewards_train/rejected": 0.09944343566894531, + "step": 2245 + }, + { + "epoch": 0.63, + "logps_train/chosen": -144.86892700195312, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -119.88607788085938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4868927001953125, + "rewards_train/margins": -1.0482847690582275, + "rewards_train/rejected": -2.438607931137085, + "step": 2245 + }, + { + "epoch": 0.63, + "logps_train/chosen": -1.4328736066818237, + "logps_train/ref_chosen": -1.7265625, + "logps_train/ref_rejected": -0.7265625, + "logps_train/rejected": -0.31501534581184387, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.029368890449404716, + "rewards_train/margins": -0.011785825714468956, + "rewards_train/rejected": 0.04115471616387367, + "step": 2245 + }, + { + "epoch": 0.63, + "logps_train/chosen": -26.94544219970703, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -29.856107711791992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3570442199707031, + "rewards_train/margins": 2.012941598892212, + "rewards_train/rejected": -2.369985818862915, + "step": 2245 + }, + { + "epoch": 0.63, + "logps_train/chosen": -19.801530838012695, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -17.587303161621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3895281553268433, + "rewards_train/margins": -0.0370478630065918, + "rewards_train/rejected": -1.3524802923202515, + "step": 2245 + }, + { + "epoch": 0.63, + "logps_train/chosen": -192.3657989501953, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -218.9866485595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.936580181121826, + "rewards_train/margins": 6.012084484100342, + "rewards_train/rejected": -11.948664665222168, + "step": 2245 + }, + { + "epoch": 0.63, + "logps_train/chosen": -8.377056121826172, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -52.67355728149414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.05604438856244087, + "rewards_train/margins": 2.5359001643955708, + "rewards_train/rejected": -2.47985577583313, + "step": 2245 + }, + { + "epoch": 0.63, + "learning_rate": 5.591400816666492e-08, + "loss": 0.455, + "step": 2246 + }, + { + "epoch": 0.63, + "logps_train/chosen": -99.38660430908203, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -229.73028564453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3886604309082031, + "rewards_train/margins": 7.984368324279785, + "rewards_train/rejected": -8.373028755187988, + "step": 2246 + }, + { + "epoch": 0.63, + "logps_train/chosen": -21.55597686767578, + "logps_train/ref_chosen": -3.796875, + "logps_train/ref_rejected": -13.6875, + "logps_train/rejected": -40.042171478271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7759102582931519, + "rewards_train/margins": 0.859557032585144, + "rewards_train/rejected": -2.635467290878296, + "step": 2246 + }, + { + "epoch": 0.63, + "logps_train/chosen": -22.281024932861328, + "logps_train/ref_chosen": -25.75, + "logps_train/ref_rejected": -29.75, + "logps_train/rejected": -56.991485595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.34689751267433167, + "rewards_train/margins": 3.0710460245609283, + "rewards_train/rejected": -2.7241485118865967, + "step": 2246 + }, + { + "epoch": 0.63, + "logps_train/chosen": -126.29444122314453, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -129.8310546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4294441342353821, + "rewards_train/margins": 0.10366135835647583, + "rewards_train/rejected": -0.5331054925918579, + "step": 2246 + }, + { + "epoch": 0.63, + "logps_train/chosen": -28.666820526123047, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -33.641353607177734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3979320526123047, + "rewards_train/margins": 0.4974534511566162, + "rewards_train/rejected": -2.895385503768921, + "step": 2246 + }, + { + "epoch": 0.63, + "logps_train/chosen": -40.006500244140625, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -40.132781982421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9756500720977783, + "rewards_train/margins": -0.2373718023300171, + "rewards_train/rejected": -1.7382782697677612, + "step": 2246 + }, + { + "epoch": 0.63, + "logps_train/chosen": -132.34072875976562, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -155.93505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5840729475021362, + "rewards_train/margins": 3.809433102607727, + "rewards_train/rejected": -5.393506050109863, + "step": 2246 + }, + { + "epoch": 0.63, + "logps_train/chosen": -19.99648666381836, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -41.6974983215332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1933987140655518, + "rewards_train/margins": 0.6638511419296265, + "rewards_train/rejected": -1.8572498559951782, + "step": 2246 + }, + { + "epoch": 0.63, + "logps_train/chosen": -26.891386032104492, + "logps_train/ref_chosen": -3.578125, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -26.59157943725586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3313262462615967, + "rewards_train/margins": -0.15029335021972656, + "rewards_train/rejected": -2.18103289604187, + "step": 2247 + }, + { + "epoch": 0.63, + "logps_train/chosen": -3.3158822059631348, + "logps_train/ref_chosen": -0.41015625, + "logps_train/ref_rejected": -0.41015625, + "logps_train/rejected": -3.3785367012023926, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2905726134777069, + "rewards_train/margins": 0.006265431642532349, + "rewards_train/rejected": -0.29683804512023926, + "step": 2247 + }, + { + "epoch": 0.63, + "logps_train/chosen": -32.462406158447266, + "logps_train/ref_chosen": -7.25, + "logps_train/ref_rejected": -2.75, + "logps_train/rejected": -14.944448471069336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.521240711212158, + "rewards_train/margins": -1.3017958402633667, + "rewards_train/rejected": -1.2194448709487915, + "step": 2247 + }, + { + "epoch": 0.63, + "logps_train/chosen": -124.39212799072266, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -223.92828369140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4392127990722656, + "rewards_train/margins": 4.953615665435791, + "rewards_train/rejected": -7.392828464508057, + "step": 2247 + }, + { + "epoch": 0.63, + "logps_train/chosen": -41.72920227050781, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -2.1875, + "logps_train/rejected": -34.03662872314453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9854202270507812, + "rewards_train/margins": 1.1994926929473877, + "rewards_train/rejected": -3.184912919998169, + "step": 2247 + }, + { + "epoch": 0.63, + "logps_train/chosen": -117.58798217773438, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -133.48587036132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.058798313140869, + "rewards_train/margins": 1.7397887706756592, + "rewards_train/rejected": -3.7985870838165283, + "step": 2247 + }, + { + "epoch": 0.63, + "logps_train/chosen": -27.411649703979492, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -51.277366638183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3380401134490967, + "rewards_train/margins": 1.7771966457366943, + "rewards_train/rejected": -4.115236759185791, + "step": 2247 + }, + { + "epoch": 0.63, + "logps_train/chosen": -9.322891235351562, + "logps_train/ref_chosen": -3.5625, + "logps_train/ref_rejected": -15.875, + "logps_train/rejected": -28.8028564453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5760391354560852, + "rewards_train/margins": 0.7167465090751648, + "rewards_train/rejected": -1.29278564453125, + "step": 2247 + }, + { + "epoch": 0.63, + "learning_rate": 5.50450763917224e-08, + "loss": 0.4225, + "step": 2248 + }, + { + "epoch": 0.63, + "logps_train/chosen": -105.44813537597656, + "logps_train/ref_chosen": -63.75, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -174.18418884277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.169813632965088, + "rewards_train/margins": 2.598605155944824, + "rewards_train/rejected": -6.768418788909912, + "step": 2248 + }, + { + "epoch": 0.63, + "logps_train/chosen": -14.49547004699707, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -19.375, + "logps_train/rejected": -27.334686279296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8214220404624939, + "rewards_train/margins": -0.025453388690948486, + "rewards_train/rejected": -0.7959686517715454, + "step": 2248 + }, + { + "epoch": 0.63, + "logps_train/chosen": -19.05682373046875, + "logps_train/ref_chosen": -4.6875, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -45.32894515991211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4369324445724487, + "rewards_train/margins": 2.4334620237350464, + "rewards_train/rejected": -3.870394468307495, + "step": 2248 + }, + { + "epoch": 0.63, + "logps_train/chosen": -0.4252254068851471, + "logps_train/ref_chosen": -0.65234375, + "logps_train/ref_rejected": -0.65234375, + "logps_train/rejected": -0.43681788444519043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02271183393895626, + "rewards_train/margins": 0.0011592470109462738, + "rewards_train/rejected": 0.021552586928009987, + "step": 2248 + }, + { + "epoch": 0.63, + "logps_train/chosen": -182.50949096679688, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -221.26580810546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7509491443634033, + "rewards_train/margins": 5.475631475448608, + "rewards_train/rejected": -9.226580619812012, + "step": 2248 + }, + { + "epoch": 0.63, + "logps_train/chosen": -25.939510345458984, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -19.858301162719727, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7564510703086853, + "rewards_train/margins": 0.5668790936470032, + "rewards_train/rejected": -1.3233301639556885, + "step": 2248 + }, + { + "epoch": 0.63, + "logps_train/chosen": -4.551595211029053, + "logps_train/ref_chosen": -1.171875, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -45.732566833496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3379720151424408, + "rewards_train/margins": 2.9852847158908844, + "rewards_train/rejected": -3.323256731033325, + "step": 2248 + }, + { + "epoch": 0.63, + "logps_train/chosen": -20.74550437927246, + "logps_train/ref_chosen": -2.265625, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -31.783945083618164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8479880094528198, + "rewards_train/margins": 0.2991565465927124, + "rewards_train/rejected": -2.1471445560455322, + "step": 2248 + }, + { + "epoch": 0.63, + "logps_train/chosen": -141.31219482421875, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -191.49493408203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8312195539474487, + "rewards_train/margins": 4.4182740449905396, + "rewards_train/rejected": -6.249493598937988, + "step": 2249 + }, + { + "epoch": 0.63, + "logps_train/chosen": -129.16714477539062, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -168.58082580566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0167144536972046, + "rewards_train/margins": 1.3413680791854858, + "rewards_train/rejected": -2.3580825328826904, + "step": 2249 + }, + { + "epoch": 0.63, + "logps_train/chosen": -152.21322631835938, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -160.25729370117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6213226318359375, + "rewards_train/margins": 3.3044068813323975, + "rewards_train/rejected": -3.925729513168335, + "step": 2249 + }, + { + "epoch": 0.63, + "logps_train/chosen": -18.903587341308594, + "logps_train/ref_chosen": -20.625, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -56.19583511352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17214126884937286, + "rewards_train/margins": 4.066724732518196, + "rewards_train/rejected": -3.8945834636688232, + "step": 2249 + }, + { + "epoch": 0.63, + "logps_train/chosen": -13.450291633605957, + "logps_train/ref_chosen": -4.46875, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -45.41749572753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8981541991233826, + "rewards_train/margins": 1.5685954689979553, + "rewards_train/rejected": -2.466749668121338, + "step": 2249 + }, + { + "epoch": 0.63, + "logps_train/chosen": -16.863618850708008, + "logps_train/ref_chosen": -2.765625, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -25.928895950317383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4097994565963745, + "rewards_train/margins": 0.5737152099609375, + "rewards_train/rejected": -1.983514666557312, + "step": 2249 + }, + { + "epoch": 0.63, + "logps_train/chosen": -77.26219177246094, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -3.640625, + "logps_train/rejected": -16.751873016357422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0012192726135254, + "rewards_train/margins": -0.6900944709777832, + "rewards_train/rejected": -1.3111248016357422, + "step": 2249 + }, + { + "epoch": 0.63, + "logps_train/chosen": -20.395109176635742, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -33.7181282043457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5520109534263611, + "rewards_train/margins": 0.48230189085006714, + "rewards_train/rejected": -1.0343128442764282, + "step": 2249 + }, + { + "epoch": 0.63, + "learning_rate": 5.4182758299365364e-08, + "loss": 0.3207, + "step": 2250 + }, + { + "epoch": 0.63, + "logps_train/chosen": -104.04558563232422, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -163.12289428710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7045585513114929, + "rewards_train/margins": 4.257730782032013, + "rewards_train/rejected": -4.962289333343506, + "step": 2250 + }, + { + "epoch": 0.63, + "logps_train/chosen": -94.199462890625, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -171.428955078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5699462890625, + "rewards_train/margins": 5.172949314117432, + "rewards_train/rejected": -7.742895603179932, + "step": 2250 + }, + { + "epoch": 0.63, + "logps_train/chosen": -34.932071685791016, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -39.991172790527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6182071566581726, + "rewards_train/margins": 1.6684101223945618, + "rewards_train/rejected": -2.2866172790527344, + "step": 2250 + }, + { + "epoch": 0.63, + "logps_train/chosen": -102.16511535644531, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -102.01982116699219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8165115714073181, + "rewards_train/margins": -0.01452946662902832, + "rewards_train/rejected": -0.8019821047782898, + "step": 2250 + }, + { + "epoch": 0.63, + "logps_train/chosen": -105.60214233398438, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -135.47596740722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9602142572402954, + "rewards_train/margins": 2.037382483482361, + "rewards_train/rejected": -3.9975967407226562, + "step": 2250 + }, + { + "epoch": 0.63, + "logps_train/chosen": -10.273197174072266, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -5.0625, + "logps_train/rejected": -30.743526458740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0835697203874588, + "rewards_train/margins": 2.484532877802849, + "rewards_train/rejected": -2.5681025981903076, + "step": 2250 + }, + { + "epoch": 0.63, + "logps_train/chosen": -21.641563415527344, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -34.474342346191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2641563415527344, + "rewards_train/margins": 0.9520280361175537, + "rewards_train/rejected": -2.216184377670288, + "step": 2250 + }, + { + "epoch": 0.63, + "logps_train/chosen": -26.958942413330078, + "logps_train/ref_chosen": -6.4375, + "logps_train/ref_rejected": -2.4375, + "logps_train/rejected": -7.156008720397949, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0521442890167236, + "rewards_train/margins": -1.5802934169769287, + "rewards_train/rejected": -0.4718508720397949, + "step": 2250 + }, + { + "epoch": 0.63, + "logps_train/chosen": -0.6327333450317383, + "logps_train/ref_chosen": -0.43359375, + "logps_train/ref_rejected": -1.0625, + "logps_train/rejected": -0.8371214270591736, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.019913960248231888, + "rewards_train/margins": -0.04245181754231453, + "rewards_train/rejected": 0.02253785729408264, + "step": 2251 + }, + { + "epoch": 0.63, + "logps_train/chosen": -144.44769287109375, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -146.46682739257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3447693586349487, + "rewards_train/margins": 4.201913475990295, + "rewards_train/rejected": -5.546682834625244, + "step": 2251 + }, + { + "epoch": 0.63, + "logps_train/chosen": -222.70916748046875, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -188.4086151123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.870916843414307, + "rewards_train/margins": 3.1199445724487305, + "rewards_train/rejected": -7.990861415863037, + "step": 2251 + }, + { + "epoch": 0.63, + "logps_train/chosen": -57.54308319091797, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -56.219120025634766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.904308319091797, + "rewards_train/margins": 0.28947877883911133, + "rewards_train/rejected": -5.193787097930908, + "step": 2251 + }, + { + "epoch": 0.63, + "logps_train/chosen": -71.18089294433594, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -146.58184814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9180893301963806, + "rewards_train/margins": 3.590095579624176, + "rewards_train/rejected": -4.508184909820557, + "step": 2251 + }, + { + "epoch": 0.63, + "logps_train/chosen": -173.01760864257812, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -221.49267578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8017609119415283, + "rewards_train/margins": 6.297507047653198, + "rewards_train/rejected": -10.099267959594727, + "step": 2251 + }, + { + "epoch": 0.63, + "logps_train/chosen": -212.61288452148438, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -233.97052001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.661288738250732, + "rewards_train/margins": 3.135763168334961, + "rewards_train/rejected": -7.797051906585693, + "step": 2251 + }, + { + "epoch": 0.63, + "logps_train/chosen": -0.2190653681755066, + "logps_train/ref_chosen": -0.13671875, + "logps_train/ref_rejected": -0.13671875, + "logps_train/rejected": -0.19936878979206085, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.008234662003815174, + "rewards_train/margins": -0.001969657838344574, + "rewards_train/rejected": -0.0062650041654706, + "step": 2251 + }, + { + "epoch": 0.63, + "learning_rate": 5.332705992490616e-08, + "loss": 0.3304, + "step": 2252 + }, + { + "epoch": 0.63, + "logps_train/chosen": -82.15943145751953, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -28.25, + "logps_train/rejected": -45.39979553222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.615943193435669, + "rewards_train/margins": 0.0990363359451294, + "rewards_train/rejected": -1.7149795293807983, + "step": 2252 + }, + { + "epoch": 0.63, + "logps_train/chosen": -0.2540077269077301, + "logps_train/ref_chosen": -0.01287841796875, + "logps_train/ref_rejected": -0.01287841796875, + "logps_train/rejected": -0.28324010968208313, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.02411293052136898, + "rewards_train/margins": 0.0029232390224933624, + "rewards_train/rejected": -0.027036169543862343, + "step": 2252 + }, + { + "epoch": 0.63, + "logps_train/chosen": -30.57773780822754, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -32.96026611328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8452738523483276, + "rewards_train/margins": 0.888252854347229, + "rewards_train/rejected": -2.7335267066955566, + "step": 2252 + }, + { + "epoch": 0.63, + "logps_train/chosen": -12.82558536529541, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -3.078125, + "logps_train/rejected": -4.633755683898926, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15130853652954102, + "rewards_train/margins": 0.004254534840583801, + "rewards_train/rejected": -0.15556307137012482, + "step": 2252 + }, + { + "epoch": 0.63, + "logps_train/chosen": -115.56819152832031, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -162.54312133789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9068191647529602, + "rewards_train/margins": 4.297493159770966, + "rewards_train/rejected": -5.204312324523926, + "step": 2252 + }, + { + "epoch": 0.63, + "logps_train/chosen": -40.346981048583984, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -71.59245300292969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9221981763839722, + "rewards_train/margins": 4.399547219276428, + "rewards_train/rejected": -6.3217453956604, + "step": 2252 + }, + { + "epoch": 0.63, + "logps_train/chosen": -133.78916931152344, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -178.0231475830078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.528916835784912, + "rewards_train/margins": 4.423398494720459, + "rewards_train/rejected": -8.952315330505371, + "step": 2252 + }, + { + "epoch": 0.63, + "logps_train/chosen": -192.13555908203125, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -244.0, + "logps_train/rejected": -312.30718994140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5135560035705566, + "rewards_train/margins": 4.317162990570068, + "rewards_train/rejected": -6.830718994140625, + "step": 2252 + }, + { + "epoch": 0.63, + "logps_train/chosen": -37.972042083740234, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -52.25, + "logps_train/rejected": -73.57502746582031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4284541606903076, + "rewards_train/margins": -0.29595136642456055, + "rewards_train/rejected": -2.132502794265747, + "step": 2253 + }, + { + "epoch": 0.63, + "logps_train/chosen": -108.29458618164062, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -102.00343322753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4794585704803467, + "rewards_train/margins": 1.370884895324707, + "rewards_train/rejected": -3.8503434658050537, + "step": 2253 + }, + { + "epoch": 0.63, + "logps_train/chosen": -144.63714599609375, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -179.0, + "logps_train/rejected": -231.86846923828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.763714551925659, + "rewards_train/margins": 2.523132562637329, + "rewards_train/rejected": -5.286847114562988, + "step": 2253 + }, + { + "epoch": 0.63, + "logps_train/chosen": -210.21493530273438, + "logps_train/ref_chosen": -190.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -201.98609924316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.021493673324585, + "rewards_train/margins": 1.977116346359253, + "rewards_train/rejected": -3.998610019683838, + "step": 2253 + }, + { + "epoch": 0.63, + "logps_train/chosen": -12.785268783569336, + "logps_train/ref_chosen": -0.9765625, + "logps_train/ref_rejected": -2.15625, + "logps_train/rejected": -44.61743927001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1808706521987915, + "rewards_train/margins": 3.0652483701705933, + "rewards_train/rejected": -4.246119022369385, + "step": 2253 + }, + { + "epoch": 0.63, + "logps_train/chosen": -132.9189453125, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -108.31514739990234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5918945670127869, + "rewards_train/margins": 1.439620316028595, + "rewards_train/rejected": -2.031514883041382, + "step": 2253 + }, + { + "epoch": 0.63, + "logps_train/chosen": -12.459928512573242, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -3.796875, + "logps_train/rejected": -6.151185989379883, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7678678631782532, + "rewards_train/margins": -0.5324367582798004, + "rewards_train/rejected": -0.23543110489845276, + "step": 2253 + }, + { + "epoch": 0.63, + "logps_train/chosen": -111.97785949707031, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -194.48924255371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3977859020233154, + "rewards_train/margins": 3.50113844871521, + "rewards_train/rejected": -6.898924350738525, + "step": 2253 + }, + { + "epoch": 0.63, + "learning_rate": 5.2477987257325996e-08, + "loss": 0.3116, + "step": 2254 + }, + { + "epoch": 0.63, + "logps_train/chosen": -23.467620849609375, + "logps_train/ref_chosen": -15.8125, + "logps_train/ref_rejected": -42.5, + "logps_train/rejected": -87.67388916015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7655121088027954, + "rewards_train/margins": 3.751876711845398, + "rewards_train/rejected": -4.517388820648193, + "step": 2254 + }, + { + "epoch": 0.63, + "logps_train/chosen": -53.01478576660156, + "logps_train/ref_chosen": -34.75, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -122.772216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8264786005020142, + "rewards_train/margins": 2.300742983818054, + "rewards_train/rejected": -4.127221584320068, + "step": 2254 + }, + { + "epoch": 0.63, + "logps_train/chosen": -0.06405018270015717, + "logps_train/ref_chosen": -0.1513671875, + "logps_train/ref_rejected": -0.1513671875, + "logps_train/rejected": -0.07727882266044617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008731700479984283, + "rewards_train/margins": 0.0013228640891611576, + "rewards_train/rejected": 0.007408836390823126, + "step": 2254 + }, + { + "epoch": 0.63, + "logps_train/chosen": -169.3592987060547, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -212.1978302001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.435929775238037, + "rewards_train/margins": 1.1838531494140625, + "rewards_train/rejected": -5.6197829246521, + "step": 2254 + }, + { + "epoch": 0.63, + "logps_train/chosen": -12.10387897491455, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -21.92760467529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.719762921333313, + "rewards_train/margins": 0.1917475461959839, + "rewards_train/rejected": -0.9115104675292969, + "step": 2254 + }, + { + "epoch": 0.63, + "logps_train/chosen": -59.355979919433594, + "logps_train/ref_chosen": -55.0, + "logps_train/ref_rejected": -62.75, + "logps_train/rejected": -77.92422485351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4355979859828949, + "rewards_train/margins": 1.0818245708942413, + "rewards_train/rejected": -1.5174225568771362, + "step": 2254 + }, + { + "epoch": 0.63, + "logps_train/chosen": -43.44482421875, + "logps_train/ref_chosen": -45.25, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -37.526145935058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18051758408546448, + "rewards_train/margins": 2.108132153749466, + "rewards_train/rejected": -1.9276145696640015, + "step": 2254 + }, + { + "epoch": 0.63, + "logps_train/chosen": -108.04417419433594, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -255.52821350097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9544174671173096, + "rewards_train/margins": 13.79840350151062, + "rewards_train/rejected": -16.75282096862793, + "step": 2254 + }, + { + "epoch": 0.63, + "logps_train/chosen": -116.11740112304688, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -15.75, + "logps_train/rejected": -48.9119987487793, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.961740255355835, + "rewards_train/margins": 0.3544597625732422, + "rewards_train/rejected": -3.316200017929077, + "step": 2255 + }, + { + "epoch": 0.63, + "logps_train/chosen": -42.57035446166992, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -61.0, + "logps_train/rejected": -90.71349334716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6070354580879211, + "rewards_train/margins": 2.364314019680023, + "rewards_train/rejected": -2.9713494777679443, + "step": 2255 + }, + { + "epoch": 0.63, + "logps_train/chosen": -0.7551525235176086, + "logps_train/ref_chosen": -1.3515625, + "logps_train/ref_rejected": -2.109375, + "logps_train/rejected": -2.2548673152923584, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.059640999883413315, + "rewards_train/margins": 0.0741902319714427, + "rewards_train/rejected": -0.014549232088029385, + "step": 2255 + }, + { + "epoch": 0.63, + "logps_train/chosen": -110.37425231933594, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -147.61495971679688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9874253273010254, + "rewards_train/margins": -0.3259294033050537, + "rewards_train/rejected": -3.6614959239959717, + "step": 2255 + }, + { + "epoch": 0.63, + "logps_train/chosen": -98.29106140136719, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -73.75608825683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0791062116622925, + "rewards_train/margins": 3.921502709388733, + "rewards_train/rejected": -5.000608921051025, + "step": 2255 + }, + { + "epoch": 0.63, + "logps_train/chosen": -33.7808952331543, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -27.526859283447266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.034339666366577, + "rewards_train/margins": -1.0472787618637085, + "rewards_train/rejected": -1.9870609045028687, + "step": 2255 + }, + { + "epoch": 0.63, + "logps_train/chosen": -107.95806884765625, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -110.24844360351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.795806884765625, + "rewards_train/margins": 2.4790375232696533, + "rewards_train/rejected": -3.2748444080352783, + "step": 2255 + }, + { + "epoch": 0.63, + "logps_train/chosen": -29.194534301757812, + "logps_train/ref_chosen": -17.625, + "logps_train/ref_rejected": -22.625, + "logps_train/rejected": -58.281795501708984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1569534540176392, + "rewards_train/margins": 2.4087260961532593, + "rewards_train/rejected": -3.5656795501708984, + "step": 2255 + }, + { + "epoch": 0.63, + "learning_rate": 5.1635546239233386e-08, + "loss": 0.3599, + "step": 2256 + }, + { + "epoch": 0.63, + "logps_train/chosen": -11.037311553955078, + "logps_train/ref_chosen": -0.83203125, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -52.65879821777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0205280780792236, + "rewards_train/margins": 1.9453518390655518, + "rewards_train/rejected": -2.9658799171447754, + "step": 2256 + }, + { + "epoch": 0.63, + "logps_train/chosen": -100.6187744140625, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -102.45152282714844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.61187744140625, + "rewards_train/margins": -0.01672518253326416, + "rewards_train/rejected": -1.5951522588729858, + "step": 2256 + }, + { + "epoch": 0.63, + "logps_train/chosen": -89.63738250732422, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -107.45502471923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4637383222579956, + "rewards_train/margins": 1.3317641019821167, + "rewards_train/rejected": -2.7955024242401123, + "step": 2256 + }, + { + "epoch": 0.63, + "logps_train/chosen": -172.61373901367188, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -235.77012634277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.261373996734619, + "rewards_train/margins": 5.315639019012451, + "rewards_train/rejected": -10.57701301574707, + "step": 2256 + }, + { + "epoch": 0.63, + "logps_train/chosen": -41.06522750854492, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -38.75, + "logps_train/rejected": -41.29378890991211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23152275383472443, + "rewards_train/margins": 0.022856131196022034, + "rewards_train/rejected": -0.25437888503074646, + "step": 2256 + }, + { + "epoch": 0.63, + "logps_train/chosen": -24.108842849731445, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -2.921875, + "logps_train/rejected": -17.76251792907715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4358842968940735, + "rewards_train/margins": 1.0481800436973572, + "rewards_train/rejected": -1.4840643405914307, + "step": 2256 + }, + { + "epoch": 0.63, + "logps_train/chosen": -1.026424527168274, + "logps_train/ref_chosen": -2.0625, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -11.744800567626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10360755026340485, + "rewards_train/margins": 0.05308760702610016, + "rewards_train/rejected": 0.05051994323730469, + "step": 2256 + }, + { + "epoch": 0.63, + "logps_train/chosen": -198.68359375, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -232.00106811523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.068359375, + "rewards_train/margins": 1.6317472457885742, + "rewards_train/rejected": -8.700106620788574, + "step": 2256 + }, + { + "epoch": 0.63, + "logps_train/chosen": -76.19979858398438, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -31.25, + "logps_train/rejected": -68.1939926147461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8199798464775085, + "rewards_train/margins": 2.8744195103645325, + "rewards_train/rejected": -3.694399356842041, + "step": 2257 + }, + { + "epoch": 0.63, + "logps_train/chosen": -12.018325805664062, + "logps_train/ref_chosen": -13.25, + "logps_train/ref_rejected": -2.0625, + "logps_train/rejected": -4.842441082000732, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12316741794347763, + "rewards_train/margins": 0.4011615440249443, + "rewards_train/rejected": -0.2779941260814667, + "step": 2257 + }, + { + "epoch": 0.63, + "logps_train/chosen": -6.658145427703857, + "logps_train/ref_chosen": -5.46875, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -20.62161636352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11893954128026962, + "rewards_train/margins": 1.3182221427559853, + "rewards_train/rejected": -1.4371616840362549, + "step": 2257 + }, + { + "epoch": 0.63, + "logps_train/chosen": -45.155574798583984, + "logps_train/ref_chosen": -31.125, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -54.695220947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4030574560165405, + "rewards_train/margins": 2.341464638710022, + "rewards_train/rejected": -3.7445220947265625, + "step": 2257 + }, + { + "epoch": 0.63, + "logps_train/chosen": -75.17701721191406, + "logps_train/ref_chosen": -49.75, + "logps_train/ref_rejected": -58.75, + "logps_train/rejected": -129.59356689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5427017211914062, + "rewards_train/margins": 4.54165506362915, + "rewards_train/rejected": -7.084356784820557, + "step": 2257 + }, + { + "epoch": 0.63, + "logps_train/chosen": -2.1234545707702637, + "logps_train/ref_chosen": -0.96875, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -6.0464091300964355, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11547046154737473, + "rewards_train/margins": 0.03917045146226883, + "rewards_train/rejected": -0.15464091300964355, + "step": 2257 + }, + { + "epoch": 0.63, + "logps_train/chosen": -2.3898420333862305, + "logps_train/ref_chosen": -2.0, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -32.521549224853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.038984205573797226, + "rewards_train/margins": 1.3256706930696964, + "rewards_train/rejected": -1.3646548986434937, + "step": 2257 + }, + { + "epoch": 0.63, + "logps_train/chosen": -190.88214111328125, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -199.3039093017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.688214302062988, + "rewards_train/margins": 3.0421767234802246, + "rewards_train/rejected": -7.730391025543213, + "step": 2257 + }, + { + "epoch": 0.63, + "learning_rate": 5.079974276682253e-08, + "loss": 0.2977, + "step": 2258 + }, + { + "epoch": 0.63, + "logps_train/chosen": -49.32215118408203, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -46.66656494140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.969715118408203, + "rewards_train/margins": -0.17805862426757812, + "rewards_train/rejected": -3.791656494140625, + "step": 2258 + }, + { + "epoch": 0.63, + "logps_train/chosen": -222.53114318847656, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -244.68804931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.853114604949951, + "rewards_train/margins": 2.2156901359558105, + "rewards_train/rejected": -8.068804740905762, + "step": 2258 + }, + { + "epoch": 0.63, + "logps_train/chosen": -25.681337356567383, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -2.8125, + "logps_train/rejected": -20.358592987060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.655633807182312, + "rewards_train/margins": 0.0989755392074585, + "rewards_train/rejected": -1.7546093463897705, + "step": 2258 + }, + { + "epoch": 0.63, + "logps_train/chosen": -50.485107421875, + "logps_train/ref_chosen": -37.75, + "logps_train/ref_rejected": -41.5, + "logps_train/rejected": -96.42549133300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2735108137130737, + "rewards_train/margins": 4.219038605690002, + "rewards_train/rejected": -5.492549419403076, + "step": 2258 + }, + { + "epoch": 0.63, + "logps_train/chosen": -54.501609802246094, + "logps_train/ref_chosen": -49.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -113.42576599121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5501610040664673, + "rewards_train/margins": 3.292415738105774, + "rewards_train/rejected": -3.842576742172241, + "step": 2258 + }, + { + "epoch": 0.63, + "logps_train/chosen": -119.37100219726562, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -176.93679809570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5871002674102783, + "rewards_train/margins": 4.006579637527466, + "rewards_train/rejected": -6.593679904937744, + "step": 2258 + }, + { + "epoch": 0.63, + "logps_train/chosen": -21.470935821533203, + "logps_train/ref_chosen": -14.8125, + "logps_train/ref_rejected": -16.875, + "logps_train/rejected": -28.27303695678711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6658436059951782, + "rewards_train/margins": 0.47396016120910645, + "rewards_train/rejected": -1.1398037672042847, + "step": 2258 + }, + { + "epoch": 0.63, + "logps_train/chosen": -27.86614990234375, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -2.8125, + "logps_train/rejected": -23.111703872680664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.036615014076233, + "rewards_train/margins": 0.9933053255081177, + "rewards_train/rejected": -2.0299203395843506, + "step": 2258 + }, + { + "epoch": 0.63, + "logps_train/chosen": -179.0513458251953, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -235.36094665527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.5551347732543945, + "rewards_train/margins": 4.480959892272949, + "rewards_train/rejected": -10.036094665527344, + "step": 2259 + }, + { + "epoch": 0.63, + "logps_train/chosen": -0.1410273015499115, + "logps_train/ref_chosen": -0.1748046875, + "logps_train/ref_rejected": -0.1748046875, + "logps_train/rejected": -0.13865788280963898, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.003377738641574979, + "rewards_train/margins": -0.00023694196715950966, + "rewards_train/rejected": 0.0036146806087344885, + "step": 2259 + }, + { + "epoch": 0.63, + "logps_train/chosen": -185.2988739013672, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -167.5135498046875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.529887676239014, + "rewards_train/margins": -1.178532600402832, + "rewards_train/rejected": -3.3513550758361816, + "step": 2259 + }, + { + "epoch": 0.63, + "logps_train/chosen": -21.776588439941406, + "logps_train/ref_chosen": -3.875, + "logps_train/ref_rejected": -3.046875, + "logps_train/rejected": -23.792142868041992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7901588678359985, + "rewards_train/margins": 0.2843679189682007, + "rewards_train/rejected": -2.074526786804199, + "step": 2259 + }, + { + "epoch": 0.63, + "logps_train/chosen": -20.153377532958984, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -6.8125, + "logps_train/rejected": -40.83580780029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8715877532958984, + "rewards_train/margins": 2.53074312210083, + "rewards_train/rejected": -3.4023308753967285, + "step": 2259 + }, + { + "epoch": 0.63, + "logps_train/chosen": -116.126220703125, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -156.01730346679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0626220703125, + "rewards_train/margins": 4.7891082763671875, + "rewards_train/rejected": -5.8517303466796875, + "step": 2259 + }, + { + "epoch": 0.63, + "logps_train/chosen": -137.21121215820312, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -137.21218872070312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.1788787841796875, + "rewards_train/margins": -0.19990235567092896, + "rewards_train/rejected": 0.37878113985061646, + "step": 2259 + }, + { + "epoch": 0.63, + "logps_train/chosen": -45.593624114990234, + "logps_train/ref_chosen": -35.0, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -37.3551025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0593624114990234, + "rewards_train/margins": 2.0448977947235107, + "rewards_train/rejected": -3.104260206222534, + "step": 2259 + }, + { + "epoch": 0.63, + "learning_rate": 4.9970582689831345e-08, + "loss": 0.3824, + "step": 2260 + }, + { + "epoch": 0.63, + "logps_train/chosen": -22.184446334838867, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -27.974390029907227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5403196811676025, + "rewards_train/margins": 0.27586936950683594, + "rewards_train/rejected": -1.8161890506744385, + "step": 2260 + }, + { + "epoch": 0.63, + "logps_train/chosen": -20.079370498657227, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -27.75, + "logps_train/rejected": -50.7347297668457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1204370260238647, + "rewards_train/margins": 1.178036093711853, + "rewards_train/rejected": -2.2984731197357178, + "step": 2260 + }, + { + "epoch": 0.63, + "logps_train/chosen": -175.78240966796875, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -283.5306701660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.978240966796875, + "rewards_train/margins": 10.57482624053955, + "rewards_train/rejected": -14.553067207336426, + "step": 2260 + }, + { + "epoch": 0.63, + "logps_train/chosen": -433.749755859375, + "logps_train/ref_chosen": -252.0, + "logps_train/ref_rejected": -207.0, + "logps_train/rejected": -372.2095947265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -18.174976348876953, + "rewards_train/margins": -1.6540164947509766, + "rewards_train/rejected": -16.520959854125977, + "step": 2260 + }, + { + "epoch": 0.63, + "logps_train/chosen": -182.10830688476562, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -209.93678283691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7108306884765625, + "rewards_train/margins": 2.682847499847412, + "rewards_train/rejected": -4.393678188323975, + "step": 2260 + }, + { + "epoch": 0.63, + "logps_train/chosen": -10.766660690307617, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -2.03125, + "logps_train/rejected": -10.296656608581543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43604108691215515, + "rewards_train/margins": 0.3904995620250702, + "rewards_train/rejected": -0.8265406489372253, + "step": 2260 + }, + { + "epoch": 0.63, + "logps_train/chosen": -13.953024864196777, + "logps_train/ref_chosen": -7.90625, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -79.13072204589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6046774983406067, + "rewards_train/margins": 3.9333946108818054, + "rewards_train/rejected": -4.538072109222412, + "step": 2260 + }, + { + "epoch": 0.63, + "logps_train/chosen": -141.53231811523438, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -173.13137817382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.4532318115234375, + "rewards_train/margins": 0.8599061965942383, + "rewards_train/rejected": -6.313138008117676, + "step": 2260 + }, + { + "epoch": 0.63, + "logps_train/chosen": -113.55613708496094, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -180.0150909423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2556138038635254, + "rewards_train/margins": 5.345895290374756, + "rewards_train/rejected": -8.601509094238281, + "step": 2261 + }, + { + "epoch": 0.63, + "logps_train/chosen": -28.99072265625, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -28.0, + "logps_train/rejected": -41.74291229248047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.017822265625, + "rewards_train/margins": -0.6435309648513794, + "rewards_train/rejected": -1.3742913007736206, + "step": 2261 + }, + { + "epoch": 0.63, + "logps_train/chosen": -3.556318759918213, + "logps_train/ref_chosen": -0.490234375, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -11.553502082824707, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3066084384918213, + "rewards_train/margins": -0.21375823020935059, + "rewards_train/rejected": -0.0928502082824707, + "step": 2261 + }, + { + "epoch": 0.63, + "logps_train/chosen": -62.23783874511719, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -60.5, + "logps_train/rejected": -62.036582946777344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.17378388345241547, + "rewards_train/margins": -0.020125582814216614, + "rewards_train/rejected": -0.15365830063819885, + "step": 2261 + }, + { + "epoch": 0.63, + "logps_train/chosen": -40.04121017456055, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -20.90829849243164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9541209936141968, + "rewards_train/margins": -0.2945411205291748, + "rewards_train/rejected": -1.659579873085022, + "step": 2261 + }, + { + "epoch": 0.63, + "logps_train/chosen": -17.195556640625, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -40.5, + "logps_train/rejected": -100.93518829345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08205566555261612, + "rewards_train/margins": 5.96146335452795, + "rewards_train/rejected": -6.043519020080566, + "step": 2261 + }, + { + "epoch": 0.63, + "logps_train/chosen": -44.18061065673828, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -13.8125, + "logps_train/rejected": -49.54377365112305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.630561113357544, + "rewards_train/margins": 0.9425663948059082, + "rewards_train/rejected": -3.573127508163452, + "step": 2261 + }, + { + "epoch": 0.63, + "logps_train/chosen": -15.161078453063965, + "logps_train/ref_chosen": -1.78125, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -38.32714080810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3379828929901123, + "rewards_train/margins": 1.5447311401367188, + "rewards_train/rejected": -2.882714033126831, + "step": 2261 + }, + { + "epoch": 0.63, + "learning_rate": 4.914807181150138e-08, + "loss": 0.4733, + "step": 2262 + }, + { + "epoch": 0.63, + "logps_train/chosen": -54.60173797607422, + "logps_train/ref_chosen": -40.5, + "logps_train/ref_rejected": -17.5, + "logps_train/rejected": -41.10211181640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.410173773765564, + "rewards_train/margins": 0.9500373601913452, + "rewards_train/rejected": -2.360211133956909, + "step": 2262 + }, + { + "epoch": 0.63, + "logps_train/chosen": -9.521342277526855, + "logps_train/ref_chosen": -1.453125, + "logps_train/ref_rejected": -2.78125, + "logps_train/rejected": -33.67095184326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8068217635154724, + "rewards_train/margins": 2.2821484208106995, + "rewards_train/rejected": -3.088970184326172, + "step": 2262 + }, + { + "epoch": 0.63, + "logps_train/chosen": -142.64747619628906, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -181.94073486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.964747667312622, + "rewards_train/margins": 5.429325819015503, + "rewards_train/rejected": -9.394073486328125, + "step": 2262 + }, + { + "epoch": 0.63, + "logps_train/chosen": -22.230993270874023, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -0.63671875, + "logps_train/rejected": -8.166131973266602, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1293493509292603, + "rewards_train/margins": -0.37640804052352905, + "rewards_train/rejected": -0.7529413104057312, + "step": 2262 + }, + { + "epoch": 0.63, + "logps_train/chosen": -23.5305118560791, + "logps_train/ref_chosen": -5.6875, + "logps_train/ref_rejected": -4.59375, + "logps_train/rejected": -29.54499053955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7843011617660522, + "rewards_train/margins": 0.7108229398727417, + "rewards_train/rejected": -2.495124101638794, + "step": 2262 + }, + { + "epoch": 0.63, + "logps_train/chosen": -2.059497117996216, + "logps_train/ref_chosen": -0.58984375, + "logps_train/ref_rejected": -3.265625, + "logps_train/rejected": -11.562213897705078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14696533977985382, + "rewards_train/margins": 0.6826935857534409, + "rewards_train/rejected": -0.8296589255332947, + "step": 2262 + }, + { + "epoch": 0.63, + "logps_train/chosen": -31.678951263427734, + "logps_train/ref_chosen": -7.59375, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -37.89720153808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.408520221710205, + "rewards_train/margins": 0.4312000274658203, + "rewards_train/rejected": -2.8397202491760254, + "step": 2262 + }, + { + "epoch": 0.63, + "logps_train/chosen": -0.5751467943191528, + "logps_train/ref_chosen": -1.2109375, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -8.94383430480957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06357907503843307, + "rewards_train/margins": 0.13296250998973846, + "rewards_train/rejected": -0.06938343495130539, + "step": 2262 + }, + { + "epoch": 0.63, + "logps_train/chosen": -109.4306640625, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -223.52566528320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.093066453933716, + "rewards_train/margins": 7.25950026512146, + "rewards_train/rejected": -10.352566719055176, + "step": 2263 + }, + { + "epoch": 0.63, + "logps_train/chosen": -5.421716690063477, + "logps_train/ref_chosen": -1.8203125, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -16.167505264282227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3601404130458832, + "rewards_train/margins": 0.6691100895404816, + "rewards_train/rejected": -1.0292505025863647, + "step": 2263 + }, + { + "epoch": 0.63, + "logps_train/chosen": -55.81805419921875, + "logps_train/ref_chosen": -37.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -228.55953979492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.881805419921875, + "rewards_train/margins": 11.12414836883545, + "rewards_train/rejected": -13.005953788757324, + "step": 2263 + }, + { + "epoch": 0.63, + "logps_train/chosen": -112.13632202148438, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -58.75, + "logps_train/rejected": -97.07647705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6636321544647217, + "rewards_train/margins": 1.169015645980835, + "rewards_train/rejected": -3.8326478004455566, + "step": 2263 + }, + { + "epoch": 0.63, + "logps_train/chosen": -78.52232360839844, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -28.0, + "logps_train/rejected": -52.57928466796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6522323489189148, + "rewards_train/margins": 1.8056960701942444, + "rewards_train/rejected": -2.457928419113159, + "step": 2263 + }, + { + "epoch": 0.63, + "logps_train/chosen": -23.79766273498535, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -62.12921905517578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2547663450241089, + "rewards_train/margins": 2.7706557512283325, + "rewards_train/rejected": -4.025422096252441, + "step": 2263 + }, + { + "epoch": 0.63, + "logps_train/chosen": -86.43408966064453, + "logps_train/ref_chosen": -64.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -112.27021789550781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2434089183807373, + "rewards_train/margins": 0.6836128234863281, + "rewards_train/rejected": -2.9270217418670654, + "step": 2263 + }, + { + "epoch": 0.63, + "logps_train/chosen": -164.1229248046875, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -139.41664123535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7122925519943237, + "rewards_train/margins": 2.679371476173401, + "rewards_train/rejected": -4.391664028167725, + "step": 2263 + }, + { + "epoch": 0.63, + "learning_rate": 4.833221588853642e-08, + "loss": 0.2898, + "step": 2264 + }, + { + "epoch": 0.63, + "logps_train/chosen": -31.867034912109375, + "logps_train/ref_chosen": -22.75, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -39.942691802978516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9117035269737244, + "rewards_train/margins": 2.6294406056404114, + "rewards_train/rejected": -3.5411441326141357, + "step": 2264 + }, + { + "epoch": 0.63, + "logps_train/chosen": -143.32884216308594, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -185.90322875976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.032884120941162, + "rewards_train/margins": 0.6574387550354004, + "rewards_train/rejected": -5.6903228759765625, + "step": 2264 + }, + { + "epoch": 0.63, + "logps_train/chosen": -127.28327941894531, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -194.50970458984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.928328037261963, + "rewards_train/margins": 5.122642993927002, + "rewards_train/rejected": -8.050971031188965, + "step": 2264 + }, + { + "epoch": 0.63, + "logps_train/chosen": -27.99140167236328, + "logps_train/ref_chosen": -12.0, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -21.56641960144043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5991401672363281, + "rewards_train/margins": 0.16843926906585693, + "rewards_train/rejected": -1.767579436302185, + "step": 2264 + }, + { + "epoch": 0.63, + "logps_train/chosen": -32.47212219238281, + "logps_train/ref_chosen": -2.96875, + "logps_train/ref_rejected": -3.828125, + "logps_train/rejected": -38.19880676269531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9503371715545654, + "rewards_train/margins": 0.48673105239868164, + "rewards_train/rejected": -3.437068223953247, + "step": 2264 + }, + { + "epoch": 0.63, + "logps_train/chosen": -4.586322784423828, + "logps_train/ref_chosen": -1.4140625, + "logps_train/ref_rejected": -1.28125, + "logps_train/rejected": -3.2498862743377686, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.31722602248191833, + "rewards_train/margins": -0.12036238610744476, + "rewards_train/rejected": -0.19686363637447357, + "step": 2264 + }, + { + "epoch": 0.63, + "logps_train/chosen": -184.89967346191406, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -228.35272216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.089967250823975, + "rewards_train/margins": 2.245305061340332, + "rewards_train/rejected": -7.335272312164307, + "step": 2264 + }, + { + "epoch": 0.63, + "logps_train/chosen": -163.77606201171875, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -169.92469787597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.02760648727417, + "rewards_train/margins": 0.614863395690918, + "rewards_train/rejected": -5.642469882965088, + "step": 2264 + }, + { + "epoch": 0.63, + "logps_train/chosen": -15.743701934814453, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -2.84375, + "logps_train/rejected": -12.585012435913086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.089995265007019, + "rewards_train/margins": -0.11586898565292358, + "rewards_train/rejected": -0.9741262793540955, + "step": 2265 + }, + { + "epoch": 0.63, + "logps_train/chosen": -8.165772438049316, + "logps_train/ref_chosen": -2.984375, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -17.907522201538086, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5181397795677185, + "rewards_train/margins": -0.7773875594139099, + "rewards_train/rejected": 0.2592477798461914, + "step": 2265 + }, + { + "epoch": 0.63, + "logps_train/chosen": -9.71399974822998, + "logps_train/ref_chosen": -3.59375, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -31.18516731262207, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6120249629020691, + "rewards_train/margins": 1.940866768360138, + "rewards_train/rejected": -2.552891731262207, + "step": 2265 + }, + { + "epoch": 0.63, + "logps_train/chosen": -134.69821166992188, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -153.82713317871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7698211669921875, + "rewards_train/margins": 1.6628923416137695, + "rewards_train/rejected": -5.432713508605957, + "step": 2265 + }, + { + "epoch": 0.63, + "logps_train/chosen": -135.63385009765625, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -126.58618927001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.863384962081909, + "rewards_train/margins": 0.795233964920044, + "rewards_train/rejected": -3.658618927001953, + "step": 2265 + }, + { + "epoch": 0.63, + "logps_train/chosen": -170.21005249023438, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -303.29425048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6210052967071533, + "rewards_train/margins": 11.608420133590698, + "rewards_train/rejected": -14.229425430297852, + "step": 2265 + }, + { + "epoch": 0.63, + "logps_train/chosen": -95.20166015625, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -132.4742431640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7701661586761475, + "rewards_train/margins": 1.7272584438323975, + "rewards_train/rejected": -5.497424602508545, + "step": 2265 + }, + { + "epoch": 0.63, + "logps_train/chosen": -64.38858795166016, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -166.39602661132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1888587474823, + "rewards_train/margins": 1.1007440090179443, + "rewards_train/rejected": -4.289602756500244, + "step": 2265 + }, + { + "epoch": 0.63, + "learning_rate": 4.7523020631063396e-08, + "loss": 0.3694, + "step": 2266 + }, + { + "epoch": 0.63, + "logps_train/chosen": -197.34657287597656, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -176.0, + "logps_train/rejected": -295.5594482421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.134657382965088, + "rewards_train/margins": 6.821287631988525, + "rewards_train/rejected": -11.955945014953613, + "step": 2266 + }, + { + "epoch": 0.63, + "logps_train/chosen": -79.70301818847656, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -134.46409606933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7703018188476562, + "rewards_train/margins": 4.7761077880859375, + "rewards_train/rejected": -5.546409606933594, + "step": 2266 + }, + { + "epoch": 0.63, + "logps_train/chosen": -16.03374481201172, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -52.016502380371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8408744931221008, + "rewards_train/margins": 3.9170257449150085, + "rewards_train/rejected": -4.757900238037109, + "step": 2266 + }, + { + "epoch": 0.63, + "logps_train/chosen": -12.086508750915527, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -18.86400604248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22740088403224945, + "rewards_train/margins": 0.22774972021579742, + "rewards_train/rejected": -0.4551506042480469, + "step": 2266 + }, + { + "epoch": 0.63, + "logps_train/chosen": -4.039289474487305, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -1.453125, + "logps_train/rejected": -4.218562602996826, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18669605255126953, + "rewards_train/margins": 0.4632398188114166, + "rewards_train/rejected": -0.2765437662601471, + "step": 2266 + }, + { + "epoch": 0.63, + "logps_train/chosen": -55.0201416015625, + "logps_train/ref_chosen": -49.5, + "logps_train/ref_rejected": -49.5, + "logps_train/rejected": -55.025875091552734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.552014172077179, + "rewards_train/margins": 0.0005733370780944824, + "rewards_train/rejected": -0.5525875091552734, + "step": 2266 + }, + { + "epoch": 0.63, + "logps_train/chosen": -35.88031005859375, + "logps_train/ref_chosen": -30.0, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -52.108978271484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.588030993938446, + "rewards_train/margins": 2.322866976261139, + "rewards_train/rejected": -2.910897970199585, + "step": 2266 + }, + { + "epoch": 0.63, + "logps_train/chosen": -110.0693359375, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -170.2989959716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.306933641433716, + "rewards_train/margins": 3.672966241836548, + "rewards_train/rejected": -5.979899883270264, + "step": 2266 + }, + { + "epoch": 0.63, + "logps_train/chosen": -21.610984802246094, + "logps_train/ref_chosen": -5.6875, + "logps_train/ref_rejected": -2.515625, + "logps_train/rejected": -2.5910086631774902, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5923484563827515, + "rewards_train/margins": -1.5848100897856057, + "rewards_train/rejected": -0.007538366597145796, + "step": 2267 + }, + { + "epoch": 0.63, + "logps_train/chosen": -10.533404350280762, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -15.0625, + "logps_train/rejected": -30.505062103271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6502154469490051, + "rewards_train/margins": 0.8940407633781433, + "rewards_train/rejected": -1.5442562103271484, + "step": 2267 + }, + { + "epoch": 0.63, + "logps_train/chosen": -13.16459846496582, + "logps_train/ref_chosen": -14.75, + "logps_train/ref_rejected": -2.046875, + "logps_train/rejected": -13.859477043151855, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.15854015946388245, + "rewards_train/margins": 1.339800387620926, + "rewards_train/rejected": -1.1812602281570435, + "step": 2267 + }, + { + "epoch": 0.63, + "logps_train/chosen": -120.53919982910156, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -24.25, + "logps_train/rejected": -65.86841583251953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.503920078277588, + "rewards_train/margins": -0.34207820892333984, + "rewards_train/rejected": -4.161841869354248, + "step": 2267 + }, + { + "epoch": 0.63, + "logps_train/chosen": -23.33474349975586, + "logps_train/ref_chosen": -3.78125, + "logps_train/ref_rejected": -1.71875, + "logps_train/rejected": -25.074501037597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.955349326133728, + "rewards_train/margins": 0.3802257776260376, + "rewards_train/rejected": -2.3355751037597656, + "step": 2267 + }, + { + "epoch": 0.63, + "logps_train/chosen": -172.6302032470703, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -175.20895385742188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.5130205154418945, + "rewards_train/margins": 2.607874870300293, + "rewards_train/rejected": -7.1208953857421875, + "step": 2267 + }, + { + "epoch": 0.63, + "logps_train/chosen": -17.339183807373047, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -53.63874435424805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4776683747768402, + "rewards_train/margins": 1.5362061560153961, + "rewards_train/rejected": -2.0138745307922363, + "step": 2267 + }, + { + "epoch": 0.63, + "logps_train/chosen": -150.38690185546875, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -156.5921173095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7386902570724487, + "rewards_train/margins": 1.4705215692520142, + "rewards_train/rejected": -3.209211826324463, + "step": 2267 + }, + { + "epoch": 0.63, + "learning_rate": 4.672049170259063e-08, + "loss": 0.3834, + "step": 2268 + }, + { + "epoch": 0.63, + "logps_train/chosen": -17.160024642944336, + "logps_train/ref_chosen": -1.2578125, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -30.872802734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5902212858200073, + "rewards_train/margins": 0.5845590829849243, + "rewards_train/rejected": -2.1747803688049316, + "step": 2268 + }, + { + "epoch": 0.63, + "logps_train/chosen": -99.6153564453125, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -155.65274047851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.211535692214966, + "rewards_train/margins": 3.403738260269165, + "rewards_train/rejected": -6.615273952484131, + "step": 2268 + }, + { + "epoch": 0.63, + "logps_train/chosen": -47.54029846191406, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -1.78125, + "logps_train/rejected": -43.370670318603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5915298461914062, + "rewards_train/margins": 0.5674123764038086, + "rewards_train/rejected": -4.158942222595215, + "step": 2268 + }, + { + "epoch": 0.63, + "logps_train/chosen": -57.5955810546875, + "logps_train/ref_chosen": -59.0, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -187.64463806152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.14044189453125, + "rewards_train/margins": 9.20490550994873, + "rewards_train/rejected": -9.06446361541748, + "step": 2268 + }, + { + "epoch": 0.63, + "logps_train/chosen": -135.7204132080078, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -242.52621459960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.572041392326355, + "rewards_train/margins": 8.480580449104309, + "rewards_train/rejected": -10.052621841430664, + "step": 2268 + }, + { + "epoch": 0.63, + "logps_train/chosen": -186.0595703125, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -216.09976196289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2059571743011475, + "rewards_train/margins": 4.70401930809021, + "rewards_train/rejected": -7.909976482391357, + "step": 2268 + }, + { + "epoch": 0.63, + "logps_train/chosen": -152.8187255859375, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -227.27560424804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.781872749328613, + "rewards_train/margins": 5.645688056945801, + "rewards_train/rejected": -11.427560806274414, + "step": 2268 + }, + { + "epoch": 0.63, + "logps_train/chosen": -266.292236328125, + "logps_train/ref_chosen": -198.0, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -226.99484252929688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.8292236328125, + "rewards_train/margins": -1.6297392845153809, + "rewards_train/rejected": -5.199484348297119, + "step": 2268 + }, + { + "epoch": 0.63, + "logps_train/chosen": -76.75920104980469, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -105.86016082763672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2259200811386108, + "rewards_train/margins": 0.26009607315063477, + "rewards_train/rejected": -1.4860161542892456, + "step": 2269 + }, + { + "epoch": 0.63, + "logps_train/chosen": -9.523744583129883, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -2.96875, + "logps_train/rejected": -11.215578079223633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12737445533275604, + "rewards_train/margins": 0.6973083764314651, + "rewards_train/rejected": -0.8246828317642212, + "step": 2269 + }, + { + "epoch": 0.63, + "logps_train/chosen": -156.69305419921875, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -243.70269775390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4193055629730225, + "rewards_train/margins": 4.350964307785034, + "rewards_train/rejected": -7.770269870758057, + "step": 2269 + }, + { + "epoch": 0.63, + "logps_train/chosen": -11.17601490020752, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -69.51066589355469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6019765138626099, + "rewards_train/margins": 4.299090266227722, + "rewards_train/rejected": -4.901066780090332, + "step": 2269 + }, + { + "epoch": 0.63, + "logps_train/chosen": -17.137113571166992, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -23.403228759765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9074613451957703, + "rewards_train/margins": 0.6797365546226501, + "rewards_train/rejected": -1.5871978998184204, + "step": 2269 + }, + { + "epoch": 0.63, + "logps_train/chosen": -117.63606262207031, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -99.70320129394531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.113606333732605, + "rewards_train/margins": -0.04328620433807373, + "rewards_train/rejected": -1.0703201293945312, + "step": 2269 + }, + { + "epoch": 0.63, + "logps_train/chosen": -12.640359878540039, + "logps_train/ref_chosen": -7.5625, + "logps_train/ref_rejected": -7.84375, + "logps_train/rejected": -27.780799865722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.507785975933075, + "rewards_train/margins": 1.4859190583229065, + "rewards_train/rejected": -1.9937050342559814, + "step": 2269 + }, + { + "epoch": 0.63, + "logps_train/chosen": -28.093441009521484, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -5.9375, + "logps_train/rejected": -36.683799743652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0093441009521484, + "rewards_train/margins": 2.0652859210968018, + "rewards_train/rejected": -3.07463002204895, + "step": 2269 + }, + { + "epoch": 0.63, + "learning_rate": 4.5924634719970215e-08, + "loss": 0.3247, + "step": 2270 + }, + { + "epoch": 0.63, + "logps_train/chosen": -10.874421119689941, + "logps_train/ref_chosen": -9.8125, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -35.54555892944336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10619211196899414, + "rewards_train/margins": 1.8671138286590576, + "rewards_train/rejected": -1.9733059406280518, + "step": 2270 + }, + { + "epoch": 0.63, + "logps_train/chosen": -180.31011962890625, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -219.34193420410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.081011772155762, + "rewards_train/margins": 2.5531816482543945, + "rewards_train/rejected": -11.634193420410156, + "step": 2270 + }, + { + "epoch": 0.63, + "logps_train/chosen": -40.76765060424805, + "logps_train/ref_chosen": -25.5, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -74.24530029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5267651081085205, + "rewards_train/margins": 3.5977652072906494, + "rewards_train/rejected": -5.12453031539917, + "step": 2270 + }, + { + "epoch": 0.63, + "logps_train/chosen": -131.15943908691406, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -188.97784423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.065943956375122, + "rewards_train/margins": 3.7818405628204346, + "rewards_train/rejected": -6.847784519195557, + "step": 2270 + }, + { + "epoch": 0.63, + "logps_train/chosen": -208.38333129882812, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -245.95709228515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.28833293914795, + "rewards_train/margins": 1.3073768615722656, + "rewards_train/rejected": -10.595709800720215, + "step": 2270 + }, + { + "epoch": 0.63, + "logps_train/chosen": -37.19556427001953, + "logps_train/ref_chosen": -13.5625, + "logps_train/ref_rejected": -16.625, + "logps_train/rejected": -40.003299713134766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3633065223693848, + "rewards_train/margins": -0.025476455688476562, + "rewards_train/rejected": -2.337830066680908, + "step": 2270 + }, + { + "epoch": 0.63, + "logps_train/chosen": -127.63589477539062, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -247.40383911132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1635894775390625, + "rewards_train/margins": 7.77679443359375, + "rewards_train/rejected": -8.940383911132812, + "step": 2270 + }, + { + "epoch": 0.63, + "logps_train/chosen": -48.81452178955078, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -43.5, + "logps_train/rejected": -108.90547180175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5564521551132202, + "rewards_train/margins": 4.984095215797424, + "rewards_train/rejected": -6.5405473709106445, + "step": 2270 + }, + { + "epoch": 0.63, + "logps_train/chosen": -241.12371826171875, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -201.0914764404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.612372398376465, + "rewards_train/margins": 0.4467754364013672, + "rewards_train/rejected": -10.059147834777832, + "step": 2271 + }, + { + "epoch": 0.63, + "logps_train/chosen": -37.91215515136719, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -66.75364685058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6099655628204346, + "rewards_train/margins": 2.365399122238159, + "rewards_train/rejected": -4.975364685058594, + "step": 2271 + }, + { + "epoch": 0.63, + "logps_train/chosen": -24.99367904663086, + "logps_train/ref_chosen": -5.4375, + "logps_train/ref_rejected": -21.125, + "logps_train/rejected": -45.70407485961914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.955617904663086, + "rewards_train/margins": 0.5022895336151123, + "rewards_train/rejected": -2.4579074382781982, + "step": 2271 + }, + { + "epoch": 0.63, + "logps_train/chosen": -154.1364288330078, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -252.06561279296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2136428356170654, + "rewards_train/margins": 4.892918825149536, + "rewards_train/rejected": -8.106561660766602, + "step": 2271 + }, + { + "epoch": 0.63, + "logps_train/chosen": -13.202974319458008, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -25.25, + "logps_train/rejected": -62.431907653808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15154743194580078, + "rewards_train/margins": 3.566643476486206, + "rewards_train/rejected": -3.718190908432007, + "step": 2271 + }, + { + "epoch": 0.63, + "logps_train/chosen": -25.52126693725586, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -27.54354476928711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31462669372558594, + "rewards_train/margins": 0.8897278308868408, + "rewards_train/rejected": -1.2043545246124268, + "step": 2271 + }, + { + "epoch": 0.63, + "logps_train/chosen": -168.03182983398438, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -278.51800537109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6031830310821533, + "rewards_train/margins": 6.948617696762085, + "rewards_train/rejected": -9.551800727844238, + "step": 2271 + }, + { + "epoch": 0.63, + "logps_train/chosen": -11.284914016723633, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -17.889846801757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3941164016723633, + "rewards_train/margins": 0.23861831426620483, + "rewards_train/rejected": -0.6327347159385681, + "step": 2271 + }, + { + "epoch": 0.64, + "learning_rate": 4.513545525335705e-08, + "loss": 0.2025, + "step": 2272 + }, + { + "epoch": 0.64, + "logps_train/chosen": -15.87127685546875, + "logps_train/ref_chosen": -2.328125, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -14.767714500427246, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.354315161705017, + "rewards_train/margins": -0.6837937235832214, + "rewards_train/rejected": -0.6705214381217957, + "step": 2272 + }, + { + "epoch": 0.64, + "logps_train/chosen": -21.289201736450195, + "logps_train/ref_chosen": -3.421875, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -55.287967681884766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7867326736450195, + "rewards_train/margins": 2.6295642852783203, + "rewards_train/rejected": -4.41629695892334, + "step": 2272 + }, + { + "epoch": 0.64, + "logps_train/chosen": -67.93272399902344, + "logps_train/ref_chosen": -61.25, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -120.71075439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6682724356651306, + "rewards_train/margins": 3.6528030037879944, + "rewards_train/rejected": -4.321075439453125, + "step": 2272 + }, + { + "epoch": 0.64, + "logps_train/chosen": -12.368865013122559, + "logps_train/ref_chosen": -3.78125, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -33.350860595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8587614893913269, + "rewards_train/margins": 1.9106996655464172, + "rewards_train/rejected": -2.769461154937744, + "step": 2272 + }, + { + "epoch": 0.64, + "logps_train/chosen": -50.8922233581543, + "logps_train/ref_chosen": -42.5, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -98.6884765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8392223715782166, + "rewards_train/margins": 0.6796253323554993, + "rewards_train/rejected": -1.5188477039337158, + "step": 2272 + }, + { + "epoch": 0.64, + "logps_train/chosen": -145.90365600585938, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -18.30381202697754, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6403656005859375, + "rewards_train/margins": -2.0787343978881836, + "rewards_train/rejected": -0.5616312026977539, + "step": 2272 + }, + { + "epoch": 0.64, + "logps_train/chosen": -19.45857048034668, + "logps_train/ref_chosen": -1.9921875, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -38.99309158325195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.746638298034668, + "rewards_train/margins": 1.5401709079742432, + "rewards_train/rejected": -3.286809206008911, + "step": 2272 + }, + { + "epoch": 0.64, + "logps_train/chosen": -128.84397888183594, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -172.07325744628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8343979120254517, + "rewards_train/margins": 3.172927737236023, + "rewards_train/rejected": -5.007325649261475, + "step": 2272 + }, + { + "epoch": 0.64, + "logps_train/chosen": -44.17509841918945, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -15.75, + "logps_train/rejected": -42.046653747558594, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7175099849700928, + "rewards_train/margins": -0.0878446102142334, + "rewards_train/rejected": -2.6296653747558594, + "step": 2273 + }, + { + "epoch": 0.64, + "logps_train/chosen": -46.73648452758789, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -94.01656341552734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.048648476600647, + "rewards_train/margins": 3.9530080556869507, + "rewards_train/rejected": -5.001656532287598, + "step": 2273 + }, + { + "epoch": 0.64, + "logps_train/chosen": -103.50907897949219, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -106.29034423828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6509079337120056, + "rewards_train/margins": 0.5281265377998352, + "rewards_train/rejected": -1.1790344715118408, + "step": 2273 + }, + { + "epoch": 0.64, + "logps_train/chosen": -36.3500862121582, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -55.39368438720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8350086212158203, + "rewards_train/margins": 2.029359817504883, + "rewards_train/rejected": -3.864368438720703, + "step": 2273 + }, + { + "epoch": 0.64, + "logps_train/chosen": -11.852063179016113, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -36.71270751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07895632088184357, + "rewards_train/margins": 2.2048144787549973, + "rewards_train/rejected": -2.283770799636841, + "step": 2273 + }, + { + "epoch": 0.64, + "logps_train/chosen": -23.174819946289062, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -4.5625, + "logps_train/rejected": -10.402312278747559, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7737320065498352, + "rewards_train/margins": -0.1897507905960083, + "rewards_train/rejected": -0.5839812159538269, + "step": 2273 + }, + { + "epoch": 0.64, + "logps_train/chosen": -6.726511001586914, + "logps_train/ref_chosen": -2.734375, + "logps_train/ref_rejected": -1.6484375, + "logps_train/rejected": -10.176328659057617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39921361207962036, + "rewards_train/margins": 0.4535754919052124, + "rewards_train/rejected": -0.8527891039848328, + "step": 2273 + }, + { + "epoch": 0.64, + "logps_train/chosen": -221.43890380859375, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -261.0750732421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.34389066696167, + "rewards_train/margins": 2.66361665725708, + "rewards_train/rejected": -9.00750732421875, + "step": 2273 + }, + { + "epoch": 0.64, + "learning_rate": 4.435295882617074e-08, + "loss": 0.4345, + "step": 2274 + }, + { + "epoch": 0.64, + "logps_train/chosen": -102.40054321289062, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -204.06625366210938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9900543689727783, + "rewards_train/margins": 8.616571187973022, + "rewards_train/rejected": -10.6066255569458, + "step": 2274 + }, + { + "epoch": 0.64, + "logps_train/chosen": -114.69548797607422, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -150.43267822265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1695488691329956, + "rewards_train/margins": 5.323719143867493, + "rewards_train/rejected": -6.493268013000488, + "step": 2274 + }, + { + "epoch": 0.64, + "logps_train/chosen": -1.8308649063110352, + "logps_train/ref_chosen": -0.65625, + "logps_train/ref_rejected": -0.65625, + "logps_train/rejected": -1.9326632022857666, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11746149510145187, + "rewards_train/margins": 0.010179825127124786, + "rewards_train/rejected": -0.12764132022857666, + "step": 2274 + }, + { + "epoch": 0.64, + "logps_train/chosen": -36.020633697509766, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -44.135414123535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9801883697509766, + "rewards_train/margins": 0.6083531379699707, + "rewards_train/rejected": -3.5885415077209473, + "step": 2274 + }, + { + "epoch": 0.64, + "logps_train/chosen": -2.9147207736968994, + "logps_train/ref_chosen": -2.015625, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -6.668587684631348, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08990957587957382, + "rewards_train/margins": 0.12382420152425766, + "rewards_train/rejected": -0.21373377740383148, + "step": 2274 + }, + { + "epoch": 0.64, + "logps_train/chosen": -26.970441818237305, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -27.912755966186523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1720441579818726, + "rewards_train/margins": 0.9942315816879272, + "rewards_train/rejected": -2.1662757396698, + "step": 2274 + }, + { + "epoch": 0.64, + "logps_train/chosen": -20.90333366394043, + "logps_train/ref_chosen": -3.703125, + "logps_train/ref_rejected": -5.21875, + "logps_train/rejected": -29.110517501831055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7200208902359009, + "rewards_train/margins": 0.6691559553146362, + "rewards_train/rejected": -2.389176845550537, + "step": 2274 + }, + { + "epoch": 0.64, + "logps_train/chosen": -6.315218925476074, + "logps_train/ref_chosen": -0.71484375, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -28.033889770507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5600375533103943, + "rewards_train/margins": 0.7058514952659607, + "rewards_train/rejected": -1.265889048576355, + "step": 2274 + }, + { + "epoch": 0.64, + "logps_train/chosen": -43.91948318481445, + "logps_train/ref_chosen": -34.5, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -44.38833999633789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9419483542442322, + "rewards_train/margins": 0.046885669231414795, + "rewards_train/rejected": -0.988834023475647, + "step": 2275 + }, + { + "epoch": 0.64, + "logps_train/chosen": -21.65811538696289, + "logps_train/ref_chosen": -14.6875, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -41.887516021728516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6970615386962891, + "rewards_train/margins": 1.87919020652771, + "rewards_train/rejected": -2.576251745223999, + "step": 2275 + }, + { + "epoch": 0.64, + "logps_train/chosen": -149.68130493164062, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -196.03543090820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8681304454803467, + "rewards_train/margins": 3.985412836074829, + "rewards_train/rejected": -6.853543281555176, + "step": 2275 + }, + { + "epoch": 0.64, + "logps_train/chosen": -7.747598648071289, + "logps_train/ref_chosen": -3.203125, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -17.08618927001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.45444735884666443, + "rewards_train/margins": 0.6385466158390045, + "rewards_train/rejected": -1.092993974685669, + "step": 2275 + }, + { + "epoch": 0.64, + "logps_train/chosen": -146.07150268554688, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -272.6882019042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1071503162384033, + "rewards_train/margins": 12.161670446395874, + "rewards_train/rejected": -13.268820762634277, + "step": 2275 + }, + { + "epoch": 0.64, + "logps_train/chosen": -72.40404510498047, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -124.83108520507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9404045343399048, + "rewards_train/margins": 4.142703890800476, + "rewards_train/rejected": -6.083108425140381, + "step": 2275 + }, + { + "epoch": 0.64, + "logps_train/chosen": -103.83460998535156, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -196.1470947265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11653900146484375, + "rewards_train/margins": 10.631248474121094, + "rewards_train/rejected": -10.51470947265625, + "step": 2275 + }, + { + "epoch": 0.64, + "logps_train/chosen": -12.7125244140625, + "logps_train/ref_chosen": -2.375, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -15.0276460647583, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.03375244140625, + "rewards_train/margins": 0.02526223659515381, + "rewards_train/rejected": -1.0590146780014038, + "step": 2275 + }, + { + "epoch": 0.64, + "learning_rate": 4.357715091505654e-08, + "loss": 0.3026, + "step": 2276 + }, + { + "epoch": 0.64, + "logps_train/chosen": -18.386564254760742, + "logps_train/ref_chosen": -5.03125, + "logps_train/ref_rejected": -32.75, + "logps_train/rejected": -51.90092849731445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.33553147315979, + "rewards_train/margins": 0.5795613527297974, + "rewards_train/rejected": -1.9150928258895874, + "step": 2276 + }, + { + "epoch": 0.64, + "logps_train/chosen": -81.79441833496094, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -61.75, + "logps_train/rejected": -76.74150848388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0294418334960938, + "rewards_train/margins": 0.46970903873443604, + "rewards_train/rejected": -1.4991508722305298, + "step": 2276 + }, + { + "epoch": 0.64, + "logps_train/chosen": -146.6044464111328, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -173.76133728027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8604446649551392, + "rewards_train/margins": 4.1656893491744995, + "rewards_train/rejected": -5.026134014129639, + "step": 2276 + }, + { + "epoch": 0.64, + "logps_train/chosen": -56.03160858154297, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -51.427032470703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8531608581542969, + "rewards_train/margins": 2.8395423889160156, + "rewards_train/rejected": -3.6927032470703125, + "step": 2276 + }, + { + "epoch": 0.64, + "logps_train/chosen": -172.00909423828125, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -259.07928466796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.900909423828125, + "rewards_train/margins": 6.00701904296875, + "rewards_train/rejected": -8.907928466796875, + "step": 2276 + }, + { + "epoch": 0.64, + "logps_train/chosen": -203.3058319091797, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -192.41539001464844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.330583095550537, + "rewards_train/margins": -0.08904409408569336, + "rewards_train/rejected": -6.241539001464844, + "step": 2276 + }, + { + "epoch": 0.64, + "logps_train/chosen": -6.5853705406188965, + "logps_train/ref_chosen": -1.2890625, + "logps_train/ref_rejected": -3.515625, + "logps_train/rejected": -29.978412628173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5296308398246765, + "rewards_train/margins": 2.116648018360138, + "rewards_train/rejected": -2.6462788581848145, + "step": 2276 + }, + { + "epoch": 0.64, + "logps_train/chosen": -137.06729125976562, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -157.3068084716797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.2567291259765625, + "rewards_train/margins": 1.6739516258239746, + "rewards_train/rejected": -5.930680751800537, + "step": 2276 + }, + { + "epoch": 0.64, + "logps_train/chosen": -222.4375457763672, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -224.98593139648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.94375467300415, + "rewards_train/margins": 1.454838752746582, + "rewards_train/rejected": -6.398593425750732, + "step": 2277 + }, + { + "epoch": 0.64, + "logps_train/chosen": -151.1492919921875, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -244.60009765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.014929294586182, + "rewards_train/margins": 4.345080852508545, + "rewards_train/rejected": -9.360010147094727, + "step": 2277 + }, + { + "epoch": 0.64, + "logps_train/chosen": -17.400781631469727, + "logps_train/ref_chosen": -10.125, + "logps_train/ref_rejected": -2.71875, + "logps_train/rejected": -22.383403778076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7275781631469727, + "rewards_train/margins": 1.2388871908187866, + "rewards_train/rejected": -1.9664653539657593, + "step": 2277 + }, + { + "epoch": 0.64, + "logps_train/chosen": -149.3992919921875, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -204.11228942871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.439929485321045, + "rewards_train/margins": 1.2712993621826172, + "rewards_train/rejected": -6.711228847503662, + "step": 2277 + }, + { + "epoch": 0.64, + "logps_train/chosen": -134.27059936523438, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -151.95932006835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6770598888397217, + "rewards_train/margins": 0.2188720703125, + "rewards_train/rejected": -2.8959319591522217, + "step": 2277 + }, + { + "epoch": 0.64, + "logps_train/chosen": -133.517578125, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -147.7237548828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.351758003234863, + "rewards_train/margins": -0.02938222885131836, + "rewards_train/rejected": -4.322375774383545, + "step": 2277 + }, + { + "epoch": 0.64, + "logps_train/chosen": -8.981499671936035, + "logps_train/ref_chosen": -1.7265625, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -38.12783432006836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7254937291145325, + "rewards_train/margins": 1.6560396552085876, + "rewards_train/rejected": -2.38153338432312, + "step": 2277 + }, + { + "epoch": 0.64, + "logps_train/chosen": -177.65977478027344, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -176.07005310058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.465977430343628, + "rewards_train/margins": 0.8410279750823975, + "rewards_train/rejected": -4.307005405426025, + "step": 2277 + }, + { + "epoch": 0.64, + "learning_rate": 4.280803694984725e-08, + "loss": 0.2866, + "step": 2278 + }, + { + "epoch": 0.64, + "logps_train/chosen": -135.44866943359375, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -136.17364501953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.894866943359375, + "rewards_train/margins": 0.17249751091003418, + "rewards_train/rejected": -2.067364454269409, + "step": 2278 + }, + { + "epoch": 0.64, + "logps_train/chosen": -224.00747680664062, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -262.06634521484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.900747776031494, + "rewards_train/margins": 4.805886745452881, + "rewards_train/rejected": -12.706634521484375, + "step": 2278 + }, + { + "epoch": 0.64, + "logps_train/chosen": -99.59413146972656, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -115.23994445800781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9094130992889404, + "rewards_train/margins": 1.4645812511444092, + "rewards_train/rejected": -4.37399435043335, + "step": 2278 + }, + { + "epoch": 0.64, + "logps_train/chosen": -112.02449035644531, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -201.2392120361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3024489879608154, + "rewards_train/margins": 4.821472406387329, + "rewards_train/rejected": -8.123921394348145, + "step": 2278 + }, + { + "epoch": 0.64, + "logps_train/chosen": -173.80682373046875, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -156.11508178710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3806824684143066, + "rewards_train/margins": 0.6308257579803467, + "rewards_train/rejected": -3.0115082263946533, + "step": 2278 + }, + { + "epoch": 0.64, + "logps_train/chosen": -15.953585624694824, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -3.515625, + "logps_train/rejected": -11.255634307861328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2719210386276245, + "rewards_train/margins": -0.49792009592056274, + "rewards_train/rejected": -0.7740009427070618, + "step": 2278 + }, + { + "epoch": 0.64, + "logps_train/chosen": -58.13025665283203, + "logps_train/ref_chosen": -51.25, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -26.600927352905273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6880256533622742, + "rewards_train/margins": 0.990817129611969, + "rewards_train/rejected": -1.6788427829742432, + "step": 2278 + }, + { + "epoch": 0.64, + "logps_train/chosen": -9.709511756896973, + "logps_train/ref_chosen": -3.484375, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -34.67610168457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6225137114524841, + "rewards_train/margins": 0.13259649276733398, + "rewards_train/rejected": -0.7551102042198181, + "step": 2278 + }, + { + "epoch": 0.64, + "logps_train/chosen": -54.345062255859375, + "logps_train/ref_chosen": -35.75, + "logps_train/ref_rejected": -1.4921875, + "logps_train/rejected": -15.420916557312012, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8595062494277954, + "rewards_train/margins": -0.46663331985473633, + "rewards_train/rejected": -1.392872929573059, + "step": 2279 + }, + { + "epoch": 0.64, + "logps_train/chosen": -202.10018920898438, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -217.27906799316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9100189208984375, + "rewards_train/margins": 1.4178881645202637, + "rewards_train/rejected": -5.327907085418701, + "step": 2279 + }, + { + "epoch": 0.64, + "logps_train/chosen": -10.702926635742188, + "logps_train/ref_chosen": -5.5625, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -22.589515686035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5140426754951477, + "rewards_train/margins": 1.188658893108368, + "rewards_train/rejected": -1.7027015686035156, + "step": 2279 + }, + { + "epoch": 0.64, + "logps_train/chosen": -103.64451599121094, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -169.0, + "logps_train/rejected": -267.75555419921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5144516229629517, + "rewards_train/margins": 8.361104369163513, + "rewards_train/rejected": -9.875555992126465, + "step": 2279 + }, + { + "epoch": 0.64, + "logps_train/chosen": -165.0893096923828, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -141.68280029296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.55893087387085, + "rewards_train/margins": -1.0906505584716797, + "rewards_train/rejected": -4.46828031539917, + "step": 2279 + }, + { + "epoch": 0.64, + "logps_train/chosen": -24.396509170532227, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -42.55206298828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7021509408950806, + "rewards_train/margins": 0.8905553817749023, + "rewards_train/rejected": -1.592706322669983, + "step": 2279 + }, + { + "epoch": 0.64, + "logps_train/chosen": -15.886505126953125, + "logps_train/ref_chosen": -5.375, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -28.825368881225586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0511505603790283, + "rewards_train/margins": 0.8313863277435303, + "rewards_train/rejected": -1.8825368881225586, + "step": 2279 + }, + { + "epoch": 0.64, + "logps_train/chosen": -25.579364776611328, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -9.5625, + "logps_train/rejected": -19.55023193359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8485615253448486, + "rewards_train/margins": -0.8497883081436157, + "rewards_train/rejected": -0.9987732172012329, + "step": 2279 + }, + { + "epoch": 0.64, + "learning_rate": 4.204562231352515e-08, + "loss": 0.4941, + "step": 2280 + }, + { + "epoch": 0.64, + "logps_train/chosen": -45.01696014404297, + "logps_train/ref_chosen": -43.5, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -106.24889373779297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15169601142406464, + "rewards_train/margins": 0.5731933861970901, + "rewards_train/rejected": -0.7248893976211548, + "step": 2280 + }, + { + "epoch": 0.64, + "logps_train/chosen": -48.08967590332031, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -57.5, + "logps_train/rejected": -75.21334838867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.258967638015747, + "rewards_train/margins": 0.5123672485351562, + "rewards_train/rejected": -1.7713348865509033, + "step": 2280 + }, + { + "epoch": 0.64, + "logps_train/chosen": -33.08427429199219, + "logps_train/ref_chosen": -22.25, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -54.48548889160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0834274291992188, + "rewards_train/margins": 2.8276214599609375, + "rewards_train/rejected": -3.9110488891601562, + "step": 2280 + }, + { + "epoch": 0.64, + "logps_train/chosen": -86.65593719482422, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -33.70873260498047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.365593671798706, + "rewards_train/margins": 0.380279541015625, + "rewards_train/rejected": -2.745873212814331, + "step": 2280 + }, + { + "epoch": 0.64, + "logps_train/chosen": -104.83367156982422, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -133.56781005859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.333367109298706, + "rewards_train/margins": 1.4734139442443848, + "rewards_train/rejected": -3.806781053543091, + "step": 2280 + }, + { + "epoch": 0.64, + "logps_train/chosen": -22.25635528564453, + "logps_train/ref_chosen": -15.9375, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -59.120628356933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6318855285644531, + "rewards_train/margins": 3.4551773071289062, + "rewards_train/rejected": -4.087062835693359, + "step": 2280 + }, + { + "epoch": 0.64, + "logps_train/chosen": -129.00279235839844, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -103.18153381347656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.250279188156128, + "rewards_train/margins": 0.96787428855896, + "rewards_train/rejected": -3.218153476715088, + "step": 2280 + }, + { + "epoch": 0.64, + "logps_train/chosen": -20.08908462524414, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -46.063385009765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22140847146511078, + "rewards_train/margins": 0.45993001759052277, + "rewards_train/rejected": -0.6813384890556335, + "step": 2280 + }, + { + "epoch": 0.64, + "logps_train/chosen": -125.80453491210938, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -172.09017944335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4304535388946533, + "rewards_train/margins": 2.3785643577575684, + "rewards_train/rejected": -3.8090178966522217, + "step": 2281 + }, + { + "epoch": 0.64, + "logps_train/chosen": -6.955295562744141, + "logps_train/ref_chosen": -0.59765625, + "logps_train/ref_rejected": -1.5859375, + "logps_train/rejected": -6.619722366333008, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.635763943195343, + "rewards_train/margins": -0.13238543272018433, + "rewards_train/rejected": -0.5033785104751587, + "step": 2281 + }, + { + "epoch": 0.64, + "logps_train/chosen": -121.10401153564453, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -187.26303100585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4604012966156006, + "rewards_train/margins": 3.9159018993377686, + "rewards_train/rejected": -7.376303195953369, + "step": 2281 + }, + { + "epoch": 0.64, + "logps_train/chosen": -22.999305725097656, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -43.5, + "logps_train/rejected": -81.94064331054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7561805844306946, + "rewards_train/margins": 3.0878838896751404, + "rewards_train/rejected": -3.844064474105835, + "step": 2281 + }, + { + "epoch": 0.64, + "logps_train/chosen": -146.21336364746094, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -178.10971069335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.571336269378662, + "rewards_train/margins": 5.089635372161865, + "rewards_train/rejected": -9.660971641540527, + "step": 2281 + }, + { + "epoch": 0.64, + "logps_train/chosen": -154.7559814453125, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -290.9642333984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.125598430633545, + "rewards_train/margins": 7.770824909210205, + "rewards_train/rejected": -12.89642333984375, + "step": 2281 + }, + { + "epoch": 0.64, + "logps_train/chosen": -55.126766204833984, + "logps_train/ref_chosen": -30.0, + "logps_train/ref_rejected": -15.75, + "logps_train/rejected": -56.08981704711914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.51267671585083, + "rewards_train/margins": 1.5213050842285156, + "rewards_train/rejected": -4.033981800079346, + "step": 2281 + }, + { + "epoch": 0.64, + "logps_train/chosen": -20.352249145507812, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -38.665462493896484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6414749026298523, + "rewards_train/margins": 1.4875712990760803, + "rewards_train/rejected": -2.1290462017059326, + "step": 2281 + }, + { + "epoch": 0.64, + "learning_rate": 4.128991234218471e-08, + "loss": 0.2416, + "step": 2282 + }, + { + "epoch": 0.64, + "logps_train/chosen": -71.53680419921875, + "logps_train/ref_chosen": -42.75, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -48.69780731201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.878680467605591, + "rewards_train/margins": 0.8348503112792969, + "rewards_train/rejected": -3.7135307788848877, + "step": 2282 + }, + { + "epoch": 0.64, + "logps_train/chosen": -79.09730529785156, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -119.58759307861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.884730577468872, + "rewards_train/margins": 1.7740287780761719, + "rewards_train/rejected": -3.658759355545044, + "step": 2282 + }, + { + "epoch": 0.64, + "logps_train/chosen": -158.82472229003906, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -219.23809814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.4824724197387695, + "rewards_train/margins": 3.041337490081787, + "rewards_train/rejected": -7.523809909820557, + "step": 2282 + }, + { + "epoch": 0.64, + "logps_train/chosen": -2.3174350261688232, + "logps_train/ref_chosen": -2.25, + "logps_train/ref_rejected": -0.384765625, + "logps_train/rejected": -0.31334564089775085, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.006743502803146839, + "rewards_train/margins": -0.013885501306504011, + "rewards_train/rejected": 0.007141998503357172, + "step": 2282 + }, + { + "epoch": 0.64, + "logps_train/chosen": -158.96426391601562, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -180.00502014160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9964264631271362, + "rewards_train/margins": 1.4040755033493042, + "rewards_train/rejected": -3.4005019664764404, + "step": 2282 + }, + { + "epoch": 0.64, + "logps_train/chosen": -10.789345741271973, + "logps_train/ref_chosen": -7.4375, + "logps_train/ref_rejected": -3.0, + "logps_train/rejected": -8.044570922851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.33518457412719727, + "rewards_train/margins": 0.1692725419998169, + "rewards_train/rejected": -0.5044571161270142, + "step": 2282 + }, + { + "epoch": 0.64, + "logps_train/chosen": -8.756937980651855, + "logps_train/ref_chosen": -2.4375, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -27.852081298828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6319438219070435, + "rewards_train/margins": 0.7470142841339111, + "rewards_train/rejected": -1.3789581060409546, + "step": 2282 + }, + { + "epoch": 0.64, + "logps_train/chosen": -24.541473388671875, + "logps_train/ref_chosen": -2.875, + "logps_train/ref_rejected": -1.8125, + "logps_train/rejected": -30.534774780273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.166647434234619, + "rewards_train/margins": 0.7055799961090088, + "rewards_train/rejected": -2.872227430343628, + "step": 2282 + }, + { + "epoch": 0.64, + "logps_train/chosen": -135.01950073242188, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -204.70639038085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0019500255584717, + "rewards_train/margins": 4.0686891078948975, + "rewards_train/rejected": -6.070639133453369, + "step": 2283 + }, + { + "epoch": 0.64, + "logps_train/chosen": -81.38279724121094, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -82.61663818359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011720276437699795, + "rewards_train/margins": 0.1233840947970748, + "rewards_train/rejected": -0.111663818359375, + "step": 2283 + }, + { + "epoch": 0.64, + "logps_train/chosen": -52.815269470214844, + "logps_train/ref_chosen": -40.25, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -55.41408920288086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2565269470214844, + "rewards_train/margins": 2.4473819732666016, + "rewards_train/rejected": -3.703908920288086, + "step": 2283 + }, + { + "epoch": 0.64, + "logps_train/chosen": -20.874361038208008, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -7.0625, + "logps_train/rejected": -22.524370193481445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5311861038208008, + "rewards_train/margins": 1.0150009393692017, + "rewards_train/rejected": -1.5461870431900024, + "step": 2283 + }, + { + "epoch": 0.64, + "logps_train/chosen": -57.21393585205078, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -11.089340209960938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1213935613632202, + "rewards_train/margins": -0.743709534406662, + "rewards_train/rejected": -0.3776840269565582, + "step": 2283 + }, + { + "epoch": 0.64, + "logps_train/chosen": -164.734375, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -217.43287658691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2734375, + "rewards_train/margins": 4.1698503494262695, + "rewards_train/rejected": -6.4432878494262695, + "step": 2283 + }, + { + "epoch": 0.64, + "logps_train/chosen": -78.5584716796875, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -145.621337890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.24415282905101776, + "rewards_train/margins": 5.006286904215813, + "rewards_train/rejected": -4.762134075164795, + "step": 2283 + }, + { + "epoch": 0.64, + "logps_train/chosen": -72.37124633789062, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -118.16896057128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.437124639749527, + "rewards_train/margins": 4.3297716081142426, + "rewards_train/rejected": -4.7668962478637695, + "step": 2283 + }, + { + "epoch": 0.64, + "learning_rate": 4.054091232499424e-08, + "loss": 0.318, + "step": 2284 + }, + { + "epoch": 0.64, + "logps_train/chosen": -167.28933715820312, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -227.48590087890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.578933715820312, + "rewards_train/margins": -0.5303430557250977, + "rewards_train/rejected": -8.048590660095215, + "step": 2284 + }, + { + "epoch": 0.64, + "logps_train/chosen": -17.92425537109375, + "logps_train/ref_chosen": -1.21875, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -39.9803466796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6705505847930908, + "rewards_train/margins": 1.2274842262268066, + "rewards_train/rejected": -2.8980348110198975, + "step": 2284 + }, + { + "epoch": 0.64, + "logps_train/chosen": -162.32699584960938, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -168.14971923828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.882699728012085, + "rewards_train/margins": 1.5322721004486084, + "rewards_train/rejected": -5.414971828460693, + "step": 2284 + }, + { + "epoch": 0.64, + "logps_train/chosen": -32.411346435546875, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -5.125, + "logps_train/rejected": -20.066072463989258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4911346435546875, + "rewards_train/margins": 0.0029726028442382812, + "rewards_train/rejected": -1.4941072463989258, + "step": 2284 + }, + { + "epoch": 0.64, + "logps_train/chosen": -152.64666748046875, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -188.84066772460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.414666652679443, + "rewards_train/margins": 0.8694000244140625, + "rewards_train/rejected": -5.284066677093506, + "step": 2284 + }, + { + "epoch": 0.64, + "logps_train/chosen": -20.25527000427246, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -40.70075988769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9317770004272461, + "rewards_train/margins": 1.3757989406585693, + "rewards_train/rejected": -2.3075759410858154, + "step": 2284 + }, + { + "epoch": 0.64, + "logps_train/chosen": -23.935100555419922, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -19.25, + "logps_train/rejected": -42.81123733520508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.16851007938385, + "rewards_train/margins": 1.187613606452942, + "rewards_train/rejected": -2.356123685836792, + "step": 2284 + }, + { + "epoch": 0.64, + "logps_train/chosen": -10.326652526855469, + "logps_train/ref_chosen": -0.7265625, + "logps_train/ref_rejected": -15.75, + "logps_train/rejected": -16.04905891418457, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9600090384483337, + "rewards_train/margins": -0.9301031474024057, + "rewards_train/rejected": -0.029905891045928, + "step": 2284 + }, + { + "epoch": 0.64, + "logps_train/chosen": -0.4020553529262543, + "logps_train/ref_chosen": -0.298828125, + "logps_train/ref_rejected": -2.296875, + "logps_train/rejected": -12.707762718200684, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.010322722606360912, + "rewards_train/margins": 1.0307660968974233, + "rewards_train/rejected": -1.0410888195037842, + "step": 2285 + }, + { + "epoch": 0.64, + "logps_train/chosen": -17.76285171508789, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -23.27179718017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8325352072715759, + "rewards_train/margins": 0.5071445107460022, + "rewards_train/rejected": -1.3396797180175781, + "step": 2285 + }, + { + "epoch": 0.64, + "logps_train/chosen": -1.3829374313354492, + "logps_train/ref_chosen": -0.5, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -13.319406509399414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08829374611377716, + "rewards_train/margins": 0.6436469405889511, + "rewards_train/rejected": -0.7319406867027283, + "step": 2285 + }, + { + "epoch": 0.64, + "logps_train/chosen": -158.64083862304688, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -180.0006866455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7640838623046875, + "rewards_train/margins": 4.9359846115112305, + "rewards_train/rejected": -8.700068473815918, + "step": 2285 + }, + { + "epoch": 0.64, + "logps_train/chosen": -66.84497833251953, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -62.5, + "logps_train/rejected": -147.6532440185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5844979286193848, + "rewards_train/margins": 5.930826663970947, + "rewards_train/rejected": -8.515324592590332, + "step": 2285 + }, + { + "epoch": 0.64, + "logps_train/chosen": -0.9529659152030945, + "logps_train/ref_chosen": -0.93359375, + "logps_train/ref_rejected": -3.9375, + "logps_train/rejected": -10.640205383300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0019372164970263839, + "rewards_train/margins": 0.6683333456749097, + "rewards_train/rejected": -0.670270562171936, + "step": 2285 + }, + { + "epoch": 0.64, + "logps_train/chosen": -83.70935821533203, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -189.75579833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1290641874074936, + "rewards_train/margins": 3.8046439737081528, + "rewards_train/rejected": -3.675579786300659, + "step": 2285 + }, + { + "epoch": 0.64, + "logps_train/chosen": -83.22858428955078, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -98.01321411132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0228585004806519, + "rewards_train/margins": 0.6284629106521606, + "rewards_train/rejected": -1.6513214111328125, + "step": 2285 + }, + { + "epoch": 0.64, + "learning_rate": 3.9798627504159524e-08, + "loss": 0.3944, + "step": 2286 + }, + { + "epoch": 0.64, + "logps_train/chosen": -9.66016674041748, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -3.21875, + "logps_train/rejected": -10.309294700622559, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18476668000221252, + "rewards_train/margins": 0.5242877900600433, + "rewards_train/rejected": -0.7090544700622559, + "step": 2286 + }, + { + "epoch": 0.64, + "logps_train/chosen": -10.811845779418945, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -16.386938095092773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.484309583902359, + "rewards_train/margins": 0.5637592971324921, + "rewards_train/rejected": -1.048068881034851, + "step": 2286 + }, + { + "epoch": 0.64, + "logps_train/chosen": -29.773927688598633, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -34.61943054199219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0523927211761475, + "rewards_train/margins": -0.4279496669769287, + "rewards_train/rejected": -1.6244430541992188, + "step": 2286 + }, + { + "epoch": 0.64, + "logps_train/chosen": -11.621230125427246, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -1.875, + "logps_train/rejected": -33.421653747558594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31837302446365356, + "rewards_train/margins": 2.8362924456596375, + "rewards_train/rejected": -3.154665470123291, + "step": 2286 + }, + { + "epoch": 0.64, + "logps_train/chosen": -6.240273952484131, + "logps_train/ref_chosen": -2.03125, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -12.44002914428711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42090240120887756, + "rewards_train/margins": 0.24185052514076233, + "rewards_train/rejected": -0.6627529263496399, + "step": 2286 + }, + { + "epoch": 0.64, + "logps_train/chosen": -117.13069152832031, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -151.5281982421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2630691528320312, + "rewards_train/margins": 1.3897507190704346, + "rewards_train/rejected": -2.652819871902466, + "step": 2286 + }, + { + "epoch": 0.64, + "logps_train/chosen": -138.90850830078125, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -232.74493408203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.590850830078125, + "rewards_train/margins": 5.483642578125, + "rewards_train/rejected": -9.074493408203125, + "step": 2286 + }, + { + "epoch": 0.64, + "logps_train/chosen": -179.65774536132812, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -208.9213409423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.4657745361328125, + "rewards_train/margins": 0.5263595581054688, + "rewards_train/rejected": -6.992134094238281, + "step": 2286 + }, + { + "epoch": 0.64, + "logps_train/chosen": -168.3001708984375, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -216.09396362304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.58001708984375, + "rewards_train/margins": 0.9293794631958008, + "rewards_train/rejected": -6.509396553039551, + "step": 2287 + }, + { + "epoch": 0.64, + "logps_train/chosen": -8.561030387878418, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -2.0, + "logps_train/rejected": -25.555038452148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.38422805070877075, + "rewards_train/margins": 1.9712757468223572, + "rewards_train/rejected": -2.355503797531128, + "step": 2287 + }, + { + "epoch": 0.64, + "logps_train/chosen": -218.84059143066406, + "logps_train/ref_chosen": -184.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -219.66036987304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4840590953826904, + "rewards_train/margins": 0.4819779396057129, + "rewards_train/rejected": -3.9660370349884033, + "step": 2287 + }, + { + "epoch": 0.64, + "logps_train/chosen": -103.80410766601562, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -227.97079467773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7304108142852783, + "rewards_train/margins": 4.666668653488159, + "rewards_train/rejected": -6.3970794677734375, + "step": 2287 + }, + { + "epoch": 0.64, + "logps_train/chosen": -1.8711144924163818, + "logps_train/ref_chosen": -0.80078125, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -13.3417329788208, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10703332722187042, + "rewards_train/margins": 0.033389970660209656, + "rewards_train/rejected": -0.14042329788208008, + "step": 2287 + }, + { + "epoch": 0.64, + "logps_train/chosen": -233.14877319335938, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -242.31552124023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.514877319335938, + "rewards_train/margins": 0.7166748046875, + "rewards_train/rejected": -11.231552124023438, + "step": 2287 + }, + { + "epoch": 0.64, + "logps_train/chosen": -23.355680465698242, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -36.977806091308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3355680704116821, + "rewards_train/margins": 0.48721253871917725, + "rewards_train/rejected": -1.8227806091308594, + "step": 2287 + }, + { + "epoch": 0.64, + "logps_train/chosen": -20.335132598876953, + "logps_train/ref_chosen": -3.703125, + "logps_train/ref_rejected": -3.671875, + "logps_train/rejected": -15.930143356323242, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6632007360458374, + "rewards_train/margins": -0.4373738765716553, + "rewards_train/rejected": -1.2258268594741821, + "step": 2287 + }, + { + "epoch": 0.64, + "learning_rate": 3.906306307488771e-08, + "loss": 0.4134, + "step": 2288 + }, + { + "epoch": 0.64, + "logps_train/chosen": -206.82278442382812, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -170.0, + "logps_train/rejected": -233.6716766357422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.3822784423828125, + "rewards_train/margins": 1.9848895072937012, + "rewards_train/rejected": -6.367167949676514, + "step": 2288 + }, + { + "epoch": 0.64, + "logps_train/chosen": -159.75262451171875, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -158.56149291992188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.975262463092804, + "rewards_train/margins": -0.11911314725875854, + "rewards_train/rejected": -0.8561493158340454, + "step": 2288 + }, + { + "epoch": 0.64, + "logps_train/chosen": -115.74700927734375, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -231.02764892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.274701118469238, + "rewards_train/margins": 7.378064155578613, + "rewards_train/rejected": -11.652765274047852, + "step": 2288 + }, + { + "epoch": 0.64, + "logps_train/chosen": -10.001354217529297, + "logps_train/ref_chosen": -7.53125, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -15.282654762268066, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24701042473316193, + "rewards_train/margins": 0.10000504553318024, + "rewards_train/rejected": -0.34701547026634216, + "step": 2288 + }, + { + "epoch": 0.64, + "logps_train/chosen": -3.2487287521362305, + "logps_train/ref_chosen": -6.15625, + "logps_train/ref_rejected": -0.98828125, + "logps_train/rejected": -1.9648902416229248, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2907521426677704, + "rewards_train/margins": 0.38841304183006287, + "rewards_train/rejected": -0.09766089916229248, + "step": 2288 + }, + { + "epoch": 0.64, + "logps_train/chosen": -103.24185180664062, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -102.7841567993164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9241852164268494, + "rewards_train/margins": -0.04576951265335083, + "rewards_train/rejected": -0.8784157037734985, + "step": 2288 + }, + { + "epoch": 0.64, + "logps_train/chosen": -177.96319580078125, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -164.5043487548828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.196319580078125, + "rewards_train/margins": -0.2958846092224121, + "rewards_train/rejected": -3.900434970855713, + "step": 2288 + }, + { + "epoch": 0.64, + "logps_train/chosen": -104.86573028564453, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -68.5, + "logps_train/rejected": -95.92192840576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.536573052406311, + "rewards_train/margins": 1.2056199312210083, + "rewards_train/rejected": -2.7421929836273193, + "step": 2288 + }, + { + "epoch": 0.64, + "logps_train/chosen": -5.566025257110596, + "logps_train/ref_chosen": -1.0390625, + "logps_train/ref_rejected": -1.0390625, + "logps_train/rejected": -5.36833381652832, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.452696293592453, + "rewards_train/margins": -0.01976916193962097, + "rewards_train/rejected": -0.43292713165283203, + "step": 2289 + }, + { + "epoch": 0.64, + "logps_train/chosen": -159.8646697998047, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -147.40826416015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.986466884613037, + "rewards_train/margins": 0.5543594360351562, + "rewards_train/rejected": -6.540826320648193, + "step": 2289 + }, + { + "epoch": 0.64, + "logps_train/chosen": -61.523441314697266, + "logps_train/ref_chosen": -31.125, + "logps_train/ref_rejected": -31.625, + "logps_train/rejected": -75.27735900878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.039844274520874, + "rewards_train/margins": 1.3253915309906006, + "rewards_train/rejected": -4.365235805511475, + "step": 2289 + }, + { + "epoch": 0.64, + "logps_train/chosen": -152.00588989257812, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -201.75885009765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.600589275360107, + "rewards_train/margins": 2.875295639038086, + "rewards_train/rejected": -7.475884914398193, + "step": 2289 + }, + { + "epoch": 0.64, + "logps_train/chosen": -214.03512573242188, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -232.21456909179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.103512763977051, + "rewards_train/margins": 2.7179441452026367, + "rewards_train/rejected": -7.8214569091796875, + "step": 2289 + }, + { + "epoch": 0.64, + "logps_train/chosen": -15.4827880859375, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -63.74808120727539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.842028796672821, + "rewards_train/margins": 3.2077792286872864, + "rewards_train/rejected": -4.049808025360107, + "step": 2289 + }, + { + "epoch": 0.64, + "logps_train/chosen": -10.472089767456055, + "logps_train/ref_chosen": -8.25, + "logps_train/ref_rejected": -15.75, + "logps_train/rejected": -52.949806213378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22220897674560547, + "rewards_train/margins": 3.497771739959717, + "rewards_train/rejected": -3.7199807167053223, + "step": 2289 + }, + { + "epoch": 0.64, + "logps_train/chosen": -5.085808753967285, + "logps_train/ref_chosen": -3.6875, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -28.758100509643555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13983087241649628, + "rewards_train/margins": 1.754729226231575, + "rewards_train/rejected": -1.8945600986480713, + "step": 2289 + }, + { + "epoch": 0.64, + "learning_rate": 3.833422418534959e-08, + "loss": 0.3511, + "step": 2290 + }, + { + "epoch": 0.64, + "logps_train/chosen": -26.88348388671875, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -7.8125, + "logps_train/rejected": -46.76448440551758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.644598364830017, + "rewards_train/margins": 2.250600218772888, + "rewards_train/rejected": -3.8951985836029053, + "step": 2290 + }, + { + "epoch": 0.64, + "logps_train/chosen": -165.89938354492188, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -169.11050415039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6899383068084717, + "rewards_train/margins": 3.0711123943328857, + "rewards_train/rejected": -5.761050701141357, + "step": 2290 + }, + { + "epoch": 0.64, + "logps_train/chosen": -159.22854614257812, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -174.7804718017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7228546142578125, + "rewards_train/margins": 1.3551926612854004, + "rewards_train/rejected": -5.078047275543213, + "step": 2290 + }, + { + "epoch": 0.64, + "logps_train/chosen": -46.82309341430664, + "logps_train/ref_chosen": -7.15625, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -46.457088470458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.966684341430664, + "rewards_train/margins": 0.06964969635009766, + "rewards_train/rejected": -4.036334037780762, + "step": 2290 + }, + { + "epoch": 0.64, + "logps_train/chosen": -38.35841369628906, + "logps_train/ref_chosen": -25.625, + "logps_train/ref_rejected": -56.75, + "logps_train/rejected": -89.35043334960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.273341417312622, + "rewards_train/margins": 1.9867019653320312, + "rewards_train/rejected": -3.2600433826446533, + "step": 2290 + }, + { + "epoch": 0.64, + "logps_train/chosen": -208.50967407226562, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -237.15786743164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.450967311859131, + "rewards_train/margins": 1.764819622039795, + "rewards_train/rejected": -7.215786933898926, + "step": 2290 + }, + { + "epoch": 0.64, + "logps_train/chosen": -17.72539710998535, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -39.97602462768555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.294414758682251, + "rewards_train/margins": 0.8406877517700195, + "rewards_train/rejected": -2.1351025104522705, + "step": 2290 + }, + { + "epoch": 0.64, + "logps_train/chosen": -127.9786376953125, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -198.32452392578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.897863745689392, + "rewards_train/margins": 5.2345887422561646, + "rewards_train/rejected": -7.132452487945557, + "step": 2290 + }, + { + "epoch": 0.64, + "logps_train/chosen": -27.772342681884766, + "logps_train/ref_chosen": -24.125, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -21.029695510864258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3647342622280121, + "rewards_train/margins": 0.7069852650165558, + "rewards_train/rejected": -1.0717195272445679, + "step": 2291 + }, + { + "epoch": 0.64, + "logps_train/chosen": -43.269309997558594, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -56.127132415771484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0644309520721436, + "rewards_train/margins": -0.1267176866531372, + "rewards_train/rejected": -1.9377132654190063, + "step": 2291 + }, + { + "epoch": 0.64, + "logps_train/chosen": -144.01007080078125, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -120.02252960205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6510071754455566, + "rewards_train/margins": 0.20124578475952148, + "rewards_train/rejected": -2.852252960205078, + "step": 2291 + }, + { + "epoch": 0.64, + "logps_train/chosen": -94.66447448730469, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -91.41162109375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8664474487304688, + "rewards_train/margins": -0.025285303592681885, + "rewards_train/rejected": -0.8411621451377869, + "step": 2291 + }, + { + "epoch": 0.64, + "logps_train/chosen": -118.66677856445312, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -131.70272827148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.666677951812744, + "rewards_train/margins": 1.2535948753356934, + "rewards_train/rejected": -3.9202728271484375, + "step": 2291 + }, + { + "epoch": 0.64, + "logps_train/chosen": -146.0696258544922, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -197.67550659179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.806962490081787, + "rewards_train/margins": 4.310588359832764, + "rewards_train/rejected": -10.11755084991455, + "step": 2291 + }, + { + "epoch": 0.64, + "logps_train/chosen": -155.6917724609375, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -172.2374267578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.71917724609375, + "rewards_train/margins": 5.0545654296875, + "rewards_train/rejected": -8.77374267578125, + "step": 2291 + }, + { + "epoch": 0.64, + "logps_train/chosen": -263.7662658691406, + "logps_train/ref_chosen": -191.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -169.35812377929688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.2766265869140625, + "rewards_train/margins": -0.8408141136169434, + "rewards_train/rejected": -6.435812473297119, + "step": 2291 + }, + { + "epoch": 0.64, + "learning_rate": 3.761211593664493e-08, + "loss": 0.3512, + "step": 2292 + }, + { + "epoch": 0.64, + "logps_train/chosen": -42.38468933105469, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -43.5768928527832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.800968885421753, + "rewards_train/margins": 1.1223454475402832, + "rewards_train/rejected": -3.923314332962036, + "step": 2292 + }, + { + "epoch": 0.64, + "logps_train/chosen": -124.01215362548828, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -235.38070678710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.301215410232544, + "rewards_train/margins": 7.636855840682983, + "rewards_train/rejected": -9.938071250915527, + "step": 2292 + }, + { + "epoch": 0.64, + "logps_train/chosen": -1.4882522821426392, + "logps_train/ref_chosen": -1.5390625, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -45.57495880126953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0050810216926038265, + "rewards_train/margins": 3.6000769971869886, + "rewards_train/rejected": -3.5949959754943848, + "step": 2292 + }, + { + "epoch": 0.64, + "logps_train/chosen": -180.96798706054688, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -243.42051696777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.096798896789551, + "rewards_train/margins": 1.545252799987793, + "rewards_train/rejected": -6.642051696777344, + "step": 2292 + }, + { + "epoch": 0.64, + "logps_train/chosen": -80.94229888916016, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -161.53546142578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5942298769950867, + "rewards_train/margins": 4.15931636095047, + "rewards_train/rejected": -4.753546237945557, + "step": 2292 + }, + { + "epoch": 0.64, + "logps_train/chosen": -130.2222900390625, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -204.590087890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4222290515899658, + "rewards_train/margins": 3.336779832839966, + "rewards_train/rejected": -4.759008884429932, + "step": 2292 + }, + { + "epoch": 0.64, + "logps_train/chosen": -18.730716705322266, + "logps_train/ref_chosen": -14.375, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -15.589373588562012, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.43557167053222656, + "rewards_train/margins": -0.08288431167602539, + "rewards_train/rejected": -0.35268735885620117, + "step": 2292 + }, + { + "epoch": 0.64, + "logps_train/chosen": -92.41218566894531, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -123.12205505371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.291218638420105, + "rewards_train/margins": 4.520987153053284, + "rewards_train/rejected": -5.812205791473389, + "step": 2292 + }, + { + "epoch": 0.64, + "logps_train/chosen": -39.93565368652344, + "logps_train/ref_chosen": -21.0, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -28.691478729248047, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8935654163360596, + "rewards_train/margins": -0.018167495727539062, + "rewards_train/rejected": -1.8753979206085205, + "step": 2293 + }, + { + "epoch": 0.64, + "logps_train/chosen": -16.887279510498047, + "logps_train/ref_chosen": -2.515625, + "logps_train/ref_rejected": -6.21875, + "logps_train/rejected": -46.70704650878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4371654987335205, + "rewards_train/margins": 2.611664056777954, + "rewards_train/rejected": -4.048829555511475, + "step": 2293 + }, + { + "epoch": 0.64, + "logps_train/chosen": -17.65957260131836, + "logps_train/ref_chosen": -4.5, + "logps_train/ref_rejected": -25.25, + "logps_train/rejected": -44.33265686035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3159573078155518, + "rewards_train/margins": 0.5923084020614624, + "rewards_train/rejected": -1.9082657098770142, + "step": 2293 + }, + { + "epoch": 0.64, + "logps_train/chosen": -110.84335327148438, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -111.13272094726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8843353390693665, + "rewards_train/margins": 0.02893674373626709, + "rewards_train/rejected": -0.9132720828056335, + "step": 2293 + }, + { + "epoch": 0.64, + "logps_train/chosen": -40.50181579589844, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -52.875579833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1939315795898438, + "rewards_train/margins": 0.19362640380859375, + "rewards_train/rejected": -3.3875579833984375, + "step": 2293 + }, + { + "epoch": 0.64, + "logps_train/chosen": -35.839073181152344, + "logps_train/ref_chosen": -6.6875, + "logps_train/ref_rejected": -6.5625, + "logps_train/rejected": -51.86863327026367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9151573181152344, + "rewards_train/margins": 1.6154561042785645, + "rewards_train/rejected": -4.530613422393799, + "step": 2293 + }, + { + "epoch": 0.64, + "logps_train/chosen": -71.79308319091797, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -44.75, + "logps_train/rejected": -62.1811637878418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5293083190917969, + "rewards_train/margins": 1.2138080596923828, + "rewards_train/rejected": -1.7431163787841797, + "step": 2293 + }, + { + "epoch": 0.64, + "logps_train/chosen": -30.243061065673828, + "logps_train/ref_chosen": -18.75, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -52.17224884033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1493061780929565, + "rewards_train/margins": 2.7616688013076782, + "rewards_train/rejected": -3.9109749794006348, + "step": 2293 + }, + { + "epoch": 0.64, + "learning_rate": 3.68967433827656e-08, + "loss": 0.2685, + "step": 2294 + }, + { + "epoch": 0.64, + "logps_train/chosen": -121.5009765625, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -119.86549377441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8000977039337158, + "rewards_train/margins": 0.08645164966583252, + "rewards_train/rejected": -1.8865493535995483, + "step": 2294 + }, + { + "epoch": 0.64, + "logps_train/chosen": -191.23056030273438, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -228.974365234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.923056125640869, + "rewards_train/margins": 1.8743805885314941, + "rewards_train/rejected": -8.797436714172363, + "step": 2294 + }, + { + "epoch": 0.64, + "logps_train/chosen": -20.85052490234375, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -33.56904983520508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.122552514076233, + "rewards_train/margins": 1.1156026124954224, + "rewards_train/rejected": -2.2381551265716553, + "step": 2294 + }, + { + "epoch": 0.64, + "logps_train/chosen": -26.86717987060547, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -22.90142059326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6992179751396179, + "rewards_train/margins": 0.8940491080284119, + "rewards_train/rejected": -1.5932670831680298, + "step": 2294 + }, + { + "epoch": 0.64, + "logps_train/chosen": -75.42353057861328, + "logps_train/ref_chosen": -59.0, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -102.60628509521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6423530578613281, + "rewards_train/margins": 1.218275547027588, + "rewards_train/rejected": -2.860628604888916, + "step": 2294 + }, + { + "epoch": 0.64, + "logps_train/chosen": -93.96977233886719, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -186.3214111328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7969772815704346, + "rewards_train/margins": 6.085163831710815, + "rewards_train/rejected": -8.88214111328125, + "step": 2294 + }, + { + "epoch": 0.64, + "logps_train/chosen": -62.61897277832031, + "logps_train/ref_chosen": -51.75, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -74.57290649414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0868972539901733, + "rewards_train/margins": 5.145393490791321, + "rewards_train/rejected": -6.232290744781494, + "step": 2294 + }, + { + "epoch": 0.64, + "logps_train/chosen": -51.444393157958984, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -119.54110717773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36943933367729187, + "rewards_train/margins": 2.1846714317798615, + "rewards_train/rejected": -2.5541107654571533, + "step": 2294 + }, + { + "epoch": 0.64, + "logps_train/chosen": -33.29060363769531, + "logps_train/ref_chosen": -20.25, + "logps_train/ref_rejected": -48.0, + "logps_train/rejected": -52.63301086425781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3040603399276733, + "rewards_train/margins": -0.8407592475414276, + "rewards_train/rejected": -0.4633010923862457, + "step": 2295 + }, + { + "epoch": 0.64, + "logps_train/chosen": -20.368295669555664, + "logps_train/ref_chosen": -11.5625, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -96.4330825805664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8805795907974243, + "rewards_train/margins": -0.8372713327407837, + "rewards_train/rejected": -0.043308258056640625, + "step": 2295 + }, + { + "epoch": 0.64, + "logps_train/chosen": -50.56193923950195, + "logps_train/ref_chosen": -42.5, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -35.30106735229492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8061939477920532, + "rewards_train/margins": 2.239537835121155, + "rewards_train/rejected": -3.045731782913208, + "step": 2295 + }, + { + "epoch": 0.64, + "logps_train/chosen": -11.017143249511719, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -16.541940689086914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23921433091163635, + "rewards_train/margins": 0.43372973799705505, + "rewards_train/rejected": -0.6729440689086914, + "step": 2295 + }, + { + "epoch": 0.64, + "logps_train/chosen": -85.27164459228516, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -238.8671112060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7771644592285156, + "rewards_train/margins": 10.859546661376953, + "rewards_train/rejected": -12.636711120605469, + "step": 2295 + }, + { + "epoch": 0.64, + "logps_train/chosen": -17.435771942138672, + "logps_train/ref_chosen": -2.90625, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -130.6344757080078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.452952265739441, + "rewards_train/margins": 3.8604952096939087, + "rewards_train/rejected": -5.31344747543335, + "step": 2295 + }, + { + "epoch": 0.64, + "logps_train/chosen": -22.772022247314453, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -6.71875, + "logps_train/rejected": -37.79365921020508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1459522247314453, + "rewards_train/margins": 1.9615387916564941, + "rewards_train/rejected": -3.1074910163879395, + "step": 2295 + }, + { + "epoch": 0.64, + "logps_train/chosen": -15.941202163696289, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -4.686363220214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.043379783630371094, + "rewards_train/margins": 0.046391105744987726, + "rewards_train/rejected": -0.0030113221146166325, + "step": 2295 + }, + { + "epoch": 0.64, + "learning_rate": 3.618811153056134e-08, + "loss": 0.351, + "step": 2296 + }, + { + "epoch": 0.64, + "logps_train/chosen": -60.1962890625, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -29.125, + "logps_train/rejected": -42.767433166503906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.044628858566284, + "rewards_train/margins": -0.6803854703903198, + "rewards_train/rejected": -1.3642433881759644, + "step": 2296 + }, + { + "epoch": 0.64, + "logps_train/chosen": -28.864591598510742, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -17.133811950683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7989591956138611, + "rewards_train/margins": 0.423797070980072, + "rewards_train/rejected": -1.222756266593933, + "step": 2296 + }, + { + "epoch": 0.64, + "logps_train/chosen": -37.6269645690918, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -5.0625, + "logps_train/rejected": -41.15169143676758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.084571599960327, + "rewards_train/margins": 0.5243475437164307, + "rewards_train/rejected": -3.608919143676758, + "step": 2296 + }, + { + "epoch": 0.64, + "logps_train/chosen": -174.344970703125, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -169.46798706054688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.034497261047363, + "rewards_train/margins": -1.5876984596252441, + "rewards_train/rejected": -2.446798801422119, + "step": 2296 + }, + { + "epoch": 0.64, + "logps_train/chosen": -260.1370849609375, + "logps_train/ref_chosen": -169.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -223.02464294433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.11370849609375, + "rewards_train/margins": 0.7387561798095703, + "rewards_train/rejected": -9.85246467590332, + "step": 2296 + }, + { + "epoch": 0.64, + "logps_train/chosen": -177.75062561035156, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -168.0673828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.475062847137451, + "rewards_train/margins": -0.31832456588745117, + "rewards_train/rejected": -6.15673828125, + "step": 2296 + }, + { + "epoch": 0.64, + "logps_train/chosen": -39.966941833496094, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -33.71205520629883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0091941356658936, + "rewards_train/margins": 0.49326133728027344, + "rewards_train/rejected": -2.502455472946167, + "step": 2296 + }, + { + "epoch": 0.64, + "logps_train/chosen": -87.9891357421875, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -104.4891357421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.498913526535034, + "rewards_train/margins": -0.2999999523162842, + "rewards_train/rejected": -2.19891357421875, + "step": 2296 + }, + { + "epoch": 0.64, + "logps_train/chosen": -7.182581901550293, + "logps_train/ref_chosen": -1.484375, + "logps_train/ref_rejected": -4.65625, + "logps_train/rejected": -20.108911514282227, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5698207020759583, + "rewards_train/margins": 0.9754454493522644, + "rewards_train/rejected": -1.5452661514282227, + "step": 2297 + }, + { + "epoch": 0.64, + "logps_train/chosen": -20.647794723510742, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -28.167131423950195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22727946937084198, + "rewards_train/margins": 1.2331836968660355, + "rewards_train/rejected": -1.4604631662368774, + "step": 2297 + }, + { + "epoch": 0.64, + "logps_train/chosen": -63.71204376220703, + "logps_train/ref_chosen": -23.0, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -62.116600036621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.071204662322998, + "rewards_train/margins": -0.00954437255859375, + "rewards_train/rejected": -4.061660289764404, + "step": 2297 + }, + { + "epoch": 0.64, + "logps_train/chosen": -21.468475341796875, + "logps_train/ref_chosen": -3.40625, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -44.61703109741211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8062225580215454, + "rewards_train/margins": 0.7929805517196655, + "rewards_train/rejected": -2.599203109741211, + "step": 2297 + }, + { + "epoch": 0.64, + "logps_train/chosen": -17.235858917236328, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -6.75, + "logps_train/rejected": -47.779911041259766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6798359155654907, + "rewards_train/margins": 3.423155188560486, + "rewards_train/rejected": -4.102991104125977, + "step": 2297 + }, + { + "epoch": 0.64, + "logps_train/chosen": -20.448123931884766, + "logps_train/ref_chosen": -15.625, + "logps_train/ref_rejected": -43.75, + "logps_train/rejected": -45.21901321411133, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.48231241106987, + "rewards_train/margins": -0.33541108667850494, + "rewards_train/rejected": -0.14690132439136505, + "step": 2297 + }, + { + "epoch": 0.64, + "logps_train/chosen": -11.294366836547852, + "logps_train/ref_chosen": -1.875, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -25.610403060913086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9419366717338562, + "rewards_train/margins": 0.9222286343574524, + "rewards_train/rejected": -1.8641653060913086, + "step": 2297 + }, + { + "epoch": 0.64, + "logps_train/chosen": -113.8942642211914, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -148.55140686035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6394264698028564, + "rewards_train/margins": 1.9157142639160156, + "rewards_train/rejected": -3.555140733718872, + "step": 2297 + }, + { + "epoch": 0.64, + "learning_rate": 3.548622533970358e-08, + "loss": 0.5903, + "step": 2298 + }, + { + "epoch": 0.64, + "logps_train/chosen": -181.331787109375, + "logps_train/ref_chosen": -166.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -121.36653137207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.533178687095642, + "rewards_train/margins": 1.5034745931625366, + "rewards_train/rejected": -3.0366532802581787, + "step": 2298 + }, + { + "epoch": 0.64, + "logps_train/chosen": -22.267683029174805, + "logps_train/ref_chosen": -2.890625, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -36.925025939941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9377058744430542, + "rewards_train/margins": 0.11729681491851807, + "rewards_train/rejected": -2.0550026893615723, + "step": 2298 + }, + { + "epoch": 0.64, + "logps_train/chosen": -114.84700012207031, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -185.9530029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.2846999168396, + "rewards_train/margins": 2.9606003761291504, + "rewards_train/rejected": -7.24530029296875, + "step": 2298 + }, + { + "epoch": 0.64, + "logps_train/chosen": -181.72476196289062, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -203.43023681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4724762439727783, + "rewards_train/margins": 1.9705474376678467, + "rewards_train/rejected": -5.443023681640625, + "step": 2298 + }, + { + "epoch": 0.64, + "logps_train/chosen": -95.59550476074219, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -120.02035522460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7595504522323608, + "rewards_train/margins": 1.5924850702285767, + "rewards_train/rejected": -3.3520355224609375, + "step": 2298 + }, + { + "epoch": 0.64, + "logps_train/chosen": -82.30026245117188, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -75.0, + "logps_train/rejected": -82.54173278808594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7300262451171875, + "rewards_train/margins": 0.02414703369140625, + "rewards_train/rejected": -0.7541732788085938, + "step": 2298 + }, + { + "epoch": 0.64, + "logps_train/chosen": -114.29379272460938, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -182.6382598876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.079379558563232, + "rewards_train/margins": 2.7844467163085938, + "rewards_train/rejected": -6.863826274871826, + "step": 2298 + }, + { + "epoch": 0.64, + "logps_train/chosen": -11.95417594909668, + "logps_train/ref_chosen": -3.390625, + "logps_train/ref_rejected": -40.5, + "logps_train/rejected": -51.61138916015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8563551306724548, + "rewards_train/margins": 0.2547838091850281, + "rewards_train/rejected": -1.111138939857483, + "step": 2298 + }, + { + "epoch": 0.64, + "logps_train/chosen": -14.974449157714844, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -18.326017379760742, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4536949098110199, + "rewards_train/margins": -0.7710931897163391, + "rewards_train/rejected": 0.3173982799053192, + "step": 2299 + }, + { + "epoch": 0.64, + "logps_train/chosen": -146.2733154296875, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -228.57574462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.527331590652466, + "rewards_train/margins": 5.6302430629730225, + "rewards_train/rejected": -9.157574653625488, + "step": 2299 + }, + { + "epoch": 0.64, + "logps_train/chosen": -16.49398422241211, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -3.515625, + "logps_train/rejected": -12.749992370605469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.968148410320282, + "rewards_train/margins": -0.0447116494178772, + "rewards_train/rejected": -0.9234367609024048, + "step": 2299 + }, + { + "epoch": 0.64, + "logps_train/chosen": -61.04499435424805, + "logps_train/ref_chosen": -58.25, + "logps_train/ref_rejected": -59.5, + "logps_train/rejected": -61.87303924560547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.27949944138526917, + "rewards_train/margins": -0.04219551384449005, + "rewards_train/rejected": -0.2373039275407791, + "step": 2299 + }, + { + "epoch": 0.64, + "logps_train/chosen": -46.104736328125, + "logps_train/ref_chosen": -40.0, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -47.04045104980469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6104736328125, + "rewards_train/margins": 0.0935714840888977, + "rewards_train/rejected": -0.7040451169013977, + "step": 2299 + }, + { + "epoch": 0.64, + "logps_train/chosen": -11.644238471984863, + "logps_train/ref_chosen": -7.4375, + "logps_train/ref_rejected": -2.890625, + "logps_train/rejected": -19.87198829650879, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42067384719848633, + "rewards_train/margins": 1.2774624824523926, + "rewards_train/rejected": -1.698136329650879, + "step": 2299 + }, + { + "epoch": 0.64, + "logps_train/chosen": -13.128250122070312, + "logps_train/ref_chosen": -3.65625, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -39.28028869628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9472000002861023, + "rewards_train/margins": 2.477703869342804, + "rewards_train/rejected": -3.4249038696289062, + "step": 2299 + }, + { + "epoch": 0.64, + "logps_train/chosen": -276.65625, + "logps_train/ref_chosen": -183.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -257.78460693359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.365625381469727, + "rewards_train/margins": 0.812835693359375, + "rewards_train/rejected": -10.178461074829102, + "step": 2299 + }, + { + "epoch": 0.64, + "learning_rate": 3.4791089722651435e-08, + "loss": 0.4025, + "step": 2300 + }, + { + "epoch": 0.64, + "logps_train/chosen": -30.898347854614258, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -29.98073959350586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0398348569869995, + "rewards_train/margins": 0.6644891500473022, + "rewards_train/rejected": -1.7043240070343018, + "step": 2300 + }, + { + "epoch": 0.64, + "logps_train/chosen": -40.776214599609375, + "logps_train/ref_chosen": -31.375, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -51.651878356933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9401214718818665, + "rewards_train/margins": 3.428191363811493, + "rewards_train/rejected": -4.368312835693359, + "step": 2300 + }, + { + "epoch": 0.64, + "logps_train/chosen": -50.97844696044922, + "logps_train/ref_chosen": -4.8125, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -55.638824462890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.6165947914123535, + "rewards_train/margins": 0.15353775024414062, + "rewards_train/rejected": -4.770132541656494, + "step": 2300 + }, + { + "epoch": 0.64, + "logps_train/chosen": -48.29137420654297, + "logps_train/ref_chosen": -27.875, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -123.60123443603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.041637420654297, + "rewards_train/margins": 1.568485975265503, + "rewards_train/rejected": -3.6101233959198, + "step": 2300 + }, + { + "epoch": 0.64, + "logps_train/chosen": -157.71054077148438, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -199.76744079589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.4710540771484375, + "rewards_train/margins": 0.8056902885437012, + "rewards_train/rejected": -5.276744365692139, + "step": 2300 + }, + { + "epoch": 0.64, + "logps_train/chosen": -135.75265502929688, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -190.4993896484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.375265598297119, + "rewards_train/margins": 2.374673366546631, + "rewards_train/rejected": -6.74993896484375, + "step": 2300 + }, + { + "epoch": 0.64, + "logps_train/chosen": -262.211181640625, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -213.56924438476562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.92111873626709, + "rewards_train/margins": -1.164194107055664, + "rewards_train/rejected": -7.756924629211426, + "step": 2300 + }, + { + "epoch": 0.64, + "logps_train/chosen": -3.9717395305633545, + "logps_train/ref_chosen": -3.25, + "logps_train/ref_rejected": -4.96875, + "logps_train/rejected": -5.085824966430664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07217395305633545, + "rewards_train/margins": -0.06046645622700453, + "rewards_train/rejected": -0.011707496829330921, + "step": 2300 + }, + { + "epoch": 0.64, + "logps_train/chosen": -45.725372314453125, + "logps_train/ref_chosen": -32.5, + "logps_train/ref_rejected": -57.0, + "logps_train/rejected": -110.86382293701172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3225373029708862, + "rewards_train/margins": 4.063845276832581, + "rewards_train/rejected": -5.386382579803467, + "step": 2301 + }, + { + "epoch": 0.64, + "logps_train/chosen": -33.3843994140625, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -31.982872009277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.250939965248108, + "rewards_train/margins": 1.5285972356796265, + "rewards_train/rejected": -2.7795372009277344, + "step": 2301 + }, + { + "epoch": 0.64, + "logps_train/chosen": -99.74160766601562, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -195.68017578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.524160861968994, + "rewards_train/margins": 5.843857288360596, + "rewards_train/rejected": -9.36801815032959, + "step": 2301 + }, + { + "epoch": 0.64, + "logps_train/chosen": -11.620063781738281, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -41.76640319824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.608881413936615, + "rewards_train/margins": 2.6052590012550354, + "rewards_train/rejected": -3.2141404151916504, + "step": 2301 + }, + { + "epoch": 0.64, + "logps_train/chosen": -31.64637565612793, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -63.724342346191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.227137565612793, + "rewards_train/margins": 2.832796573638916, + "rewards_train/rejected": -4.059934139251709, + "step": 2301 + }, + { + "epoch": 0.64, + "logps_train/chosen": -96.74449920654297, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -46.75, + "logps_train/rejected": -115.31422424316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5244500637054443, + "rewards_train/margins": 3.331972360610962, + "rewards_train/rejected": -6.856422424316406, + "step": 2301 + }, + { + "epoch": 0.64, + "logps_train/chosen": -15.123767852783203, + "logps_train/ref_chosen": -2.984375, + "logps_train/ref_rejected": -4.46875, + "logps_train/rejected": -29.767080307006836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2139393091201782, + "rewards_train/margins": 1.3158937692642212, + "rewards_train/rejected": -2.5298330783843994, + "step": 2301 + }, + { + "epoch": 0.64, + "logps_train/chosen": -47.724761962890625, + "logps_train/ref_chosen": -54.0, + "logps_train/ref_rejected": -54.0, + "logps_train/rejected": -47.06584548950195, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.6275238394737244, + "rewards_train/margins": -0.06589162349700928, + "rewards_train/rejected": 0.6934154629707336, + "step": 2301 + }, + { + "epoch": 0.64, + "learning_rate": 3.4102709544617245e-08, + "loss": 0.3259, + "step": 2302 + }, + { + "epoch": 0.64, + "logps_train/chosen": -100.25347900390625, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -140.31759643554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.525347948074341, + "rewards_train/margins": 3.40641188621521, + "rewards_train/rejected": -6.931759834289551, + "step": 2302 + }, + { + "epoch": 0.64, + "logps_train/chosen": -9.974150657653809, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -16.309648513793945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.17866507172584534, + "rewards_train/margins": 0.027299776673316956, + "rewards_train/rejected": -0.2059648483991623, + "step": 2302 + }, + { + "epoch": 0.64, + "logps_train/chosen": -249.24147033691406, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -234.82388305664062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.124147415161133, + "rewards_train/margins": -0.5417585372924805, + "rewards_train/rejected": -11.582388877868652, + "step": 2302 + }, + { + "epoch": 0.64, + "logps_train/chosen": -7.6230926513671875, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -10.136677742004395, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10019073635339737, + "rewards_train/margins": 0.15135851129889488, + "rewards_train/rejected": -0.05116777494549751, + "step": 2302 + }, + { + "epoch": 0.64, + "logps_train/chosen": -38.05402374267578, + "logps_train/ref_chosen": -30.5, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -26.390920639038086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7554023861885071, + "rewards_train/margins": 0.14618968963623047, + "rewards_train/rejected": -0.9015920758247375, + "step": 2302 + }, + { + "epoch": 0.64, + "logps_train/chosen": -32.619083404541016, + "logps_train/ref_chosen": -3.796875, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -56.255821228027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.882220983505249, + "rewards_train/margins": 1.212111234664917, + "rewards_train/rejected": -4.094332218170166, + "step": 2302 + }, + { + "epoch": 0.64, + "logps_train/chosen": -24.68952178955078, + "logps_train/ref_chosen": -3.5625, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -20.94936180114746, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1127021312713623, + "rewards_train/margins": -1.2115159630775452, + "rewards_train/rejected": -0.9011861681938171, + "step": 2302 + }, + { + "epoch": 0.64, + "logps_train/chosen": -3.7827882766723633, + "logps_train/ref_chosen": -0.64453125, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -16.357364654541016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31382569670677185, + "rewards_train/margins": 0.6969107687473297, + "rewards_train/rejected": -1.0107364654541016, + "step": 2302 + }, + { + "epoch": 0.64, + "logps_train/chosen": -125.56565856933594, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -177.2008514404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2434341460466385, + "rewards_train/margins": 0.7635193020105362, + "rewards_train/rejected": -0.5200851559638977, + "step": 2303 + }, + { + "epoch": 0.64, + "logps_train/chosen": -23.022558212280273, + "logps_train/ref_chosen": -6.5625, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -25.39175033569336, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6460058689117432, + "rewards_train/margins": -0.6380808353424072, + "rewards_train/rejected": -1.007925033569336, + "step": 2303 + }, + { + "epoch": 0.64, + "logps_train/chosen": -26.308475494384766, + "logps_train/ref_chosen": -3.859375, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -35.715179443359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2449100017547607, + "rewards_train/margins": -0.5358920097351074, + "rewards_train/rejected": -1.7090179920196533, + "step": 2303 + }, + { + "epoch": 0.64, + "logps_train/chosen": -150.14157104492188, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -192.58554077148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6141570806503296, + "rewards_train/margins": 2.4443970918655396, + "rewards_train/rejected": -4.058554172515869, + "step": 2303 + }, + { + "epoch": 0.64, + "logps_train/chosen": -29.81951141357422, + "logps_train/ref_chosen": -22.875, + "logps_train/ref_rejected": -29.75, + "logps_train/rejected": -31.109994888305664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6944511532783508, + "rewards_train/margins": -0.5584516674280167, + "rewards_train/rejected": -0.13599948585033417, + "step": 2303 + }, + { + "epoch": 0.64, + "logps_train/chosen": -40.49465560913086, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -35.45952606201172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.924465537071228, + "rewards_train/margins": -0.5160129070281982, + "rewards_train/rejected": -1.4084526300430298, + "step": 2303 + }, + { + "epoch": 0.64, + "logps_train/chosen": -199.06179809570312, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -161.62777709960938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.406180381774902, + "rewards_train/margins": -1.7434024810791016, + "rewards_train/rejected": -6.662777900695801, + "step": 2303 + }, + { + "epoch": 0.64, + "logps_train/chosen": -77.66818237304688, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -84.57768249511719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8168182373046875, + "rewards_train/margins": 0.19095003604888916, + "rewards_train/rejected": -1.0077682733535767, + "step": 2303 + }, + { + "epoch": 0.64, + "learning_rate": 3.3421089623532297e-08, + "loss": 0.7573, + "step": 2304 + }, + { + "epoch": 0.64, + "logps_train/chosen": -54.11960983276367, + "logps_train/ref_chosen": -41.25, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -19.54521942138672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2869609594345093, + "rewards_train/margins": 0.1956859827041626, + "rewards_train/rejected": -1.4826469421386719, + "step": 2304 + }, + { + "epoch": 0.64, + "logps_train/chosen": -93.24197387695312, + "logps_train/ref_chosen": -59.25, + "logps_train/ref_rejected": -38.0, + "logps_train/rejected": -83.43702697753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3991973400115967, + "rewards_train/margins": 1.144505262374878, + "rewards_train/rejected": -4.543702602386475, + "step": 2304 + }, + { + "epoch": 0.64, + "logps_train/chosen": -7.580404758453369, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -20.776823043823242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3919595181941986, + "rewards_train/margins": 2.038391798734665, + "rewards_train/rejected": -1.6464322805404663, + "step": 2304 + }, + { + "epoch": 0.64, + "logps_train/chosen": -154.71139526367188, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -184.51919555664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3711395263671875, + "rewards_train/margins": 2.28078031539917, + "rewards_train/rejected": -5.651919841766357, + "step": 2304 + }, + { + "epoch": 0.64, + "logps_train/chosen": -99.74554443359375, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -98.94523620605469, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2745544910430908, + "rewards_train/margins": -0.5300308465957642, + "rewards_train/rejected": -0.7445236444473267, + "step": 2304 + }, + { + "epoch": 0.64, + "logps_train/chosen": -179.81585693359375, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -244.39752197265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.331585884094238, + "rewards_train/margins": 4.208166122436523, + "rewards_train/rejected": -9.539752006530762, + "step": 2304 + }, + { + "epoch": 0.64, + "logps_train/chosen": -108.11723327636719, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -160.5958251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1617233753204346, + "rewards_train/margins": 4.697859048843384, + "rewards_train/rejected": -5.859582424163818, + "step": 2304 + }, + { + "epoch": 0.64, + "logps_train/chosen": -41.596893310546875, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -48.84248352050781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5846893787384033, + "rewards_train/margins": -0.16294097900390625, + "rewards_train/rejected": -2.421748399734497, + "step": 2304 + }, + { + "epoch": 0.64, + "logps_train/chosen": -24.4093017578125, + "logps_train/ref_chosen": -13.5625, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -42.385276794433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.084680199623108, + "rewards_train/margins": 1.9913474321365356, + "rewards_train/rejected": -3.0760276317596436, + "step": 2305 + }, + { + "epoch": 0.64, + "logps_train/chosen": -53.95840835571289, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -2.984375, + "logps_train/rejected": -49.3148193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6583409309387207, + "rewards_train/margins": 0.9747037887573242, + "rewards_train/rejected": -4.633044719696045, + "step": 2305 + }, + { + "epoch": 0.64, + "logps_train/chosen": -44.664981842041016, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -52.378150939941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.972748279571533, + "rewards_train/margins": 1.1025667190551758, + "rewards_train/rejected": -4.075314998626709, + "step": 2305 + }, + { + "epoch": 0.64, + "logps_train/chosen": -9.856069564819336, + "logps_train/ref_chosen": -7.40625, + "logps_train/ref_rejected": -1.75, + "logps_train/rejected": -17.073936462402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24498195946216583, + "rewards_train/margins": 1.2874117344617844, + "rewards_train/rejected": -1.5323936939239502, + "step": 2305 + }, + { + "epoch": 0.64, + "logps_train/chosen": -19.32361602783203, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -52.8651123046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5386115908622742, + "rewards_train/margins": 0.9228996634483337, + "rewards_train/rejected": -1.461511254310608, + "step": 2305 + }, + { + "epoch": 0.64, + "logps_train/chosen": -61.3009147644043, + "logps_train/ref_chosen": -53.25, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -171.32733154296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8050915002822876, + "rewards_train/margins": 2.8276416063308716, + "rewards_train/rejected": -3.632733106613159, + "step": 2305 + }, + { + "epoch": 0.64, + "logps_train/chosen": -22.832313537597656, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -7.3125, + "logps_train/rejected": -22.801456451416016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4269813299179077, + "rewards_train/margins": 0.12191438674926758, + "rewards_train/rejected": -1.5488957166671753, + "step": 2305 + }, + { + "epoch": 0.64, + "logps_train/chosen": -138.0037078857422, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -256.91058349609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.000370740890503, + "rewards_train/margins": 7.490687608718872, + "rewards_train/rejected": -10.491058349609375, + "step": 2305 + }, + { + "epoch": 0.64, + "learning_rate": 3.274623473001381e-08, + "loss": 0.3061, + "step": 2306 + }, + { + "epoch": 0.64, + "logps_train/chosen": -20.329431533813477, + "logps_train/ref_chosen": -3.84375, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -31.535247802734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6485681533813477, + "rewards_train/margins": 0.654956579208374, + "rewards_train/rejected": -2.3035247325897217, + "step": 2306 + }, + { + "epoch": 0.64, + "logps_train/chosen": -108.99915313720703, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -166.84580993652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1999154090881348, + "rewards_train/margins": 3.3346657752990723, + "rewards_train/rejected": -5.534581184387207, + "step": 2306 + }, + { + "epoch": 0.64, + "logps_train/chosen": -265.6212158203125, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -254.3726348876953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.662121772766113, + "rewards_train/margins": -0.9248580932617188, + "rewards_train/rejected": -11.737263679504395, + "step": 2306 + }, + { + "epoch": 0.64, + "logps_train/chosen": -28.38709259033203, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -37.900291442871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4637092649936676, + "rewards_train/margins": 1.1638198792934418, + "rewards_train/rejected": -1.6275291442871094, + "step": 2306 + }, + { + "epoch": 0.64, + "logps_train/chosen": -27.959640502929688, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -34.25, + "logps_train/rejected": -66.15553283691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6897140741348267, + "rewards_train/margins": 1.500839352607727, + "rewards_train/rejected": -3.1905534267425537, + "step": 2306 + }, + { + "epoch": 0.64, + "logps_train/chosen": -161.4617919921875, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -232.34356689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.946179151535034, + "rewards_train/margins": 4.788177728652954, + "rewards_train/rejected": -7.734356880187988, + "step": 2306 + }, + { + "epoch": 0.64, + "logps_train/chosen": -105.46145629882812, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -118.71955871582031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.8461456298828125, + "rewards_train/margins": -1.5241897106170654, + "rewards_train/rejected": -2.321955919265747, + "step": 2306 + }, + { + "epoch": 0.64, + "logps_train/chosen": -27.178897857666016, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -14.625, + "logps_train/rejected": -31.603851318359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4366397857666016, + "rewards_train/margins": 0.26124536991119385, + "rewards_train/rejected": -1.6978851556777954, + "step": 2306 + }, + { + "epoch": 0.64, + "logps_train/chosen": -166.6259307861328, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -215.76300048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.2625932693481445, + "rewards_train/margins": 2.8137073516845703, + "rewards_train/rejected": -9.076300621032715, + "step": 2307 + }, + { + "epoch": 0.64, + "logps_train/chosen": -24.123605728149414, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -6.46875, + "logps_train/rejected": -38.12387466430664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0686105489730835, + "rewards_train/margins": 2.096902012825012, + "rewards_train/rejected": -3.1655125617980957, + "step": 2307 + }, + { + "epoch": 0.64, + "logps_train/chosen": -200.16615295410156, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -199.92283630371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.566615104675293, + "rewards_train/margins": 0.22566890716552734, + "rewards_train/rejected": -8.79228401184082, + "step": 2307 + }, + { + "epoch": 0.64, + "logps_train/chosen": -22.565900802612305, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -23.188928604125977, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7690900564193726, + "rewards_train/margins": -0.7251971960067749, + "rewards_train/rejected": -1.0438928604125977, + "step": 2307 + }, + { + "epoch": 0.64, + "logps_train/chosen": -109.2342529296875, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -139.0036163330078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7234253287315369, + "rewards_train/margins": 3.1269362568855286, + "rewards_train/rejected": -3.8503615856170654, + "step": 2307 + }, + { + "epoch": 0.64, + "logps_train/chosen": -132.8342742919922, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -199.09268188476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01657257042825222, + "rewards_train/margins": 3.5258408542722464, + "rewards_train/rejected": -3.509268283843994, + "step": 2307 + }, + { + "epoch": 0.64, + "logps_train/chosen": -18.475971221923828, + "logps_train/ref_chosen": -12.4375, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -26.384740829467773, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6038471460342407, + "rewards_train/margins": 1.0033769607543945, + "rewards_train/rejected": -1.6072241067886353, + "step": 2307 + }, + { + "epoch": 0.64, + "logps_train/chosen": -130.58108520507812, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -140.52735900878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9581085443496704, + "rewards_train/margins": 1.0946274995803833, + "rewards_train/rejected": -3.0527360439300537, + "step": 2307 + }, + { + "epoch": 0.65, + "learning_rate": 3.2078149587330127e-08, + "loss": 0.4401, + "step": 2308 + }, + { + "epoch": 0.65, + "logps_train/chosen": -183.86224365234375, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -135.68667602539062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.686224460601807, + "rewards_train/margins": -0.5175566673278809, + "rewards_train/rejected": -4.168667793273926, + "step": 2308 + }, + { + "epoch": 0.65, + "logps_train/chosen": -181.99462890625, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -171.21148681640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.849462985992432, + "rewards_train/margins": -1.178314208984375, + "rewards_train/rejected": -4.671148777008057, + "step": 2308 + }, + { + "epoch": 0.65, + "logps_train/chosen": -14.755741119384766, + "logps_train/ref_chosen": -4.1875, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -31.148433685302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0568240880966187, + "rewards_train/margins": 0.5267692804336548, + "rewards_train/rejected": -1.5835933685302734, + "step": 2308 + }, + { + "epoch": 0.65, + "logps_train/chosen": -122.34387969970703, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -206.86773681640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7843879461288452, + "rewards_train/margins": 5.202386021614075, + "rewards_train/rejected": -6.98677396774292, + "step": 2308 + }, + { + "epoch": 0.65, + "logps_train/chosen": -204.88143920898438, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -251.23028564453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.388144016265869, + "rewards_train/margins": 4.234884738922119, + "rewards_train/rejected": -10.623028755187988, + "step": 2308 + }, + { + "epoch": 0.65, + "logps_train/chosen": -196.73956298828125, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -296.9481201171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.323956489562988, + "rewards_train/margins": 6.770855903625488, + "rewards_train/rejected": -14.094812393188477, + "step": 2308 + }, + { + "epoch": 0.65, + "logps_train/chosen": -112.00081634521484, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -167.75843811035156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.250081539154053, + "rewards_train/margins": -0.17423772811889648, + "rewards_train/rejected": -4.075843811035156, + "step": 2308 + }, + { + "epoch": 0.65, + "logps_train/chosen": -6.784698486328125, + "logps_train/ref_chosen": -5.15625, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -22.096107482910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16284485161304474, + "rewards_train/margins": 0.9717659205198288, + "rewards_train/rejected": -1.1346107721328735, + "step": 2308 + }, + { + "epoch": 0.65, + "logps_train/chosen": -40.28874969482422, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -25.71368408203125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8788750171661377, + "rewards_train/margins": -0.2262566089630127, + "rewards_train/rejected": -1.652618408203125, + "step": 2309 + }, + { + "epoch": 0.65, + "logps_train/chosen": -139.95391845703125, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -143.3172149658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0953919887542725, + "rewards_train/margins": 4.786329507827759, + "rewards_train/rejected": -6.881721496582031, + "step": 2309 + }, + { + "epoch": 0.65, + "logps_train/chosen": -14.066827774047852, + "logps_train/ref_chosen": -2.78125, + "logps_train/ref_rejected": -4.9375, + "logps_train/rejected": -21.919017791748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.128557801246643, + "rewards_train/margins": 0.5695940256118774, + "rewards_train/rejected": -1.6981518268585205, + "step": 2309 + }, + { + "epoch": 0.65, + "logps_train/chosen": -19.36993408203125, + "logps_train/ref_chosen": -19.5, + "logps_train/ref_rejected": -1.5390625, + "logps_train/rejected": -20.0942440032959, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.01300659216940403, + "rewards_train/margins": 1.8685248140245676, + "rewards_train/rejected": -1.8555182218551636, + "step": 2309 + }, + { + "epoch": 0.65, + "logps_train/chosen": -95.46644592285156, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -135.43588256835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7966446280479431, + "rewards_train/margins": 1.4469435811042786, + "rewards_train/rejected": -2.2435882091522217, + "step": 2309 + }, + { + "epoch": 0.65, + "logps_train/chosen": -13.358160018920898, + "logps_train/ref_chosen": -12.1875, + "logps_train/ref_rejected": -31.125, + "logps_train/rejected": -35.028404235839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11706600338220596, + "rewards_train/margins": 0.27327441424131393, + "rewards_train/rejected": -0.3903404176235199, + "step": 2309 + }, + { + "epoch": 0.65, + "logps_train/chosen": -97.10289764404297, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -146.52503967285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1602897644042969, + "rewards_train/margins": 3.142214298248291, + "rewards_train/rejected": -4.302504062652588, + "step": 2309 + }, + { + "epoch": 0.65, + "logps_train/chosen": -46.01873016357422, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -17.75, + "logps_train/rejected": -43.53114700317383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.964372992515564, + "rewards_train/margins": 0.6137417554855347, + "rewards_train/rejected": -2.5781147480010986, + "step": 2309 + }, + { + "epoch": 0.65, + "learning_rate": 3.141683887136892e-08, + "loss": 0.4181, + "step": 2310 + }, + { + "epoch": 0.65, + "logps_train/chosen": -6.184999465942383, + "logps_train/ref_chosen": -4.1875, + "logps_train/ref_rejected": -2.625, + "logps_train/rejected": -32.058677673339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19974994659423828, + "rewards_train/margins": 2.7436177730560303, + "rewards_train/rejected": -2.9433677196502686, + "step": 2310 + }, + { + "epoch": 0.65, + "logps_train/chosen": -78.3731689453125, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -190.8176727294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7623169422149658, + "rewards_train/margins": 5.169450521469116, + "rewards_train/rejected": -6.931767463684082, + "step": 2310 + }, + { + "epoch": 0.65, + "logps_train/chosen": -153.4864501953125, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -71.5, + "logps_train/rejected": -85.71527099609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.04864501953125, + "rewards_train/margins": 0.3728821277618408, + "rewards_train/rejected": -1.4215271472930908, + "step": 2310 + }, + { + "epoch": 0.65, + "logps_train/chosen": -123.64482879638672, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -140.39874267578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.114482879638672, + "rewards_train/margins": 2.8253912925720215, + "rewards_train/rejected": -5.939874172210693, + "step": 2310 + }, + { + "epoch": 0.65, + "logps_train/chosen": -22.66860008239746, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -32.19297409057617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.723110020160675, + "rewards_train/margins": 1.0274373888969421, + "rewards_train/rejected": -1.7505474090576172, + "step": 2310 + }, + { + "epoch": 0.65, + "logps_train/chosen": -13.128364562988281, + "logps_train/ref_chosen": -0.435546875, + "logps_train/ref_rejected": -1.484375, + "logps_train/rejected": -18.83868980407715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2692817449569702, + "rewards_train/margins": 0.46614980697631836, + "rewards_train/rejected": -1.7354315519332886, + "step": 2310 + }, + { + "epoch": 0.65, + "logps_train/chosen": -84.05705261230469, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -37.0, + "logps_train/rejected": -31.6783504486084, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9057053327560425, + "rewards_train/margins": -2.4378703236579895, + "rewards_train/rejected": 0.532164990901947, + "step": 2310 + }, + { + "epoch": 0.65, + "logps_train/chosen": -41.02189636230469, + "logps_train/ref_chosen": -25.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -151.38275146484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5521897077560425, + "rewards_train/margins": 3.7860857248306274, + "rewards_train/rejected": -5.33827543258667, + "step": 2310 + }, + { + "epoch": 0.65, + "logps_train/chosen": -115.5062484741211, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -153.56521606445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6006247997283936, + "rewards_train/margins": 2.5558969974517822, + "rewards_train/rejected": -6.156521797180176, + "step": 2311 + }, + { + "epoch": 0.65, + "logps_train/chosen": -101.6883316040039, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -40.927978515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.118833303451538, + "rewards_train/margins": 0.9927146434783936, + "rewards_train/rejected": -3.1115479469299316, + "step": 2311 + }, + { + "epoch": 0.65, + "logps_train/chosen": -158.8526611328125, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -165.33480834960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.135266304016113, + "rewards_train/margins": 2.398214340209961, + "rewards_train/rejected": -9.533480644226074, + "step": 2311 + }, + { + "epoch": 0.65, + "logps_train/chosen": -32.28633117675781, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -81.87432861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5536330938339233, + "rewards_train/margins": 2.8588000535964966, + "rewards_train/rejected": -4.41243314743042, + "step": 2311 + }, + { + "epoch": 0.65, + "logps_train/chosen": -59.60536193847656, + "logps_train/ref_chosen": -56.0, + "logps_train/ref_rejected": -61.5, + "logps_train/rejected": -93.44916534423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3605361878871918, + "rewards_train/margins": 2.8343802988529205, + "rewards_train/rejected": -3.1949164867401123, + "step": 2311 + }, + { + "epoch": 0.65, + "logps_train/chosen": -100.55618286132812, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -78.21037292480469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1556183099746704, + "rewards_train/margins": 0.21541905403137207, + "rewards_train/rejected": -1.3710373640060425, + "step": 2311 + }, + { + "epoch": 0.65, + "logps_train/chosen": -11.370336532592773, + "logps_train/ref_chosen": -1.734375, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -15.619733810424805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9635961651802063, + "rewards_train/margins": 0.020252227783203125, + "rewards_train/rejected": -0.9838483929634094, + "step": 2311 + }, + { + "epoch": 0.65, + "logps_train/chosen": -69.92459869384766, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -44.25, + "logps_train/rejected": -97.10554504394531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6674599647521973, + "rewards_train/margins": 1.6180944442749023, + "rewards_train/rejected": -5.2855544090271, + "step": 2311 + }, + { + "epoch": 0.65, + "learning_rate": 3.0762307210604243e-08, + "loss": 0.3773, + "step": 2312 + }, + { + "epoch": 0.65, + "logps_train/chosen": -38.92338562011719, + "logps_train/ref_chosen": -6.09375, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -39.811126708984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.282963514328003, + "rewards_train/margins": -0.27685070037841797, + "rewards_train/rejected": -3.006112813949585, + "step": 2312 + }, + { + "epoch": 0.65, + "logps_train/chosen": -126.71595764160156, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -155.47897338867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.121595859527588, + "rewards_train/margins": 1.1763014793395996, + "rewards_train/rejected": -6.2978973388671875, + "step": 2312 + }, + { + "epoch": 0.65, + "logps_train/chosen": -91.62750244140625, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -59.5, + "logps_train/rejected": -66.58065795898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5127502679824829, + "rewards_train/margins": 0.19531553983688354, + "rewards_train/rejected": -0.7080658078193665, + "step": 2312 + }, + { + "epoch": 0.65, + "logps_train/chosen": -40.15925979614258, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -63.90132141113281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6784260272979736, + "rewards_train/margins": 1.5742061138153076, + "rewards_train/rejected": -4.252632141113281, + "step": 2312 + }, + { + "epoch": 0.65, + "logps_train/chosen": -32.410152435302734, + "logps_train/ref_chosen": -7.40625, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -48.186317443847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5003902912139893, + "rewards_train/margins": 1.243241548538208, + "rewards_train/rejected": -3.7436318397521973, + "step": 2312 + }, + { + "epoch": 0.65, + "logps_train/chosen": -15.374847412109375, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -48.061729431152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6749847531318665, + "rewards_train/margins": 2.393688142299652, + "rewards_train/rejected": -3.0686728954315186, + "step": 2312 + }, + { + "epoch": 0.65, + "logps_train/chosen": -7.279866695404053, + "logps_train/ref_chosen": -1.59375, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -24.852439880371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5686116814613342, + "rewards_train/margins": 1.2197573781013489, + "rewards_train/rejected": -1.788369059562683, + "step": 2312 + }, + { + "epoch": 0.65, + "logps_train/chosen": -343.13800048828125, + "logps_train/ref_chosen": -188.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -288.0827331542969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -15.513800621032715, + "rewards_train/margins": -1.3055267333984375, + "rewards_train/rejected": -14.208273887634277, + "step": 2312 + }, + { + "epoch": 0.65, + "logps_train/chosen": -164.26959228515625, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -158.66339111328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.226959228515625, + "rewards_train/margins": -0.36062002182006836, + "rewards_train/rejected": -2.8663392066955566, + "step": 2313 + }, + { + "epoch": 0.65, + "logps_train/chosen": -116.84439086914062, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -61.0, + "logps_train/rejected": -90.76676940917969, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9844391345977783, + "rewards_train/margins": -1.0077621936798096, + "rewards_train/rejected": -2.9766769409179688, + "step": 2313 + }, + { + "epoch": 0.65, + "logps_train/chosen": -23.329605102539062, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -27.462608337402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6923354864120483, + "rewards_train/margins": 0.5664254426956177, + "rewards_train/rejected": -2.258760929107666, + "step": 2313 + }, + { + "epoch": 0.65, + "logps_train/chosen": -99.49726867675781, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -171.56065368652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1997268199920654, + "rewards_train/margins": 5.106338739395142, + "rewards_train/rejected": -7.306065559387207, + "step": 2313 + }, + { + "epoch": 0.65, + "logps_train/chosen": -11.998234748840332, + "logps_train/ref_chosen": -3.796875, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -27.251794815063477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8201360106468201, + "rewards_train/margins": 0.3925434947013855, + "rewards_train/rejected": -1.2126795053482056, + "step": 2313 + }, + { + "epoch": 0.65, + "logps_train/chosen": -91.52561950683594, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -90.05036926269531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3525619506835938, + "rewards_train/margins": -0.14752495288848877, + "rewards_train/rejected": -1.205036997795105, + "step": 2313 + }, + { + "epoch": 0.65, + "logps_train/chosen": -152.9126434326172, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -209.1743927001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8912644386291504, + "rewards_train/margins": 5.376175403594971, + "rewards_train/rejected": -9.267439842224121, + "step": 2313 + }, + { + "epoch": 0.65, + "logps_train/chosen": -86.02812194824219, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -64.5, + "logps_train/rejected": -98.81634521484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3528122007846832, + "rewards_train/margins": 3.0788224637508392, + "rewards_train/rejected": -3.4316346645355225, + "step": 2313 + }, + { + "epoch": 0.65, + "learning_rate": 3.0114559186063536e-08, + "loss": 0.5026, + "step": 2314 + }, + { + "epoch": 0.65, + "logps_train/chosen": -26.880033493041992, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -25.068313598632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7130033373832703, + "rewards_train/margins": 0.3688279986381531, + "rewards_train/rejected": -1.0818313360214233, + "step": 2314 + }, + { + "epoch": 0.65, + "logps_train/chosen": -109.55103302001953, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -115.61970520019531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.905103325843811, + "rewards_train/margins": -0.7931327819824219, + "rewards_train/rejected": -1.1119705438613892, + "step": 2314 + }, + { + "epoch": 0.65, + "logps_train/chosen": -82.82005310058594, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -54.25, + "logps_train/rejected": -94.944580078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1679946929216385, + "rewards_train/margins": 4.2374527007341385, + "rewards_train/rejected": -4.0694580078125, + "step": 2314 + }, + { + "epoch": 0.65, + "logps_train/chosen": -21.33226776123047, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -24.125, + "logps_train/rejected": -32.364784240722656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4551018476486206, + "rewards_train/margins": -0.631123423576355, + "rewards_train/rejected": -0.8239784240722656, + "step": 2314 + }, + { + "epoch": 0.65, + "logps_train/chosen": -34.71707534790039, + "logps_train/ref_chosen": -30.0, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -43.03938293457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4717075526714325, + "rewards_train/margins": 0.0072307586669921875, + "rewards_train/rejected": -0.4789383113384247, + "step": 2314 + }, + { + "epoch": 0.65, + "logps_train/chosen": -7.552461624145508, + "logps_train/ref_chosen": -2.765625, + "logps_train/ref_rejected": -1.3671875, + "logps_train/rejected": -18.9089412689209, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4786836802959442, + "rewards_train/margins": 1.2754917442798615, + "rewards_train/rejected": -1.7541754245758057, + "step": 2314 + }, + { + "epoch": 0.65, + "logps_train/chosen": -8.656187057495117, + "logps_train/ref_chosen": -1.7578125, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -30.28876495361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6898374557495117, + "rewards_train/margins": 1.3702890872955322, + "rewards_train/rejected": -2.060126543045044, + "step": 2314 + }, + { + "epoch": 0.65, + "logps_train/chosen": -39.35356140136719, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -25.807723999023438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4916062355041504, + "rewards_train/margins": -1.7920838594436646, + "rewards_train/rejected": -1.6995223760604858, + "step": 2314 + }, + { + "epoch": 0.65, + "logps_train/chosen": -8.544290542602539, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -39.676082611083984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.048179056495428085, + "rewards_train/margins": 1.8694292046129704, + "rewards_train/rejected": -1.9176082611083984, + "step": 2315 + }, + { + "epoch": 0.65, + "logps_train/chosen": -185.2622833251953, + "logps_train/ref_chosen": -132.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -200.66236877441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.326228618621826, + "rewards_train/margins": 2.34000825881958, + "rewards_train/rejected": -7.666236877441406, + "step": 2315 + }, + { + "epoch": 0.65, + "logps_train/chosen": -170.37078857421875, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -162.0, + "logps_train/rejected": -214.69471740722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.437078952789307, + "rewards_train/margins": 0.832392692565918, + "rewards_train/rejected": -5.269471645355225, + "step": 2315 + }, + { + "epoch": 0.65, + "logps_train/chosen": -23.900638580322266, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -40.5952262878418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9525638818740845, + "rewards_train/margins": 1.4913338422775269, + "rewards_train/rejected": -3.4438977241516113, + "step": 2315 + }, + { + "epoch": 0.65, + "logps_train/chosen": -166.76925659179688, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -136.59791564941406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9769256114959717, + "rewards_train/margins": -0.6171340942382812, + "rewards_train/rejected": -3.3597915172576904, + "step": 2315 + }, + { + "epoch": 0.65, + "logps_train/chosen": -246.8392791748047, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -205.78761291503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.683928489685059, + "rewards_train/margins": 0.04483318328857422, + "rewards_train/rejected": -8.728761672973633, + "step": 2315 + }, + { + "epoch": 0.65, + "logps_train/chosen": -35.23651123046875, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -34.7475700378418, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.604901075363159, + "rewards_train/margins": -0.3488941192626953, + "rewards_train/rejected": -2.256006956100464, + "step": 2315 + }, + { + "epoch": 0.65, + "logps_train/chosen": -111.23187255859375, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -142.1573028564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6231873035430908, + "rewards_train/margins": 4.942543268203735, + "rewards_train/rejected": -6.565730571746826, + "step": 2315 + }, + { + "epoch": 0.65, + "learning_rate": 2.947359933129645e-08, + "loss": 0.5802, + "step": 2316 + }, + { + "epoch": 0.65, + "logps_train/chosen": -22.81133460998535, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -62.0152587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.581133484840393, + "rewards_train/margins": 2.7578924894332886, + "rewards_train/rejected": -4.339025974273682, + "step": 2316 + }, + { + "epoch": 0.65, + "logps_train/chosen": -7.476126670837402, + "logps_train/ref_chosen": -2.40625, + "logps_train/ref_rejected": -14.0, + "logps_train/rejected": -26.8140869140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5069876909255981, + "rewards_train/margins": 0.774420976638794, + "rewards_train/rejected": -1.281408667564392, + "step": 2316 + }, + { + "epoch": 0.65, + "logps_train/chosen": -33.66339111328125, + "logps_train/ref_chosen": -5.5625, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -56.047943115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.810089111328125, + "rewards_train/margins": 1.0822052955627441, + "rewards_train/rejected": -3.892294406890869, + "step": 2316 + }, + { + "epoch": 0.65, + "logps_train/chosen": -7.834233283996582, + "logps_train/ref_chosen": -2.84375, + "logps_train/ref_rejected": -22.5, + "logps_train/rejected": -53.05785369873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4990483224391937, + "rewards_train/margins": 2.556737095117569, + "rewards_train/rejected": -3.0557854175567627, + "step": 2316 + }, + { + "epoch": 0.65, + "logps_train/chosen": -264.973388671875, + "logps_train/ref_chosen": -253.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -255.09744262695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1973389387130737, + "rewards_train/margins": 8.712405323982239, + "rewards_train/rejected": -9.909744262695312, + "step": 2316 + }, + { + "epoch": 0.65, + "logps_train/chosen": -58.901756286621094, + "logps_train/ref_chosen": -34.5, + "logps_train/ref_rejected": -45.25, + "logps_train/rejected": -84.52413940429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.440175771713257, + "rewards_train/margins": 1.4872381687164307, + "rewards_train/rejected": -3.9274139404296875, + "step": 2316 + }, + { + "epoch": 0.65, + "logps_train/chosen": -138.67385864257812, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -168.03643798828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.567386150360107, + "rewards_train/margins": 0.9862575531005859, + "rewards_train/rejected": -5.553643703460693, + "step": 2316 + }, + { + "epoch": 0.65, + "logps_train/chosen": -298.57373046875, + "logps_train/ref_chosen": -207.0, + "logps_train/ref_rejected": -204.0, + "logps_train/rejected": -297.254150390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.157373428344727, + "rewards_train/margins": 0.16804218292236328, + "rewards_train/rejected": -9.32541561126709, + "step": 2316 + }, + { + "epoch": 0.65, + "logps_train/chosen": -62.776119232177734, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -142.0381317138672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6401119232177734, + "rewards_train/margins": 3.4637012481689453, + "rewards_train/rejected": -7.103813171386719, + "step": 2317 + }, + { + "epoch": 0.65, + "logps_train/chosen": -151.10577392578125, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -137.191650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.810577392578125, + "rewards_train/margins": 1.3585877418518066, + "rewards_train/rejected": -4.169165134429932, + "step": 2317 + }, + { + "epoch": 0.65, + "logps_train/chosen": -25.316295623779297, + "logps_train/ref_chosen": -7.0625, + "logps_train/ref_rejected": -6.375, + "logps_train/rejected": -41.17074966430664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8253796100616455, + "rewards_train/margins": 1.6541953086853027, + "rewards_train/rejected": -3.4795749187469482, + "step": 2317 + }, + { + "epoch": 0.65, + "logps_train/chosen": -17.412567138671875, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -17.0, + "logps_train/rejected": -17.35004425048828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04125671461224556, + "rewards_train/margins": -0.006252288818359375, + "rewards_train/rejected": -0.035004425793886185, + "step": 2317 + }, + { + "epoch": 0.65, + "logps_train/chosen": -104.13117980957031, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -158.70079040527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4631179571151733, + "rewards_train/margins": 4.15696108341217, + "rewards_train/rejected": -5.620079040527344, + "step": 2317 + }, + { + "epoch": 0.65, + "logps_train/chosen": -17.151899337768555, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -52.607627868652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3120650053024292, + "rewards_train/margins": 1.8736978769302368, + "rewards_train/rejected": -3.185762882232666, + "step": 2317 + }, + { + "epoch": 0.65, + "logps_train/chosen": -256.35858154296875, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -250.18655395507812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -12.735857963562012, + "rewards_train/margins": -0.1172027587890625, + "rewards_train/rejected": -12.61865520477295, + "step": 2317 + }, + { + "epoch": 0.65, + "logps_train/chosen": -21.681339263916016, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -7.15625, + "logps_train/rejected": -33.50684356689453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8368839621543884, + "rewards_train/margins": 1.7981753945350647, + "rewards_train/rejected": -2.635059356689453, + "step": 2317 + }, + { + "epoch": 0.65, + "learning_rate": 2.883943213234219e-08, + "loss": 0.2586, + "step": 2318 + }, + { + "epoch": 0.65, + "logps_train/chosen": -77.09854125976562, + "logps_train/ref_chosen": -58.75, + "logps_train/ref_rejected": -192.0, + "logps_train/rejected": -358.1563720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8348541259765625, + "rewards_train/margins": 14.780782699584961, + "rewards_train/rejected": -16.615636825561523, + "step": 2318 + }, + { + "epoch": 0.65, + "logps_train/chosen": -40.3348388671875, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -66.31242370605469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9522340297698975, + "rewards_train/margins": 1.7790086269378662, + "rewards_train/rejected": -4.731242656707764, + "step": 2318 + }, + { + "epoch": 0.65, + "logps_train/chosen": -84.38233947753906, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -100.62422180175781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2632339000701904, + "rewards_train/margins": 0.699188232421875, + "rewards_train/rejected": -2.9624221324920654, + "step": 2318 + }, + { + "epoch": 0.65, + "logps_train/chosen": -127.63214874267578, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -236.98678588867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3632149696350098, + "rewards_train/margins": 5.735463619232178, + "rewards_train/rejected": -9.098678588867188, + "step": 2318 + }, + { + "epoch": 0.65, + "logps_train/chosen": -36.55232238769531, + "logps_train/ref_chosen": -18.5, + "logps_train/ref_rejected": -6.28125, + "logps_train/rejected": -17.69774627685547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.805232286453247, + "rewards_train/margins": -0.6635826826095581, + "rewards_train/rejected": -1.141649603843689, + "step": 2318 + }, + { + "epoch": 0.65, + "logps_train/chosen": -191.66119384765625, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -201.34854125976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.166119575500488, + "rewards_train/margins": 2.2687344551086426, + "rewards_train/rejected": -6.434854030609131, + "step": 2318 + }, + { + "epoch": 0.65, + "logps_train/chosen": -184.73782348632812, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -189.0, + "logps_train/rejected": -208.99612426757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7737823724746704, + "rewards_train/margins": 1.225830078125, + "rewards_train/rejected": -1.9996124505996704, + "step": 2318 + }, + { + "epoch": 0.65, + "logps_train/chosen": -111.79780578613281, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -97.7080307006836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.029780626296997, + "rewards_train/margins": 1.0910224914550781, + "rewards_train/rejected": -3.120803117752075, + "step": 2318 + }, + { + "epoch": 0.65, + "logps_train/chosen": -0.9411418437957764, + "logps_train/ref_chosen": -0.34375, + "logps_train/ref_rejected": -0.34375, + "logps_train/rejected": -1.2223976850509644, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05973918363451958, + "rewards_train/margins": 0.028125587850809097, + "rewards_train/rejected": -0.08786477148532867, + "step": 2319 + }, + { + "epoch": 0.65, + "logps_train/chosen": -8.120702743530273, + "logps_train/ref_chosen": -3.125, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -29.90958595275879, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4995702803134918, + "rewards_train/margins": 1.3351382911205292, + "rewards_train/rejected": -1.834708571434021, + "step": 2319 + }, + { + "epoch": 0.65, + "logps_train/chosen": -123.51477813720703, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -125.56843566894531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.151477813720703, + "rewards_train/margins": 0.10536575317382812, + "rewards_train/rejected": -2.2568435668945312, + "step": 2319 + }, + { + "epoch": 0.65, + "logps_train/chosen": -293.99676513671875, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -318.6929931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -14.499676704406738, + "rewards_train/margins": 2.169623374938965, + "rewards_train/rejected": -16.669300079345703, + "step": 2319 + }, + { + "epoch": 0.65, + "logps_train/chosen": -156.92800903320312, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -230.87606811523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.692800998687744, + "rewards_train/margins": 5.794806003570557, + "rewards_train/rejected": -9.4876070022583, + "step": 2319 + }, + { + "epoch": 0.65, + "logps_train/chosen": -73.45909881591797, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -127.10017395019531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7459098696708679, + "rewards_train/margins": 1.1641075015068054, + "rewards_train/rejected": -1.9100173711776733, + "step": 2319 + }, + { + "epoch": 0.65, + "logps_train/chosen": -26.859363555908203, + "logps_train/ref_chosen": -8.6875, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -33.790218353271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8171863555908203, + "rewards_train/margins": 0.8087105751037598, + "rewards_train/rejected": -2.62589693069458, + "step": 2319 + }, + { + "epoch": 0.65, + "logps_train/chosen": -40.173248291015625, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -3.125, + "logps_train/rejected": -35.03177261352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3798248767852783, + "rewards_train/margins": 0.8108525276184082, + "rewards_train/rejected": -3.1906774044036865, + "step": 2319 + }, + { + "epoch": 0.65, + "learning_rate": 2.821206202769899e-08, + "loss": 0.3099, + "step": 2320 + }, + { + "epoch": 0.65, + "logps_train/chosen": -71.10448455810547, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -56.5, + "logps_train/rejected": -71.16780853271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4604485034942627, + "rewards_train/margins": 0.0063323974609375, + "rewards_train/rejected": -1.4667809009552002, + "step": 2320 + }, + { + "epoch": 0.65, + "logps_train/chosen": -119.52110290527344, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -126.27786254882812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7021102905273438, + "rewards_train/margins": 1.5756759643554688, + "rewards_train/rejected": -2.2777862548828125, + "step": 2320 + }, + { + "epoch": 0.65, + "logps_train/chosen": -69.48645782470703, + "logps_train/ref_chosen": -46.75, + "logps_train/ref_rejected": -46.75, + "logps_train/rejected": -69.13172912597656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2736458778381348, + "rewards_train/margins": -0.035472869873046875, + "rewards_train/rejected": -2.238173007965088, + "step": 2320 + }, + { + "epoch": 0.65, + "logps_train/chosen": -162.2604217529297, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -257.68646240234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.026042461395264, + "rewards_train/margins": 8.442603588104248, + "rewards_train/rejected": -13.468646049499512, + "step": 2320 + }, + { + "epoch": 0.65, + "logps_train/chosen": -5.343206405639648, + "logps_train/ref_chosen": -6.4375, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -19.139944076538086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10942935943603516, + "rewards_train/margins": 1.5359238386154175, + "rewards_train/rejected": -1.4264944791793823, + "step": 2320 + }, + { + "epoch": 0.65, + "logps_train/chosen": -114.529541015625, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -136.9136962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.752954125404358, + "rewards_train/margins": 3.6384156942367554, + "rewards_train/rejected": -5.391369819641113, + "step": 2320 + }, + { + "epoch": 0.65, + "logps_train/chosen": -117.68069458007812, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -32.192604064941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8180694580078125, + "rewards_train/margins": 0.8511909246444702, + "rewards_train/rejected": -1.6692603826522827, + "step": 2320 + }, + { + "epoch": 0.65, + "logps_train/chosen": -31.196346282958984, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -37.87012481689453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9446346759796143, + "rewards_train/margins": 0.12987780570983887, + "rewards_train/rejected": -2.074512481689453, + "step": 2320 + }, + { + "epoch": 0.65, + "logps_train/chosen": -135.02850341796875, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -190.26084899902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.052850365638733, + "rewards_train/margins": 2.3732346296310425, + "rewards_train/rejected": -3.4260849952697754, + "step": 2321 + }, + { + "epoch": 0.65, + "logps_train/chosen": -81.7980728149414, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -157.34754943847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0798072814941406, + "rewards_train/margins": 4.9549479484558105, + "rewards_train/rejected": -7.034755229949951, + "step": 2321 + }, + { + "epoch": 0.65, + "logps_train/chosen": -138.32652282714844, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -260.4580383300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3826522827148438, + "rewards_train/margins": 9.063151359558105, + "rewards_train/rejected": -11.44580364227295, + "step": 2321 + }, + { + "epoch": 0.65, + "logps_train/chosen": -137.18592834472656, + "logps_train/ref_chosen": -59.75, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -70.25048828125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.743592739105225, + "rewards_train/margins": -3.1935439109802246, + "rewards_train/rejected": -4.550048828125, + "step": 2321 + }, + { + "epoch": 0.65, + "logps_train/chosen": -109.79984283447266, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -111.07829284667969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.079984188079834, + "rewards_train/margins": 0.12784528732299805, + "rewards_train/rejected": -4.207829475402832, + "step": 2321 + }, + { + "epoch": 0.65, + "logps_train/chosen": -187.3194580078125, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -158.70339965820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.031945705413818, + "rewards_train/margins": 0.6383943557739258, + "rewards_train/rejected": -5.670340061187744, + "step": 2321 + }, + { + "epoch": 0.65, + "logps_train/chosen": -19.433176040649414, + "logps_train/ref_chosen": -6.4375, + "logps_train/ref_rejected": -1.59375, + "logps_train/rejected": -21.794193267822266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2995675802230835, + "rewards_train/margins": 0.7204767465591431, + "rewards_train/rejected": -2.0200443267822266, + "step": 2321 + }, + { + "epoch": 0.65, + "logps_train/chosen": -193.39707946777344, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -165.42941284179688, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.589707851409912, + "rewards_train/margins": -1.1967663764953613, + "rewards_train/rejected": -5.392941474914551, + "step": 2321 + }, + { + "epoch": 0.65, + "learning_rate": 2.7591493408292255e-08, + "loss": 0.5649, + "step": 2322 + }, + { + "epoch": 0.65, + "logps_train/chosen": -144.32369995117188, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -149.26708984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4323699474334717, + "rewards_train/margins": 3.09433913230896, + "rewards_train/rejected": -6.526709079742432, + "step": 2322 + }, + { + "epoch": 0.65, + "logps_train/chosen": -12.857183456420898, + "logps_train/ref_chosen": -7.53125, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -78.91195678710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5325933694839478, + "rewards_train/margins": 5.80235230922699, + "rewards_train/rejected": -6.3349456787109375, + "step": 2322 + }, + { + "epoch": 0.65, + "logps_train/chosen": -0.1376364827156067, + "logps_train/ref_chosen": -0.255859375, + "logps_train/ref_rejected": -0.255859375, + "logps_train/rejected": -0.14007145166397095, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.011822289787232876, + "rewards_train/margins": 0.0002434970811009407, + "rewards_train/rejected": 0.011578792706131935, + "step": 2322 + }, + { + "epoch": 0.65, + "logps_train/chosen": -186.76773071289062, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -232.95736694335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.076773166656494, + "rewards_train/margins": 7.468964099884033, + "rewards_train/rejected": -10.545737266540527, + "step": 2322 + }, + { + "epoch": 0.65, + "logps_train/chosen": -25.70252799987793, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -5.25, + "logps_train/rejected": -25.722623825073242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.045252799987793, + "rewards_train/margins": 0.0020096302032470703, + "rewards_train/rejected": -2.04726243019104, + "step": 2322 + }, + { + "epoch": 0.65, + "logps_train/chosen": -16.71990394592285, + "logps_train/ref_chosen": -0.2177734375, + "logps_train/ref_rejected": -0.2177734375, + "logps_train/rejected": -16.11756706237793, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6502131223678589, + "rewards_train/margins": -0.0602337121963501, + "rewards_train/rejected": -1.5899794101715088, + "step": 2322 + }, + { + "epoch": 0.65, + "logps_train/chosen": -60.686500549316406, + "logps_train/ref_chosen": -56.25, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -145.0306396484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4436500668525696, + "rewards_train/margins": 3.5594140887260437, + "rewards_train/rejected": -4.003064155578613, + "step": 2322 + }, + { + "epoch": 0.65, + "logps_train/chosen": -97.6286849975586, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -149.40768432617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1128685474395752, + "rewards_train/margins": 2.9279000759124756, + "rewards_train/rejected": -4.040768623352051, + "step": 2322 + }, + { + "epoch": 0.65, + "logps_train/chosen": -83.09054565429688, + "logps_train/ref_chosen": -33.0, + "logps_train/ref_rejected": -2.3125, + "logps_train/rejected": -34.28886795043945, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.009054660797119, + "rewards_train/margins": -1.811417818069458, + "rewards_train/rejected": -3.197636842727661, + "step": 2323 + }, + { + "epoch": 0.65, + "logps_train/chosen": -93.25894165039062, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -134.62461853027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3258942365646362, + "rewards_train/margins": 3.7365676164627075, + "rewards_train/rejected": -5.062461853027344, + "step": 2323 + }, + { + "epoch": 0.65, + "logps_train/chosen": -131.0835418701172, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -173.26351928710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1583542823791504, + "rewards_train/margins": 6.16799783706665, + "rewards_train/rejected": -8.3263521194458, + "step": 2323 + }, + { + "epoch": 0.65, + "logps_train/chosen": -3.150763988494873, + "logps_train/ref_chosen": -1.9453125, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -1.6782450675964355, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1205451488494873, + "rewards_train/margins": -0.21678314357995987, + "rewards_train/rejected": 0.09623799473047256, + "step": 2323 + }, + { + "epoch": 0.65, + "logps_train/chosen": -26.618484497070312, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -24.375, + "logps_train/rejected": -42.20988845825195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0368484258651733, + "rewards_train/margins": 0.7466404438018799, + "rewards_train/rejected": -1.7834888696670532, + "step": 2323 + }, + { + "epoch": 0.65, + "logps_train/chosen": -152.30645751953125, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -234.6918487548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.230645775794983, + "rewards_train/margins": 8.038539290428162, + "rewards_train/rejected": -9.269185066223145, + "step": 2323 + }, + { + "epoch": 0.65, + "logps_train/chosen": -64.50543212890625, + "logps_train/ref_chosen": -49.25, + "logps_train/ref_rejected": -52.75, + "logps_train/rejected": -64.65171813964844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.525543212890625, + "rewards_train/margins": -0.33537137508392334, + "rewards_train/rejected": -1.1901718378067017, + "step": 2323 + }, + { + "epoch": 0.65, + "logps_train/chosen": -21.873947143554688, + "logps_train/ref_chosen": -8.375, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -24.59394073486328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3498947620391846, + "rewards_train/margins": -0.20925068855285645, + "rewards_train/rejected": -1.1406440734863281, + "step": 2323 + }, + { + "epoch": 0.65, + "learning_rate": 2.697773061744435e-08, + "loss": 0.4434, + "step": 2324 + }, + { + "epoch": 0.65, + "logps_train/chosen": -27.50705909729004, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -42.5, + "logps_train/rejected": -49.31222152709961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7882059216499329, + "rewards_train/margins": -0.10698378086090088, + "rewards_train/rejected": -0.681222140789032, + "step": 2324 + }, + { + "epoch": 0.65, + "logps_train/chosen": -47.29366683959961, + "logps_train/ref_chosen": -32.25, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -29.518795013427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5043667554855347, + "rewards_train/margins": 0.01001274585723877, + "rewards_train/rejected": -1.5143795013427734, + "step": 2324 + }, + { + "epoch": 0.65, + "logps_train/chosen": -78.2702407836914, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -93.71287536621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1229759231209755, + "rewards_train/margins": 2.1442636027932167, + "rewards_train/rejected": -2.021287679672241, + "step": 2324 + }, + { + "epoch": 0.65, + "logps_train/chosen": -13.507615089416504, + "logps_train/ref_chosen": -4.875, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -25.61726188659668, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8632615208625793, + "rewards_train/margins": 1.2828397154808044, + "rewards_train/rejected": -2.146101236343384, + "step": 2324 + }, + { + "epoch": 0.65, + "logps_train/chosen": -16.03546142578125, + "logps_train/ref_chosen": -7.6875, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -42.55218505859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.834796130657196, + "rewards_train/margins": 2.376672327518463, + "rewards_train/rejected": -3.211468458175659, + "step": 2324 + }, + { + "epoch": 0.65, + "logps_train/chosen": -99.94562530517578, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -195.18785095214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.719562530517578, + "rewards_train/margins": 1.1992225646972656, + "rewards_train/rejected": -4.918785095214844, + "step": 2324 + }, + { + "epoch": 0.65, + "logps_train/chosen": -62.72761154174805, + "logps_train/ref_chosen": -60.75, + "logps_train/ref_rejected": -11.875, + "logps_train/rejected": -29.259185791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1977611631155014, + "rewards_train/margins": 1.540657415986061, + "rewards_train/rejected": -1.7384185791015625, + "step": 2324 + }, + { + "epoch": 0.65, + "logps_train/chosen": -21.424846649169922, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -49.75323486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.514359712600708, + "rewards_train/margins": 1.8984637260437012, + "rewards_train/rejected": -3.412823438644409, + "step": 2324 + }, + { + "epoch": 0.65, + "logps_train/chosen": -57.19860076904297, + "logps_train/ref_chosen": -37.0, + "logps_train/ref_rejected": -32.0, + "logps_train/rejected": -75.79969787597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.019860029220581, + "rewards_train/margins": 2.36011004447937, + "rewards_train/rejected": -4.379970073699951, + "step": 2325 + }, + { + "epoch": 0.65, + "logps_train/chosen": -24.236984252929688, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -27.198040008544922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9393234252929688, + "rewards_train/margins": -0.08201944828033447, + "rewards_train/rejected": -1.8573039770126343, + "step": 2325 + }, + { + "epoch": 0.65, + "logps_train/chosen": -120.70185089111328, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -183.40829467773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.870185136795044, + "rewards_train/margins": 2.5706446170806885, + "rewards_train/rejected": -4.440829753875732, + "step": 2325 + }, + { + "epoch": 0.65, + "logps_train/chosen": -69.47816467285156, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -96.0860595703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4478164613246918, + "rewards_train/margins": 1.6107894480228424, + "rewards_train/rejected": -2.058605909347534, + "step": 2325 + }, + { + "epoch": 0.65, + "logps_train/chosen": -18.589702606201172, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -17.991851806640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.402720332145691, + "rewards_train/margins": -0.08166015148162842, + "rewards_train/rejected": -1.3210601806640625, + "step": 2325 + }, + { + "epoch": 0.65, + "logps_train/chosen": -133.3548583984375, + "logps_train/ref_chosen": -114.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -173.26419067382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8854858875274658, + "rewards_train/margins": 2.9909331798553467, + "rewards_train/rejected": -4.8764190673828125, + "step": 2325 + }, + { + "epoch": 0.65, + "logps_train/chosen": -143.35275268554688, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -146.97476196289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1852753162384033, + "rewards_train/margins": 0.912200927734375, + "rewards_train/rejected": -3.0974762439727783, + "step": 2325 + }, + { + "epoch": 0.65, + "logps_train/chosen": -31.23975372314453, + "logps_train/ref_chosen": -14.1875, + "logps_train/ref_rejected": -18.25, + "logps_train/rejected": -56.246803283691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7052253484725952, + "rewards_train/margins": 2.094455122947693, + "rewards_train/rejected": -3.799680471420288, + "step": 2325 + }, + { + "epoch": 0.65, + "learning_rate": 2.637077795084408e-08, + "loss": 0.2993, + "step": 2326 + }, + { + "epoch": 0.65, + "logps_train/chosen": -45.41502380371094, + "logps_train/ref_chosen": -26.25, + "logps_train/ref_rejected": -2.96875, + "logps_train/rejected": -39.499595642089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9165023565292358, + "rewards_train/margins": 1.7365821599960327, + "rewards_train/rejected": -3.6530845165252686, + "step": 2326 + }, + { + "epoch": 0.65, + "logps_train/chosen": -14.422090530395508, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -28.875, + "logps_train/rejected": -69.81358337402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0984591245651245, + "rewards_train/margins": 2.9953991174697876, + "rewards_train/rejected": -4.093858242034912, + "step": 2326 + }, + { + "epoch": 0.65, + "logps_train/chosen": -17.464818954467773, + "logps_train/ref_chosen": -7.8125, + "logps_train/ref_rejected": -0.953125, + "logps_train/rejected": -34.0161018371582, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9652318954467773, + "rewards_train/margins": 2.3410658836364746, + "rewards_train/rejected": -3.306297779083252, + "step": 2326 + }, + { + "epoch": 0.65, + "logps_train/chosen": -139.77561950683594, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -144.1470184326172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.777561902999878, + "rewards_train/margins": 0.9871399402618408, + "rewards_train/rejected": -4.764701843261719, + "step": 2326 + }, + { + "epoch": 0.65, + "logps_train/chosen": -16.68667221069336, + "logps_train/ref_chosen": -2.578125, + "logps_train/ref_rejected": -1.328125, + "logps_train/rejected": -3.870664119720459, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.410854697227478, + "rewards_train/margins": -1.1566007733345032, + "rewards_train/rejected": -0.25425392389297485, + "step": 2326 + }, + { + "epoch": 0.65, + "logps_train/chosen": -23.235462188720703, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -33.34900665283203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5110462307929993, + "rewards_train/margins": 1.5613545775413513, + "rewards_train/rejected": -2.0724008083343506, + "step": 2326 + }, + { + "epoch": 0.65, + "logps_train/chosen": -244.0872039794922, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -247.0974578857422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.808720588684082, + "rewards_train/margins": 0.9010257720947266, + "rewards_train/rejected": -11.709746360778809, + "step": 2326 + }, + { + "epoch": 0.65, + "logps_train/chosen": -124.79974365234375, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -128.29005432128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.329974412918091, + "rewards_train/margins": 1.699031114578247, + "rewards_train/rejected": -4.029005527496338, + "step": 2326 + }, + { + "epoch": 0.65, + "logps_train/chosen": -13.144071578979492, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -19.42359733581543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.19184283912181854, + "rewards_train/margins": 1.0279525965452194, + "rewards_train/rejected": -0.8361097574234009, + "step": 2327 + }, + { + "epoch": 0.65, + "logps_train/chosen": -149.1931610107422, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -180.83380126953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.819316387176514, + "rewards_train/margins": 1.3140640258789062, + "rewards_train/rejected": -6.13338041305542, + "step": 2327 + }, + { + "epoch": 0.65, + "logps_train/chosen": -159.979248046875, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -215.792724609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0979249477386475, + "rewards_train/margins": 5.2813475131988525, + "rewards_train/rejected": -8.3792724609375, + "step": 2327 + }, + { + "epoch": 0.65, + "logps_train/chosen": -72.14160919189453, + "logps_train/ref_chosen": -58.25, + "logps_train/ref_rejected": -76.0, + "logps_train/rejected": -164.77113342285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3891609907150269, + "rewards_train/margins": 7.487952351570129, + "rewards_train/rejected": -8.877113342285156, + "step": 2327 + }, + { + "epoch": 0.65, + "logps_train/chosen": -151.4069061279297, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -147.41445922851562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.140690803527832, + "rewards_train/margins": -1.0992445945739746, + "rewards_train/rejected": -4.041446208953857, + "step": 2327 + }, + { + "epoch": 0.65, + "logps_train/chosen": -248.10598754882812, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -199.0, + "logps_train/rejected": -306.6878662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.910598754882812, + "rewards_train/margins": 0.8581876754760742, + "rewards_train/rejected": -10.768786430358887, + "step": 2327 + }, + { + "epoch": 0.65, + "logps_train/chosen": -26.930334091186523, + "logps_train/ref_chosen": -15.125, + "logps_train/ref_rejected": -22.25, + "logps_train/rejected": -51.502655029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1805334091186523, + "rewards_train/margins": 1.744732141494751, + "rewards_train/rejected": -2.9252655506134033, + "step": 2327 + }, + { + "epoch": 0.65, + "logps_train/chosen": -27.05812644958496, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -57.135746002197266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.168312668800354, + "rewards_train/margins": 2.1702619791030884, + "rewards_train/rejected": -3.3385746479034424, + "step": 2327 + }, + { + "epoch": 0.65, + "learning_rate": 2.5770639656516712e-08, + "loss": 0.3318, + "step": 2328 + }, + { + "epoch": 0.65, + "logps_train/chosen": -162.68930053710938, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -89.5, + "logps_train/rejected": -157.97860717773438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.9189300537109375, + "rewards_train/margins": -1.0710692405700684, + "rewards_train/rejected": -6.847860813140869, + "step": 2328 + }, + { + "epoch": 0.65, + "logps_train/chosen": -154.0283966064453, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -219.77938842773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.052839756011963, + "rewards_train/margins": 6.525099277496338, + "rewards_train/rejected": -13.5779390335083, + "step": 2328 + }, + { + "epoch": 0.65, + "logps_train/chosen": -15.208111763000488, + "logps_train/ref_chosen": -14.625, + "logps_train/ref_rejected": -0.59765625, + "logps_train/rejected": -9.270888328552246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05831117555499077, + "rewards_train/margins": 0.8090120442211628, + "rewards_train/rejected": -0.8673232197761536, + "step": 2328 + }, + { + "epoch": 0.65, + "logps_train/chosen": -99.06773376464844, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -188.5061798095703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1567734479904175, + "rewards_train/margins": 5.493844628334045, + "rewards_train/rejected": -6.650618076324463, + "step": 2328 + }, + { + "epoch": 0.65, + "logps_train/chosen": -4.293414115905762, + "logps_train/ref_chosen": -2.375, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -105.61244201660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.19184140861034393, + "rewards_train/margins": 2.5194029361009598, + "rewards_train/rejected": -2.7112443447113037, + "step": 2328 + }, + { + "epoch": 0.65, + "logps_train/chosen": -32.16126251220703, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -146.03054809570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.697376251220703, + "rewards_train/margins": 0.005678653717041016, + "rewards_train/rejected": -2.703054904937744, + "step": 2328 + }, + { + "epoch": 0.65, + "logps_train/chosen": -35.80216979980469, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -122.91659545898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7677170038223267, + "rewards_train/margins": 3.7739428281784058, + "rewards_train/rejected": -5.541659832000732, + "step": 2328 + }, + { + "epoch": 0.65, + "logps_train/chosen": -44.32942199707031, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -6.34375, + "logps_train/rejected": -50.941619873046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.032942295074463, + "rewards_train/margins": 2.426844596862793, + "rewards_train/rejected": -4.459786891937256, + "step": 2328 + }, + { + "epoch": 0.65, + "logps_train/chosen": -258.6784362792969, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -261.7149658203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.767843723297119, + "rewards_train/margins": 1.2036528587341309, + "rewards_train/rejected": -8.97149658203125, + "step": 2329 + }, + { + "epoch": 0.65, + "logps_train/chosen": -141.567138671875, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -55.29824447631836, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.5067138671875, + "rewards_train/margins": -0.8206391334533691, + "rewards_train/rejected": -4.686074733734131, + "step": 2329 + }, + { + "epoch": 0.65, + "logps_train/chosen": -26.219097137451172, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -10.1875, + "logps_train/rejected": -55.426300048828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.665659785270691, + "rewards_train/margins": 2.8582202196121216, + "rewards_train/rejected": -4.5238800048828125, + "step": 2329 + }, + { + "epoch": 0.65, + "logps_train/chosen": -13.513955116271973, + "logps_train/ref_chosen": -9.875, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -20.05829429626465, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3638955056667328, + "rewards_train/margins": 0.7919339239597321, + "rewards_train/rejected": -1.1558294296264648, + "step": 2329 + }, + { + "epoch": 0.65, + "logps_train/chosen": -25.14691162109375, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -85.14999389648438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6709412336349487, + "rewards_train/margins": -1.4059418439865112, + "rewards_train/rejected": -0.2649993896484375, + "step": 2329 + }, + { + "epoch": 0.65, + "logps_train/chosen": -194.11959838867188, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -154.8556671142578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.811959743499756, + "rewards_train/margins": -1.526392936706543, + "rewards_train/rejected": -5.285566806793213, + "step": 2329 + }, + { + "epoch": 0.65, + "logps_train/chosen": -163.13198852539062, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -193.77085876464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7631988525390625, + "rewards_train/margins": 4.563887596130371, + "rewards_train/rejected": -8.327086448669434, + "step": 2329 + }, + { + "epoch": 0.65, + "logps_train/chosen": -33.331912994384766, + "logps_train/ref_chosen": -26.625, + "logps_train/ref_rejected": -5.96875, + "logps_train/rejected": -13.53321361541748, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6706913113594055, + "rewards_train/margins": 0.08575505018234253, + "rewards_train/rejected": -0.756446361541748, + "step": 2329 + }, + { + "epoch": 0.65, + "learning_rate": 2.5177319934793995e-08, + "loss": 0.532, + "step": 2330 + }, + { + "epoch": 0.65, + "logps_train/chosen": -31.446195602416992, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -7.15625, + "logps_train/rejected": -32.461002349853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8321195840835571, + "rewards_train/margins": 1.698355793952942, + "rewards_train/rejected": -2.530475378036499, + "step": 2330 + }, + { + "epoch": 0.65, + "logps_train/chosen": -196.10337829589844, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -249.80738830566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.860337734222412, + "rewards_train/margins": 1.5204014778137207, + "rewards_train/rejected": -8.380739212036133, + "step": 2330 + }, + { + "epoch": 0.65, + "logps_train/chosen": -79.45635986328125, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -56.75, + "logps_train/rejected": -66.44642639160156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0456360578536987, + "rewards_train/margins": -0.07599341869354248, + "rewards_train/rejected": -0.9696426391601562, + "step": 2330 + }, + { + "epoch": 0.65, + "logps_train/chosen": -60.4830436706543, + "logps_train/ref_chosen": -53.5, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -31.62748146057129, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6983043551445007, + "rewards_train/margins": 1.4081938862800598, + "rewards_train/rejected": -2.1064982414245605, + "step": 2330 + }, + { + "epoch": 0.65, + "logps_train/chosen": -81.83616638183594, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -28.625, + "logps_train/rejected": -97.87599182128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.208616733551025, + "rewards_train/margins": 2.716482639312744, + "rewards_train/rejected": -6.9250993728637695, + "step": 2330 + }, + { + "epoch": 0.65, + "logps_train/chosen": -32.6451416015625, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -26.171295166015625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4895142316818237, + "rewards_train/margins": -0.02863466739654541, + "rewards_train/rejected": -1.4608795642852783, + "step": 2330 + }, + { + "epoch": 0.65, + "logps_train/chosen": -71.66175079345703, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -126.86576843261719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1161750555038452, + "rewards_train/margins": 3.7704020738601685, + "rewards_train/rejected": -4.886577129364014, + "step": 2330 + }, + { + "epoch": 0.65, + "logps_train/chosen": -200.90675354003906, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -232.43743896484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.590675354003906, + "rewards_train/margins": 3.7530689239501953, + "rewards_train/rejected": -9.343744277954102, + "step": 2330 + }, + { + "epoch": 0.65, + "logps_train/chosen": -144.24374389648438, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -143.34490966796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.524374485015869, + "rewards_train/margins": -0.08988332748413086, + "rewards_train/rejected": -6.434491157531738, + "step": 2331 + }, + { + "epoch": 0.65, + "logps_train/chosen": -69.15152740478516, + "logps_train/ref_chosen": -50.25, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -180.9292449951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8901528120040894, + "rewards_train/margins": 1.202771782875061, + "rewards_train/rejected": -3.0929245948791504, + "step": 2331 + }, + { + "epoch": 0.65, + "logps_train/chosen": -83.38099670410156, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -134.17376708984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5380997061729431, + "rewards_train/margins": 0.2792770266532898, + "rewards_train/rejected": -0.8173767328262329, + "step": 2331 + }, + { + "epoch": 0.65, + "logps_train/chosen": -25.772769927978516, + "logps_train/ref_chosen": -7.28125, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -30.23290252685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8491519689559937, + "rewards_train/margins": 0.4803882837295532, + "rewards_train/rejected": -2.329540252685547, + "step": 2331 + }, + { + "epoch": 0.65, + "logps_train/chosen": -145.64767456054688, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -218.51719665527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.164767742156982, + "rewards_train/margins": 5.736952304840088, + "rewards_train/rejected": -9.90172004699707, + "step": 2331 + }, + { + "epoch": 0.65, + "logps_train/chosen": -96.02824401855469, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -239.0, + "logps_train/rejected": -288.4353942871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8528244495391846, + "rewards_train/margins": 3.090715169906616, + "rewards_train/rejected": -4.943539619445801, + "step": 2331 + }, + { + "epoch": 0.65, + "logps_train/chosen": -78.64797973632812, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -116.96015167236328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1147979497909546, + "rewards_train/margins": 0.5312172174453735, + "rewards_train/rejected": -1.6460151672363281, + "step": 2331 + }, + { + "epoch": 0.65, + "logps_train/chosen": -66.08836364746094, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -137.33299255371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4588364362716675, + "rewards_train/margins": 1.424462914466858, + "rewards_train/rejected": -2.8832993507385254, + "step": 2331 + }, + { + "epoch": 0.65, + "learning_rate": 2.459082293828485e-08, + "loss": 0.307, + "step": 2332 + }, + { + "epoch": 0.65, + "logps_train/chosen": -31.24160385131836, + "logps_train/ref_chosen": -31.5, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -50.675880432128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.025839615613222122, + "rewards_train/margins": 2.9684277065098286, + "rewards_train/rejected": -2.9425880908966064, + "step": 2332 + }, + { + "epoch": 0.65, + "logps_train/chosen": -0.6466708183288574, + "logps_train/ref_chosen": -0.181640625, + "logps_train/ref_rejected": -0.181640625, + "logps_train/rejected": -0.6451920866966248, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.04650301858782768, + "rewards_train/margins": -0.00014787167310714722, + "rewards_train/rejected": -0.046355146914720535, + "step": 2332 + }, + { + "epoch": 0.65, + "logps_train/chosen": -280.5828857421875, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -217.032470703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.858288764953613, + "rewards_train/margins": -0.7550411224365234, + "rewards_train/rejected": -10.10324764251709, + "step": 2332 + }, + { + "epoch": 0.65, + "logps_train/chosen": -246.05941772460938, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -228.726806640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.705942153930664, + "rewards_train/margins": -0.7332611083984375, + "rewards_train/rejected": -9.972681045532227, + "step": 2332 + }, + { + "epoch": 0.65, + "logps_train/chosen": -127.03353881835938, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -106.05308532714844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7033538818359375, + "rewards_train/margins": -0.79804527759552, + "rewards_train/rejected": -1.9053086042404175, + "step": 2332 + }, + { + "epoch": 0.65, + "logps_train/chosen": -87.1290512084961, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -133.66232299804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6129051446914673, + "rewards_train/margins": 2.9033273458480835, + "rewards_train/rejected": -4.516232490539551, + "step": 2332 + }, + { + "epoch": 0.65, + "logps_train/chosen": -18.033885955810547, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -4.21875, + "logps_train/rejected": -25.081260681152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8783885836601257, + "rewards_train/margins": 1.2078624367713928, + "rewards_train/rejected": -2.0862510204315186, + "step": 2332 + }, + { + "epoch": 0.65, + "logps_train/chosen": -18.0168399810791, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -35.0, + "logps_train/rejected": -91.01542663574219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8704339861869812, + "rewards_train/margins": 4.7311089634895325, + "rewards_train/rejected": -5.601542949676514, + "step": 2332 + }, + { + "epoch": 0.65, + "logps_train/chosen": -21.32553482055664, + "logps_train/ref_chosen": -8.5, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -14.174615859985352, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2825535535812378, + "rewards_train/margins": -1.2150919660925865, + "rewards_train/rejected": -0.06746158748865128, + "step": 2333 + }, + { + "epoch": 0.65, + "logps_train/chosen": -21.56902313232422, + "logps_train/ref_chosen": -11.6875, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -11.019454002380371, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9881523251533508, + "rewards_train/margins": -0.31433188915252686, + "rewards_train/rejected": -0.673820436000824, + "step": 2333 + }, + { + "epoch": 0.65, + "logps_train/chosen": -226.8350830078125, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -235.7182159423828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.383508682250977, + "rewards_train/margins": -0.41168689727783203, + "rewards_train/rejected": -7.9718217849731445, + "step": 2333 + }, + { + "epoch": 0.65, + "logps_train/chosen": -71.66600799560547, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -137.6981658935547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8166007995605469, + "rewards_train/margins": 2.9532158374786377, + "rewards_train/rejected": -3.7698166370391846, + "step": 2333 + }, + { + "epoch": 0.65, + "logps_train/chosen": -83.68609619140625, + "logps_train/ref_chosen": -52.25, + "logps_train/ref_rejected": -51.0, + "logps_train/rejected": -108.26287841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1436097621917725, + "rewards_train/margins": 2.5826780796051025, + "rewards_train/rejected": -5.726287841796875, + "step": 2333 + }, + { + "epoch": 0.65, + "logps_train/chosen": -42.75308609008789, + "logps_train/ref_chosen": -26.375, + "logps_train/ref_rejected": -2.703125, + "logps_train/rejected": -32.83802032470703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6378086805343628, + "rewards_train/margins": 1.3756808042526245, + "rewards_train/rejected": -3.0134894847869873, + "step": 2333 + }, + { + "epoch": 0.65, + "logps_train/chosen": -15.391928672790527, + "logps_train/ref_chosen": -3.296875, + "logps_train/ref_rejected": -1.34375, + "logps_train/rejected": -8.904547691345215, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2095054388046265, + "rewards_train/margins": -0.45342564582824707, + "rewards_train/rejected": -0.7560797929763794, + "step": 2333 + }, + { + "epoch": 0.65, + "logps_train/chosen": -209.24163818359375, + "logps_train/ref_chosen": -199.0, + "logps_train/ref_rejected": -260.0, + "logps_train/rejected": -294.8639831542969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.024163842201233, + "rewards_train/margins": 2.462234616279602, + "rewards_train/rejected": -3.486398458480835, + "step": 2333 + }, + { + "epoch": 0.65, + "learning_rate": 2.40111527718464e-08, + "loss": 0.5711, + "step": 2334 + }, + { + "epoch": 0.65, + "logps_train/chosen": -24.913978576660156, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -59.58695983886719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6288978457450867, + "rewards_train/margins": 1.554798185825348, + "rewards_train/rejected": -2.1836960315704346, + "step": 2334 + }, + { + "epoch": 0.65, + "logps_train/chosen": -108.90802001953125, + "logps_train/ref_chosen": -85.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -170.99319458007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.340801954269409, + "rewards_train/margins": 4.358517408370972, + "rewards_train/rejected": -6.699319362640381, + "step": 2334 + }, + { + "epoch": 0.65, + "logps_train/chosen": -51.932823181152344, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -24.875, + "logps_train/rejected": -55.365325927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8182823657989502, + "rewards_train/margins": 1.230750322341919, + "rewards_train/rejected": -3.049032688140869, + "step": 2334 + }, + { + "epoch": 0.65, + "logps_train/chosen": -18.011337280273438, + "logps_train/ref_chosen": -2.890625, + "logps_train/ref_rejected": -3.859375, + "logps_train/rejected": -49.932167053222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5120712518692017, + "rewards_train/margins": 3.0952080488204956, + "rewards_train/rejected": -4.607279300689697, + "step": 2334 + }, + { + "epoch": 0.65, + "logps_train/chosen": -40.62099838256836, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -33.42306137084961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.787099838256836, + "rewards_train/margins": -0.241668701171875, + "rewards_train/rejected": -2.545431137084961, + "step": 2334 + }, + { + "epoch": 0.65, + "logps_train/chosen": -124.3865966796875, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -123.41326904296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.68865966796875, + "rewards_train/margins": -0.09733277559280396, + "rewards_train/rejected": -0.591326892375946, + "step": 2334 + }, + { + "epoch": 0.65, + "logps_train/chosen": -15.271925926208496, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -5.21875, + "logps_train/rejected": -23.86960792541504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.34594258666038513, + "rewards_train/margins": 1.5191432535648346, + "rewards_train/rejected": -1.8650858402252197, + "step": 2334 + }, + { + "epoch": 0.65, + "logps_train/chosen": -208.01718139648438, + "logps_train/ref_chosen": -186.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -169.00534057617188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2017180919647217, + "rewards_train/margins": -0.10118389129638672, + "rewards_train/rejected": -2.100534200668335, + "step": 2334 + }, + { + "epoch": 0.65, + "logps_train/chosen": -19.300582885742188, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -40.31739807128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4925583600997925, + "rewards_train/margins": 1.5766814947128296, + "rewards_train/rejected": -3.069239854812622, + "step": 2335 + }, + { + "epoch": 0.65, + "logps_train/chosen": -44.57359313964844, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -4.8125, + "logps_train/rejected": -30.54190444946289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.9229843616485596, + "rewards_train/margins": -1.350043773651123, + "rewards_train/rejected": -2.5729405879974365, + "step": 2335 + }, + { + "epoch": 0.65, + "logps_train/chosen": -48.892784118652344, + "logps_train/ref_chosen": -38.75, + "logps_train/ref_rejected": -21.125, + "logps_train/rejected": -49.1702995300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0142784118652344, + "rewards_train/margins": 1.7902514934539795, + "rewards_train/rejected": -2.804529905319214, + "step": 2335 + }, + { + "epoch": 0.65, + "logps_train/chosen": -207.24154663085938, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -234.2600860595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.824154853820801, + "rewards_train/margins": 4.001853942871094, + "rewards_train/rejected": -10.826008796691895, + "step": 2335 + }, + { + "epoch": 0.65, + "logps_train/chosen": -3.226290464401245, + "logps_train/ref_chosen": -0.1884765625, + "logps_train/ref_rejected": -4.9375, + "logps_train/rejected": -13.479368209838867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3037813901901245, + "rewards_train/margins": 0.5504054427146912, + "rewards_train/rejected": -0.8541868329048157, + "step": 2335 + }, + { + "epoch": 0.65, + "logps_train/chosen": -93.92056274414062, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -99.54151916503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0420563220977783, + "rewards_train/margins": 0.11209559440612793, + "rewards_train/rejected": -1.1541519165039062, + "step": 2335 + }, + { + "epoch": 0.65, + "logps_train/chosen": -153.2186279296875, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -245.09161376953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.371862888336182, + "rewards_train/margins": 5.137299060821533, + "rewards_train/rejected": -10.509161949157715, + "step": 2335 + }, + { + "epoch": 0.65, + "logps_train/chosen": -66.52821350097656, + "logps_train/ref_chosen": -41.0, + "logps_train/ref_rejected": -56.25, + "logps_train/rejected": -128.91909790039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.552821397781372, + "rewards_train/margins": 4.714088678359985, + "rewards_train/rejected": -7.266910076141357, + "step": 2335 + }, + { + "epoch": 0.65, + "learning_rate": 2.343831349255532e-08, + "loss": 0.3789, + "step": 2336 + }, + { + "epoch": 0.65, + "logps_train/chosen": -74.5013656616211, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -48.5, + "logps_train/rejected": -90.5478515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6751365661621094, + "rewards_train/margins": 2.529648780822754, + "rewards_train/rejected": -4.204785346984863, + "step": 2336 + }, + { + "epoch": 0.65, + "logps_train/chosen": -135.7627410888672, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -171.7718963623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.926274061203003, + "rewards_train/margins": 5.600916147232056, + "rewards_train/rejected": -8.527190208435059, + "step": 2336 + }, + { + "epoch": 0.65, + "logps_train/chosen": -130.42942810058594, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -121.05947875976562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.992942810058594, + "rewards_train/margins": -0.7869949340820312, + "rewards_train/rejected": -4.2059478759765625, + "step": 2336 + }, + { + "epoch": 0.65, + "logps_train/chosen": -127.38961791992188, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -188.80252075195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6889617443084717, + "rewards_train/margins": 6.391290903091431, + "rewards_train/rejected": -9.080252647399902, + "step": 2336 + }, + { + "epoch": 0.65, + "logps_train/chosen": -34.381629943847656, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -39.814640045166016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1444129943847656, + "rewards_train/margins": 0.5245511531829834, + "rewards_train/rejected": -2.668964147567749, + "step": 2336 + }, + { + "epoch": 0.65, + "logps_train/chosen": -7.457362651824951, + "logps_train/ref_chosen": -6.0625, + "logps_train/ref_rejected": -31.625, + "logps_train/rejected": -60.77235794067383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13948626816272736, + "rewards_train/margins": 2.7752495259046555, + "rewards_train/rejected": -2.914735794067383, + "step": 2336 + }, + { + "epoch": 0.65, + "logps_train/chosen": -134.94834899902344, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -153.2139434814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9948348999023438, + "rewards_train/margins": 0.6765594482421875, + "rewards_train/rejected": -4.671394348144531, + "step": 2336 + }, + { + "epoch": 0.65, + "logps_train/chosen": -175.52163696289062, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -229.33340454101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.55216383934021, + "rewards_train/margins": 1.2811765670776367, + "rewards_train/rejected": -3.8333404064178467, + "step": 2336 + }, + { + "epoch": 0.65, + "logps_train/chosen": -19.088497161865234, + "logps_train/ref_chosen": -5.71875, + "logps_train/ref_rejected": -15.4375, + "logps_train/rejected": -31.94605827331543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3369747400283813, + "rewards_train/margins": 0.31388115882873535, + "rewards_train/rejected": -1.6508558988571167, + "step": 2337 + }, + { + "epoch": 0.65, + "logps_train/chosen": -135.36294555664062, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -194.87689208984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8862946033477783, + "rewards_train/margins": 6.3013951778411865, + "rewards_train/rejected": -8.187689781188965, + "step": 2337 + }, + { + "epoch": 0.65, + "logps_train/chosen": -14.644381523132324, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -39.85362243652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4144381582736969, + "rewards_train/margins": 2.0084241330623627, + "rewards_train/rejected": -2.4228622913360596, + "step": 2337 + }, + { + "epoch": 0.65, + "logps_train/chosen": -13.125426292419434, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -20.529977798461914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7156676650047302, + "rewards_train/margins": 0.5310801863670349, + "rewards_train/rejected": -1.2467478513717651, + "step": 2337 + }, + { + "epoch": 0.65, + "logps_train/chosen": -159.8125, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -160.97906494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.78125, + "rewards_train/margins": 1.0666565895080566, + "rewards_train/rejected": -5.847906589508057, + "step": 2337 + }, + { + "epoch": 0.65, + "logps_train/chosen": -4.9403181076049805, + "logps_train/ref_chosen": -4.53125, + "logps_train/ref_rejected": -12.25, + "logps_train/rejected": -32.46031951904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.040906812995672226, + "rewards_train/margins": 1.980125281959772, + "rewards_train/rejected": -2.0210320949554443, + "step": 2337 + }, + { + "epoch": 0.65, + "logps_train/chosen": -237.83151245117188, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -183.0, + "logps_train/rejected": -280.1612854003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.283151626586914, + "rewards_train/margins": 0.43297672271728516, + "rewards_train/rejected": -9.7161283493042, + "step": 2337 + }, + { + "epoch": 0.65, + "logps_train/chosen": -34.11183166503906, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -51.716331481933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3611831665039062, + "rewards_train/margins": 1.4729499816894531, + "rewards_train/rejected": -3.8341331481933594, + "step": 2337 + }, + { + "epoch": 0.65, + "learning_rate": 2.2872309109679077e-08, + "loss": 0.2935, + "step": 2338 + }, + { + "epoch": 0.65, + "logps_train/chosen": -205.86563110351562, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -215.37136840820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.236563682556152, + "rewards_train/margins": 1.6005735397338867, + "rewards_train/rejected": -11.837137222290039, + "step": 2338 + }, + { + "epoch": 0.65, + "logps_train/chosen": -10.599959373474121, + "logps_train/ref_chosen": -2.234375, + "logps_train/ref_rejected": -11.0, + "logps_train/rejected": -13.171903610229492, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.83655846118927, + "rewards_train/margins": -0.6193680912256241, + "rewards_train/rejected": -0.21719036996364594, + "step": 2338 + }, + { + "epoch": 0.65, + "logps_train/chosen": -86.91448974609375, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -62.545738220214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2414491176605225, + "rewards_train/margins": 1.9631249904632568, + "rewards_train/rejected": -4.204574108123779, + "step": 2338 + }, + { + "epoch": 0.65, + "logps_train/chosen": -122.5284652709961, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -96.00606536865234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4528465270996094, + "rewards_train/margins": -0.702239990234375, + "rewards_train/rejected": -0.7506065368652344, + "step": 2338 + }, + { + "epoch": 0.65, + "logps_train/chosen": -13.073091506958008, + "logps_train/ref_chosen": -4.28125, + "logps_train/ref_rejected": -3.078125, + "logps_train/rejected": -7.725618362426758, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8791841864585876, + "rewards_train/margins": -0.41443485021591187, + "rewards_train/rejected": -0.4647493362426758, + "step": 2338 + }, + { + "epoch": 0.65, + "logps_train/chosen": -0.7423474788665771, + "logps_train/ref_chosen": -0.52734375, + "logps_train/ref_rejected": -1.03125, + "logps_train/rejected": -4.316951274871826, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.021500373259186745, + "rewards_train/margins": 0.3070697542279959, + "rewards_train/rejected": -0.3285701274871826, + "step": 2338 + }, + { + "epoch": 0.65, + "logps_train/chosen": -70.12483978271484, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -118.70378112792969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.062483977526426315, + "rewards_train/margins": 2.4578941352665424, + "rewards_train/rejected": -2.5203781127929688, + "step": 2338 + }, + { + "epoch": 0.65, + "logps_train/chosen": -39.17202377319336, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -51.788021087646484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4047024250030518, + "rewards_train/margins": 0.19284963607788086, + "rewards_train/rejected": -3.5975520610809326, + "step": 2338 + }, + { + "epoch": 0.65, + "logps_train/chosen": -7.955750942230225, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -6.25, + "logps_train/rejected": -10.367132186889648, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.010674905963242054, + "rewards_train/margins": 0.4223881186917424, + "rewards_train/rejected": -0.41171321272850037, + "step": 2339 + }, + { + "epoch": 0.65, + "logps_train/chosen": -109.81377410888672, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -119.7747802734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0313775539398193, + "rewards_train/margins": -0.30389952659606934, + "rewards_train/rejected": -1.72747802734375, + "step": 2339 + }, + { + "epoch": 0.65, + "logps_train/chosen": -5.787237167358398, + "logps_train/ref_chosen": -2.609375, + "logps_train/ref_rejected": -2.640625, + "logps_train/rejected": -8.563279151916504, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.31778621673583984, + "rewards_train/margins": 0.2744792103767395, + "rewards_train/rejected": -0.5922654271125793, + "step": 2339 + }, + { + "epoch": 0.65, + "logps_train/chosen": -72.9705581665039, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -62.5, + "logps_train/rejected": -71.92344665527344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5470558404922485, + "rewards_train/margins": 0.3952888250350952, + "rewards_train/rejected": -0.9423446655273438, + "step": 2339 + }, + { + "epoch": 0.65, + "logps_train/chosen": -17.736717224121094, + "logps_train/ref_chosen": -15.625, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -25.730712890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2111717313528061, + "rewards_train/margins": 1.086899533867836, + "rewards_train/rejected": -1.298071265220642, + "step": 2339 + }, + { + "epoch": 0.65, + "logps_train/chosen": -36.64988327026367, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -42.50940704345703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2399883270263672, + "rewards_train/margins": 1.298452377319336, + "rewards_train/rejected": -2.538440704345703, + "step": 2339 + }, + { + "epoch": 0.65, + "logps_train/chosen": -148.81271362304688, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -256.48419189453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8812713623046875, + "rewards_train/margins": 10.0171480178833, + "rewards_train/rejected": -12.898419380187988, + "step": 2339 + }, + { + "epoch": 0.65, + "logps_train/chosen": -68.50396728515625, + "logps_train/ref_chosen": -57.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -137.16748046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.100396752357483, + "rewards_train/margins": 1.7163513898849487, + "rewards_train/rejected": -2.8167481422424316, + "step": 2339 + }, + { + "epoch": 0.65, + "learning_rate": 2.231314358464842e-08, + "loss": 0.4852, + "step": 2340 + }, + { + "epoch": 0.65, + "logps_train/chosen": -457.6280517578125, + "logps_train/ref_chosen": -362.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -133.2750244140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.56280517578125, + "rewards_train/margins": -6.385302782058716, + "rewards_train/rejected": -3.177502393722534, + "step": 2340 + }, + { + "epoch": 0.65, + "logps_train/chosen": -197.5269012451172, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -181.0, + "logps_train/rejected": -287.711181640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.052690029144287, + "rewards_train/margins": 4.618428707122803, + "rewards_train/rejected": -10.67111873626709, + "step": 2340 + }, + { + "epoch": 0.65, + "logps_train/chosen": -137.10006713867188, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -138.05422973632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.560006856918335, + "rewards_train/margins": 2.695416212081909, + "rewards_train/rejected": -5.255423069000244, + "step": 2340 + }, + { + "epoch": 0.65, + "logps_train/chosen": -213.15365600585938, + "logps_train/ref_chosen": -148.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -214.48146057128906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.5153656005859375, + "rewards_train/margins": -0.26721954345703125, + "rewards_train/rejected": -6.248146057128906, + "step": 2340 + }, + { + "epoch": 0.65, + "logps_train/chosen": -160.23020935058594, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -229.0, + "logps_train/rejected": -259.0356750488281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2769790589809418, + "rewards_train/margins": 3.2805465161800385, + "rewards_train/rejected": -3.0035674571990967, + "step": 2340 + }, + { + "epoch": 0.65, + "logps_train/chosen": -268.2487487792969, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -278.54364013671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.524874687194824, + "rewards_train/margins": -0.0705108642578125, + "rewards_train/rejected": -11.454363822937012, + "step": 2340 + }, + { + "epoch": 0.65, + "logps_train/chosen": -9.047390937805176, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -124.26858520507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.017239093780517578, + "rewards_train/margins": 3.859619379043579, + "rewards_train/rejected": -3.8768584728240967, + "step": 2340 + }, + { + "epoch": 0.65, + "logps_train/chosen": -109.94273376464844, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -179.87159729003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7942733764648438, + "rewards_train/margins": 3.542886257171631, + "rewards_train/rejected": -6.337159633636475, + "step": 2340 + }, + { + "epoch": 0.65, + "logps_train/chosen": -135.48118591308594, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -106.68740844726562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.398118734359741, + "rewards_train/margins": -0.8293778896331787, + "rewards_train/rejected": -1.5687408447265625, + "step": 2341 + }, + { + "epoch": 0.65, + "logps_train/chosen": -44.531593322753906, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -44.459877014160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.959409475326538, + "rewards_train/margins": 1.055328130722046, + "rewards_train/rejected": -4.014737606048584, + "step": 2341 + }, + { + "epoch": 0.65, + "logps_train/chosen": -90.6843490600586, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -98.88795471191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1684348583221436, + "rewards_train/margins": 0.3703606128692627, + "rewards_train/rejected": -2.5387954711914062, + "step": 2341 + }, + { + "epoch": 0.65, + "logps_train/chosen": -112.9615478515625, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -100.53473663330078, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3461549282073975, + "rewards_train/margins": -0.7926812171936035, + "rewards_train/rejected": -1.553473711013794, + "step": 2341 + }, + { + "epoch": 0.65, + "logps_train/chosen": -91.47061157226562, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -92.58170318603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3470611572265625, + "rewards_train/margins": 0.1111091673374176, + "rewards_train/rejected": -0.4581703245639801, + "step": 2341 + }, + { + "epoch": 0.65, + "logps_train/chosen": -74.10970306396484, + "logps_train/ref_chosen": -42.5, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -98.16669464111328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.160970449447632, + "rewards_train/margins": -1.644300937652588, + "rewards_train/rejected": -1.516669511795044, + "step": 2341 + }, + { + "epoch": 0.65, + "logps_train/chosen": -106.77687072753906, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -115.203369140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3776872158050537, + "rewards_train/margins": 0.9426496028900146, + "rewards_train/rejected": -4.320336818695068, + "step": 2341 + }, + { + "epoch": 0.65, + "logps_train/chosen": -45.11502456665039, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -49.308773040771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4240024089813232, + "rewards_train/margins": 2.09125018119812, + "rewards_train/rejected": -4.515252590179443, + "step": 2341 + }, + { + "epoch": 0.65, + "learning_rate": 2.1760820831029035e-08, + "loss": 0.8906, + "step": 2342 + }, + { + "epoch": 0.65, + "logps_train/chosen": -19.120615005493164, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -48.064857482910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27456149458885193, + "rewards_train/margins": 2.669424206018448, + "rewards_train/rejected": -2.9439857006073, + "step": 2342 + }, + { + "epoch": 0.65, + "logps_train/chosen": -12.755367279052734, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -2.0, + "logps_train/rejected": -11.101140022277832, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0932132750749588, + "rewards_train/margins": 1.003327265381813, + "rewards_train/rejected": -0.9101139903068542, + "step": 2342 + }, + { + "epoch": 0.65, + "logps_train/chosen": -112.79489135742188, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -165.70870971679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2294892072677612, + "rewards_train/margins": 2.441381812095642, + "rewards_train/rejected": -3.6708710193634033, + "step": 2342 + }, + { + "epoch": 0.65, + "logps_train/chosen": -238.01898193359375, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -210.37535095214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.901898384094238, + "rewards_train/margins": 3.335637092590332, + "rewards_train/rejected": -9.23753547668457, + "step": 2342 + }, + { + "epoch": 0.65, + "logps_train/chosen": -20.13167381286621, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -50.12907409667969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3881673812866211, + "rewards_train/margins": 1.5997400283813477, + "rewards_train/rejected": -1.9879074096679688, + "step": 2342 + }, + { + "epoch": 0.65, + "logps_train/chosen": -15.858814239501953, + "logps_train/ref_chosen": -13.9375, + "logps_train/ref_rejected": -5.65625, + "logps_train/rejected": -20.49460220336914, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1921314299106598, + "rewards_train/margins": 1.2917037904262543, + "rewards_train/rejected": -1.483835220336914, + "step": 2342 + }, + { + "epoch": 0.65, + "logps_train/chosen": -99.08349609375, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -169.83627319335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35834962129592896, + "rewards_train/margins": 2.02527779340744, + "rewards_train/rejected": -2.383627414703369, + "step": 2342 + }, + { + "epoch": 0.65, + "logps_train/chosen": -59.11213684082031, + "logps_train/ref_chosen": -44.75, + "logps_train/ref_rejected": -35.0, + "logps_train/rejected": -65.28080749511719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.436213731765747, + "rewards_train/margins": 1.5918669700622559, + "rewards_train/rejected": -3.028080701828003, + "step": 2342 + }, + { + "epoch": 0.65, + "logps_train/chosen": -115.50967407226562, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -200.3168487548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8009674549102783, + "rewards_train/margins": 4.830717325210571, + "rewards_train/rejected": -6.63168478012085, + "step": 2343 + }, + { + "epoch": 0.65, + "logps_train/chosen": -12.182879447937012, + "logps_train/ref_chosen": -2.75, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -23.562843322753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9432879686355591, + "rewards_train/margins": 0.5442464351654053, + "rewards_train/rejected": -1.4875344038009644, + "step": 2343 + }, + { + "epoch": 0.65, + "logps_train/chosen": -31.291706085205078, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -6.65625, + "logps_train/rejected": -34.846221923828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.92292058467865, + "rewards_train/margins": 0.8960765600204468, + "rewards_train/rejected": -2.8189971446990967, + "step": 2343 + }, + { + "epoch": 0.65, + "logps_train/chosen": -97.58146667480469, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -179.2933807373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7581467032432556, + "rewards_train/margins": 5.1711912751197815, + "rewards_train/rejected": -5.929337978363037, + "step": 2343 + }, + { + "epoch": 0.65, + "logps_train/chosen": -18.71601676940918, + "logps_train/ref_chosen": -3.0, + "logps_train/ref_rejected": -25.375, + "logps_train/rejected": -46.29739761352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5716017484664917, + "rewards_train/margins": 0.520638108253479, + "rewards_train/rejected": -2.0922398567199707, + "step": 2343 + }, + { + "epoch": 0.65, + "logps_train/chosen": -14.150300979614258, + "logps_train/ref_chosen": -2.984375, + "logps_train/ref_rejected": -6.90625, + "logps_train/rejected": -23.154203414916992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1165926456451416, + "rewards_train/margins": 0.5082026720046997, + "rewards_train/rejected": -1.6247953176498413, + "step": 2343 + }, + { + "epoch": 0.65, + "logps_train/chosen": -23.390470504760742, + "logps_train/ref_chosen": -17.0, + "logps_train/ref_rejected": -4.03125, + "logps_train/rejected": -36.675960540771484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6390470862388611, + "rewards_train/margins": 2.6254239678382874, + "rewards_train/rejected": -3.2644710540771484, + "step": 2343 + }, + { + "epoch": 0.65, + "logps_train/chosen": -28.15469741821289, + "logps_train/ref_chosen": -26.5, + "logps_train/ref_rejected": -7.84375, + "logps_train/rejected": -18.92897605895996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16546975076198578, + "rewards_train/margins": 0.9430529028177261, + "rewards_train/rejected": -1.108522653579712, + "step": 2343 + }, + { + "epoch": 0.66, + "learning_rate": 2.121534471449482e-08, + "loss": 0.2115, + "step": 2344 + }, + { + "epoch": 0.66, + "logps_train/chosen": -55.36970520019531, + "logps_train/ref_chosen": -42.0, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -41.09362030029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.336970567703247, + "rewards_train/margins": 1.734891414642334, + "rewards_train/rejected": -3.071861982345581, + "step": 2344 + }, + { + "epoch": 0.66, + "logps_train/chosen": -104.58200073242188, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -114.1767578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1082000732421875, + "rewards_train/margins": 0.5594756603240967, + "rewards_train/rejected": -3.667675733566284, + "step": 2344 + }, + { + "epoch": 0.66, + "logps_train/chosen": -72.42698669433594, + "logps_train/ref_chosen": -45.75, + "logps_train/ref_rejected": -62.5, + "logps_train/rejected": -140.71231079101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.667698621749878, + "rewards_train/margins": 5.1535327434539795, + "rewards_train/rejected": -7.821231365203857, + "step": 2344 + }, + { + "epoch": 0.66, + "logps_train/chosen": -41.05371856689453, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -15.625, + "logps_train/rejected": -45.418739318847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7866218090057373, + "rewards_train/margins": 0.19275212287902832, + "rewards_train/rejected": -2.9793739318847656, + "step": 2344 + }, + { + "epoch": 0.66, + "logps_train/chosen": -79.47334289550781, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -163.33233642578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4973343014717102, + "rewards_train/margins": 5.835899531841278, + "rewards_train/rejected": -6.333233833312988, + "step": 2344 + }, + { + "epoch": 0.66, + "logps_train/chosen": -43.242225646972656, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -3.84375, + "logps_train/rejected": -45.99199295043945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0117225646972656, + "rewards_train/margins": 1.203101634979248, + "rewards_train/rejected": -4.214824199676514, + "step": 2344 + }, + { + "epoch": 0.66, + "logps_train/chosen": -5.8362627029418945, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -21.23697280883789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07737626880407333, + "rewards_train/margins": 1.1650709882378578, + "rewards_train/rejected": -1.2424472570419312, + "step": 2344 + }, + { + "epoch": 0.66, + "logps_train/chosen": -179.59298706054688, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -202.9507598876953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.059298992156982, + "rewards_train/margins": -0.06422281265258789, + "rewards_train/rejected": -6.9950761795043945, + "step": 2344 + }, + { + "epoch": 0.66, + "logps_train/chosen": -25.066062927246094, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -3.921875, + "logps_train/rejected": -22.30805206298828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9534813165664673, + "rewards_train/margins": -0.11486363410949707, + "rewards_train/rejected": -1.8386176824569702, + "step": 2345 + }, + { + "epoch": 0.66, + "logps_train/chosen": -229.7647705078125, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -143.0, + "logps_train/rejected": -244.13662719726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.47647762298584, + "rewards_train/margins": 1.6371850967407227, + "rewards_train/rejected": -10.113662719726562, + "step": 2345 + }, + { + "epoch": 0.66, + "logps_train/chosen": -224.268310546875, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -187.12466430664062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.826830863952637, + "rewards_train/margins": -1.014364242553711, + "rewards_train/rejected": -7.812466621398926, + "step": 2345 + }, + { + "epoch": 0.66, + "logps_train/chosen": -82.75720977783203, + "logps_train/ref_chosen": -48.75, + "logps_train/ref_rejected": -57.75, + "logps_train/rejected": -102.63944244384766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4007210731506348, + "rewards_train/margins": 1.0882234573364258, + "rewards_train/rejected": -4.4889445304870605, + "step": 2345 + }, + { + "epoch": 0.66, + "logps_train/chosen": -149.69610595703125, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -204.020751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.769610643386841, + "rewards_train/margins": 5.0824644565582275, + "rewards_train/rejected": -7.852075099945068, + "step": 2345 + }, + { + "epoch": 0.66, + "logps_train/chosen": -38.81795883178711, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -47.03547286987305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2067959308624268, + "rewards_train/margins": 1.2655014991760254, + "rewards_train/rejected": -3.472297430038452, + "step": 2345 + }, + { + "epoch": 0.66, + "logps_train/chosen": -222.31884765625, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -182.968017578125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.731884956359863, + "rewards_train/margins": -1.6850829124450684, + "rewards_train/rejected": -6.046802043914795, + "step": 2345 + }, + { + "epoch": 0.66, + "logps_train/chosen": -35.49176788330078, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -24.875, + "logps_train/rejected": -50.96910858154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.399176836013794, + "rewards_train/margins": 0.2102341651916504, + "rewards_train/rejected": -2.6094110012054443, + "step": 2345 + }, + { + "epoch": 0.66, + "learning_rate": 2.0676719052799994e-08, + "loss": 0.4833, + "step": 2346 + }, + { + "epoch": 0.66, + "logps_train/chosen": -25.999658584594727, + "logps_train/ref_chosen": -13.3125, + "logps_train/ref_rejected": -13.375, + "logps_train/rejected": -31.606502532958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2687158584594727, + "rewards_train/margins": 0.5544344186782837, + "rewards_train/rejected": -1.8231502771377563, + "step": 2346 + }, + { + "epoch": 0.66, + "logps_train/chosen": -122.27391815185547, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -212.972900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6773918271064758, + "rewards_train/margins": 6.819898307323456, + "rewards_train/rejected": -7.497290134429932, + "step": 2346 + }, + { + "epoch": 0.66, + "logps_train/chosen": -175.16986083984375, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -199.99624633789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.616986036300659, + "rewards_train/margins": 4.382638692855835, + "rewards_train/rejected": -6.999624729156494, + "step": 2346 + }, + { + "epoch": 0.66, + "logps_train/chosen": -20.778650283813477, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -45.26716613769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7903650403022766, + "rewards_train/margins": 2.655101716518402, + "rewards_train/rejected": -3.4454667568206787, + "step": 2346 + }, + { + "epoch": 0.66, + "logps_train/chosen": -143.2351531982422, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -147.4228973388672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8235153555870056, + "rewards_train/margins": 0.01877439022064209, + "rewards_train/rejected": -0.8422897458076477, + "step": 2346 + }, + { + "epoch": 0.66, + "logps_train/chosen": -146.35772705078125, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -200.76278686523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.135772705078125, + "rewards_train/margins": 4.740506172180176, + "rewards_train/rejected": -9.8762788772583, + "step": 2346 + }, + { + "epoch": 0.66, + "logps_train/chosen": -107.9656982421875, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -43.75, + "logps_train/rejected": -79.52053833007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8965698480606079, + "rewards_train/margins": 2.6804839372634888, + "rewards_train/rejected": -3.5770537853240967, + "step": 2346 + }, + { + "epoch": 0.66, + "logps_train/chosen": -90.5706787109375, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -130.98663330078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1070679426193237, + "rewards_train/margins": 1.9915953874588013, + "rewards_train/rejected": -3.098663330078125, + "step": 2346 + }, + { + "epoch": 0.66, + "logps_train/chosen": -127.80305480957031, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -86.0, + "logps_train/rejected": -146.0304718017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.4803056716918945, + "rewards_train/margins": 1.5227417945861816, + "rewards_train/rejected": -6.003047466278076, + "step": 2347 + }, + { + "epoch": 0.66, + "logps_train/chosen": -95.30366516113281, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -188.48724365234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.755366563796997, + "rewards_train/margins": 2.843357801437378, + "rewards_train/rejected": -6.598724365234375, + "step": 2347 + }, + { + "epoch": 0.66, + "logps_train/chosen": -8.21993350982666, + "logps_train/ref_chosen": -0.828125, + "logps_train/ref_rejected": -0.828125, + "logps_train/rejected": -7.50223445892334, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.739180862903595, + "rewards_train/margins": -0.07176989316940308, + "rewards_train/rejected": -0.6674109697341919, + "step": 2347 + }, + { + "epoch": 0.66, + "logps_train/chosen": -48.088233947753906, + "logps_train/ref_chosen": -24.75, + "logps_train/ref_rejected": -32.75, + "logps_train/rejected": -59.72471237182617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3338234424591064, + "rewards_train/margins": 0.3636479377746582, + "rewards_train/rejected": -2.6974713802337646, + "step": 2347 + }, + { + "epoch": 0.66, + "logps_train/chosen": -132.6995849609375, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -66.5, + "logps_train/rejected": -123.97466278076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.669958591461182, + "rewards_train/margins": 0.07750797271728516, + "rewards_train/rejected": -5.747466564178467, + "step": 2347 + }, + { + "epoch": 0.66, + "logps_train/chosen": -137.10513305664062, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -275.8077392578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.5105133056640625, + "rewards_train/margins": 10.070261001586914, + "rewards_train/rejected": -14.580774307250977, + "step": 2347 + }, + { + "epoch": 0.66, + "logps_train/chosen": -53.40715789794922, + "logps_train/ref_chosen": -49.25, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -37.40515899658203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4157157838344574, + "rewards_train/margins": -0.3501998856663704, + "rewards_train/rejected": -0.065515898168087, + "step": 2347 + }, + { + "epoch": 0.66, + "logps_train/chosen": -68.90116119384766, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -42.424739837646484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.333866119384766, + "rewards_train/margins": -1.6976420879364014, + "rewards_train/rejected": -3.6362240314483643, + "step": 2347 + }, + { + "epoch": 0.66, + "learning_rate": 2.0144947615753138e-08, + "loss": 0.3962, + "step": 2348 + }, + { + "epoch": 0.66, + "logps_train/chosen": -199.03915405273438, + "logps_train/ref_chosen": -160.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -159.96054077148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9039154052734375, + "rewards_train/margins": 0.4921388626098633, + "rewards_train/rejected": -4.396054267883301, + "step": 2348 + }, + { + "epoch": 0.66, + "logps_train/chosen": -21.78167724609375, + "logps_train/ref_chosen": -5.65625, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -28.668039321899414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.612542748451233, + "rewards_train/margins": 0.4355112314224243, + "rewards_train/rejected": -2.0480539798736572, + "step": 2348 + }, + { + "epoch": 0.66, + "logps_train/chosen": -134.77484130859375, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -177.33096313476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9774841666221619, + "rewards_train/margins": 4.905612051486969, + "rewards_train/rejected": -5.883096218109131, + "step": 2348 + }, + { + "epoch": 0.66, + "logps_train/chosen": -17.63998031616211, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -3.109375, + "logps_train/rejected": -23.416149139404297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7889980673789978, + "rewards_train/margins": 1.2416794896125793, + "rewards_train/rejected": -2.030677556991577, + "step": 2348 + }, + { + "epoch": 0.66, + "logps_train/chosen": -85.80343627929688, + "logps_train/ref_chosen": -67.5, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -152.05242919921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8303436040878296, + "rewards_train/margins": 6.374899506568909, + "rewards_train/rejected": -8.205243110656738, + "step": 2348 + }, + { + "epoch": 0.66, + "logps_train/chosen": -165.9425506591797, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -271.522216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.844255208969116, + "rewards_train/margins": 7.0079662799835205, + "rewards_train/rejected": -10.852221488952637, + "step": 2348 + }, + { + "epoch": 0.66, + "logps_train/chosen": -83.94921112060547, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -138.18426513671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.344921112060547, + "rewards_train/margins": 2.9735054969787598, + "rewards_train/rejected": -5.318426609039307, + "step": 2348 + }, + { + "epoch": 0.66, + "logps_train/chosen": -27.18276023864746, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -3.0625, + "logps_train/rejected": -27.62053871154785, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.093276023864746, + "rewards_train/margins": 1.362527847290039, + "rewards_train/rejected": -2.455803871154785, + "step": 2348 + }, + { + "epoch": 0.66, + "logps_train/chosen": -15.718984603881836, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -74.5, + "logps_train/rejected": -119.1282730102539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6343984603881836, + "rewards_train/margins": 3.8284287452697754, + "rewards_train/rejected": -4.462827205657959, + "step": 2349 + }, + { + "epoch": 0.66, + "logps_train/chosen": -3.747596025466919, + "logps_train/ref_chosen": -1.2578125, + "logps_train/ref_rejected": -1.0078125, + "logps_train/rejected": -25.19378662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2489783614873886, + "rewards_train/margins": 2.169619098305702, + "rewards_train/rejected": -2.418597459793091, + "step": 2349 + }, + { + "epoch": 0.66, + "logps_train/chosen": -60.32276916503906, + "logps_train/ref_chosen": -40.75, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -50.30584716796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9572769403457642, + "rewards_train/margins": 0.973307728767395, + "rewards_train/rejected": -2.930584669113159, + "step": 2349 + }, + { + "epoch": 0.66, + "logps_train/chosen": -208.4906768798828, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -144.99021911621094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.249068260192871, + "rewards_train/margins": -2.7500462532043457, + "rewards_train/rejected": -6.499022006988525, + "step": 2349 + }, + { + "epoch": 0.66, + "logps_train/chosen": -107.43461608886719, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -180.0464630126953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24346160888671875, + "rewards_train/margins": 7.561184883117676, + "rewards_train/rejected": -7.8046464920043945, + "step": 2349 + }, + { + "epoch": 0.66, + "logps_train/chosen": -39.63069152832031, + "logps_train/ref_chosen": -14.9375, + "logps_train/ref_rejected": -6.84375, + "logps_train/rejected": -29.163192749023438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4693191051483154, + "rewards_train/margins": -0.23737478256225586, + "rewards_train/rejected": -2.2319443225860596, + "step": 2349 + }, + { + "epoch": 0.66, + "logps_train/chosen": -26.510374069213867, + "logps_train/ref_chosen": -15.75, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -21.248146057128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0760374069213867, + "rewards_train/margins": 0.5581521987915039, + "rewards_train/rejected": -1.6341896057128906, + "step": 2349 + }, + { + "epoch": 0.66, + "logps_train/chosen": -49.00431823730469, + "logps_train/ref_chosen": -15.0, + "logps_train/ref_rejected": -18.75, + "logps_train/rejected": -60.50478744506836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4004318714141846, + "rewards_train/margins": 0.7750470638275146, + "rewards_train/rejected": -4.175478935241699, + "step": 2349 + }, + { + "epoch": 0.66, + "learning_rate": 1.9620034125190643e-08, + "loss": 0.4017, + "step": 2350 + }, + { + "epoch": 0.66, + "logps_train/chosen": -90.02108001708984, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -86.95661926269531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5521080493927002, + "rewards_train/margins": -0.056446075439453125, + "rewards_train/rejected": -1.495661973953247, + "step": 2350 + }, + { + "epoch": 0.66, + "logps_train/chosen": -41.05466079711914, + "logps_train/ref_chosen": -15.375, + "logps_train/ref_rejected": -52.25, + "logps_train/rejected": -72.37797546386719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5679662227630615, + "rewards_train/margins": -0.555168628692627, + "rewards_train/rejected": -2.0127975940704346, + "step": 2350 + }, + { + "epoch": 0.66, + "logps_train/chosen": -15.3399076461792, + "logps_train/ref_chosen": -0.78515625, + "logps_train/ref_rejected": -0.78515625, + "logps_train/rejected": -15.543047904968262, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4554752111434937, + "rewards_train/margins": 0.02031397819519043, + "rewards_train/rejected": -1.475789189338684, + "step": 2350 + }, + { + "epoch": 0.66, + "logps_train/chosen": -27.38506507873535, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -72.0511474609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.051006555557251, + "rewards_train/margins": 2.6041083335876465, + "rewards_train/rejected": -3.6551148891448975, + "step": 2350 + }, + { + "epoch": 0.66, + "logps_train/chosen": -104.12306213378906, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -236.96759033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7623062133789062, + "rewards_train/margins": 7.434453010559082, + "rewards_train/rejected": -8.196759223937988, + "step": 2350 + }, + { + "epoch": 0.66, + "logps_train/chosen": -176.78773498535156, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -173.13137817382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.178773403167725, + "rewards_train/margins": 1.084364414215088, + "rewards_train/rejected": -5.2631378173828125, + "step": 2350 + }, + { + "epoch": 0.66, + "logps_train/chosen": -141.9852294921875, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -25.889347076416016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6485230922698975, + "rewards_train/margins": -1.922088384628296, + "rewards_train/rejected": -0.7264347076416016, + "step": 2350 + }, + { + "epoch": 0.66, + "logps_train/chosen": -161.7008514404297, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -241.43402099609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.570085048675537, + "rewards_train/margins": 6.223317623138428, + "rewards_train/rejected": -12.793402671813965, + "step": 2350 + }, + { + "epoch": 0.66, + "logps_train/chosen": -15.314421653747559, + "logps_train/ref_chosen": -2.375, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -47.9759635925293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2939422130584717, + "rewards_train/margins": 2.116154193878174, + "rewards_train/rejected": -3.4100964069366455, + "step": 2351 + }, + { + "epoch": 0.66, + "logps_train/chosen": -13.245292663574219, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -8.25, + "logps_train/rejected": -22.04485321044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43702927231788635, + "rewards_train/margins": 0.9424560964107513, + "rewards_train/rejected": -1.3794853687286377, + "step": 2351 + }, + { + "epoch": 0.66, + "logps_train/chosen": -20.60023307800293, + "logps_train/ref_chosen": -15.0, + "logps_train/ref_rejected": -34.25, + "logps_train/rejected": -78.3489990234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.560023307800293, + "rewards_train/margins": 3.849876880645752, + "rewards_train/rejected": -4.409900188446045, + "step": 2351 + }, + { + "epoch": 0.66, + "logps_train/chosen": -183.84786987304688, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -235.94894409179688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.7347869873046875, + "rewards_train/margins": 2.6601076126098633, + "rewards_train/rejected": -8.39489459991455, + "step": 2351 + }, + { + "epoch": 0.66, + "logps_train/chosen": -48.45381164550781, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -74.7841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2578811645507812, + "rewards_train/margins": 2.626786708831787, + "rewards_train/rejected": -5.884667873382568, + "step": 2351 + }, + { + "epoch": 0.66, + "logps_train/chosen": -135.87591552734375, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -314.76824951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.387591600418091, + "rewards_train/margins": 12.589233160018921, + "rewards_train/rejected": -15.976824760437012, + "step": 2351 + }, + { + "epoch": 0.66, + "logps_train/chosen": -34.920494079589844, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -19.039447784423828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5732994079589844, + "rewards_train/margins": -1.2037296295166016, + "rewards_train/rejected": -1.3695697784423828, + "step": 2351 + }, + { + "epoch": 0.66, + "logps_train/chosen": -142.37033081054688, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -178.77767944335938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.937033176422119, + "rewards_train/margins": -1.05926513671875, + "rewards_train/rejected": -2.877768039703369, + "step": 2351 + }, + { + "epoch": 0.66, + "learning_rate": 1.91019822549503e-08, + "loss": 0.5162, + "step": 2352 + }, + { + "epoch": 0.66, + "logps_train/chosen": -21.23522186279297, + "logps_train/ref_chosen": -22.625, + "logps_train/ref_rejected": -34.25, + "logps_train/rejected": -50.09492111206055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1389778107404709, + "rewards_train/margins": 1.7234698981046677, + "rewards_train/rejected": -1.5844920873641968, + "step": 2352 + }, + { + "epoch": 0.66, + "logps_train/chosen": -5.220886707305908, + "logps_train/ref_chosen": -2.53125, + "logps_train/ref_rejected": -0.431640625, + "logps_train/rejected": -14.743782997131348, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.26896366477012634, + "rewards_train/margins": 1.1622505486011505, + "rewards_train/rejected": -1.4312142133712769, + "step": 2352 + }, + { + "epoch": 0.66, + "logps_train/chosen": -283.7851257324219, + "logps_train/ref_chosen": -216.0, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -255.39549255371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.778512477874756, + "rewards_train/margins": 0.7610368728637695, + "rewards_train/rejected": -7.539549350738525, + "step": 2352 + }, + { + "epoch": 0.66, + "logps_train/chosen": -155.5851593017578, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -154.5674285888672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.108516216278076, + "rewards_train/margins": -0.10177326202392578, + "rewards_train/rejected": -5.00674295425415, + "step": 2352 + }, + { + "epoch": 0.66, + "logps_train/chosen": -88.54241943359375, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -190.64596557617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0542420148849487, + "rewards_train/margins": 2.410354495048523, + "rewards_train/rejected": -3.4645965099334717, + "step": 2352 + }, + { + "epoch": 0.66, + "logps_train/chosen": -38.57234191894531, + "logps_train/ref_chosen": -15.25, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -48.73077392578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3322341442108154, + "rewards_train/margins": 0.5658433437347412, + "rewards_train/rejected": -2.8980774879455566, + "step": 2352 + }, + { + "epoch": 0.66, + "logps_train/chosen": -79.04367065429688, + "logps_train/ref_chosen": -39.0, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -46.437252044677734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.004367351531982, + "rewards_train/margins": -0.9981420040130615, + "rewards_train/rejected": -3.006225347518921, + "step": 2352 + }, + { + "epoch": 0.66, + "logps_train/chosen": -64.77190399169922, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -106.83468627929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7021903991699219, + "rewards_train/margins": 1.0812782049179077, + "rewards_train/rejected": -1.7834686040878296, + "step": 2352 + }, + { + "epoch": 0.66, + "logps_train/chosen": -130.3089141845703, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -178.6373748779297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4308913946151733, + "rewards_train/margins": 5.082846283912659, + "rewards_train/rejected": -6.513737678527832, + "step": 2353 + }, + { + "epoch": 0.66, + "logps_train/chosen": -24.68638038635254, + "logps_train/ref_chosen": -12.375, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -23.08194351196289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2311381101608276, + "rewards_train/margins": -0.14169371128082275, + "rewards_train/rejected": -1.0894443988800049, + "step": 2353 + }, + { + "epoch": 0.66, + "logps_train/chosen": -9.770275115966797, + "logps_train/ref_chosen": -3.390625, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -26.31930160522461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6379650235176086, + "rewards_train/margins": 0.9877151846885681, + "rewards_train/rejected": -1.6256802082061768, + "step": 2353 + }, + { + "epoch": 0.66, + "logps_train/chosen": -18.394947052001953, + "logps_train/ref_chosen": -5.6875, + "logps_train/ref_rejected": -2.984375, + "logps_train/rejected": -6.05742883682251, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2707446813583374, + "rewards_train/margins": -0.9634392857551575, + "rewards_train/rejected": -0.30730539560317993, + "step": 2353 + }, + { + "epoch": 0.66, + "logps_train/chosen": -65.51497650146484, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -30.625, + "logps_train/rejected": -76.74161529541016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3514976501464844, + "rewards_train/margins": 1.2601637840270996, + "rewards_train/rejected": -4.611661434173584, + "step": 2353 + }, + { + "epoch": 0.66, + "logps_train/chosen": -90.27571105957031, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -87.0, + "logps_train/rejected": -88.30009460449219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3275711238384247, + "rewards_train/margins": -0.19756166636943817, + "rewards_train/rejected": -0.1300094574689865, + "step": 2353 + }, + { + "epoch": 0.66, + "logps_train/chosen": -12.635921478271484, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -1.296875, + "logps_train/rejected": -5.92690372467041, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6417171359062195, + "rewards_train/margins": -0.17871424555778503, + "rewards_train/rejected": -0.46300289034843445, + "step": 2353 + }, + { + "epoch": 0.66, + "logps_train/chosen": -101.2378921508789, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -113.89070129394531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.023789167404175, + "rewards_train/margins": 0.9152810573577881, + "rewards_train/rejected": -2.939070224761963, + "step": 2353 + }, + { + "epoch": 0.66, + "learning_rate": 1.859079563084609e-08, + "loss": 0.5156, + "step": 2354 + }, + { + "epoch": 0.66, + "logps_train/chosen": -17.657882690429688, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -10.225417137145996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6220383048057556, + "rewards_train/margins": 0.06456589698791504, + "rewards_train/rejected": -0.6866042017936707, + "step": 2354 + }, + { + "epoch": 0.66, + "logps_train/chosen": -221.54888916015625, + "logps_train/ref_chosen": -206.0, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -195.10020446777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5548889636993408, + "rewards_train/margins": 2.455131769180298, + "rewards_train/rejected": -4.010020732879639, + "step": 2354 + }, + { + "epoch": 0.66, + "logps_train/chosen": -33.743873596191406, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -9.625, + "logps_train/rejected": -43.01747512817383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.561887502670288, + "rewards_train/margins": 0.7773599624633789, + "rewards_train/rejected": -3.339247465133667, + "step": 2354 + }, + { + "epoch": 0.66, + "logps_train/chosen": -32.01461410522461, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -58.92826843261719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2014615535736084, + "rewards_train/margins": 1.5663652420043945, + "rewards_train/rejected": -3.767826795578003, + "step": 2354 + }, + { + "epoch": 0.66, + "logps_train/chosen": -27.968547821044922, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -40.5, + "logps_train/rejected": -79.11724090576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.25310480594635, + "rewards_train/margins": 2.6086193323135376, + "rewards_train/rejected": -3.8617241382598877, + "step": 2354 + }, + { + "epoch": 0.66, + "logps_train/chosen": -64.7168960571289, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -180.1023712158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7466896176338196, + "rewards_train/margins": 6.313547790050507, + "rewards_train/rejected": -7.060237407684326, + "step": 2354 + }, + { + "epoch": 0.66, + "logps_train/chosen": -205.76138305664062, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -206.19619750976562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.2761383056640625, + "rewards_train/margins": -0.45651865005493164, + "rewards_train/rejected": -6.819619655609131, + "step": 2354 + }, + { + "epoch": 0.66, + "logps_train/chosen": -24.25106430053711, + "logps_train/ref_chosen": -5.875, + "logps_train/ref_rejected": -3.421875, + "logps_train/rejected": -15.87070369720459, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.837606430053711, + "rewards_train/margins": -0.5927234888076782, + "rewards_train/rejected": -1.2448829412460327, + "step": 2354 + }, + { + "epoch": 0.66, + "logps_train/chosen": -88.27062225341797, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -144.89590454101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6270622611045837, + "rewards_train/margins": 3.7125282883644104, + "rewards_train/rejected": -4.339590549468994, + "step": 2355 + }, + { + "epoch": 0.66, + "logps_train/chosen": -148.73321533203125, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -210.2943115234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.373321533203125, + "rewards_train/margins": 5.856109619140625, + "rewards_train/rejected": -7.22943115234375, + "step": 2355 + }, + { + "epoch": 0.66, + "logps_train/chosen": -19.641700744628906, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -13.5625, + "logps_train/rejected": -24.570919036865234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5547951459884644, + "rewards_train/margins": -0.45395326614379883, + "rewards_train/rejected": -1.1008418798446655, + "step": 2355 + }, + { + "epoch": 0.66, + "logps_train/chosen": -4.06873083114624, + "logps_train/ref_chosen": -3.28125, + "logps_train/ref_rejected": -7.71875, + "logps_train/rejected": -8.701850891113281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07874808460474014, + "rewards_train/margins": 0.0195620059967041, + "rewards_train/rejected": -0.09831009060144424, + "step": 2355 + }, + { + "epoch": 0.66, + "logps_train/chosen": -121.39934539794922, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -119.24243927001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.039934515953064, + "rewards_train/margins": 0.23430943489074707, + "rewards_train/rejected": -1.274243950843811, + "step": 2355 + }, + { + "epoch": 0.66, + "logps_train/chosen": -99.25704956054688, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -129.1790313720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9757049083709717, + "rewards_train/margins": 3.592198133468628, + "rewards_train/rejected": -6.5679030418396, + "step": 2355 + }, + { + "epoch": 0.66, + "logps_train/chosen": -114.17863464355469, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -179.7486572265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8678635358810425, + "rewards_train/margins": 4.207002282142639, + "rewards_train/rejected": -6.074865818023682, + "step": 2355 + }, + { + "epoch": 0.66, + "logps_train/chosen": -134.37840270996094, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -178.14181518554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2878403663635254, + "rewards_train/margins": 4.576341152191162, + "rewards_train/rejected": -6.8641815185546875, + "step": 2355 + }, + { + "epoch": 0.66, + "learning_rate": 1.8086477830642212e-08, + "loss": 0.3536, + "step": 2356 + }, + { + "epoch": 0.66, + "logps_train/chosen": -7.401618480682373, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -30.12248420715332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.06608815491199493, + "rewards_train/margins": 1.803336575627327, + "rewards_train/rejected": -1.737248420715332, + "step": 2356 + }, + { + "epoch": 0.66, + "logps_train/chosen": -87.3689193725586, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -187.6119384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2868919372558594, + "rewards_train/margins": 1.0743019580841064, + "rewards_train/rejected": -2.361193895339966, + "step": 2356 + }, + { + "epoch": 0.66, + "logps_train/chosen": -46.72854995727539, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -87.37371826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0353550910949707, + "rewards_train/margins": 3.6020169258117676, + "rewards_train/rejected": -6.637372016906738, + "step": 2356 + }, + { + "epoch": 0.66, + "logps_train/chosen": -139.54379272460938, + "logps_train/ref_chosen": -121.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -161.54678344726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8043793439865112, + "rewards_train/margins": 2.8502990007400513, + "rewards_train/rejected": -4.6546783447265625, + "step": 2356 + }, + { + "epoch": 0.66, + "logps_train/chosen": -0.3099299967288971, + "logps_train/ref_chosen": -0.61328125, + "logps_train/ref_rejected": -4.125, + "logps_train/rejected": -7.058773994445801, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.03033512644469738, + "rewards_train/margins": 0.32371252588927746, + "rewards_train/rejected": -0.2933773994445801, + "step": 2356 + }, + { + "epoch": 0.66, + "logps_train/chosen": -14.374459266662598, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -8.875, + "logps_train/rejected": -15.235345840454102, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18755407631397247, + "rewards_train/margins": 0.8235886842012405, + "rewards_train/rejected": -0.6360346078872681, + "step": 2356 + }, + { + "epoch": 0.66, + "logps_train/chosen": -57.66395568847656, + "logps_train/ref_chosen": -40.0, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -56.07353210449219, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7663955688476562, + "rewards_train/margins": -0.1590423583984375, + "rewards_train/rejected": -1.6073532104492188, + "step": 2356 + }, + { + "epoch": 0.66, + "logps_train/chosen": -13.782002449035645, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -26.062877655029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6438252329826355, + "rewards_train/margins": 1.012462556362152, + "rewards_train/rejected": -1.6562877893447876, + "step": 2356 + }, + { + "epoch": 0.66, + "logps_train/chosen": -4.885894775390625, + "logps_train/ref_chosen": -2.09375, + "logps_train/ref_rejected": -0.68359375, + "logps_train/rejected": -1.544430136680603, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.279214471578598, + "rewards_train/margins": -0.19313082844018936, + "rewards_train/rejected": -0.08608364313840866, + "step": 2357 + }, + { + "epoch": 0.66, + "logps_train/chosen": -121.77961730957031, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -201.60397338867188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2779617309570312, + "rewards_train/margins": 5.332435607910156, + "rewards_train/rejected": -7.6103973388671875, + "step": 2357 + }, + { + "epoch": 0.66, + "logps_train/chosen": -84.89349365234375, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -179.24974060058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2893494367599487, + "rewards_train/margins": 5.035624623298645, + "rewards_train/rejected": -6.324974060058594, + "step": 2357 + }, + { + "epoch": 0.66, + "logps_train/chosen": -10.767782211303711, + "logps_train/ref_chosen": -7.1875, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -26.960512161254883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35802823305130005, + "rewards_train/margins": 1.2130230069160461, + "rewards_train/rejected": -1.5710512399673462, + "step": 2357 + }, + { + "epoch": 0.66, + "logps_train/chosen": -36.60457229614258, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -10.5, + "logps_train/rejected": -24.872827529907227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.1042072772979736, + "rewards_train/margins": -0.6669244766235352, + "rewards_train/rejected": -1.4372828006744385, + "step": 2357 + }, + { + "epoch": 0.66, + "logps_train/chosen": -13.491592407226562, + "logps_train/ref_chosen": -11.5625, + "logps_train/ref_rejected": -11.5625, + "logps_train/rejected": -13.411153793334961, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.19290924072265625, + "rewards_train/margins": -0.008043855428695679, + "rewards_train/rejected": -0.18486538529396057, + "step": 2357 + }, + { + "epoch": 0.66, + "logps_train/chosen": -80.60502624511719, + "logps_train/ref_chosen": -63.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -93.83555603027344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7605026960372925, + "rewards_train/margins": -0.7269470691680908, + "rewards_train/rejected": -1.0335556268692017, + "step": 2357 + }, + { + "epoch": 0.66, + "logps_train/chosen": -94.07930755615234, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -289.4517517089844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.157930850982666, + "rewards_train/margins": 12.187244892120361, + "rewards_train/rejected": -14.345175743103027, + "step": 2357 + }, + { + "epoch": 0.66, + "learning_rate": 1.7589032384028757e-08, + "loss": 0.4056, + "step": 2358 + }, + { + "epoch": 0.66, + "logps_train/chosen": -126.90982818603516, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -220.19430541992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3909828662872314, + "rewards_train/margins": 5.828448057174683, + "rewards_train/rejected": -9.219430923461914, + "step": 2358 + }, + { + "epoch": 0.66, + "logps_train/chosen": -1.4104633331298828, + "logps_train/ref_chosen": -0.310546875, + "logps_train/ref_rejected": -0.310546875, + "logps_train/rejected": -1.548000454902649, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1099916473031044, + "rewards_train/margins": 0.013753712177276611, + "rewards_train/rejected": -0.12374535948038101, + "step": 2358 + }, + { + "epoch": 0.66, + "logps_train/chosen": -191.96286010742188, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -234.47984313964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.94628620147705, + "rewards_train/margins": 1.5516986846923828, + "rewards_train/rejected": -11.497984886169434, + "step": 2358 + }, + { + "epoch": 0.66, + "logps_train/chosen": -8.270856857299805, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -35.75, + "logps_train/rejected": -63.274620056152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27708569169044495, + "rewards_train/margins": 2.475376456975937, + "rewards_train/rejected": -2.752462148666382, + "step": 2358 + }, + { + "epoch": 0.66, + "logps_train/chosen": -6.361278533935547, + "logps_train/ref_chosen": -1.6484375, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -32.689186096191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4712841212749481, + "rewards_train/margins": 1.2663845121860504, + "rewards_train/rejected": -1.7376686334609985, + "step": 2358 + }, + { + "epoch": 0.66, + "logps_train/chosen": -74.50132751464844, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -33.75, + "logps_train/rejected": -95.40847778320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1251327991485596, + "rewards_train/margins": 3.040714979171753, + "rewards_train/rejected": -6.1658477783203125, + "step": 2358 + }, + { + "epoch": 0.66, + "logps_train/chosen": -227.2353057861328, + "logps_train/ref_chosen": -167.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -278.43157958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.02353048324585, + "rewards_train/margins": 4.519628047943115, + "rewards_train/rejected": -10.543158531188965, + "step": 2358 + }, + { + "epoch": 0.66, + "logps_train/chosen": -22.0350399017334, + "logps_train/ref_chosen": -2.75, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -162.7130126953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9285039901733398, + "rewards_train/margins": 2.59279727935791, + "rewards_train/rejected": -4.52130126953125, + "step": 2358 + }, + { + "epoch": 0.66, + "logps_train/chosen": -21.11518096923828, + "logps_train/ref_chosen": -11.25, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -36.1711311340332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9865180850028992, + "rewards_train/margins": 1.0930951237678528, + "rewards_train/rejected": -2.079613208770752, + "step": 2359 + }, + { + "epoch": 0.66, + "logps_train/chosen": -17.39193344116211, + "logps_train/ref_chosen": -12.75, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -45.462989807128906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46419334411621094, + "rewards_train/margins": 3.0821056365966797, + "rewards_train/rejected": -3.5462989807128906, + "step": 2359 + }, + { + "epoch": 0.66, + "logps_train/chosen": -97.99659729003906, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -134.5804901123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0996596813201904, + "rewards_train/margins": 1.3583896160125732, + "rewards_train/rejected": -4.458049297332764, + "step": 2359 + }, + { + "epoch": 0.66, + "logps_train/chosen": -70.66148376464844, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -171.05865478515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2661483883857727, + "rewards_train/margins": 6.139717280864716, + "rewards_train/rejected": -6.405865669250488, + "step": 2359 + }, + { + "epoch": 0.66, + "logps_train/chosen": -124.08338928222656, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -25.934429168701172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5583388805389404, + "rewards_train/margins": -0.4273958206176758, + "rewards_train/rejected": -2.1309430599212646, + "step": 2359 + }, + { + "epoch": 0.66, + "logps_train/chosen": -152.46192932128906, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -251.0424041748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7461929321289062, + "rewards_train/margins": 9.158047676086426, + "rewards_train/rejected": -11.904240608215332, + "step": 2359 + }, + { + "epoch": 0.66, + "logps_train/chosen": -28.71611785888672, + "logps_train/ref_chosen": -16.625, + "logps_train/ref_rejected": -13.625, + "logps_train/rejected": -60.84218215942383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2091118097305298, + "rewards_train/margins": 3.5126065015792847, + "rewards_train/rejected": -4.7217183113098145, + "step": 2359 + }, + { + "epoch": 0.66, + "logps_train/chosen": -94.37300872802734, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -227.36080932617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.787300944328308, + "rewards_train/margins": 8.998780369758606, + "rewards_train/rejected": -10.786081314086914, + "step": 2359 + }, + { + "epoch": 0.66, + "learning_rate": 1.70984627725963e-08, + "loss": 0.1787, + "step": 2360 + }, + { + "epoch": 0.66, + "logps_train/chosen": -0.009442456997931004, + "logps_train/ref_chosen": -0.08056640625, + "logps_train/ref_rejected": -3.828125, + "logps_train/rejected": -6.759148120880127, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.007112395018339157, + "rewards_train/margins": 0.3002147190272808, + "rewards_train/rejected": -0.29310232400894165, + "step": 2360 + }, + { + "epoch": 0.66, + "logps_train/chosen": -3.89801287651062, + "logps_train/ref_chosen": -2.59375, + "logps_train/ref_rejected": -30.25, + "logps_train/rejected": -35.50738525390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.130426287651062, + "rewards_train/margins": 0.39531224966049194, + "rewards_train/rejected": -0.525738537311554, + "step": 2360 + }, + { + "epoch": 0.66, + "logps_train/chosen": -160.02798461914062, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -219.76443481445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.002798557281494, + "rewards_train/margins": 4.623645305633545, + "rewards_train/rejected": -9.626443862915039, + "step": 2360 + }, + { + "epoch": 0.66, + "logps_train/chosen": -64.3498764038086, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -93.36689758300781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11501236259937286, + "rewards_train/margins": 1.751702144742012, + "rewards_train/rejected": -1.6366897821426392, + "step": 2360 + }, + { + "epoch": 0.66, + "logps_train/chosen": -51.506309509277344, + "logps_train/ref_chosen": -36.25, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -94.7625503540039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5256309509277344, + "rewards_train/margins": 1.050624132156372, + "rewards_train/rejected": -2.5762550830841064, + "step": 2360 + }, + { + "epoch": 0.66, + "logps_train/chosen": -222.40219116210938, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -193.150390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.840219020843506, + "rewards_train/margins": 0.27482032775878906, + "rewards_train/rejected": -7.115039348602295, + "step": 2360 + }, + { + "epoch": 0.66, + "logps_train/chosen": -176.81768798828125, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -274.65374755859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.681768894195557, + "rewards_train/margins": 7.883606433868408, + "rewards_train/rejected": -13.565375328063965, + "step": 2360 + }, + { + "epoch": 0.66, + "logps_train/chosen": -7.922698497772217, + "logps_train/ref_chosen": -5.375, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -54.48680877685547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25476986169815063, + "rewards_train/margins": 2.4814111590385437, + "rewards_train/rejected": -2.7361810207366943, + "step": 2360 + }, + { + "epoch": 0.66, + "logps_train/chosen": -151.28382873535156, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -224.95986938476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.128382921218872, + "rewards_train/margins": 9.11760401725769, + "rewards_train/rejected": -11.245986938476562, + "step": 2361 + }, + { + "epoch": 0.66, + "logps_train/chosen": -172.05685424804688, + "logps_train/ref_chosen": -146.0, + "logps_train/ref_rejected": -256.0, + "logps_train/rejected": -358.9018249511719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6056854724884033, + "rewards_train/margins": 7.684497594833374, + "rewards_train/rejected": -10.290183067321777, + "step": 2361 + }, + { + "epoch": 0.66, + "logps_train/chosen": -28.855392456054688, + "logps_train/ref_chosen": -4.40625, + "logps_train/ref_rejected": -16.0, + "logps_train/rejected": -41.84764099121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4449143409729004, + "rewards_train/margins": 0.13984990119934082, + "rewards_train/rejected": -2.584764242172241, + "step": 2361 + }, + { + "epoch": 0.66, + "logps_train/chosen": -112.3600082397461, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -114.56085205078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2860008180141449, + "rewards_train/margins": 0.020084381103515625, + "rewards_train/rejected": -0.3060851991176605, + "step": 2361 + }, + { + "epoch": 0.66, + "logps_train/chosen": -16.666345596313477, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -59.362667083740234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37913456559181213, + "rewards_train/margins": 4.682132333517075, + "rewards_train/rejected": -5.061266899108887, + "step": 2361 + }, + { + "epoch": 0.66, + "logps_train/chosen": -154.25819396972656, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -87.70448303222656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.025819301605225, + "rewards_train/margins": -3.1053709983825684, + "rewards_train/rejected": -0.9204483032226562, + "step": 2361 + }, + { + "epoch": 0.66, + "logps_train/chosen": -46.55048370361328, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -6.875, + "logps_train/rejected": -45.23844528198242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3050484657287598, + "rewards_train/margins": 0.5312960147857666, + "rewards_train/rejected": -3.8363444805145264, + "step": 2361 + }, + { + "epoch": 0.66, + "logps_train/chosen": -35.333892822265625, + "logps_train/ref_chosen": -5.25, + "logps_train/ref_rejected": -10.1875, + "logps_train/rejected": -39.982337951660156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0083892345428467, + "rewards_train/margins": -0.028905391693115234, + "rewards_train/rejected": -2.9794838428497314, + "step": 2361 + }, + { + "epoch": 0.66, + "learning_rate": 1.661477242981202e-08, + "loss": 0.4889, + "step": 2362 + }, + { + "epoch": 0.66, + "logps_train/chosen": -151.0234832763672, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -115.90247344970703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.852348327636719, + "rewards_train/margins": -0.812100887298584, + "rewards_train/rejected": -4.040247440338135, + "step": 2362 + }, + { + "epoch": 0.66, + "logps_train/chosen": -18.45496368408203, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -11.25, + "logps_train/rejected": -34.218894958496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7892463803291321, + "rewards_train/margins": 1.5076431632041931, + "rewards_train/rejected": -2.296889543533325, + "step": 2362 + }, + { + "epoch": 0.66, + "logps_train/chosen": -7.459085464477539, + "logps_train/ref_chosen": -3.75, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -24.957477569580078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37090855836868286, + "rewards_train/margins": 1.206089198589325, + "rewards_train/rejected": -1.5769977569580078, + "step": 2362 + }, + { + "epoch": 0.66, + "logps_train/chosen": -25.086942672729492, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -32.71125411987305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7711942791938782, + "rewards_train/margins": 2.049931228160858, + "rewards_train/rejected": -2.8211255073547363, + "step": 2362 + }, + { + "epoch": 0.66, + "logps_train/chosen": -14.938447952270508, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -22.25, + "logps_train/rejected": -32.74467849731445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5500947833061218, + "rewards_train/margins": 0.4993731379508972, + "rewards_train/rejected": -1.049467921257019, + "step": 2362 + }, + { + "epoch": 0.66, + "logps_train/chosen": -79.54206085205078, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -141.65997314453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20420609414577484, + "rewards_train/margins": 2.2117911726236343, + "rewards_train/rejected": -2.415997266769409, + "step": 2362 + }, + { + "epoch": 0.66, + "logps_train/chosen": -203.64356994628906, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -251.90513610839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.014357566833496, + "rewards_train/margins": 2.6761560440063477, + "rewards_train/rejected": -10.690513610839844, + "step": 2362 + }, + { + "epoch": 0.66, + "logps_train/chosen": -159.02444458007812, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -247.66705322265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3024444580078125, + "rewards_train/margins": 8.36426067352295, + "rewards_train/rejected": -10.666705131530762, + "step": 2362 + }, + { + "epoch": 0.66, + "logps_train/chosen": -159.41098022460938, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -233.3951873779297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1410980224609375, + "rewards_train/margins": 11.448420524597168, + "rewards_train/rejected": -13.589518547058105, + "step": 2363 + }, + { + "epoch": 0.66, + "logps_train/chosen": -41.947654724121094, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -22.875, + "logps_train/rejected": -25.71536636352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21976546943187714, + "rewards_train/margins": 0.06427116692066193, + "rewards_train/rejected": -0.28403663635253906, + "step": 2363 + }, + { + "epoch": 0.66, + "logps_train/chosen": -104.09661102294922, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -169.282958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5596611499786377, + "rewards_train/margins": 2.8186347484588623, + "rewards_train/rejected": -6.3782958984375, + "step": 2363 + }, + { + "epoch": 0.66, + "logps_train/chosen": -18.65545654296875, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -7.71875, + "logps_train/rejected": -26.867290496826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2342957258224487, + "rewards_train/margins": 0.6805583238601685, + "rewards_train/rejected": -1.9148540496826172, + "step": 2363 + }, + { + "epoch": 0.66, + "logps_train/chosen": -1.2071696519851685, + "logps_train/ref_chosen": -1.4921875, + "logps_train/ref_rejected": -1.6640625, + "logps_train/rejected": -3.665459394454956, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.028501784428954124, + "rewards_train/margins": 0.2286414708942175, + "rewards_train/rejected": -0.20013968646526337, + "step": 2363 + }, + { + "epoch": 0.66, + "logps_train/chosen": -17.184730529785156, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -3.890625, + "logps_train/rejected": -45.16374588012695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8184730410575867, + "rewards_train/margins": 3.3088391423225403, + "rewards_train/rejected": -4.127312183380127, + "step": 2363 + }, + { + "epoch": 0.66, + "logps_train/chosen": -37.15239334106445, + "logps_train/ref_chosen": -13.6875, + "logps_train/ref_rejected": -7.65625, + "logps_train/rejected": -49.09027862548828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.346489429473877, + "rewards_train/margins": 1.7969136238098145, + "rewards_train/rejected": -4.143403053283691, + "step": 2363 + }, + { + "epoch": 0.66, + "logps_train/chosen": -32.67881774902344, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -14.609823226928711, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9741318225860596, + "rewards_train/margins": -0.9975244998931885, + "rewards_train/rejected": -0.9766073226928711, + "step": 2363 + }, + { + "epoch": 0.66, + "learning_rate": 1.613796474099549e-08, + "loss": 0.3513, + "step": 2364 + }, + { + "epoch": 0.66, + "logps_train/chosen": -3.727968215942383, + "logps_train/ref_chosen": -2.734375, + "logps_train/ref_rejected": -28.375, + "logps_train/rejected": -32.67288589477539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09935932606458664, + "rewards_train/margins": 0.3304292634129524, + "rewards_train/rejected": -0.42978858947753906, + "step": 2364 + }, + { + "epoch": 0.66, + "logps_train/chosen": -147.260009765625, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -105.0904769897461, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.476001024246216, + "rewards_train/margins": -0.36695337295532227, + "rewards_train/rejected": -2.1090476512908936, + "step": 2364 + }, + { + "epoch": 0.66, + "logps_train/chosen": -249.7540283203125, + "logps_train/ref_chosen": -184.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -258.97833251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.575402736663818, + "rewards_train/margins": 2.72243070602417, + "rewards_train/rejected": -9.297833442687988, + "step": 2364 + }, + { + "epoch": 0.66, + "logps_train/chosen": -146.9033203125, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -178.1732177734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.09033203125, + "rewards_train/margins": 3.1269898414611816, + "rewards_train/rejected": -6.217321872711182, + "step": 2364 + }, + { + "epoch": 0.66, + "logps_train/chosen": -64.95062255859375, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -9.0, + "logps_train/rejected": -47.18526077270508, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.457562446594238, + "rewards_train/margins": -0.6390364170074463, + "rewards_train/rejected": -3.818526029586792, + "step": 2364 + }, + { + "epoch": 0.66, + "logps_train/chosen": -107.58509826660156, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -65.5, + "logps_train/rejected": -94.92875671386719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.958509922027588, + "rewards_train/margins": -0.01563429832458496, + "rewards_train/rejected": -2.942875623703003, + "step": 2364 + }, + { + "epoch": 0.66, + "logps_train/chosen": -10.377584457397461, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -3.578125, + "logps_train/rejected": -21.514162063598633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1940084546804428, + "rewards_train/margins": 1.5995953232049942, + "rewards_train/rejected": -1.793603777885437, + "step": 2364 + }, + { + "epoch": 0.66, + "logps_train/chosen": -5.69325065612793, + "logps_train/ref_chosen": -3.765625, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -29.74136734008789, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1927625685930252, + "rewards_train/margins": 2.225124165415764, + "rewards_train/rejected": -2.417886734008789, + "step": 2364 + }, + { + "epoch": 0.66, + "logps_train/chosen": -102.56050109863281, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -102.49893188476562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6060501337051392, + "rewards_train/margins": -0.10615694522857666, + "rewards_train/rejected": -1.4998931884765625, + "step": 2365 + }, + { + "epoch": 0.66, + "logps_train/chosen": -12.929152488708496, + "logps_train/ref_chosen": -4.1875, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -24.33641815185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8741652369499207, + "rewards_train/margins": 0.703226625919342, + "rewards_train/rejected": -1.5773918628692627, + "step": 2365 + }, + { + "epoch": 0.66, + "logps_train/chosen": -105.37335205078125, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -28.875, + "logps_train/rejected": -63.53369140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5873353481292725, + "rewards_train/margins": 0.8785338401794434, + "rewards_train/rejected": -3.465869188308716, + "step": 2365 + }, + { + "epoch": 0.66, + "logps_train/chosen": -56.70815658569336, + "logps_train/ref_chosen": -45.5, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -78.01618957519531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.120815634727478, + "rewards_train/margins": -0.36919665336608887, + "rewards_train/rejected": -0.7516189813613892, + "step": 2365 + }, + { + "epoch": 0.66, + "logps_train/chosen": -188.502197265625, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -192.0, + "logps_train/rejected": -282.6422424316406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.450219631195068, + "rewards_train/margins": 2.614004611968994, + "rewards_train/rejected": -9.064224243164062, + "step": 2365 + }, + { + "epoch": 0.66, + "logps_train/chosen": -94.54866027832031, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -107.47206115722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3451339900493622, + "rewards_train/margins": 0.8423401117324829, + "rewards_train/rejected": -0.4972061216831207, + "step": 2365 + }, + { + "epoch": 0.66, + "logps_train/chosen": -6.1181488037109375, + "logps_train/ref_chosen": -5.5625, + "logps_train/ref_rejected": -3.6875, + "logps_train/rejected": -15.232550621032715, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05556488037109375, + "rewards_train/margins": 1.0989402532577515, + "rewards_train/rejected": -1.1545051336288452, + "step": 2365 + }, + { + "epoch": 0.66, + "logps_train/chosen": -165.50799560546875, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -178.52120971679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.700799465179443, + "rewards_train/margins": 2.751321792602539, + "rewards_train/rejected": -7.452121257781982, + "step": 2365 + }, + { + "epoch": 0.66, + "learning_rate": 1.5668043043295052e-08, + "loss": 0.4228, + "step": 2366 + }, + { + "epoch": 0.66, + "logps_train/chosen": -32.08568572998047, + "logps_train/ref_chosen": -14.125, + "logps_train/ref_rejected": -2.859375, + "logps_train/rejected": -26.62024688720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.796068549156189, + "rewards_train/margins": 0.5800186395645142, + "rewards_train/rejected": -2.376087188720703, + "step": 2366 + }, + { + "epoch": 0.66, + "logps_train/chosen": -1.0870792865753174, + "logps_train/ref_chosen": -1.171875, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -8.159242630004883, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.008479571901261806, + "rewards_train/margins": 0.22127883788198233, + "rewards_train/rejected": -0.21279926598072052, + "step": 2366 + }, + { + "epoch": 0.66, + "logps_train/chosen": -1.6018809080123901, + "logps_train/ref_chosen": -0.310546875, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -11.075621604919434, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.129133403301239, + "rewards_train/margins": 0.15967875719070435, + "rewards_train/rejected": -0.28881216049194336, + "step": 2366 + }, + { + "epoch": 0.66, + "logps_train/chosen": -14.929779052734375, + "logps_train/ref_chosen": -4.71875, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -49.93946838378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0211029052734375, + "rewards_train/margins": 1.4978439807891846, + "rewards_train/rejected": -2.518946886062622, + "step": 2366 + }, + { + "epoch": 0.66, + "logps_train/chosen": -27.218109130859375, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -3.015625, + "logps_train/rejected": -21.971330642700195, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.435873508453369, + "rewards_train/margins": -0.5403028726577759, + "rewards_train/rejected": -1.8955706357955933, + "step": 2366 + }, + { + "epoch": 0.66, + "logps_train/chosen": -20.384387969970703, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -40.30511474609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21343879401683807, + "rewards_train/margins": 1.6045727282762527, + "rewards_train/rejected": -1.8180115222930908, + "step": 2366 + }, + { + "epoch": 0.66, + "logps_train/chosen": -15.185419082641602, + "logps_train/ref_chosen": -3.671875, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -14.891706466674805, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.151354432106018, + "rewards_train/margins": -0.08093380928039551, + "rewards_train/rejected": -1.0704206228256226, + "step": 2366 + }, + { + "epoch": 0.66, + "logps_train/chosen": -35.35677719116211, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -30.630460739135742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.485677719116211, + "rewards_train/margins": 1.2414309978485107, + "rewards_train/rejected": -2.7271087169647217, + "step": 2366 + }, + { + "epoch": 0.66, + "logps_train/chosen": -124.15199279785156, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -150.8953094482422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4151992797851562, + "rewards_train/margins": 1.7243318557739258, + "rewards_train/rejected": -4.139531135559082, + "step": 2367 + }, + { + "epoch": 0.66, + "logps_train/chosen": -127.56658935546875, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -124.48849487304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.656658887863159, + "rewards_train/margins": 2.2421905994415283, + "rewards_train/rejected": -4.8988494873046875, + "step": 2367 + }, + { + "epoch": 0.66, + "logps_train/chosen": -41.18699264526367, + "logps_train/ref_chosen": -27.5, + "logps_train/ref_rejected": -19.625, + "logps_train/rejected": -71.05113983154297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.368699312210083, + "rewards_train/margins": 3.7739145755767822, + "rewards_train/rejected": -5.142613887786865, + "step": 2367 + }, + { + "epoch": 0.66, + "logps_train/chosen": -11.881516456604004, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -35.0, + "logps_train/rejected": -50.502784729003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3069016635417938, + "rewards_train/margins": 1.2433768808841705, + "rewards_train/rejected": -1.5502785444259644, + "step": 2367 + }, + { + "epoch": 0.66, + "logps_train/chosen": -25.109159469604492, + "logps_train/ref_chosen": -7.21875, + "logps_train/ref_rejected": -2.875, + "logps_train/rejected": -21.831817626953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7890409231185913, + "rewards_train/margins": 0.10664081573486328, + "rewards_train/rejected": -1.8956817388534546, + "step": 2367 + }, + { + "epoch": 0.66, + "logps_train/chosen": -22.024776458740234, + "logps_train/ref_chosen": -17.125, + "logps_train/ref_rejected": -6.0625, + "logps_train/rejected": -19.190738677978516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4899776577949524, + "rewards_train/margins": 0.8228462338447571, + "rewards_train/rejected": -1.3128238916397095, + "step": 2367 + }, + { + "epoch": 0.66, + "logps_train/chosen": -68.39610290527344, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -89.20375061035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.36038970947265625, + "rewards_train/margins": 1.4807647466659546, + "rewards_train/rejected": -1.1203750371932983, + "step": 2367 + }, + { + "epoch": 0.66, + "logps_train/chosen": -22.099884033203125, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -3.546875, + "logps_train/rejected": -19.888702392578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1349884271621704, + "rewards_train/margins": 0.4991943836212158, + "rewards_train/rejected": -1.6341828107833862, + "step": 2367 + }, + { + "epoch": 0.66, + "learning_rate": 1.5205010625664373e-08, + "loss": 0.3907, + "step": 2368 + }, + { + "epoch": 0.66, + "logps_train/chosen": -111.32025146484375, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -206.85903930664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3820252418518066, + "rewards_train/margins": 3.8038787841796875, + "rewards_train/rejected": -6.185904026031494, + "step": 2368 + }, + { + "epoch": 0.66, + "logps_train/chosen": -13.496771812438965, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -20.17230224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7090522050857544, + "rewards_train/margins": 0.5550529956817627, + "rewards_train/rejected": -1.264105200767517, + "step": 2368 + }, + { + "epoch": 0.66, + "logps_train/chosen": -106.34687805175781, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -119.74333190917969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2846878170967102, + "rewards_train/margins": 2.93964546918869, + "rewards_train/rejected": -3.2243332862854004, + "step": 2368 + }, + { + "epoch": 0.66, + "logps_train/chosen": -17.329038619995117, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -2.296875, + "logps_train/rejected": -12.854581832885742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5454038977622986, + "rewards_train/margins": 0.5103668570518494, + "rewards_train/rejected": -1.055770754814148, + "step": 2368 + }, + { + "epoch": 0.66, + "logps_train/chosen": -4.460578918457031, + "logps_train/ref_chosen": -2.703125, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -10.446431159973145, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1757453978061676, + "rewards_train/margins": 0.22514772415161133, + "rewards_train/rejected": -0.40089312195777893, + "step": 2368 + }, + { + "epoch": 0.66, + "logps_train/chosen": -228.28872680664062, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -209.17251586914062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.728873252868652, + "rewards_train/margins": -0.3616218566894531, + "rewards_train/rejected": -10.3672513961792, + "step": 2368 + }, + { + "epoch": 0.66, + "logps_train/chosen": -238.62490844726562, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -252.1302947998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.962491035461426, + "rewards_train/margins": 1.8505382537841797, + "rewards_train/rejected": -12.813029289245605, + "step": 2368 + }, + { + "epoch": 0.66, + "logps_train/chosen": -133.33758544921875, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -208.81045532226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.733758568763733, + "rewards_train/margins": 6.94728696346283, + "rewards_train/rejected": -8.681045532226562, + "step": 2368 + }, + { + "epoch": 0.66, + "logps_train/chosen": -18.558671951293945, + "logps_train/ref_chosen": -1.9453125, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -17.50983238220215, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6613359451293945, + "rewards_train/margins": -0.4884777069091797, + "rewards_train/rejected": -1.1728582382202148, + "step": 2369 + }, + { + "epoch": 0.66, + "logps_train/chosen": -18.904985427856445, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -1.9453125, + "logps_train/rejected": -18.60215187072754, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.484248548746109, + "rewards_train/margins": 1.1814354360103607, + "rewards_train/rejected": -1.6656839847564697, + "step": 2369 + }, + { + "epoch": 0.66, + "logps_train/chosen": -94.2663803100586, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -12.125, + "logps_train/rejected": -72.21947479248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5766379833221436, + "rewards_train/margins": 3.432809591293335, + "rewards_train/rejected": -6.0094475746154785, + "step": 2369 + }, + { + "epoch": 0.66, + "logps_train/chosen": -33.38687515258789, + "logps_train/ref_chosen": -31.75, + "logps_train/ref_rejected": -26.75, + "logps_train/rejected": -37.41502380371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16368751227855682, + "rewards_train/margins": 0.9028149396181107, + "rewards_train/rejected": -1.0665024518966675, + "step": 2369 + }, + { + "epoch": 0.66, + "logps_train/chosen": -167.31979370117188, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -186.738525390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0319793224334717, + "rewards_train/margins": 1.7418732643127441, + "rewards_train/rejected": -3.773852586746216, + "step": 2369 + }, + { + "epoch": 0.66, + "logps_train/chosen": -134.7540283203125, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -233.56149291992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.47540283203125, + "rewards_train/margins": 5.480746269226074, + "rewards_train/rejected": -8.956149101257324, + "step": 2369 + }, + { + "epoch": 0.66, + "logps_train/chosen": -3.578176259994507, + "logps_train/ref_chosen": -3.1875, + "logps_train/ref_rejected": -1.609375, + "logps_train/rejected": -1.4131375551223755, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.039067625999450684, + "rewards_train/margins": -0.058691371232271194, + "rewards_train/rejected": 0.01962374523282051, + "step": 2369 + }, + { + "epoch": 0.66, + "logps_train/chosen": -143.07266235351562, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -253.95431518554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0572662353515625, + "rewards_train/margins": 7.338165283203125, + "rewards_train/rejected": -9.395431518554688, + "step": 2369 + }, + { + "epoch": 0.66, + "learning_rate": 1.4748870728839346e-08, + "loss": 0.3199, + "step": 2370 + }, + { + "epoch": 0.66, + "logps_train/chosen": -20.104774475097656, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -9.1875, + "logps_train/rejected": -22.945634841918945, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3917274475097656, + "rewards_train/margins": -0.015913963317871094, + "rewards_train/rejected": -1.3758134841918945, + "step": 2370 + }, + { + "epoch": 0.66, + "logps_train/chosen": -94.48054504394531, + "logps_train/ref_chosen": -62.0, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -184.1214599609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2480545043945312, + "rewards_train/margins": 3.9140915870666504, + "rewards_train/rejected": -7.162146091461182, + "step": 2370 + }, + { + "epoch": 0.66, + "logps_train/chosen": -216.72705078125, + "logps_train/ref_chosen": -184.0, + "logps_train/ref_rejected": -191.0, + "logps_train/rejected": -230.89166259765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.272705078125, + "rewards_train/margins": 0.716461181640625, + "rewards_train/rejected": -3.989166259765625, + "step": 2370 + }, + { + "epoch": 0.66, + "logps_train/chosen": -174.75343322753906, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -193.0, + "logps_train/rejected": -311.40655517578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3753433227539062, + "rewards_train/margins": 8.465312004089355, + "rewards_train/rejected": -11.840655326843262, + "step": 2370 + }, + { + "epoch": 0.66, + "logps_train/chosen": -60.22647476196289, + "logps_train/ref_chosen": -57.75, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -120.47476196289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2476474791765213, + "rewards_train/margins": 1.2998286932706833, + "rewards_train/rejected": -1.5474761724472046, + "step": 2370 + }, + { + "epoch": 0.66, + "logps_train/chosen": -215.45968627929688, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -261.81024169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.445968627929688, + "rewards_train/margins": 6.085055351257324, + "rewards_train/rejected": -14.531023979187012, + "step": 2370 + }, + { + "epoch": 0.66, + "logps_train/chosen": -309.521728515625, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -280.7416076660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -13.552172660827637, + "rewards_train/margins": 0.3219881057739258, + "rewards_train/rejected": -13.874160766601562, + "step": 2370 + }, + { + "epoch": 0.66, + "logps_train/chosen": -40.25275802612305, + "logps_train/ref_chosen": -40.75, + "logps_train/ref_rejected": -44.75, + "logps_train/rejected": -57.53791809082031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04972419887781143, + "rewards_train/margins": 1.3285159841179848, + "rewards_train/rejected": -1.2787917852401733, + "step": 2370 + }, + { + "epoch": 0.66, + "logps_train/chosen": -105.84700775146484, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -155.09719848632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6847007274627686, + "rewards_train/margins": 2.2250192165374756, + "rewards_train/rejected": -4.909719944000244, + "step": 2371 + }, + { + "epoch": 0.66, + "logps_train/chosen": -51.218353271484375, + "logps_train/ref_chosen": -29.75, + "logps_train/ref_rejected": -23.875, + "logps_train/rejected": -50.95806121826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1468353271484375, + "rewards_train/margins": 0.5614707469940186, + "rewards_train/rejected": -2.708306074142456, + "step": 2371 + }, + { + "epoch": 0.66, + "logps_train/chosen": -59.82182312011719, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -40.25, + "logps_train/rejected": -73.3338394165039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8321823477745056, + "rewards_train/margins": 2.476201593875885, + "rewards_train/rejected": -3.3083839416503906, + "step": 2371 + }, + { + "epoch": 0.66, + "logps_train/chosen": -159.04623413085938, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -197.61788940429688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7046234607696533, + "rewards_train/margins": 2.457165479660034, + "rewards_train/rejected": -6.1617889404296875, + "step": 2371 + }, + { + "epoch": 0.66, + "logps_train/chosen": -47.03385925292969, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -41.67408752441406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.578386068344116, + "rewards_train/margins": 0.47652268409729004, + "rewards_train/rejected": -3.0549087524414062, + "step": 2371 + }, + { + "epoch": 0.66, + "logps_train/chosen": -24.983062744140625, + "logps_train/ref_chosen": -20.75, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -28.72304344177246, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42330628633499146, + "rewards_train/margins": 1.3677480816841125, + "rewards_train/rejected": -1.791054368019104, + "step": 2371 + }, + { + "epoch": 0.66, + "logps_train/chosen": -125.86058807373047, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -125.69387817382812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.28605881333351135, + "rewards_train/margins": -0.01667100191116333, + "rewards_train/rejected": -0.269387811422348, + "step": 2371 + }, + { + "epoch": 0.66, + "logps_train/chosen": -28.730905532836914, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -3.390625, + "logps_train/rejected": -30.68888282775879, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5730905532836914, + "rewards_train/margins": 1.1567351818084717, + "rewards_train/rejected": -2.729825735092163, + "step": 2371 + }, + { + "epoch": 0.66, + "learning_rate": 1.4299626545315691e-08, + "loss": 0.284, + "step": 2372 + }, + { + "epoch": 0.66, + "logps_train/chosen": -19.038806915283203, + "logps_train/ref_chosen": -6.5, + "logps_train/ref_rejected": -8.6875, + "logps_train/rejected": -31.972522735595703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2538807392120361, + "rewards_train/margins": 1.0746216773986816, + "rewards_train/rejected": -2.3285024166107178, + "step": 2372 + }, + { + "epoch": 0.66, + "logps_train/chosen": -152.55056762695312, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -184.21646118164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3550567626953125, + "rewards_train/margins": 5.56658935546875, + "rewards_train/rejected": -6.9216461181640625, + "step": 2372 + }, + { + "epoch": 0.66, + "logps_train/chosen": -125.12918853759766, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -133.22622680664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0629189014434814, + "rewards_train/margins": 0.5097038745880127, + "rewards_train/rejected": -2.572622776031494, + "step": 2372 + }, + { + "epoch": 0.66, + "logps_train/chosen": -28.402759552001953, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -0.83203125, + "logps_train/rejected": -10.827343940734863, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.065276026725769, + "rewards_train/margins": -0.06574475765228271, + "rewards_train/rejected": -0.9995312690734863, + "step": 2372 + }, + { + "epoch": 0.66, + "logps_train/chosen": -32.2132682800293, + "logps_train/ref_chosen": -28.75, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -34.34419250488281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3463268280029297, + "rewards_train/margins": 1.6630923748016357, + "rewards_train/rejected": -2.0094192028045654, + "step": 2372 + }, + { + "epoch": 0.66, + "logps_train/chosen": -13.54150390625, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -45.5, + "logps_train/rejected": -53.45164108276367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4166503846645355, + "rewards_train/margins": 0.37851372361183167, + "rewards_train/rejected": -0.7951641082763672, + "step": 2372 + }, + { + "epoch": 0.66, + "logps_train/chosen": -114.33837890625, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -94.91370391845703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16616211831569672, + "rewards_train/margins": 1.9075325578451157, + "rewards_train/rejected": -1.741370439529419, + "step": 2372 + }, + { + "epoch": 0.66, + "logps_train/chosen": -74.18851470947266, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -41.75, + "logps_train/rejected": -79.29646301269531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8938515186309814, + "rewards_train/margins": 1.8607947826385498, + "rewards_train/rejected": -3.7546463012695312, + "step": 2372 + }, + { + "epoch": 0.66, + "logps_train/chosen": -111.88823699951172, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -196.7024688720703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1388237476348877, + "rewards_train/margins": 6.131423234939575, + "rewards_train/rejected": -7.270246982574463, + "step": 2373 + }, + { + "epoch": 0.66, + "logps_train/chosen": -52.794410705566406, + "logps_train/ref_chosen": -43.75, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -25.911151885986328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9044410586357117, + "rewards_train/margins": 1.071049153804779, + "rewards_train/rejected": -1.9754902124404907, + "step": 2373 + }, + { + "epoch": 0.66, + "logps_train/chosen": -18.32477378845215, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -4.9375, + "logps_train/rejected": -15.283600807189941, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8887273669242859, + "rewards_train/margins": 0.14588278532028198, + "rewards_train/rejected": -1.0346101522445679, + "step": 2373 + }, + { + "epoch": 0.66, + "logps_train/chosen": -46.665748596191406, + "logps_train/ref_chosen": -17.375, + "logps_train/ref_rejected": -54.25, + "logps_train/rejected": -118.511962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.929075002670288, + "rewards_train/margins": 3.497121572494507, + "rewards_train/rejected": -6.426196575164795, + "step": 2373 + }, + { + "epoch": 0.66, + "logps_train/chosen": -119.82121276855469, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -185.0, + "logps_train/rejected": -219.666015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5821213126182556, + "rewards_train/margins": 2.88448029756546, + "rewards_train/rejected": -3.466601610183716, + "step": 2373 + }, + { + "epoch": 0.66, + "logps_train/chosen": -31.803211212158203, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -6.125, + "logps_train/rejected": -38.139137268066406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1553211212158203, + "rewards_train/margins": 1.0460927486419678, + "rewards_train/rejected": -3.201413869857788, + "step": 2373 + }, + { + "epoch": 0.66, + "logps_train/chosen": -97.24101257324219, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -133.22402954101562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7741012573242188, + "rewards_train/margins": 2.7983016967773438, + "rewards_train/rejected": -4.5724029541015625, + "step": 2373 + }, + { + "epoch": 0.66, + "logps_train/chosen": -110.05000305175781, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -147.80197143554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9050003290176392, + "rewards_train/margins": 3.1251968145370483, + "rewards_train/rejected": -4.0301971435546875, + "step": 2373 + }, + { + "epoch": 0.66, + "learning_rate": 1.3857281219326388e-08, + "loss": 0.2425, + "step": 2374 + }, + { + "epoch": 0.66, + "logps_train/chosen": -37.900245666503906, + "logps_train/ref_chosen": -33.5, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -19.28070068359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.44002458453178406, + "rewards_train/margins": -0.024454504251480103, + "rewards_train/rejected": -0.41557008028030396, + "step": 2374 + }, + { + "epoch": 0.66, + "logps_train/chosen": -12.778165817260742, + "logps_train/ref_chosen": -2.53125, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -27.111358642578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0246915817260742, + "rewards_train/margins": 0.892694354057312, + "rewards_train/rejected": -1.9173859357833862, + "step": 2374 + }, + { + "epoch": 0.66, + "logps_train/chosen": -99.84098052978516, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -31.625, + "logps_train/rejected": -64.4506607055664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.8340983390808105, + "rewards_train/margins": -1.55153226852417, + "rewards_train/rejected": -3.2825660705566406, + "step": 2374 + }, + { + "epoch": 0.66, + "logps_train/chosen": -101.74679565429688, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -167.81271362304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5246796011924744, + "rewards_train/margins": 4.656591951847076, + "rewards_train/rejected": -5.181271553039551, + "step": 2374 + }, + { + "epoch": 0.66, + "logps_train/chosen": -37.39755630493164, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -24.25, + "logps_train/rejected": -55.318721771240234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.72100567817688, + "rewards_train/margins": 0.385866641998291, + "rewards_train/rejected": -3.106872320175171, + "step": 2374 + }, + { + "epoch": 0.66, + "logps_train/chosen": -25.533462524414062, + "logps_train/ref_chosen": -5.84375, + "logps_train/ref_rejected": -1.953125, + "logps_train/rejected": -28.839061737060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9689712524414062, + "rewards_train/margins": 0.7196223735809326, + "rewards_train/rejected": -2.688593626022339, + "step": 2374 + }, + { + "epoch": 0.66, + "logps_train/chosen": -24.441871643066406, + "logps_train/ref_chosen": -11.125, + "logps_train/ref_rejected": -4.71875, + "logps_train/rejected": -16.998571395874023, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3316872119903564, + "rewards_train/margins": -0.10370504856109619, + "rewards_train/rejected": -1.2279821634292603, + "step": 2374 + }, + { + "epoch": 0.66, + "logps_train/chosen": -9.968392372131348, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -12.3125, + "logps_train/rejected": -19.074254989624023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.153089240193367, + "rewards_train/margins": 0.5230862945318222, + "rewards_train/rejected": -0.6761755347251892, + "step": 2374 + }, + { + "epoch": 0.66, + "logps_train/chosen": -17.444562911987305, + "logps_train/ref_chosen": -3.875, + "logps_train/ref_rejected": -6.5, + "logps_train/rejected": -27.951644897460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3569563627243042, + "rewards_train/margins": 0.7882081270217896, + "rewards_train/rejected": -2.1451644897460938, + "step": 2375 + }, + { + "epoch": 0.66, + "logps_train/chosen": -9.550118446350098, + "logps_train/ref_chosen": -1.0625, + "logps_train/ref_rejected": -4.9375, + "logps_train/rejected": -26.39583396911621, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8487618565559387, + "rewards_train/margins": 1.297071635723114, + "rewards_train/rejected": -2.1458334922790527, + "step": 2375 + }, + { + "epoch": 0.66, + "logps_train/chosen": -70.21343231201172, + "logps_train/ref_chosen": -59.0, + "logps_train/ref_rejected": -60.75, + "logps_train/rejected": -99.26858520507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1213432550430298, + "rewards_train/margins": 2.7305153608322144, + "rewards_train/rejected": -3.851858615875244, + "step": 2375 + }, + { + "epoch": 0.66, + "logps_train/chosen": -129.73191833496094, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -106.654052734375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4731918573379517, + "rewards_train/margins": -1.3077865839004517, + "rewards_train/rejected": -0.1654052734375, + "step": 2375 + }, + { + "epoch": 0.66, + "logps_train/chosen": -23.71562385559082, + "logps_train/ref_chosen": -28.375, + "logps_train/ref_rejected": -13.875, + "logps_train/rejected": -25.697750091552734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.46593761444091797, + "rewards_train/margins": 1.6482126712799072, + "rewards_train/rejected": -1.1822750568389893, + "step": 2375 + }, + { + "epoch": 0.66, + "logps_train/chosen": -9.800737380981445, + "logps_train/ref_chosen": -10.8125, + "logps_train/ref_rejected": -1.453125, + "logps_train/rejected": -29.767377853393555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10117626190185547, + "rewards_train/margins": 2.9326016902923584, + "rewards_train/rejected": -2.831425428390503, + "step": 2375 + }, + { + "epoch": 0.66, + "logps_train/chosen": -17.3091983795166, + "logps_train/ref_chosen": -18.0, + "logps_train/ref_rejected": -54.75, + "logps_train/rejected": -85.86109161376953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0690801665186882, + "rewards_train/margins": 3.180189423263073, + "rewards_train/rejected": -3.1111092567443848, + "step": 2375 + }, + { + "epoch": 0.66, + "logps_train/chosen": -141.02481079101562, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -157.2178955078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6524810791015625, + "rewards_train/margins": 0.6693084239959717, + "rewards_train/rejected": -2.321789503097534, + "step": 2375 + }, + { + "epoch": 0.66, + "learning_rate": 1.3421837846819717e-08, + "loss": 0.4899, + "step": 2376 + }, + { + "epoch": 0.66, + "logps_train/chosen": -19.203685760498047, + "logps_train/ref_chosen": -8.875, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -26.822786331176758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0328686237335205, + "rewards_train/margins": 1.0869100093841553, + "rewards_train/rejected": -2.119778633117676, + "step": 2376 + }, + { + "epoch": 0.66, + "logps_train/chosen": -26.106935501098633, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -48.75, + "logps_train/rejected": -45.55763626098633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.21069355309009552, + "rewards_train/margins": -0.5299299210309982, + "rewards_train/rejected": 0.3192363679409027, + "step": 2376 + }, + { + "epoch": 0.66, + "logps_train/chosen": -149.47613525390625, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -130.73150634765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.447613477706909, + "rewards_train/margins": 1.6255371570587158, + "rewards_train/rejected": -4.073150634765625, + "step": 2376 + }, + { + "epoch": 0.66, + "logps_train/chosen": -17.238605499267578, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -7.875, + "logps_train/rejected": -12.930326461791992, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6801105737686157, + "rewards_train/margins": -0.17457789182662964, + "rewards_train/rejected": -0.5055326819419861, + "step": 2376 + }, + { + "epoch": 0.66, + "logps_train/chosen": -103.6976318359375, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -104.2911376953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4697632789611816, + "rewards_train/margins": 0.05935049057006836, + "rewards_train/rejected": -2.52911376953125, + "step": 2376 + }, + { + "epoch": 0.66, + "logps_train/chosen": -84.23619842529297, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -84.02628326416016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1236199140548706, + "rewards_train/margins": -0.02099156379699707, + "rewards_train/rejected": -1.1026283502578735, + "step": 2376 + }, + { + "epoch": 0.66, + "logps_train/chosen": -127.6468276977539, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -259.36529541015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6646827459335327, + "rewards_train/margins": 8.571846604347229, + "rewards_train/rejected": -10.236529350280762, + "step": 2376 + }, + { + "epoch": 0.66, + "logps_train/chosen": -54.94322967529297, + "logps_train/ref_chosen": -42.0, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -48.046756744384766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2943229675292969, + "rewards_train/margins": 2.5666027069091797, + "rewards_train/rejected": -3.8609256744384766, + "step": 2376 + }, + { + "epoch": 0.66, + "logps_train/chosen": -83.17660522460938, + "logps_train/ref_chosen": -53.5, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -55.49834060668945, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.967660665512085, + "rewards_train/margins": -1.042826533317566, + "rewards_train/rejected": -1.924834132194519, + "step": 2377 + }, + { + "epoch": 0.66, + "logps_train/chosen": -104.8753433227539, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -164.41192626953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4375343322753906, + "rewards_train/margins": 1.0036582946777344, + "rewards_train/rejected": -1.441192626953125, + "step": 2377 + }, + { + "epoch": 0.66, + "logps_train/chosen": -225.63839721679688, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -212.06480407714844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.463840007781982, + "rewards_train/margins": 0.542640209197998, + "rewards_train/rejected": -8.00648021697998, + "step": 2377 + }, + { + "epoch": 0.66, + "logps_train/chosen": -20.705890655517578, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -58.64875793457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7830891013145447, + "rewards_train/margins": 3.0817866921424866, + "rewards_train/rejected": -3.8648757934570312, + "step": 2377 + }, + { + "epoch": 0.66, + "logps_train/chosen": -111.87979888916016, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -148.24758911132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16202011704444885, + "rewards_train/margins": 2.986779123544693, + "rewards_train/rejected": -2.824759006500244, + "step": 2377 + }, + { + "epoch": 0.66, + "logps_train/chosen": -23.263843536376953, + "logps_train/ref_chosen": -24.25, + "logps_train/ref_rejected": -4.28125, + "logps_train/rejected": -13.694404602050781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09861564636230469, + "rewards_train/margins": 1.0399311184883118, + "rewards_train/rejected": -0.9413154721260071, + "step": 2377 + }, + { + "epoch": 0.66, + "logps_train/chosen": -47.08515548706055, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -37.5, + "logps_train/rejected": -62.63501739501953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7085156440734863, + "rewards_train/margins": -0.19501376152038574, + "rewards_train/rejected": -2.5135018825531006, + "step": 2377 + }, + { + "epoch": 0.66, + "logps_train/chosen": -35.115478515625, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -43.482425689697266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8365478515625, + "rewards_train/margins": 0.6491947174072266, + "rewards_train/rejected": -2.4857425689697266, + "step": 2377 + }, + { + "epoch": 0.66, + "learning_rate": 1.2993299475437592e-08, + "loss": 0.4635, + "step": 2378 + }, + { + "epoch": 0.66, + "logps_train/chosen": -140.00677490234375, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -163.34915161132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.400677442550659, + "rewards_train/margins": 3.834237813949585, + "rewards_train/rejected": -6.234915256500244, + "step": 2378 + }, + { + "epoch": 0.66, + "logps_train/chosen": -19.006988525390625, + "logps_train/ref_chosen": -3.296875, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -40.61743927001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5710114240646362, + "rewards_train/margins": 2.0532325506210327, + "rewards_train/rejected": -3.624243974685669, + "step": 2378 + }, + { + "epoch": 0.66, + "logps_train/chosen": -145.36695861816406, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -116.16311645507812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.386695861816406, + "rewards_train/margins": -3.2703840732574463, + "rewards_train/rejected": -2.11631178855896, + "step": 2378 + }, + { + "epoch": 0.66, + "logps_train/chosen": -18.53628158569336, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -8.899225234985352, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.834878146648407, + "rewards_train/margins": -0.6730806231498718, + "rewards_train/rejected": -0.16179752349853516, + "step": 2378 + }, + { + "epoch": 0.66, + "logps_train/chosen": -77.57461547851562, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -56.131935119628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.29253846406936646, + "rewards_train/margins": 3.8182321190834045, + "rewards_train/rejected": -3.525693655014038, + "step": 2378 + }, + { + "epoch": 0.66, + "logps_train/chosen": -0.6127430200576782, + "logps_train/ref_chosen": -1.0078125, + "logps_train/ref_rejected": -6.59375, + "logps_train/rejected": -43.33514404296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0395069494843483, + "rewards_train/margins": 3.713646449148655, + "rewards_train/rejected": -3.6741394996643066, + "step": 2378 + }, + { + "epoch": 0.66, + "logps_train/chosen": -49.782344818115234, + "logps_train/ref_chosen": -29.25, + "logps_train/ref_rejected": -28.625, + "logps_train/rejected": -72.6405029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.053234577178955, + "rewards_train/margins": 2.348315715789795, + "rewards_train/rejected": -4.40155029296875, + "step": 2378 + }, + { + "epoch": 0.66, + "logps_train/chosen": -8.48460578918457, + "logps_train/ref_chosen": -5.21875, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -42.57438278198242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.326585590839386, + "rewards_train/margins": 1.905852735042572, + "rewards_train/rejected": -2.232438325881958, + "step": 2378 + }, + { + "epoch": 0.66, + "logps_train/chosen": -101.7302474975586, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -130.11557006835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.473024845123291, + "rewards_train/margins": 0.038532257080078125, + "rewards_train/rejected": -2.511557102203369, + "step": 2379 + }, + { + "epoch": 0.66, + "logps_train/chosen": -51.24736785888672, + "logps_train/ref_chosen": -28.625, + "logps_train/ref_rejected": -10.9375, + "logps_train/rejected": -46.02646255493164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2622368335723877, + "rewards_train/margins": 1.246659517288208, + "rewards_train/rejected": -3.5088963508605957, + "step": 2379 + }, + { + "epoch": 0.66, + "logps_train/chosen": -18.102725982666016, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -26.375, + "logps_train/rejected": -56.166107177734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8540226221084595, + "rewards_train/margins": 2.125088095664978, + "rewards_train/rejected": -2.9791107177734375, + "step": 2379 + }, + { + "epoch": 0.66, + "logps_train/chosen": -77.322021484375, + "logps_train/ref_chosen": -49.0, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -105.94976043701172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.832202196121216, + "rewards_train/margins": -1.337226152420044, + "rewards_train/rejected": -1.4949760437011719, + "step": 2379 + }, + { + "epoch": 0.66, + "logps_train/chosen": -16.48944091796875, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -54.816253662109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9708191156387329, + "rewards_train/margins": 2.873306393623352, + "rewards_train/rejected": -3.844125509262085, + "step": 2379 + }, + { + "epoch": 0.66, + "logps_train/chosen": -323.65435791015625, + "logps_train/ref_chosen": -174.0, + "logps_train/ref_rejected": -177.0, + "logps_train/rejected": -329.2249450683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -14.965435981750488, + "rewards_train/margins": 0.25705909729003906, + "rewards_train/rejected": -15.222495079040527, + "step": 2379 + }, + { + "epoch": 0.66, + "logps_train/chosen": -106.41615295410156, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -163.30978393554688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1916152983903885, + "rewards_train/margins": 2.639363095164299, + "rewards_train/rejected": -2.8309783935546875, + "step": 2379 + }, + { + "epoch": 0.66, + "logps_train/chosen": -177.42654418945312, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -210.0, + "logps_train/rejected": -247.8322296142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3426544666290283, + "rewards_train/margins": 1.440568447113037, + "rewards_train/rejected": -3.7832229137420654, + "step": 2379 + }, + { + "epoch": 0.67, + "learning_rate": 1.2571669104494253e-08, + "loss": 0.5205, + "step": 2380 + }, + { + "epoch": 0.67, + "logps_train/chosen": -181.2207794189453, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -233.53285217285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.822077989578247, + "rewards_train/margins": 7.331207036972046, + "rewards_train/rejected": -10.153285026550293, + "step": 2380 + }, + { + "epoch": 0.67, + "logps_train/chosen": -209.91136169433594, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -224.41799926757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.591136455535889, + "rewards_train/margins": 2.2506632804870605, + "rewards_train/rejected": -9.84179973602295, + "step": 2380 + }, + { + "epoch": 0.67, + "logps_train/chosen": -118.873291015625, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -106.79721069335938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6873291730880737, + "rewards_train/margins": 1.6923919916152954, + "rewards_train/rejected": -3.379721164703369, + "step": 2380 + }, + { + "epoch": 0.67, + "logps_train/chosen": -22.521425247192383, + "logps_train/ref_chosen": -14.25, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -30.287050247192383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8271425366401672, + "rewards_train/margins": 1.7484375834465027, + "rewards_train/rejected": -2.57558012008667, + "step": 2380 + }, + { + "epoch": 0.67, + "logps_train/chosen": -13.906261444091797, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -43.10962677001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20937614142894745, + "rewards_train/margins": 2.0515865832567215, + "rewards_train/rejected": -2.260962724685669, + "step": 2380 + }, + { + "epoch": 0.67, + "logps_train/chosen": -47.326412200927734, + "logps_train/ref_chosen": -50.5, + "logps_train/ref_rejected": -39.5, + "logps_train/rejected": -54.24425506591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3173587918281555, + "rewards_train/margins": 1.7917843461036682, + "rewards_train/rejected": -1.4744255542755127, + "step": 2380 + }, + { + "epoch": 0.67, + "logps_train/chosen": -165.5684814453125, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -166.0, + "logps_train/rejected": -267.91461181640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.056848049163818, + "rewards_train/margins": 4.134613513946533, + "rewards_train/rejected": -10.191461563110352, + "step": 2380 + }, + { + "epoch": 0.67, + "logps_train/chosen": -169.35537719726562, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -124.63079833984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.335537910461426, + "rewards_train/margins": -3.4224579334259033, + "rewards_train/rejected": -2.9130799770355225, + "step": 2380 + }, + { + "epoch": 0.67, + "logps_train/chosen": -10.716781616210938, + "logps_train/ref_chosen": -3.859375, + "logps_train/ref_rejected": -3.59375, + "logps_train/rejected": -11.237773895263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6857406497001648, + "rewards_train/margins": 0.07866173982620239, + "rewards_train/rejected": -0.7644023895263672, + "step": 2381 + }, + { + "epoch": 0.67, + "logps_train/chosen": -121.27886199951172, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -62.25, + "logps_train/rejected": -114.37705993652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3278863430023193, + "rewards_train/margins": 2.884819746017456, + "rewards_train/rejected": -5.212706089019775, + "step": 2381 + }, + { + "epoch": 0.67, + "logps_train/chosen": -137.56451416015625, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -144.29771423339844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8064515590667725, + "rewards_train/margins": 1.6733200550079346, + "rewards_train/rejected": -4.479771614074707, + "step": 2381 + }, + { + "epoch": 0.67, + "logps_train/chosen": -53.45851135253906, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -38.5, + "logps_train/rejected": -61.57322692871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3708511590957642, + "rewards_train/margins": 0.9364715814590454, + "rewards_train/rejected": -2.3073227405548096, + "step": 2381 + }, + { + "epoch": 0.67, + "logps_train/chosen": -6.718571662902832, + "logps_train/ref_chosen": -6.1875, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -41.45558166503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05310716852545738, + "rewards_train/margins": 2.3299511410295963, + "rewards_train/rejected": -2.3830583095550537, + "step": 2381 + }, + { + "epoch": 0.67, + "logps_train/chosen": -47.54172134399414, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -47.88597869873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5666720867156982, + "rewards_train/margins": 0.03442573547363281, + "rewards_train/rejected": -2.601097822189331, + "step": 2381 + }, + { + "epoch": 0.67, + "logps_train/chosen": -3.208644390106201, + "logps_train/ref_chosen": -2.265625, + "logps_train/ref_rejected": -1.546875, + "logps_train/rejected": -12.715669631958008, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09430193901062012, + "rewards_train/margins": 1.0225775241851807, + "rewards_train/rejected": -1.1168794631958008, + "step": 2381 + }, + { + "epoch": 0.67, + "logps_train/chosen": -185.68643188476562, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -206.0360107421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.4686431884765625, + "rewards_train/margins": 0.0349578857421875, + "rewards_train/rejected": -4.50360107421875, + "step": 2381 + }, + { + "epoch": 0.67, + "learning_rate": 1.2156949684955398e-08, + "loss": 0.4462, + "step": 2382 + }, + { + "epoch": 0.67, + "logps_train/chosen": -160.96298217773438, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -201.4772186279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5962982177734375, + "rewards_train/margins": 2.5514235496520996, + "rewards_train/rejected": -5.147721767425537, + "step": 2382 + }, + { + "epoch": 0.67, + "logps_train/chosen": -238.43838500976562, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -225.45767211914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.593838691711426, + "rewards_train/margins": 0.45192909240722656, + "rewards_train/rejected": -12.045767784118652, + "step": 2382 + }, + { + "epoch": 0.67, + "logps_train/chosen": -220.1004180908203, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -194.0, + "logps_train/rejected": -304.24932861328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.710041999816895, + "rewards_train/margins": 2.3148908615112305, + "rewards_train/rejected": -11.024932861328125, + "step": 2382 + }, + { + "epoch": 0.67, + "logps_train/chosen": -29.99028968811035, + "logps_train/ref_chosen": -21.25, + "logps_train/ref_rejected": -29.25, + "logps_train/rejected": -61.04381561279297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8740289807319641, + "rewards_train/margins": 2.3053526282310486, + "rewards_train/rejected": -3.1793816089630127, + "step": 2382 + }, + { + "epoch": 0.67, + "logps_train/chosen": -204.96578979492188, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -234.83319091796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.6965789794921875, + "rewards_train/margins": -0.7132596969604492, + "rewards_train/rejected": -6.983319282531738, + "step": 2382 + }, + { + "epoch": 0.67, + "logps_train/chosen": -156.9661865234375, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -275.7686767578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.696618556976318, + "rewards_train/margins": 7.780249118804932, + "rewards_train/rejected": -13.47686767578125, + "step": 2382 + }, + { + "epoch": 0.67, + "logps_train/chosen": -133.71812438964844, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -140.40216064453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7218124270439148, + "rewards_train/margins": -0.08159637451171875, + "rewards_train/rejected": -0.640216052532196, + "step": 2382 + }, + { + "epoch": 0.67, + "logps_train/chosen": -28.65728759765625, + "logps_train/ref_chosen": -7.03125, + "logps_train/ref_rejected": -7.03125, + "logps_train/rejected": -29.060333251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1626038551330566, + "rewards_train/margins": 0.04030442237854004, + "rewards_train/rejected": -2.2029082775115967, + "step": 2382 + }, + { + "epoch": 0.67, + "logps_train/chosen": -6.001341819763184, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -10.5625, + "logps_train/rejected": -13.47216510772705, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03763418272137642, + "rewards_train/margins": 0.25333232805132866, + "rewards_train/rejected": -0.2909665107727051, + "step": 2383 + }, + { + "epoch": 0.67, + "logps_train/chosen": -76.61681365966797, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -111.4005355834961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.01168136578053236, + "rewards_train/margins": 2.6783723356202245, + "rewards_train/rejected": -2.690053701400757, + "step": 2383 + }, + { + "epoch": 0.67, + "logps_train/chosen": -30.911476135253906, + "logps_train/ref_chosen": -7.15625, + "logps_train/ref_rejected": -6.875, + "logps_train/rejected": -41.837738037109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3755226135253906, + "rewards_train/margins": 1.120751142501831, + "rewards_train/rejected": -3.4962737560272217, + "step": 2383 + }, + { + "epoch": 0.67, + "logps_train/chosen": -6.331090450286865, + "logps_train/ref_chosen": -1.7109375, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -34.79657745361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.462015300989151, + "rewards_train/margins": 1.017642468214035, + "rewards_train/rejected": -1.479657769203186, + "step": 2383 + }, + { + "epoch": 0.67, + "logps_train/chosen": -29.74750518798828, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -34.5, + "logps_train/rejected": -68.2222900390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3372505903244019, + "rewards_train/margins": 2.03497850894928, + "rewards_train/rejected": -3.3722290992736816, + "step": 2383 + }, + { + "epoch": 0.67, + "logps_train/chosen": -172.06753540039062, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -241.62948608398438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.00675368309021, + "rewards_train/margins": 4.756194829940796, + "rewards_train/rejected": -7.762948513031006, + "step": 2383 + }, + { + "epoch": 0.67, + "logps_train/chosen": -117.78221130371094, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -202.27255249023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5282211303710938, + "rewards_train/margins": 4.299034118652344, + "rewards_train/rejected": -5.8272552490234375, + "step": 2383 + }, + { + "epoch": 0.67, + "logps_train/chosen": -189.24403381347656, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -119.5, + "logps_train/rejected": -187.28866577148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.0244035720825195, + "rewards_train/margins": 0.7544631958007812, + "rewards_train/rejected": -6.778866767883301, + "step": 2383 + }, + { + "epoch": 0.67, + "learning_rate": 1.1749144119417077e-08, + "loss": 0.3149, + "step": 2384 + }, + { + "epoch": 0.67, + "logps_train/chosen": -44.63491439819336, + "logps_train/ref_chosen": -37.0, + "logps_train/ref_rejected": -5.5625, + "logps_train/rejected": -57.183441162109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7634914517402649, + "rewards_train/margins": 4.398602664470673, + "rewards_train/rejected": -5.1620941162109375, + "step": 2384 + }, + { + "epoch": 0.67, + "logps_train/chosen": -41.71446228027344, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -21.125, + "logps_train/rejected": -68.81935119628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9214462041854858, + "rewards_train/margins": 2.8479892015457153, + "rewards_train/rejected": -4.769435405731201, + "step": 2384 + }, + { + "epoch": 0.67, + "logps_train/chosen": -73.88961791992188, + "logps_train/ref_chosen": -21.625, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -72.54995727539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.226461887359619, + "rewards_train/margins": 0.8910341262817383, + "rewards_train/rejected": -6.117496013641357, + "step": 2384 + }, + { + "epoch": 0.67, + "logps_train/chosen": -38.25895309448242, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -51.422454833984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.994645357131958, + "rewards_train/margins": 0.07260012626647949, + "rewards_train/rejected": -3.0672454833984375, + "step": 2384 + }, + { + "epoch": 0.67, + "logps_train/chosen": -12.113585472106934, + "logps_train/ref_chosen": -3.953125, + "logps_train/ref_rejected": -25.25, + "logps_train/rejected": -39.75367736816406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8160460591316223, + "rewards_train/margins": 0.6343217492103577, + "rewards_train/rejected": -1.45036780834198, + "step": 2384 + }, + { + "epoch": 0.67, + "logps_train/chosen": -33.56827163696289, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -41.59577560424805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9068271517753601, + "rewards_train/margins": 2.6621254086494446, + "rewards_train/rejected": -3.5689525604248047, + "step": 2384 + }, + { + "epoch": 0.67, + "logps_train/chosen": -16.957765579223633, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -9.6875, + "logps_train/rejected": -32.64037322998047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6332765817642212, + "rewards_train/margins": 1.6620107889175415, + "rewards_train/rejected": -2.2952873706817627, + "step": 2384 + }, + { + "epoch": 0.67, + "logps_train/chosen": -189.59788513183594, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -198.99917602539062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.459788799285889, + "rewards_train/margins": 3.5401291847229004, + "rewards_train/rejected": -8.999917984008789, + "step": 2384 + }, + { + "epoch": 0.67, + "logps_train/chosen": -24.150531768798828, + "logps_train/ref_chosen": -10.375, + "logps_train/ref_rejected": -24.5, + "logps_train/rejected": -57.21239471435547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3775532245635986, + "rewards_train/margins": 1.893686294555664, + "rewards_train/rejected": -3.2712395191192627, + "step": 2385 + }, + { + "epoch": 0.67, + "logps_train/chosen": -147.06906127929688, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -188.02883911132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6069061756134033, + "rewards_train/margins": 3.595978021621704, + "rewards_train/rejected": -7.202884197235107, + "step": 2385 + }, + { + "epoch": 0.67, + "logps_train/chosen": -17.804718017578125, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -42.47303009033203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8117218017578125, + "rewards_train/margins": 1.879331350326538, + "rewards_train/rejected": -2.6910531520843506, + "step": 2385 + }, + { + "epoch": 0.67, + "logps_train/chosen": -269.9830017089844, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -156.0, + "logps_train/rejected": -271.2793884277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.598299980163574, + "rewards_train/margins": 0.9296388626098633, + "rewards_train/rejected": -11.527938842773438, + "step": 2385 + }, + { + "epoch": 0.67, + "logps_train/chosen": -35.35445022583008, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -18.625, + "logps_train/rejected": -57.21868133544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5854451656341553, + "rewards_train/margins": 1.2739229202270508, + "rewards_train/rejected": -3.859368085861206, + "step": 2385 + }, + { + "epoch": 0.67, + "logps_train/chosen": -13.655071258544922, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -15.6875, + "logps_train/rejected": -54.51316833496094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7873821258544922, + "rewards_train/margins": 3.095184803009033, + "rewards_train/rejected": -3.8825669288635254, + "step": 2385 + }, + { + "epoch": 0.67, + "logps_train/chosen": -21.7440185546875, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -1.734375, + "logps_train/rejected": -17.287900924682617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4869018495082855, + "rewards_train/margins": 1.0684507191181183, + "rewards_train/rejected": -1.5553525686264038, + "step": 2385 + }, + { + "epoch": 0.67, + "logps_train/chosen": -109.77256774902344, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -22.92742347717285, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8272567987442017, + "rewards_train/margins": -0.9720144271850586, + "rewards_train/rejected": -0.8552423715591431, + "step": 2385 + }, + { + "epoch": 0.67, + "learning_rate": 1.1348255262086048e-08, + "loss": 0.2684, + "step": 2386 + }, + { + "epoch": 0.67, + "logps_train/chosen": -107.82032012939453, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -168.5133056640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.532032012939453, + "rewards_train/margins": 4.719298839569092, + "rewards_train/rejected": -7.251330852508545, + "step": 2386 + }, + { + "epoch": 0.67, + "logps_train/chosen": -114.31256866455078, + "logps_train/ref_chosen": -85.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -170.20782470703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9312570095062256, + "rewards_train/margins": 5.739526033401489, + "rewards_train/rejected": -8.670783042907715, + "step": 2386 + }, + { + "epoch": 0.67, + "logps_train/chosen": -157.03277587890625, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -184.64413452148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.253277778625488, + "rewards_train/margins": 0.9611358642578125, + "rewards_train/rejected": -6.214413642883301, + "step": 2386 + }, + { + "epoch": 0.67, + "logps_train/chosen": -29.84832763671875, + "logps_train/ref_chosen": -7.1875, + "logps_train/ref_rejected": -9.375, + "logps_train/rejected": -39.212158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.266082763671875, + "rewards_train/margins": 0.7176330089569092, + "rewards_train/rejected": -2.983715772628784, + "step": 2386 + }, + { + "epoch": 0.67, + "logps_train/chosen": -300.9842834472656, + "logps_train/ref_chosen": -214.0, + "logps_train/ref_rejected": -202.0, + "logps_train/rejected": -288.58984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.6984281539917, + "rewards_train/margins": -0.0394439697265625, + "rewards_train/rejected": -8.658984184265137, + "step": 2386 + }, + { + "epoch": 0.67, + "logps_train/chosen": -93.73408508300781, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -105.66731262207031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4234085083007812, + "rewards_train/margins": 0.09332275390625, + "rewards_train/rejected": -1.5167312622070312, + "step": 2386 + }, + { + "epoch": 0.67, + "logps_train/chosen": -18.563396453857422, + "logps_train/ref_chosen": -4.5, + "logps_train/ref_rejected": -5.0625, + "logps_train/rejected": -30.405925750732422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4063396453857422, + "rewards_train/margins": 1.1280028820037842, + "rewards_train/rejected": -2.5343425273895264, + "step": 2386 + }, + { + "epoch": 0.67, + "logps_train/chosen": -117.21113586425781, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -80.0, + "logps_train/rejected": -125.05931854248047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.221113681793213, + "rewards_train/margins": 0.284818172454834, + "rewards_train/rejected": -4.505931854248047, + "step": 2386 + }, + { + "epoch": 0.67, + "logps_train/chosen": -1.3465148210525513, + "logps_train/ref_chosen": -1.1171875, + "logps_train/ref_rejected": -1.1171875, + "logps_train/rejected": -1.2887027263641357, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.022932732477784157, + "rewards_train/margins": -0.005781209096312523, + "rewards_train/rejected": -0.017151523381471634, + "step": 2387 + }, + { + "epoch": 0.67, + "logps_train/chosen": -61.7874870300293, + "logps_train/ref_chosen": -23.75, + "logps_train/ref_rejected": -43.25, + "logps_train/rejected": -65.49454498291016, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.803748846054077, + "rewards_train/margins": -1.579294204711914, + "rewards_train/rejected": -2.224454641342163, + "step": 2387 + }, + { + "epoch": 0.67, + "logps_train/chosen": -24.10103988647461, + "logps_train/ref_chosen": -14.0625, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -70.27578735351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0038540363311768, + "rewards_train/margins": 3.9612247943878174, + "rewards_train/rejected": -4.965078830718994, + "step": 2387 + }, + { + "epoch": 0.67, + "logps_train/chosen": -43.162200927734375, + "logps_train/ref_chosen": -10.5625, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -53.756858825683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.259970188140869, + "rewards_train/margins": 1.0344657897949219, + "rewards_train/rejected": -4.294435977935791, + "step": 2387 + }, + { + "epoch": 0.67, + "logps_train/chosen": -26.436599731445312, + "logps_train/ref_chosen": -25.375, + "logps_train/ref_rejected": -2.875, + "logps_train/rejected": -17.366052627563477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10615997761487961, + "rewards_train/margins": 1.342945285141468, + "rewards_train/rejected": -1.4491052627563477, + "step": 2387 + }, + { + "epoch": 0.67, + "logps_train/chosen": -47.896240234375, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -11.1875, + "logps_train/rejected": -50.57475280761719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4646241664886475, + "rewards_train/margins": 1.4741010665893555, + "rewards_train/rejected": -3.938725233078003, + "step": 2387 + }, + { + "epoch": 0.67, + "logps_train/chosen": -164.74484252929688, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -129.53497314453125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.474484443664551, + "rewards_train/margins": -0.9209871292114258, + "rewards_train/rejected": -4.553497314453125, + "step": 2387 + }, + { + "epoch": 0.67, + "logps_train/chosen": -129.07833862304688, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -29.5, + "logps_train/rejected": -38.242835998535156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.0578339099884033, + "rewards_train/margins": -1.1835502982139587, + "rewards_train/rejected": -0.8742836117744446, + "step": 2387 + }, + { + "epoch": 0.67, + "learning_rate": 1.095428591875902e-08, + "loss": 0.5541, + "step": 2388 + }, + { + "epoch": 0.67, + "logps_train/chosen": -70.26488494873047, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -155.046142578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07648849487304688, + "rewards_train/margins": 6.6281256675720215, + "rewards_train/rejected": -6.704614162445068, + "step": 2388 + }, + { + "epoch": 0.67, + "logps_train/chosen": -52.57283401489258, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -230.95376586914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5072833895683289, + "rewards_train/margins": 13.388093769550323, + "rewards_train/rejected": -13.895377159118652, + "step": 2388 + }, + { + "epoch": 0.67, + "logps_train/chosen": -36.844482421875, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -48.374332427978516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3406982421875, + "rewards_train/margins": 1.2154850959777832, + "rewards_train/rejected": -3.556183338165283, + "step": 2388 + }, + { + "epoch": 0.67, + "logps_train/chosen": -119.96041107177734, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -193.94033813476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6960411071777344, + "rewards_train/margins": 1.7979927062988281, + "rewards_train/rejected": -4.4940338134765625, + "step": 2388 + }, + { + "epoch": 0.67, + "logps_train/chosen": -139.89918518066406, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -118.671630859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4399185180664062, + "rewards_train/margins": -0.5727553963661194, + "rewards_train/rejected": -0.8671631217002869, + "step": 2388 + }, + { + "epoch": 0.67, + "logps_train/chosen": -172.93145751953125, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -219.39401245117188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.493145942687988, + "rewards_train/margins": 0.6462554931640625, + "rewards_train/rejected": -5.139401435852051, + "step": 2388 + }, + { + "epoch": 0.67, + "logps_train/chosen": -171.89881896972656, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -234.31524658203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.739881992340088, + "rewards_train/margins": 4.791642665863037, + "rewards_train/rejected": -9.531524658203125, + "step": 2388 + }, + { + "epoch": 0.67, + "logps_train/chosen": -27.507287979125977, + "logps_train/ref_chosen": -5.65625, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -36.84529113769531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1851038932800293, + "rewards_train/margins": 1.0681753158569336, + "rewards_train/rejected": -3.253279209136963, + "step": 2388 + }, + { + "epoch": 0.67, + "logps_train/chosen": -50.958126068115234, + "logps_train/ref_chosen": -33.25, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -46.08305740356445, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7708126306533813, + "rewards_train/margins": 1.1249932050704956, + "rewards_train/rejected": -2.895805835723877, + "step": 2389 + }, + { + "epoch": 0.67, + "logps_train/chosen": -162.7576141357422, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -122.10646057128906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8757615089416504, + "rewards_train/margins": -1.1151154041290283, + "rewards_train/rejected": -1.760646104812622, + "step": 2389 + }, + { + "epoch": 0.67, + "logps_train/chosen": -149.02210998535156, + "logps_train/ref_chosen": -106.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -139.17529296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.302211284637451, + "rewards_train/margins": 0.36531829833984375, + "rewards_train/rejected": -4.667529582977295, + "step": 2389 + }, + { + "epoch": 0.67, + "logps_train/chosen": -181.11578369140625, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -158.0, + "logps_train/rejected": -224.66482543945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.711578369140625, + "rewards_train/margins": 2.954904079437256, + "rewards_train/rejected": -6.666482448577881, + "step": 2389 + }, + { + "epoch": 0.67, + "logps_train/chosen": -76.12016296386719, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -112.94464111328125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.1879837065935135, + "rewards_train/margins": -0.06755219399929047, + "rewards_train/rejected": 0.25553590059280396, + "step": 2389 + }, + { + "epoch": 0.67, + "logps_train/chosen": -144.9281005859375, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -170.83641052246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5928101539611816, + "rewards_train/margins": 2.0908308029174805, + "rewards_train/rejected": -4.683640956878662, + "step": 2389 + }, + { + "epoch": 0.67, + "logps_train/chosen": -39.269256591796875, + "logps_train/ref_chosen": -28.875, + "logps_train/ref_rejected": -9.4375, + "logps_train/rejected": -26.638151168823242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0394257307052612, + "rewards_train/margins": 0.680639386177063, + "rewards_train/rejected": -1.7200651168823242, + "step": 2389 + }, + { + "epoch": 0.67, + "logps_train/chosen": -20.50156021118164, + "logps_train/ref_chosen": -5.34375, + "logps_train/ref_rejected": -3.03125, + "logps_train/rejected": -6.807353496551514, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.515781044960022, + "rewards_train/margins": -1.1381706893444061, + "rewards_train/rejected": -0.37761035561561584, + "step": 2389 + }, + { + "epoch": 0.67, + "learning_rate": 1.0567238846803995e-08, + "loss": 0.4428, + "step": 2390 + }, + { + "epoch": 0.67, + "logps_train/chosen": -29.612693786621094, + "logps_train/ref_chosen": -0.58984375, + "logps_train/ref_rejected": -0.58984375, + "logps_train/rejected": -30.47452163696289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.902285099029541, + "rewards_train/margins": 0.08618283271789551, + "rewards_train/rejected": -2.9884679317474365, + "step": 2390 + }, + { + "epoch": 0.67, + "logps_train/chosen": -36.74226760864258, + "logps_train/ref_chosen": -10.0625, + "logps_train/ref_rejected": -24.5, + "logps_train/rejected": -46.57670974731445, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6679768562316895, + "rewards_train/margins": -0.46030592918395996, + "rewards_train/rejected": -2.2076709270477295, + "step": 2390 + }, + { + "epoch": 0.67, + "logps_train/chosen": -64.44129180908203, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -125.12295532226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21912918984889984, + "rewards_train/margins": 2.293166294693947, + "rewards_train/rejected": -2.5122954845428467, + "step": 2390 + }, + { + "epoch": 0.67, + "logps_train/chosen": -145.6449432373047, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -167.0, + "logps_train/rejected": -301.271240234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.914494514465332, + "rewards_train/margins": 8.512629508972168, + "rewards_train/rejected": -13.4271240234375, + "step": 2390 + }, + { + "epoch": 0.67, + "logps_train/chosen": -25.728271484375, + "logps_train/ref_chosen": -18.25, + "logps_train/ref_rejected": -27.75, + "logps_train/rejected": -61.56770706176758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7478271722793579, + "rewards_train/margins": 2.6339436769485474, + "rewards_train/rejected": -3.3817708492279053, + "step": 2390 + }, + { + "epoch": 0.67, + "logps_train/chosen": -135.03005981445312, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -85.0, + "logps_train/rejected": -135.95571899414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.303006172180176, + "rewards_train/margins": 0.7925658226013184, + "rewards_train/rejected": -5.095571994781494, + "step": 2390 + }, + { + "epoch": 0.67, + "logps_train/chosen": -16.836824417114258, + "logps_train/ref_chosen": -6.34375, + "logps_train/ref_rejected": -4.5625, + "logps_train/rejected": -22.79088592529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0493074655532837, + "rewards_train/margins": 0.7735311985015869, + "rewards_train/rejected": -1.8228386640548706, + "step": 2390 + }, + { + "epoch": 0.67, + "logps_train/chosen": -7.650228023529053, + "logps_train/ref_chosen": -2.140625, + "logps_train/ref_rejected": -2.625, + "logps_train/rejected": -16.226076126098633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5509603023529053, + "rewards_train/margins": 0.8091473579406738, + "rewards_train/rejected": -1.360107660293579, + "step": 2390 + }, + { + "epoch": 0.67, + "logps_train/chosen": -14.56261920928955, + "logps_train/ref_chosen": -2.265625, + "logps_train/ref_rejected": -11.3125, + "logps_train/rejected": -37.528018951416016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2296994924545288, + "rewards_train/margins": 1.3918524980545044, + "rewards_train/rejected": -2.621551990509033, + "step": 2391 + }, + { + "epoch": 0.67, + "logps_train/chosen": -17.767826080322266, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -6.3125, + "logps_train/rejected": -25.775100708007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2767826318740845, + "rewards_train/margins": 0.6694774627685547, + "rewards_train/rejected": -1.9462600946426392, + "step": 2391 + }, + { + "epoch": 0.67, + "logps_train/chosen": -5.425139427185059, + "logps_train/ref_chosen": -3.0, + "logps_train/ref_rejected": -4.625, + "logps_train/rejected": -6.42724084854126, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.24251393973827362, + "rewards_train/margins": -0.062289848923683167, + "rewards_train/rejected": -0.18022409081459045, + "step": 2391 + }, + { + "epoch": 0.67, + "logps_train/chosen": -47.218868255615234, + "logps_train/ref_chosen": -28.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -110.157958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9218868017196655, + "rewards_train/margins": 1.5439091920852661, + "rewards_train/rejected": -3.4657959938049316, + "step": 2391 + }, + { + "epoch": 0.67, + "logps_train/chosen": -24.34992027282715, + "logps_train/ref_chosen": -8.625, + "logps_train/ref_rejected": -15.1875, + "logps_train/rejected": -23.833820343017578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.572492003440857, + "rewards_train/margins": -0.7078599333763123, + "rewards_train/rejected": -0.8646320700645447, + "step": 2391 + }, + { + "epoch": 0.67, + "logps_train/chosen": -37.512962341308594, + "logps_train/ref_chosen": -27.5, + "logps_train/ref_rejected": -12.0625, + "logps_train/rejected": -35.96367645263672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0012962818145752, + "rewards_train/margins": 1.3888213634490967, + "rewards_train/rejected": -2.390117645263672, + "step": 2391 + }, + { + "epoch": 0.67, + "logps_train/chosen": -134.4080352783203, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -173.90020751953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7908036708831787, + "rewards_train/margins": 2.2992169857025146, + "rewards_train/rejected": -5.090020656585693, + "step": 2391 + }, + { + "epoch": 0.67, + "logps_train/chosen": -192.6602020263672, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -83.16844177246094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.666020154953003, + "rewards_train/margins": -1.8491759300231934, + "rewards_train/rejected": -1.8168442249298096, + "step": 2391 + }, + { + "epoch": 0.67, + "learning_rate": 1.0187116755139947e-08, + "loss": 0.4915, + "step": 2392 + }, + { + "epoch": 0.67, + "logps_train/chosen": -93.839599609375, + "logps_train/ref_chosen": -69.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -238.7451934814453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.483959913253784, + "rewards_train/margins": 7.290560007095337, + "rewards_train/rejected": -9.774519920349121, + "step": 2392 + }, + { + "epoch": 0.67, + "logps_train/chosen": -6.298407077789307, + "logps_train/ref_chosen": -1.9453125, + "logps_train/ref_rejected": -1.5703125, + "logps_train/rejected": -24.11349105834961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4353094696998596, + "rewards_train/margins": 1.8190085291862488, + "rewards_train/rejected": -2.2543179988861084, + "step": 2392 + }, + { + "epoch": 0.67, + "logps_train/chosen": -150.1337890625, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -133.66114807128906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.813378810882568, + "rewards_train/margins": -3.097263813018799, + "rewards_train/rejected": -4.7161149978637695, + "step": 2392 + }, + { + "epoch": 0.67, + "logps_train/chosen": -126.95275115966797, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -141.89788818359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3452751636505127, + "rewards_train/margins": 1.8945138454437256, + "rewards_train/rejected": -4.239789009094238, + "step": 2392 + }, + { + "epoch": 0.67, + "logps_train/chosen": -106.78935241699219, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -131.30746459960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6789352893829346, + "rewards_train/margins": 2.351811170578003, + "rewards_train/rejected": -5.0307464599609375, + "step": 2392 + }, + { + "epoch": 0.67, + "logps_train/chosen": -15.328393936157227, + "logps_train/ref_chosen": -10.5, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -43.32294464111328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4828394055366516, + "rewards_train/margins": 3.2088300585746765, + "rewards_train/rejected": -3.691669464111328, + "step": 2392 + }, + { + "epoch": 0.67, + "logps_train/chosen": -102.96070861816406, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -151.0, + "logps_train/rejected": -202.59580993652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2460708618164062, + "rewards_train/margins": 2.913510322570801, + "rewards_train/rejected": -5.159581184387207, + "step": 2392 + }, + { + "epoch": 0.67, + "logps_train/chosen": -23.813600540161133, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -10.625, + "logps_train/rejected": -43.81663131713867, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4438600540161133, + "rewards_train/margins": 1.875303030014038, + "rewards_train/rejected": -3.3191630840301514, + "step": 2392 + }, + { + "epoch": 0.67, + "logps_train/chosen": -86.3531494140625, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -144.17031860351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.13531494140625, + "rewards_train/margins": 4.231717109680176, + "rewards_train/rejected": -6.367032051086426, + "step": 2393 + }, + { + "epoch": 0.67, + "logps_train/chosen": -143.4312286376953, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -76.5, + "logps_train/rejected": -103.28712463378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2431228160858154, + "rewards_train/margins": 0.4355897903442383, + "rewards_train/rejected": -2.6787126064300537, + "step": 2393 + }, + { + "epoch": 0.67, + "logps_train/chosen": -146.1922607421875, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -167.0401153564453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2192261219024658, + "rewards_train/margins": 3.034785509109497, + "rewards_train/rejected": -4.254011631011963, + "step": 2393 + }, + { + "epoch": 0.67, + "logps_train/chosen": -129.49359130859375, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -179.58184814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.049359321594238, + "rewards_train/margins": 2.10882568359375, + "rewards_train/rejected": -6.158185005187988, + "step": 2393 + }, + { + "epoch": 0.67, + "logps_train/chosen": -190.05416870117188, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -119.0, + "logps_train/rejected": -207.0763702392578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.005416870117188, + "rewards_train/margins": -0.19777965545654297, + "rewards_train/rejected": -8.807637214660645, + "step": 2393 + }, + { + "epoch": 0.67, + "logps_train/chosen": -110.40450286865234, + "logps_train/ref_chosen": -86.5, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -176.0395050048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3904502391815186, + "rewards_train/margins": 0.6135003566741943, + "rewards_train/rejected": -3.003950595855713, + "step": 2393 + }, + { + "epoch": 0.67, + "logps_train/chosen": -27.3386287689209, + "logps_train/ref_chosen": -14.4375, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -27.158885955810547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.290112853050232, + "rewards_train/margins": 0.8945258855819702, + "rewards_train/rejected": -2.184638738632202, + "step": 2393 + }, + { + "epoch": 0.67, + "logps_train/chosen": -123.03268432617188, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -61.0, + "logps_train/rejected": -107.61070251464844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.403268575668335, + "rewards_train/margins": 1.2578017711639404, + "rewards_train/rejected": -4.661070346832275, + "step": 2393 + }, + { + "epoch": 0.67, + "learning_rate": 9.813922304218626e-09, + "loss": 0.391, + "step": 2394 + }, + { + "epoch": 0.67, + "logps_train/chosen": -9.90931510925293, + "logps_train/ref_chosen": -3.234375, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -28.080730438232422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.667493999004364, + "rewards_train/margins": 1.2593290209770203, + "rewards_train/rejected": -1.9268230199813843, + "step": 2394 + }, + { + "epoch": 0.67, + "logps_train/chosen": -171.34852600097656, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -146.84927368164062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.334852695465088, + "rewards_train/margins": -0.9499251842498779, + "rewards_train/rejected": -2.38492751121521, + "step": 2394 + }, + { + "epoch": 0.67, + "logps_train/chosen": -153.08876037597656, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -151.56640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.3588759899139404, + "rewards_train/margins": -0.00223541259765625, + "rewards_train/rejected": -3.356640577316284, + "step": 2394 + }, + { + "epoch": 0.67, + "logps_train/chosen": -119.71916961669922, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -155.05618286132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3219170570373535, + "rewards_train/margins": 2.6337013244628906, + "rewards_train/rejected": -4.955618381500244, + "step": 2394 + }, + { + "epoch": 0.67, + "logps_train/chosen": -17.087575912475586, + "logps_train/ref_chosen": -3.28125, + "logps_train/ref_rejected": -13.6875, + "logps_train/rejected": -19.179346084594727, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3806326389312744, + "rewards_train/margins": -0.8314480185508728, + "rewards_train/rejected": -0.5491846203804016, + "step": 2394 + }, + { + "epoch": 0.67, + "logps_train/chosen": -64.46206665039062, + "logps_train/ref_chosen": -40.25, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -52.76392364501953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4212067127227783, + "rewards_train/margins": 1.5739357471466064, + "rewards_train/rejected": -3.9951424598693848, + "step": 2394 + }, + { + "epoch": 0.67, + "logps_train/chosen": -67.66799926757812, + "logps_train/ref_chosen": -54.25, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -146.3357696533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3417999744415283, + "rewards_train/margins": 4.891777276992798, + "rewards_train/rejected": -6.233577251434326, + "step": 2394 + }, + { + "epoch": 0.67, + "logps_train/chosen": -66.14586639404297, + "logps_train/ref_chosen": -37.75, + "logps_train/ref_rejected": -69.0, + "logps_train/rejected": -148.59149169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8395867347717285, + "rewards_train/margins": 5.11956262588501, + "rewards_train/rejected": -7.959149360656738, + "step": 2394 + }, + { + "epoch": 0.67, + "logps_train/chosen": -39.85026931762695, + "logps_train/ref_chosen": -34.0, + "logps_train/ref_rejected": -52.5, + "logps_train/rejected": -79.87480163574219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5850269198417664, + "rewards_train/margins": 2.1524532437324524, + "rewards_train/rejected": -2.7374801635742188, + "step": 2395 + }, + { + "epoch": 0.67, + "logps_train/chosen": -72.78497314453125, + "logps_train/ref_chosen": -56.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -176.39759826660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6284973621368408, + "rewards_train/margins": 5.66126275062561, + "rewards_train/rejected": -7.289760112762451, + "step": 2395 + }, + { + "epoch": 0.67, + "logps_train/chosen": -17.714618682861328, + "logps_train/ref_chosen": -6.21875, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -29.074960708618164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1495869159698486, + "rewards_train/margins": 0.4766591787338257, + "rewards_train/rejected": -1.6262460947036743, + "step": 2395 + }, + { + "epoch": 0.67, + "logps_train/chosen": -81.54939270019531, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -106.25288391113281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3549392819404602, + "rewards_train/margins": 1.2703490853309631, + "rewards_train/rejected": -1.6252883672714233, + "step": 2395 + }, + { + "epoch": 0.67, + "logps_train/chosen": -12.541858673095703, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -42.7640495300293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6760608553886414, + "rewards_train/margins": 2.419094145298004, + "rewards_train/rejected": -3.0951550006866455, + "step": 2395 + }, + { + "epoch": 0.67, + "logps_train/chosen": -35.078102111816406, + "logps_train/ref_chosen": -2.390625, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -44.592247009277344, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2687478065490723, + "rewards_train/margins": -0.2470231056213379, + "rewards_train/rejected": -3.0217247009277344, + "step": 2395 + }, + { + "epoch": 0.67, + "logps_train/chosen": -8.638338088989258, + "logps_train/ref_chosen": -1.109375, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -26.976837158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7528963088989258, + "rewards_train/margins": 0.057287395000457764, + "rewards_train/rejected": -0.8101837038993835, + "step": 2395 + }, + { + "epoch": 0.67, + "logps_train/chosen": -6.885016918182373, + "logps_train/ref_chosen": -2.28125, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -32.46697998046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46037670969963074, + "rewards_train/margins": 0.9988212883472443, + "rewards_train/rejected": -1.459197998046875, + "step": 2395 + }, + { + "epoch": 0.67, + "learning_rate": 9.44765810600523e-09, + "loss": 0.4009, + "step": 2396 + }, + { + "epoch": 0.67, + "logps_train/chosen": -72.2641372680664, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -81.0, + "logps_train/rejected": -115.6624526977539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9764137268066406, + "rewards_train/margins": 2.4898316860198975, + "rewards_train/rejected": -3.466245412826538, + "step": 2396 + }, + { + "epoch": 0.67, + "logps_train/chosen": -193.7348175048828, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -263.37005615234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.773481845855713, + "rewards_train/margins": 3.1635241508483887, + "rewards_train/rejected": -8.937005996704102, + "step": 2396 + }, + { + "epoch": 0.67, + "logps_train/chosen": -113.94825744628906, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -214.53701782226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.89482581615448, + "rewards_train/margins": 4.358875870704651, + "rewards_train/rejected": -6.253701686859131, + "step": 2396 + }, + { + "epoch": 0.67, + "logps_train/chosen": -1.306447982788086, + "logps_train/ref_chosen": -0.36328125, + "logps_train/ref_rejected": -6.21875, + "logps_train/rejected": -6.596321105957031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.09431667625904083, + "rewards_train/margins": -0.05655956640839577, + "rewards_train/rejected": -0.037757109850645065, + "step": 2396 + }, + { + "epoch": 0.67, + "logps_train/chosen": -9.896808624267578, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -30.557918548583984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06468086689710617, + "rewards_train/margins": 1.541111059486866, + "rewards_train/rejected": -1.6057919263839722, + "step": 2396 + }, + { + "epoch": 0.67, + "logps_train/chosen": -3.915304660797119, + "logps_train/ref_chosen": -2.734375, + "logps_train/ref_rejected": -3.96875, + "logps_train/rejected": -7.6033525466918945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11809296905994415, + "rewards_train/margins": 0.24536730349063873, + "rewards_train/rejected": -0.3634602725505829, + "step": 2396 + }, + { + "epoch": 0.67, + "logps_train/chosen": -32.53116989135742, + "logps_train/ref_chosen": -21.5, + "logps_train/ref_rejected": -23.75, + "logps_train/rejected": -36.11676025390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1031169891357422, + "rewards_train/margins": 0.13355910778045654, + "rewards_train/rejected": -1.2366760969161987, + "step": 2396 + }, + { + "epoch": 0.67, + "logps_train/chosen": -18.567214965820312, + "logps_train/ref_chosen": -2.859375, + "logps_train/ref_rejected": -4.5, + "logps_train/rejected": -28.541112899780273, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5707839727401733, + "rewards_train/margins": 0.8333274126052856, + "rewards_train/rejected": -2.404111385345459, + "step": 2396 + }, + { + "epoch": 0.67, + "logps_train/chosen": -75.1274642944336, + "logps_train/ref_chosen": -42.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -98.05826568603516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.262746572494507, + "rewards_train/margins": -0.8569200038909912, + "rewards_train/rejected": -2.4058265686035156, + "step": 2397 + }, + { + "epoch": 0.67, + "logps_train/chosen": -119.99125671386719, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -3.71875, + "logps_train/rejected": -23.52802085876465, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2991256713867188, + "rewards_train/margins": -1.318198561668396, + "rewards_train/rejected": -1.9809271097183228, + "step": 2397 + }, + { + "epoch": 0.67, + "logps_train/chosen": -69.9803466796875, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -43.9035530090332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4480346739292145, + "rewards_train/margins": 1.2798206508159637, + "rewards_train/rejected": -1.7278553247451782, + "step": 2397 + }, + { + "epoch": 0.67, + "logps_train/chosen": -168.62640380859375, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -184.47393798828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.862640380859375, + "rewards_train/margins": 0.4847536087036133, + "rewards_train/rejected": -4.347393989562988, + "step": 2397 + }, + { + "epoch": 0.67, + "logps_train/chosen": -42.07471466064453, + "logps_train/ref_chosen": -28.0, + "logps_train/ref_rejected": -4.40625, + "logps_train/rejected": -16.92694854736328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4074715375900269, + "rewards_train/margins": -0.15540170669555664, + "rewards_train/rejected": -1.2520698308944702, + "step": 2397 + }, + { + "epoch": 0.67, + "logps_train/chosen": -94.43878173828125, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -50.0, + "logps_train/rejected": -83.31259155273438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.593878149986267, + "rewards_train/margins": 1.737381100654602, + "rewards_train/rejected": -3.331259250640869, + "step": 2397 + }, + { + "epoch": 0.67, + "logps_train/chosen": -152.78115844726562, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -137.0, + "logps_train/rejected": -225.05665588378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6281158924102783, + "rewards_train/margins": 5.1775500774383545, + "rewards_train/rejected": -8.805665969848633, + "step": 2397 + }, + { + "epoch": 0.67, + "logps_train/chosen": -37.348453521728516, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -3.15625, + "logps_train/rejected": -25.733627319335938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1723453998565674, + "rewards_train/margins": -0.9146075248718262, + "rewards_train/rejected": -2.257737874984741, + "step": 2397 + }, + { + "epoch": 0.67, + "learning_rate": 9.088326723961093e-09, + "loss": 0.5175, + "step": 2398 + }, + { + "epoch": 0.67, + "logps_train/chosen": -25.599937438964844, + "logps_train/ref_chosen": -6.84375, + "logps_train/ref_rejected": -10.75, + "logps_train/rejected": -34.70754623413086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.875618815422058, + "rewards_train/margins": 0.520135760307312, + "rewards_train/rejected": -2.39575457572937, + "step": 2398 + }, + { + "epoch": 0.67, + "logps_train/chosen": -269.4289855957031, + "logps_train/ref_chosen": -184.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -245.85614013671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.542899131774902, + "rewards_train/margins": 1.2427148818969727, + "rewards_train/rejected": -9.785614013671875, + "step": 2398 + }, + { + "epoch": 0.67, + "logps_train/chosen": -136.73162841796875, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -111.46421813964844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.473162889480591, + "rewards_train/margins": -0.576741099357605, + "rewards_train/rejected": -1.8964217901229858, + "step": 2398 + }, + { + "epoch": 0.67, + "logps_train/chosen": -35.800621032714844, + "logps_train/ref_chosen": -9.25, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -35.849517822265625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.655062198638916, + "rewards_train/margins": -0.9701104164123535, + "rewards_train/rejected": -1.6849517822265625, + "step": 2398 + }, + { + "epoch": 0.67, + "logps_train/chosen": -105.16024780273438, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -166.61349487304688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1660248041152954, + "rewards_train/margins": 7.095324873924255, + "rewards_train/rejected": -8.26134967803955, + "step": 2398 + }, + { + "epoch": 0.67, + "logps_train/chosen": -256.4110107421875, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -279.12030029296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.141100883483887, + "rewards_train/margins": 1.2709293365478516, + "rewards_train/rejected": -11.412030220031738, + "step": 2398 + }, + { + "epoch": 0.67, + "logps_train/chosen": -24.159896850585938, + "logps_train/ref_chosen": -11.0, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -16.866535186767578, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3159897327423096, + "rewards_train/margins": -0.26996123790740967, + "rewards_train/rejected": -1.0460284948349, + "step": 2398 + }, + { + "epoch": 0.67, + "logps_train/chosen": -130.1486358642578, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -146.11215209960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0148637294769287, + "rewards_train/margins": 0.4463515281677246, + "rewards_train/rejected": -3.4612152576446533, + "step": 2398 + }, + { + "epoch": 0.67, + "logps_train/chosen": -5.954643249511719, + "logps_train/ref_chosen": -3.65625, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -7.918859004974365, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22983932495117188, + "rewards_train/margins": 0.1557965874671936, + "rewards_train/rejected": -0.3856359124183655, + "step": 2399 + }, + { + "epoch": 0.67, + "logps_train/chosen": -18.266807556152344, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -12.5, + "logps_train/rejected": -28.583080291748047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6516807675361633, + "rewards_train/margins": 0.9566273093223572, + "rewards_train/rejected": -1.6083080768585205, + "step": 2399 + }, + { + "epoch": 0.67, + "logps_train/chosen": -13.480658531188965, + "logps_train/ref_chosen": -6.625, + "logps_train/ref_rejected": -5.09375, + "logps_train/rejected": -24.478986740112305, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6855658888816833, + "rewards_train/margins": 1.2529577612876892, + "rewards_train/rejected": -1.9385236501693726, + "step": 2399 + }, + { + "epoch": 0.67, + "logps_train/chosen": -83.09831237792969, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -101.9598388671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9848312139511108, + "rewards_train/margins": 0.661152720451355, + "rewards_train/rejected": -2.645983934402466, + "step": 2399 + }, + { + "epoch": 0.67, + "logps_train/chosen": -164.32969665527344, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -201.06729125976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.682969808578491, + "rewards_train/margins": 5.723759889602661, + "rewards_train/rejected": -9.406729698181152, + "step": 2399 + }, + { + "epoch": 0.67, + "logps_train/chosen": -107.63921356201172, + "logps_train/ref_chosen": -65.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -146.20880126953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.26392126083374, + "rewards_train/margins": 1.6069588661193848, + "rewards_train/rejected": -5.870880126953125, + "step": 2399 + }, + { + "epoch": 0.67, + "logps_train/chosen": -5.3958740234375, + "logps_train/ref_chosen": -1.8203125, + "logps_train/ref_rejected": -5.3125, + "logps_train/rejected": -29.072744369506836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35755616426467896, + "rewards_train/margins": 2.0184683203697205, + "rewards_train/rejected": -2.3760244846343994, + "step": 2399 + }, + { + "epoch": 0.67, + "logps_train/chosen": -214.69943237304688, + "logps_train/ref_chosen": -179.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -254.34521484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5699431896209717, + "rewards_train/margins": 8.264578104019165, + "rewards_train/rejected": -11.834521293640137, + "step": 2399 + }, + { + "epoch": 0.67, + "learning_rate": 8.735930673024805e-09, + "loss": 0.4085, + "step": 2400 + }, + { + "epoch": 0.67, + "logps_train/chosen": -238.6028594970703, + "logps_train/ref_chosen": -127.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -227.87646484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.160285949707031, + "rewards_train/margins": 0.7773609161376953, + "rewards_train/rejected": -11.937646865844727, + "step": 2400 + }, + { + "epoch": 0.67, + "logps_train/chosen": -16.787281036376953, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -16.5, + "logps_train/rejected": -47.90565490722656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9849781394004822, + "rewards_train/margins": 2.1555874943733215, + "rewards_train/rejected": -3.1405656337738037, + "step": 2400 + }, + { + "epoch": 0.67, + "logps_train/chosen": -4.031208038330078, + "logps_train/ref_chosen": -7.34375, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -14.605436325073242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3312542140483856, + "rewards_train/margins": 0.3417978463694453, + "rewards_train/rejected": -0.010543632321059704, + "step": 2400 + }, + { + "epoch": 0.67, + "logps_train/chosen": -131.65487670898438, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -154.60809326171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9654876589775085, + "rewards_train/margins": 1.295321762561798, + "rewards_train/rejected": -2.2608094215393066, + "step": 2400 + }, + { + "epoch": 0.67, + "logps_train/chosen": -2.1766581535339355, + "logps_train/ref_chosen": -1.5, + "logps_train/ref_rejected": -0.53125, + "logps_train/rejected": -0.27646636962890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.06766581535339355, + "rewards_train/margins": -0.09314417839050293, + "rewards_train/rejected": 0.025478363037109375, + "step": 2400 + }, + { + "epoch": 0.67, + "logps_train/chosen": -122.5167236328125, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -159.62155151367188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.301672339439392, + "rewards_train/margins": 6.110482811927795, + "rewards_train/rejected": -7.4121551513671875, + "step": 2400 + }, + { + "epoch": 0.67, + "logps_train/chosen": -38.71118927001953, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -48.5, + "logps_train/rejected": -105.81168365478516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.421118974685669, + "rewards_train/margins": 4.310049295425415, + "rewards_train/rejected": -5.731168270111084, + "step": 2400 + }, + { + "epoch": 0.67, + "logps_train/chosen": -3.982755661010742, + "logps_train/ref_chosen": -2.84375, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -15.315862655639648, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1139005646109581, + "rewards_train/margins": 0.7489357367157936, + "rewards_train/rejected": -0.8628363013267517, + "step": 2400 + }, + { + "epoch": 0.67, + "logps_train/chosen": -143.3668212890625, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -207.7547149658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9366822242736816, + "rewards_train/margins": 3.838789463043213, + "rewards_train/rejected": -6.7754716873168945, + "step": 2401 + }, + { + "epoch": 0.67, + "logps_train/chosen": -45.77601623535156, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -20.75, + "logps_train/rejected": -63.49974060058594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.621351718902588, + "rewards_train/margins": 0.6536226272583008, + "rewards_train/rejected": -4.274974346160889, + "step": 2401 + }, + { + "epoch": 0.67, + "logps_train/chosen": -72.55552673339844, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -77.0, + "logps_train/rejected": -88.21768188476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4055526852607727, + "rewards_train/margins": 0.7162155508995056, + "rewards_train/rejected": -1.1217682361602783, + "step": 2401 + }, + { + "epoch": 0.67, + "logps_train/chosen": -165.96682739257812, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -165.0, + "logps_train/rejected": -232.30853271484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0966827869415283, + "rewards_train/margins": 4.634170770645142, + "rewards_train/rejected": -6.73085355758667, + "step": 2401 + }, + { + "epoch": 0.67, + "logps_train/chosen": -62.45349884033203, + "logps_train/ref_chosen": -38.5, + "logps_train/ref_rejected": -33.25, + "logps_train/rejected": -61.83708190917969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3953499794006348, + "rewards_train/margins": 0.46335816383361816, + "rewards_train/rejected": -2.858708143234253, + "step": 2401 + }, + { + "epoch": 0.67, + "logps_train/chosen": -21.742881774902344, + "logps_train/ref_chosen": -4.96875, + "logps_train/ref_rejected": -3.5625, + "logps_train/rejected": -18.04876708984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6774132251739502, + "rewards_train/margins": -0.22878646850585938, + "rewards_train/rejected": -1.4486267566680908, + "step": 2401 + }, + { + "epoch": 0.67, + "logps_train/chosen": -92.66738891601562, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -223.05908203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5167388916015625, + "rewards_train/margins": 8.489169120788574, + "rewards_train/rejected": -9.005908012390137, + "step": 2401 + }, + { + "epoch": 0.67, + "logps_train/chosen": -134.6192626953125, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -201.62905883789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.711926221847534, + "rewards_train/margins": 5.950979471206665, + "rewards_train/rejected": -8.6629056930542, + "step": 2401 + }, + { + "epoch": 0.67, + "learning_rate": 8.390472419595119e-09, + "loss": 0.2852, + "step": 2402 + }, + { + "epoch": 0.67, + "logps_train/chosen": -23.35835838317871, + "logps_train/ref_chosen": -8.8125, + "logps_train/ref_rejected": -24.125, + "logps_train/rejected": -61.61561584472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4545859098434448, + "rewards_train/margins": 2.2944756746292114, + "rewards_train/rejected": -3.7490615844726562, + "step": 2402 + }, + { + "epoch": 0.67, + "logps_train/chosen": -28.208011627197266, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -44.61052322387695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5645512342453003, + "rewards_train/margins": 1.4715012311935425, + "rewards_train/rejected": -3.0360524654388428, + "step": 2402 + }, + { + "epoch": 0.67, + "logps_train/chosen": -2.677474021911621, + "logps_train/ref_chosen": -2.921875, + "logps_train/ref_rejected": -1.3984375, + "logps_train/rejected": -1.850760817527771, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.02444009855389595, + "rewards_train/margins": 0.06967243179678917, + "rewards_train/rejected": -0.04523233324289322, + "step": 2402 + }, + { + "epoch": 0.67, + "logps_train/chosen": -1.97037672996521, + "logps_train/ref_chosen": -2.1875, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -5.891863822937012, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.021712327376008034, + "rewards_train/margins": -0.1578512992709875, + "rewards_train/rejected": 0.17956362664699554, + "step": 2402 + }, + { + "epoch": 0.67, + "logps_train/chosen": -113.86007690429688, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -171.87051391601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0360076427459717, + "rewards_train/margins": 7.351043939590454, + "rewards_train/rejected": -9.387051582336426, + "step": 2402 + }, + { + "epoch": 0.67, + "logps_train/chosen": -208.76333618164062, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -230.67385864257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.676333904266357, + "rewards_train/margins": 3.891051769256592, + "rewards_train/rejected": -9.56738567352295, + "step": 2402 + }, + { + "epoch": 0.67, + "logps_train/chosen": -38.849788665771484, + "logps_train/ref_chosen": -27.125, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -18.047048568725586, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1724789142608643, + "rewards_train/margins": -0.045899033546447754, + "rewards_train/rejected": -1.1265798807144165, + "step": 2402 + }, + { + "epoch": 0.67, + "logps_train/chosen": -111.06455993652344, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -152.76840209960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9064559936523438, + "rewards_train/margins": 2.8703842163085938, + "rewards_train/rejected": -3.7768402099609375, + "step": 2402 + }, + { + "epoch": 0.67, + "logps_train/chosen": -10.901464462280273, + "logps_train/ref_chosen": -7.0, + "logps_train/ref_rejected": -30.375, + "logps_train/rejected": -55.49740219116211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3901464641094208, + "rewards_train/margins": 2.1220937073230743, + "rewards_train/rejected": -2.512240171432495, + "step": 2403 + }, + { + "epoch": 0.67, + "logps_train/chosen": -62.65410232543945, + "logps_train/ref_chosen": -32.25, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -45.62615203857422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.040410280227661, + "rewards_train/margins": 0.9347050189971924, + "rewards_train/rejected": -3.9751152992248535, + "step": 2403 + }, + { + "epoch": 0.67, + "logps_train/chosen": -18.95864486694336, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -18.757949829101562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2583644986152649, + "rewards_train/margins": -0.020069509744644165, + "rewards_train/rejected": -0.23829498887062073, + "step": 2403 + }, + { + "epoch": 0.67, + "logps_train/chosen": -42.511817932128906, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -32.75, + "logps_train/rejected": -61.938602447509766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.288681745529175, + "rewards_train/margins": 0.6301784515380859, + "rewards_train/rejected": -2.9188601970672607, + "step": 2403 + }, + { + "epoch": 0.67, + "logps_train/chosen": -135.8529510498047, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -129.6519012451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6352951526641846, + "rewards_train/margins": 1.1798951625823975, + "rewards_train/rejected": -4.815190315246582, + "step": 2403 + }, + { + "epoch": 0.67, + "logps_train/chosen": -35.06266784667969, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -1.1171875, + "logps_train/rejected": -24.08060073852539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6437667608261108, + "rewards_train/margins": 0.6525746583938599, + "rewards_train/rejected": -2.2963414192199707, + "step": 2403 + }, + { + "epoch": 0.67, + "logps_train/chosen": -15.479700088500977, + "logps_train/ref_chosen": -4.03125, + "logps_train/ref_rejected": -9.875, + "logps_train/rejected": -40.671287536621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1448450088500977, + "rewards_train/margins": 1.934783697128296, + "rewards_train/rejected": -3.0796287059783936, + "step": 2403 + }, + { + "epoch": 0.67, + "logps_train/chosen": -79.3382568359375, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -99.97428894042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.583825707435608, + "rewards_train/margins": 0.5636032819747925, + "rewards_train/rejected": -2.1474289894104004, + "step": 2403 + }, + { + "epoch": 0.67, + "learning_rate": 8.051954381513514e-09, + "loss": 0.3359, + "step": 2404 + }, + { + "epoch": 0.67, + "logps_train/chosen": -22.742977142333984, + "logps_train/ref_chosen": -4.90625, + "logps_train/ref_rejected": -16.125, + "logps_train/rejected": -29.262521743774414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7836726903915405, + "rewards_train/margins": -0.4699205160140991, + "rewards_train/rejected": -1.3137521743774414, + "step": 2404 + }, + { + "epoch": 0.67, + "logps_train/chosen": -17.832836151123047, + "logps_train/ref_chosen": -3.5, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -28.80545425415039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4332836866378784, + "rewards_train/margins": -0.17773818969726562, + "rewards_train/rejected": -1.2555454969406128, + "step": 2404 + }, + { + "epoch": 0.67, + "logps_train/chosen": -95.33308410644531, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -102.0, + "logps_train/rejected": -141.29067993164062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.833308458328247, + "rewards_train/margins": 2.095759630203247, + "rewards_train/rejected": -3.929068088531494, + "step": 2404 + }, + { + "epoch": 0.67, + "logps_train/chosen": -86.25199890136719, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -128.13064575195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3751998841762543, + "rewards_train/margins": 0.4378646910190582, + "rewards_train/rejected": -0.8130645751953125, + "step": 2404 + }, + { + "epoch": 0.67, + "logps_train/chosen": -124.18760681152344, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -192.371826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2687606811523438, + "rewards_train/margins": 7.5184221267700195, + "rewards_train/rejected": -10.787182807922363, + "step": 2404 + }, + { + "epoch": 0.67, + "logps_train/chosen": -46.81999206542969, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -42.97452163696289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.731999158859253, + "rewards_train/margins": -0.3470468521118164, + "rewards_train/rejected": -3.3849523067474365, + "step": 2404 + }, + { + "epoch": 0.67, + "logps_train/chosen": -55.68989944458008, + "logps_train/ref_chosen": -32.0, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -111.59074401855469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.368989944458008, + "rewards_train/margins": 2.4650845527648926, + "rewards_train/rejected": -4.8340744972229, + "step": 2404 + }, + { + "epoch": 0.67, + "logps_train/chosen": -87.90036010742188, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -53.75, + "logps_train/rejected": -59.60051345825195, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.765036106109619, + "rewards_train/margins": -2.179984748363495, + "rewards_train/rejected": -0.5850513577461243, + "step": 2404 + }, + { + "epoch": 0.67, + "logps_train/chosen": -192.40420532226562, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -260.94683837890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.240420818328857, + "rewards_train/margins": 7.254263401031494, + "rewards_train/rejected": -11.494684219360352, + "step": 2405 + }, + { + "epoch": 0.67, + "logps_train/chosen": -3.2602992057800293, + "logps_train/ref_chosen": -1.1796875, + "logps_train/ref_rejected": -1.6015625, + "logps_train/rejected": -3.6952970027923584, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.20806117355823517, + "rewards_train/margins": 0.0013122856616973877, + "rewards_train/rejected": -0.20937345921993256, + "step": 2405 + }, + { + "epoch": 0.67, + "logps_train/chosen": -29.076326370239258, + "logps_train/ref_chosen": -8.9375, + "logps_train/ref_rejected": -7.28125, + "logps_train/rejected": -44.70040512084961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.013882637023926, + "rewards_train/margins": 1.7280328273773193, + "rewards_train/rejected": -3.741915464401245, + "step": 2405 + }, + { + "epoch": 0.67, + "logps_train/chosen": -11.34318733215332, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -2.21875, + "logps_train/rejected": -34.21810531616211, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.27806875109672546, + "rewards_train/margins": 2.921866923570633, + "rewards_train/rejected": -3.1999356746673584, + "step": 2405 + }, + { + "epoch": 0.67, + "logps_train/chosen": -48.161216735839844, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -38.782379150390625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1411216259002686, + "rewards_train/margins": -0.7191336154937744, + "rewards_train/rejected": -2.421988010406494, + "step": 2405 + }, + { + "epoch": 0.67, + "logps_train/chosen": -16.956832885742188, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -21.114295959472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8394333124160767, + "rewards_train/margins": 0.45949625968933105, + "rewards_train/rejected": -1.2989295721054077, + "step": 2405 + }, + { + "epoch": 0.67, + "logps_train/chosen": -84.73169708251953, + "logps_train/ref_chosen": -72.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -164.64285278320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2231696844100952, + "rewards_train/margins": 3.991115689277649, + "rewards_train/rejected": -5.214285373687744, + "step": 2405 + }, + { + "epoch": 0.67, + "logps_train/chosen": -35.93278503417969, + "logps_train/ref_chosen": -29.625, + "logps_train/ref_rejected": -31.75, + "logps_train/rejected": -35.051029205322266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6307784914970398, + "rewards_train/margins": -0.30067557096481323, + "rewards_train/rejected": -0.33010292053222656, + "step": 2405 + }, + { + "epoch": 0.67, + "learning_rate": 7.720378928047333e-09, + "loss": 0.5622, + "step": 2406 + }, + { + "epoch": 0.67, + "logps_train/chosen": -43.26502227783203, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -200.85269165039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.001502275466919, + "rewards_train/margins": 7.6337668895721436, + "rewards_train/rejected": -9.635269165039062, + "step": 2406 + }, + { + "epoch": 0.67, + "logps_train/chosen": -22.60150909423828, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -5.0, + "logps_train/rejected": -32.858951568603516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7164009213447571, + "rewards_train/margins": 2.0694941878318787, + "rewards_train/rejected": -2.7858951091766357, + "step": 2406 + }, + { + "epoch": 0.67, + "logps_train/chosen": -27.06745719909668, + "logps_train/ref_chosen": -10.0, + "logps_train/ref_rejected": -6.40625, + "logps_train/rejected": -32.16448211669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7067457437515259, + "rewards_train/margins": 0.8690775632858276, + "rewards_train/rejected": -2.5758233070373535, + "step": 2406 + }, + { + "epoch": 0.67, + "logps_train/chosen": -88.42181396484375, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -17.625, + "logps_train/rejected": -22.474952697753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.392181396484375, + "rewards_train/margins": 0.0928138792514801, + "rewards_train/rejected": -0.4849952757358551, + "step": 2406 + }, + { + "epoch": 0.67, + "logps_train/chosen": -5.477113246917725, + "logps_train/ref_chosen": -1.1015625, + "logps_train/ref_rejected": -2.953125, + "logps_train/rejected": -20.039779663085938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43755507469177246, + "rewards_train/margins": 1.2711104154586792, + "rewards_train/rejected": -1.7086654901504517, + "step": 2406 + }, + { + "epoch": 0.67, + "logps_train/chosen": -69.1559066772461, + "logps_train/ref_chosen": -54.75, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -88.13762664794922, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.440590739250183, + "rewards_train/margins": -0.5268280506134033, + "rewards_train/rejected": -0.9137626886367798, + "step": 2406 + }, + { + "epoch": 0.67, + "logps_train/chosen": -6.290282726287842, + "logps_train/ref_chosen": -2.015625, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -69.55030822753906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4274657666683197, + "rewards_train/margins": 3.1275650560855865, + "rewards_train/rejected": -3.5550308227539062, + "step": 2406 + }, + { + "epoch": 0.67, + "logps_train/chosen": -14.011344909667969, + "logps_train/ref_chosen": -9.75, + "logps_train/ref_rejected": -13.0625, + "logps_train/rejected": -22.70452880859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.42613449692726135, + "rewards_train/margins": 0.5380683839321136, + "rewards_train/rejected": -0.964202880859375, + "step": 2406 + }, + { + "epoch": 0.67, + "logps_train/chosen": -26.958654403686523, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -36.307960510253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.383365511894226, + "rewards_train/margins": 0.9786804914474487, + "rewards_train/rejected": -2.362046003341675, + "step": 2407 + }, + { + "epoch": 0.67, + "logps_train/chosen": -8.252928733825684, + "logps_train/ref_chosen": -7.625, + "logps_train/ref_rejected": -12.8125, + "logps_train/rejected": -33.930381774902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.06279287487268448, + "rewards_train/margins": 2.0489953979849815, + "rewards_train/rejected": -2.111788272857666, + "step": 2407 + }, + { + "epoch": 0.67, + "logps_train/chosen": -173.79296875, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -303.37628173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.879296779632568, + "rewards_train/margins": 9.558331966400146, + "rewards_train/rejected": -14.437628746032715, + "step": 2407 + }, + { + "epoch": 0.67, + "logps_train/chosen": -34.44225311279297, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -71.22172546386719, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8067253828048706, + "rewards_train/margins": 3.427947163581848, + "rewards_train/rejected": -5.234672546386719, + "step": 2407 + }, + { + "epoch": 0.67, + "logps_train/chosen": -61.34864807128906, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -51.17588806152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.05986487865448, + "rewards_train/margins": 1.5952240228652954, + "rewards_train/rejected": -2.6550889015197754, + "step": 2407 + }, + { + "epoch": 0.67, + "logps_train/chosen": -46.99164581298828, + "logps_train/ref_chosen": -11.9375, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -45.515689849853516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5054147243499756, + "rewards_train/margins": 0.3086543083190918, + "rewards_train/rejected": -3.8140690326690674, + "step": 2407 + }, + { + "epoch": 0.67, + "logps_train/chosen": -147.6627197265625, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -164.19985961914062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2162721157073975, + "rewards_train/margins": 0.8537137508392334, + "rewards_train/rejected": -4.069985866546631, + "step": 2407 + }, + { + "epoch": 0.67, + "logps_train/chosen": -22.5797176361084, + "logps_train/ref_chosen": -6.25, + "logps_train/ref_rejected": -30.75, + "logps_train/rejected": -51.10129165649414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6329717636108398, + "rewards_train/margins": 0.4021575450897217, + "rewards_train/rejected": -2.0351293087005615, + "step": 2407 + }, + { + "epoch": 0.67, + "learning_rate": 7.395748379873335e-09, + "loss": 0.3079, + "step": 2408 + }, + { + "epoch": 0.67, + "logps_train/chosen": -86.49967956542969, + "logps_train/ref_chosen": -56.0, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -168.52627563476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0499680042266846, + "rewards_train/margins": 5.8526599407196045, + "rewards_train/rejected": -8.902627944946289, + "step": 2408 + }, + { + "epoch": 0.67, + "logps_train/chosen": -148.61758422851562, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -250.26931762695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.51175856590271, + "rewards_train/margins": 8.71517300605774, + "rewards_train/rejected": -12.22693157196045, + "step": 2408 + }, + { + "epoch": 0.67, + "logps_train/chosen": -59.96194076538086, + "logps_train/ref_chosen": -29.625, + "logps_train/ref_rejected": -24.875, + "logps_train/rejected": -67.38072967529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.03369402885437, + "rewards_train/margins": 1.21687912940979, + "rewards_train/rejected": -4.25057315826416, + "step": 2408 + }, + { + "epoch": 0.67, + "logps_train/chosen": -33.066322326660156, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -12.9375, + "logps_train/rejected": -20.030893325805664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8566322326660156, + "rewards_train/margins": -0.14729291200637817, + "rewards_train/rejected": -0.7093393206596375, + "step": 2408 + }, + { + "epoch": 0.67, + "logps_train/chosen": -187.42874145507812, + "logps_train/ref_chosen": -165.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -171.85401916503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2428741455078125, + "rewards_train/margins": 2.8425278663635254, + "rewards_train/rejected": -5.085402011871338, + "step": 2408 + }, + { + "epoch": 0.67, + "logps_train/chosen": -43.53901672363281, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -46.86417007446289, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2414016723632812, + "rewards_train/margins": 0.8637654781341553, + "rewards_train/rejected": -3.1051671504974365, + "step": 2408 + }, + { + "epoch": 0.67, + "logps_train/chosen": -46.030181884765625, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -61.317840576171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6280181407928467, + "rewards_train/margins": 1.1787660121917725, + "rewards_train/rejected": -3.806784152984619, + "step": 2408 + }, + { + "epoch": 0.67, + "logps_train/chosen": -135.945556640625, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -225.1536865234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.0445556640625, + "rewards_train/margins": 5.070813179016113, + "rewards_train/rejected": -9.115368843078613, + "step": 2408 + }, + { + "epoch": 0.67, + "logps_train/chosen": -89.66787719726562, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -2.671875, + "logps_train/rejected": -18.84640884399414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36678773164749146, + "rewards_train/margins": 1.2506657242774963, + "rewards_train/rejected": -1.6174534559249878, + "step": 2409 + }, + { + "epoch": 0.67, + "logps_train/chosen": -170.6967315673828, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -274.52325439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.369673252105713, + "rewards_train/margins": 7.882652759552002, + "rewards_train/rejected": -13.252326011657715, + "step": 2409 + }, + { + "epoch": 0.67, + "logps_train/chosen": -175.03195190429688, + "logps_train/ref_chosen": -107.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -186.10861206054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.753195285797119, + "rewards_train/margins": 1.1576662063598633, + "rewards_train/rejected": -7.910861492156982, + "step": 2409 + }, + { + "epoch": 0.67, + "logps_train/chosen": -105.70994567871094, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -106.1108169555664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1709945648908615, + "rewards_train/margins": 0.04008713364601135, + "rewards_train/rejected": -0.21108169853687286, + "step": 2409 + }, + { + "epoch": 0.67, + "logps_train/chosen": -12.249310493469238, + "logps_train/ref_chosen": -5.96875, + "logps_train/ref_rejected": -3.15625, + "logps_train/rejected": -23.98179054260254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6280560493469238, + "rewards_train/margins": 1.454498052597046, + "rewards_train/rejected": -2.0825541019439697, + "step": 2409 + }, + { + "epoch": 0.67, + "logps_train/chosen": -6.774941921234131, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -2.265625, + "logps_train/rejected": -8.454315185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11499419063329697, + "rewards_train/margins": 0.5038748160004616, + "rewards_train/rejected": -0.6188690066337585, + "step": 2409 + }, + { + "epoch": 0.67, + "logps_train/chosen": -113.8950424194336, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -177.75497436523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5395042896270752, + "rewards_train/margins": 5.685993432998657, + "rewards_train/rejected": -7.225497722625732, + "step": 2409 + }, + { + "epoch": 0.67, + "logps_train/chosen": -117.3153305053711, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -153.30258178710938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6315330862998962, + "rewards_train/margins": 3.348725140094757, + "rewards_train/rejected": -3.9802582263946533, + "step": 2409 + }, + { + "epoch": 0.67, + "learning_rate": 7.07806500906094e-09, + "loss": 0.2269, + "step": 2410 + }, + { + "epoch": 0.67, + "logps_train/chosen": -176.02687072753906, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -223.34259033203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.552687168121338, + "rewards_train/margins": 2.481571674346924, + "rewards_train/rejected": -10.034258842468262, + "step": 2410 + }, + { + "epoch": 0.67, + "logps_train/chosen": -6.906754016876221, + "logps_train/ref_chosen": -2.609375, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -26.491743087768555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4297378957271576, + "rewards_train/margins": 0.2194364368915558, + "rewards_train/rejected": -0.6491743326187134, + "step": 2410 + }, + { + "epoch": 0.67, + "logps_train/chosen": -74.96382904052734, + "logps_train/ref_chosen": -55.0, + "logps_train/ref_rejected": -49.75, + "logps_train/rejected": -89.62973022460938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9963829517364502, + "rewards_train/margins": 1.9915900230407715, + "rewards_train/rejected": -3.9879729747772217, + "step": 2410 + }, + { + "epoch": 0.67, + "logps_train/chosen": -6.848236560821533, + "logps_train/ref_chosen": -4.0625, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -17.23090171813965, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2785736620426178, + "rewards_train/margins": 0.569516509771347, + "rewards_train/rejected": -0.8480901718139648, + "step": 2410 + }, + { + "epoch": 0.67, + "logps_train/chosen": -137.32798767089844, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -172.0, + "logps_train/rejected": -179.37869262695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.13279877603054047, + "rewards_train/margins": 0.605070486664772, + "rewards_train/rejected": -0.7378692626953125, + "step": 2410 + }, + { + "epoch": 0.67, + "logps_train/chosen": -133.81674194335938, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -202.06947326660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9316742420196533, + "rewards_train/margins": 7.425273656845093, + "rewards_train/rejected": -10.356947898864746, + "step": 2410 + }, + { + "epoch": 0.67, + "logps_train/chosen": -119.97714233398438, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -195.90753173828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.5477142333984375, + "rewards_train/margins": 4.043039321899414, + "rewards_train/rejected": -8.590753555297852, + "step": 2410 + }, + { + "epoch": 0.67, + "logps_train/chosen": -99.0228042602539, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -154.23809814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5022804141044617, + "rewards_train/margins": 4.321529686450958, + "rewards_train/rejected": -4.82381010055542, + "step": 2410 + }, + { + "epoch": 0.67, + "logps_train/chosen": -36.81046676635742, + "logps_train/ref_chosen": -24.0, + "logps_train/ref_rejected": -46.5, + "logps_train/rejected": -69.51940155029297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.281046748161316, + "rewards_train/margins": 1.0208934545516968, + "rewards_train/rejected": -2.3019402027130127, + "step": 2411 + }, + { + "epoch": 0.67, + "logps_train/chosen": -110.49478149414062, + "logps_train/ref_chosen": -71.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -110.55809783935547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.8994781970977783, + "rewards_train/margins": -1.3936684131622314, + "rewards_train/rejected": -2.505809783935547, + "step": 2411 + }, + { + "epoch": 0.67, + "logps_train/chosen": -129.84207153320312, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -196.09490966796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.384207248687744, + "rewards_train/margins": 5.175283908843994, + "rewards_train/rejected": -10.559491157531738, + "step": 2411 + }, + { + "epoch": 0.67, + "logps_train/chosen": -127.72761535644531, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -221.46726989746094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.322761535644531, + "rewards_train/margins": 4.323966026306152, + "rewards_train/rejected": -8.646727561950684, + "step": 2411 + }, + { + "epoch": 0.67, + "logps_train/chosen": -37.215702056884766, + "logps_train/ref_chosen": -26.75, + "logps_train/ref_rejected": -22.75, + "logps_train/rejected": -47.33946228027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0465701818466187, + "rewards_train/margins": 1.412376046180725, + "rewards_train/rejected": -2.4589462280273438, + "step": 2411 + }, + { + "epoch": 0.67, + "logps_train/chosen": -42.38066864013672, + "logps_train/ref_chosen": -12.0625, + "logps_train/ref_rejected": -8.125, + "logps_train/rejected": -51.6029052734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0318169593811035, + "rewards_train/margins": 1.3159737586975098, + "rewards_train/rejected": -4.347790718078613, + "step": 2411 + }, + { + "epoch": 0.67, + "logps_train/chosen": -11.484428405761719, + "logps_train/ref_chosen": -5.65625, + "logps_train/ref_rejected": -6.4375, + "logps_train/rejected": -15.32091999053955, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5828178524971008, + "rewards_train/margins": 0.30552417039871216, + "rewards_train/rejected": -0.888342022895813, + "step": 2411 + }, + { + "epoch": 0.67, + "logps_train/chosen": -4.885074615478516, + "logps_train/ref_chosen": -0.85546875, + "logps_train/ref_rejected": -7.4375, + "logps_train/rejected": -20.67352294921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4029605984687805, + "rewards_train/margins": 0.9206417202949524, + "rewards_train/rejected": -1.323602318763733, + "step": 2411 + }, + { + "epoch": 0.67, + "learning_rate": 6.767331039057133e-09, + "loss": 0.3123, + "step": 2412 + }, + { + "epoch": 0.67, + "logps_train/chosen": -12.817717552185059, + "logps_train/ref_chosen": -0.72265625, + "logps_train/ref_rejected": -0.72265625, + "logps_train/rejected": -12.94183349609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2095061540603638, + "rewards_train/margins": 0.01241159439086914, + "rewards_train/rejected": -1.221917748451233, + "step": 2412 + }, + { + "epoch": 0.67, + "logps_train/chosen": -12.838114738464355, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -10.375, + "logps_train/rejected": -21.45486831665039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4713114798069, + "rewards_train/margins": 0.6366753280162811, + "rewards_train/rejected": -1.1079868078231812, + "step": 2412 + }, + { + "epoch": 0.67, + "logps_train/chosen": -167.71707153320312, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -231.06344604492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.27170729637146, + "rewards_train/margins": 5.9346373081207275, + "rewards_train/rejected": -8.206344604492188, + "step": 2412 + }, + { + "epoch": 0.67, + "logps_train/chosen": -33.26476287841797, + "logps_train/ref_chosen": -11.75, + "logps_train/ref_rejected": -24.5, + "logps_train/rejected": -58.20633316040039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1514763832092285, + "rewards_train/margins": 1.2191569805145264, + "rewards_train/rejected": -3.370633363723755, + "step": 2412 + }, + { + "epoch": 0.67, + "logps_train/chosen": -76.47509765625, + "logps_train/ref_chosen": -60.25, + "logps_train/ref_rejected": -56.75, + "logps_train/rejected": -82.39702606201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6225098371505737, + "rewards_train/margins": 0.9421929121017456, + "rewards_train/rejected": -2.5647027492523193, + "step": 2412 + }, + { + "epoch": 0.67, + "logps_train/chosen": -17.39920425415039, + "logps_train/ref_chosen": -10.75, + "logps_train/ref_rejected": -21.75, + "logps_train/rejected": -37.468624114990234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.664920449256897, + "rewards_train/margins": 0.9069420099258423, + "rewards_train/rejected": -1.5718624591827393, + "step": 2412 + }, + { + "epoch": 0.67, + "logps_train/chosen": -111.28819274902344, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -36.253395080566406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4788193702697754, + "rewards_train/margins": -1.990979790687561, + "rewards_train/rejected": -1.4878395795822144, + "step": 2412 + }, + { + "epoch": 0.67, + "logps_train/chosen": -175.11270141601562, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -194.71499633789062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.611270427703857, + "rewards_train/margins": 2.810229778289795, + "rewards_train/rejected": -10.421500205993652, + "step": 2412 + }, + { + "epoch": 0.67, + "logps_train/chosen": -37.17371368408203, + "logps_train/ref_chosen": -24.5, + "logps_train/ref_rejected": -1.8046875, + "logps_train/rejected": -21.41400909423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.267371416091919, + "rewards_train/margins": 0.6935607194900513, + "rewards_train/rejected": -1.9609321355819702, + "step": 2413 + }, + { + "epoch": 0.67, + "logps_train/chosen": -28.549226760864258, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -99.65269470214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8674226999282837, + "rewards_train/margins": 6.435346961021423, + "rewards_train/rejected": -7.302769660949707, + "step": 2413 + }, + { + "epoch": 0.67, + "logps_train/chosen": -46.507972717285156, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -7.75, + "logps_train/rejected": -36.36476516723633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.2757973670959473, + "rewards_train/margins": -0.414320707321167, + "rewards_train/rejected": -2.8614766597747803, + "step": 2413 + }, + { + "epoch": 0.67, + "logps_train/chosen": -25.19696617126465, + "logps_train/ref_chosen": -1.7265625, + "logps_train/ref_rejected": -4.75, + "logps_train/rejected": -26.367931365966797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3470404148101807, + "rewards_train/margins": -0.18524718284606934, + "rewards_train/rejected": -2.1617932319641113, + "step": 2413 + }, + { + "epoch": 0.67, + "logps_train/chosen": -2.887568712234497, + "logps_train/ref_chosen": -1.8203125, + "logps_train/ref_rejected": -21.375, + "logps_train/rejected": -30.012313842773438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.10672562569379807, + "rewards_train/margins": 0.7570057585835457, + "rewards_train/rejected": -0.8637313842773438, + "step": 2413 + }, + { + "epoch": 0.67, + "logps_train/chosen": -193.13623046875, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -173.6815185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.813622951507568, + "rewards_train/margins": 0.95452880859375, + "rewards_train/rejected": -6.768151760101318, + "step": 2413 + }, + { + "epoch": 0.67, + "logps_train/chosen": -61.512794494628906, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -4.84375, + "logps_train/rejected": -14.572983741760254, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.051279451698064804, + "rewards_train/margins": 0.9216439463198185, + "rewards_train/rejected": -0.9729233980178833, + "step": 2413 + }, + { + "epoch": 0.67, + "logps_train/chosen": -11.965818405151367, + "logps_train/ref_chosen": -1.3671875, + "logps_train/ref_rejected": -2.328125, + "logps_train/rejected": -13.30030632019043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0598630905151367, + "rewards_train/margins": 0.03735506534576416, + "rewards_train/rejected": -1.0972181558609009, + "step": 2413 + }, + { + "epoch": 0.67, + "learning_rate": 6.463548644670136e-09, + "loss": 0.5039, + "step": 2414 + }, + { + "epoch": 0.67, + "logps_train/chosen": -10.510600090026855, + "logps_train/ref_chosen": -3.53125, + "logps_train/ref_rejected": -8.3125, + "logps_train/rejected": -44.074432373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6979350447654724, + "rewards_train/margins": 2.8782582879066467, + "rewards_train/rejected": -3.576193332672119, + "step": 2414 + }, + { + "epoch": 0.67, + "logps_train/chosen": -8.345085144042969, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -6.34375, + "logps_train/rejected": -11.372106552124023, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3563835322856903, + "rewards_train/margins": 0.1464521586894989, + "rewards_train/rejected": -0.5028356909751892, + "step": 2414 + }, + { + "epoch": 0.67, + "logps_train/chosen": -74.78750610351562, + "logps_train/ref_chosen": -50.75, + "logps_train/ref_rejected": -7.40625, + "logps_train/rejected": -32.006900787353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4037506580352783, + "rewards_train/margins": 0.05631446838378906, + "rewards_train/rejected": -2.4600651264190674, + "step": 2414 + }, + { + "epoch": 0.67, + "logps_train/chosen": -85.58191680908203, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -75.5, + "logps_train/rejected": -109.45137023925781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.758191704750061, + "rewards_train/margins": 1.636945366859436, + "rewards_train/rejected": -3.395137071609497, + "step": 2414 + }, + { + "epoch": 0.67, + "logps_train/chosen": -172.28762817382812, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -126.5, + "logps_train/rejected": -177.92489624023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.828763008117676, + "rewards_train/margins": 0.31372690200805664, + "rewards_train/rejected": -5.142489910125732, + "step": 2414 + }, + { + "epoch": 0.67, + "logps_train/chosen": -108.93775177001953, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -67.5, + "logps_train/rejected": -112.89544677734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2937753200531006, + "rewards_train/margins": 2.2457692623138428, + "rewards_train/rejected": -4.539544582366943, + "step": 2414 + }, + { + "epoch": 0.67, + "logps_train/chosen": -264.57867431640625, + "logps_train/ref_chosen": -210.0, + "logps_train/ref_rejected": -175.0, + "logps_train/rejected": -232.75379943847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.457867622375488, + "rewards_train/margins": 0.31751251220703125, + "rewards_train/rejected": -5.7753801345825195, + "step": 2414 + }, + { + "epoch": 0.67, + "logps_train/chosen": -34.279541015625, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -180.0, + "logps_train/rejected": -243.7836456298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3904541730880737, + "rewards_train/margins": 4.9879103899002075, + "rewards_train/rejected": -6.378364562988281, + "step": 2414 + }, + { + "epoch": 0.68, + "logps_train/chosen": -177.0771942138672, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -132.29554748535156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.657719612121582, + "rewards_train/margins": -2.1781647205352783, + "rewards_train/rejected": -3.4795548915863037, + "step": 2415 + }, + { + "epoch": 0.68, + "logps_train/chosen": -108.57083129882812, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -77.5, + "logps_train/rejected": -129.9750518798828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3570832014083862, + "rewards_train/margins": 3.890421986579895, + "rewards_train/rejected": -5.247505187988281, + "step": 2415 + }, + { + "epoch": 0.68, + "logps_train/chosen": -27.454418182373047, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -2.890625, + "logps_train/rejected": -29.122386932373047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9391918182373047, + "rewards_train/margins": 0.6839845180511475, + "rewards_train/rejected": -2.623176336288452, + "step": 2415 + }, + { + "epoch": 0.68, + "logps_train/chosen": -118.08729553222656, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -151.22103881835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.558729648590088, + "rewards_train/margins": 4.3133745193481445, + "rewards_train/rejected": -6.872104167938232, + "step": 2415 + }, + { + "epoch": 0.68, + "logps_train/chosen": -90.97334289550781, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -72.5, + "logps_train/rejected": -112.14236450195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5973342657089233, + "rewards_train/margins": 2.366902232170105, + "rewards_train/rejected": -3.9642364978790283, + "step": 2415 + }, + { + "epoch": 0.68, + "logps_train/chosen": -115.54656219482422, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -106.69902038574219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.254656195640564, + "rewards_train/margins": 0.5152459144592285, + "rewards_train/rejected": -1.7699021100997925, + "step": 2415 + }, + { + "epoch": 0.68, + "logps_train/chosen": -4.927787780761719, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -28.381256103515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.061528779566287994, + "rewards_train/margins": 2.440659426152706, + "rewards_train/rejected": -2.502188205718994, + "step": 2415 + }, + { + "epoch": 0.68, + "logps_train/chosen": -9.53746223449707, + "logps_train/ref_chosen": -3.671875, + "logps_train/ref_rejected": -29.125, + "logps_train/rejected": -45.93511199951172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5865587592124939, + "rewards_train/margins": 1.094452440738678, + "rewards_train/rejected": -1.6810111999511719, + "step": 2415 + }, + { + "epoch": 0.68, + "learning_rate": 6.1667199520547555e-09, + "loss": 0.3992, + "step": 2416 + }, + { + "epoch": 0.68, + "logps_train/chosen": -128.73280334472656, + "logps_train/ref_chosen": -123.5, + "logps_train/ref_rejected": -90.5, + "logps_train/rejected": -121.9133071899414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5232803225517273, + "rewards_train/margins": 2.6180503964424133, + "rewards_train/rejected": -3.1413307189941406, + "step": 2416 + }, + { + "epoch": 0.68, + "logps_train/chosen": -39.4788703918457, + "logps_train/ref_chosen": -13.0625, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -34.0218505859375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.641637086868286, + "rewards_train/margins": -1.2394520044326782, + "rewards_train/rejected": -1.402185082435608, + "step": 2416 + }, + { + "epoch": 0.68, + "logps_train/chosen": -4.322413921356201, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -5.40625, + "logps_train/rejected": -22.500478744506836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.005258608143776655, + "rewards_train/margins": 1.714681554120034, + "rewards_train/rejected": -1.7094229459762573, + "step": 2416 + }, + { + "epoch": 0.68, + "logps_train/chosen": -4.065748691558838, + "logps_train/ref_chosen": -2.390625, + "logps_train/ref_rejected": -5.09375, + "logps_train/rejected": -8.271933555603027, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16751237213611603, + "rewards_train/margins": 0.15030600130558014, + "rewards_train/rejected": -0.31781837344169617, + "step": 2416 + }, + { + "epoch": 0.68, + "logps_train/chosen": -132.4364013671875, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -184.0, + "logps_train/rejected": -242.4840087890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6936402320861816, + "rewards_train/margins": 3.1547608375549316, + "rewards_train/rejected": -5.848401069641113, + "step": 2416 + }, + { + "epoch": 0.68, + "logps_train/chosen": -56.3493537902832, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -11.6875, + "logps_train/rejected": -50.06428527832031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.597435474395752, + "rewards_train/margins": 0.24024319648742676, + "rewards_train/rejected": -3.8376786708831787, + "step": 2416 + }, + { + "epoch": 0.68, + "logps_train/chosen": -1.027199149131775, + "logps_train/ref_chosen": -0.11865234375, + "logps_train/ref_rejected": -0.11865234375, + "logps_train/rejected": -1.0929871797561646, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09085468202829361, + "rewards_train/margins": 0.006578803062438965, + "rewards_train/rejected": -0.09743348509073257, + "step": 2416 + }, + { + "epoch": 0.68, + "logps_train/chosen": -207.953125, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -211.87998962402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.895312786102295, + "rewards_train/margins": 0.29268646240234375, + "rewards_train/rejected": -6.187999248504639, + "step": 2416 + }, + { + "epoch": 0.68, + "logps_train/chosen": -19.72389030456543, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -6.96875, + "logps_train/rejected": -9.583476066589355, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.009889030829071999, + "rewards_train/margins": 0.251583581790328, + "rewards_train/rejected": -0.2614726126194, + "step": 2417 + }, + { + "epoch": 0.68, + "logps_train/chosen": -23.226503372192383, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -9.5, + "logps_train/rejected": -27.512052536010742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3101503551006317, + "rewards_train/margins": 1.4910549223423004, + "rewards_train/rejected": -1.8012052774429321, + "step": 2417 + }, + { + "epoch": 0.68, + "logps_train/chosen": -100.10431671142578, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -161.386962890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.46043166518211365, + "rewards_train/margins": 5.12826481461525, + "rewards_train/rejected": -5.588696479797363, + "step": 2417 + }, + { + "epoch": 0.68, + "logps_train/chosen": -18.75571060180664, + "logps_train/ref_chosen": -1.75, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -25.213998794555664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.700571060180664, + "rewards_train/margins": -0.17292118072509766, + "rewards_train/rejected": -1.5276498794555664, + "step": 2417 + }, + { + "epoch": 0.68, + "logps_train/chosen": -121.73945617675781, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -134.68826293945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.873945713043213, + "rewards_train/margins": 1.7448806762695312, + "rewards_train/rejected": -5.618826389312744, + "step": 2417 + }, + { + "epoch": 0.68, + "logps_train/chosen": -125.63288879394531, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -195.0325469970703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0132888555526733, + "rewards_train/margins": 3.2899657487869263, + "rewards_train/rejected": -4.3032546043396, + "step": 2417 + }, + { + "epoch": 0.68, + "logps_train/chosen": -230.83607482910156, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -193.0, + "logps_train/rejected": -284.605224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.383607387542725, + "rewards_train/margins": 3.7769150733947754, + "rewards_train/rejected": -9.1605224609375, + "step": 2417 + }, + { + "epoch": 0.68, + "logps_train/chosen": -241.26248168945312, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -199.0, + "logps_train/rejected": -262.2390441894531, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.426248073577881, + "rewards_train/margins": -0.10234355926513672, + "rewards_train/rejected": -6.323904514312744, + "step": 2417 + }, + { + "epoch": 0.68, + "learning_rate": 5.876847038696953e-09, + "loss": 0.4222, + "step": 2418 + }, + { + "epoch": 0.68, + "logps_train/chosen": -153.2227325439453, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -137.24234008789062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.772273302078247, + "rewards_train/margins": -0.7980391979217529, + "rewards_train/rejected": -2.974234104156494, + "step": 2418 + }, + { + "epoch": 0.68, + "logps_train/chosen": -166.6282501220703, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -212.12185668945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.962825298309326, + "rewards_train/margins": 1.899360179901123, + "rewards_train/rejected": -8.86218547821045, + "step": 2418 + }, + { + "epoch": 0.68, + "logps_train/chosen": -26.16939353942871, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -40.0, + "logps_train/rejected": -91.71878051757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.129439353942871, + "rewards_train/margins": 4.042438983917236, + "rewards_train/rejected": -5.171878337860107, + "step": 2418 + }, + { + "epoch": 0.68, + "logps_train/chosen": -50.733734130859375, + "logps_train/ref_chosen": -34.25, + "logps_train/ref_rejected": -15.375, + "logps_train/rejected": -51.00813293457031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6483734846115112, + "rewards_train/margins": 1.9149397611618042, + "rewards_train/rejected": -3.5633132457733154, + "step": 2418 + }, + { + "epoch": 0.68, + "logps_train/chosen": -16.43482208251953, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -8.5, + "logps_train/rejected": -19.56932258605957, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2622322142124176, + "rewards_train/margins": 0.8447000682353973, + "rewards_train/rejected": -1.106932282447815, + "step": 2418 + }, + { + "epoch": 0.68, + "logps_train/chosen": -46.839927673339844, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -38.43893051147461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.821492910385132, + "rewards_train/margins": 0.3224000930786133, + "rewards_train/rejected": -3.143893003463745, + "step": 2418 + }, + { + "epoch": 0.68, + "logps_train/chosen": -84.23436737060547, + "logps_train/ref_chosen": -80.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -162.3463592529297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.37343674898147583, + "rewards_train/margins": 5.411199271678925, + "rewards_train/rejected": -5.7846360206604, + "step": 2418 + }, + { + "epoch": 0.68, + "logps_train/chosen": -113.36337280273438, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -164.4599151611328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3863372802734375, + "rewards_train/margins": 1.659654140472412, + "rewards_train/rejected": -4.04599142074585, + "step": 2418 + }, + { + "epoch": 0.68, + "logps_train/chosen": -145.51905822753906, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -236.60452270507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.101905822753906, + "rewards_train/margins": 6.2085466384887695, + "rewards_train/rejected": -14.310452461242676, + "step": 2419 + }, + { + "epoch": 0.68, + "logps_train/chosen": -184.3317108154297, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -183.02239990234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.433171033859253, + "rewards_train/margins": 1.3690688610076904, + "rewards_train/rejected": -4.802239894866943, + "step": 2419 + }, + { + "epoch": 0.68, + "logps_train/chosen": -74.1739730834961, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -72.0, + "logps_train/rejected": -138.26507568359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3173973560333252, + "rewards_train/margins": 5.309110403060913, + "rewards_train/rejected": -6.626507759094238, + "step": 2419 + }, + { + "epoch": 0.68, + "logps_train/chosen": -226.3306427001953, + "logps_train/ref_chosen": -177.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -147.893310546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.9330644607543945, + "rewards_train/margins": -1.7937333583831787, + "rewards_train/rejected": -3.139331102371216, + "step": 2419 + }, + { + "epoch": 0.68, + "logps_train/chosen": -70.87689208984375, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -70.80158996582031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.8623108267784119, + "rewards_train/margins": -0.00753021240234375, + "rewards_train/rejected": 0.8698410391807556, + "step": 2419 + }, + { + "epoch": 0.68, + "logps_train/chosen": -30.909914016723633, + "logps_train/ref_chosen": -24.125, + "logps_train/ref_rejected": -59.75, + "logps_train/rejected": -92.1469955444336, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6784914135932922, + "rewards_train/margins": 2.561208188533783, + "rewards_train/rejected": -3.239699602127075, + "step": 2419 + }, + { + "epoch": 0.68, + "logps_train/chosen": -161.17433166503906, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -160.90289306640625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.6174333095550537, + "rewards_train/margins": -0.02714395523071289, + "rewards_train/rejected": -2.590289354324341, + "step": 2419 + }, + { + "epoch": 0.68, + "logps_train/chosen": -151.75450134277344, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -200.0787353515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0754501819610596, + "rewards_train/margins": 5.832423448562622, + "rewards_train/rejected": -7.907873630523682, + "step": 2419 + }, + { + "epoch": 0.68, + "learning_rate": 5.593931933399853e-09, + "loss": 0.3879, + "step": 2420 + }, + { + "epoch": 0.68, + "logps_train/chosen": -37.97584915161133, + "logps_train/ref_chosen": -20.875, + "logps_train/ref_rejected": -3.625, + "logps_train/rejected": -26.663171768188477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7100849151611328, + "rewards_train/margins": 0.5937323570251465, + "rewards_train/rejected": -2.3038172721862793, + "step": 2420 + }, + { + "epoch": 0.68, + "logps_train/chosen": -10.689708709716797, + "logps_train/ref_chosen": -4.84375, + "logps_train/ref_rejected": -2.28125, + "logps_train/rejected": -10.937997817993164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5845958590507507, + "rewards_train/margins": 0.28107893466949463, + "rewards_train/rejected": -0.8656747937202454, + "step": 2420 + }, + { + "epoch": 0.68, + "logps_train/chosen": -0.025570731610059738, + "logps_train/ref_chosen": -0.1923828125, + "logps_train/ref_rejected": -0.1923828125, + "logps_train/rejected": -0.027188187465071678, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.016681207343935966, + "rewards_train/margins": 0.00016174465417861938, + "rewards_train/rejected": 0.016519462689757347, + "step": 2420 + }, + { + "epoch": 0.68, + "logps_train/chosen": -142.1025390625, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -151.02626037597656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.810253858566284, + "rewards_train/margins": -0.2576277256011963, + "rewards_train/rejected": -3.552626132965088, + "step": 2420 + }, + { + "epoch": 0.68, + "logps_train/chosen": -19.75986099243164, + "logps_train/ref_chosen": -17.75, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -24.510639190673828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2009861022233963, + "rewards_train/margins": 0.18757782876491547, + "rewards_train/rejected": -0.38856393098831177, + "step": 2420 + }, + { + "epoch": 0.68, + "logps_train/chosen": -15.729719161987305, + "logps_train/ref_chosen": -5.40625, + "logps_train/ref_rejected": -1.8828125, + "logps_train/rejected": -15.264686584472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0323469638824463, + "rewards_train/margins": 0.30584049224853516, + "rewards_train/rejected": -1.3381874561309814, + "step": 2420 + }, + { + "epoch": 0.68, + "logps_train/chosen": -81.00730895996094, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -163.51490783691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6507309675216675, + "rewards_train/margins": 2.6507598161697388, + "rewards_train/rejected": -4.301490783691406, + "step": 2420 + }, + { + "epoch": 0.68, + "logps_train/chosen": -23.718610763549805, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -4.1875, + "logps_train/rejected": -31.54326820373535, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8656110763549805, + "rewards_train/margins": 1.8699657917022705, + "rewards_train/rejected": -2.735576868057251, + "step": 2420 + }, + { + "epoch": 0.68, + "logps_train/chosen": -19.398475646972656, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -26.20167350769043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2617225646972656, + "rewards_train/margins": 0.8896949291229248, + "rewards_train/rejected": -2.1514174938201904, + "step": 2421 + }, + { + "epoch": 0.68, + "logps_train/chosen": -82.065673828125, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -114.0, + "logps_train/rejected": -140.59335327148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.806567370891571, + "rewards_train/margins": 1.8527680039405823, + "rewards_train/rejected": -2.6593353748321533, + "step": 2421 + }, + { + "epoch": 0.68, + "logps_train/chosen": -1.3305261135101318, + "logps_train/ref_chosen": -0.78125, + "logps_train/ref_rejected": -8.8125, + "logps_train/rejected": -9.699382781982422, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.05492761358618736, + "rewards_train/margins": 0.033760663121938705, + "rewards_train/rejected": -0.08868827670812607, + "step": 2421 + }, + { + "epoch": 0.68, + "logps_train/chosen": -12.5066499710083, + "logps_train/ref_chosen": -7.71875, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -18.61922264099121, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4787900149822235, + "rewards_train/margins": -0.048117756843566895, + "rewards_train/rejected": -0.4306722581386566, + "step": 2421 + }, + { + "epoch": 0.68, + "logps_train/chosen": -9.804238319396973, + "logps_train/ref_chosen": -3.4375, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -17.697776794433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6366738677024841, + "rewards_train/margins": 0.6956037878990173, + "rewards_train/rejected": -1.3322776556015015, + "step": 2421 + }, + { + "epoch": 0.68, + "logps_train/chosen": -108.61711120605469, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -125.5, + "logps_train/rejected": -173.37240600585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.311711072921753, + "rewards_train/margins": 1.475529432296753, + "rewards_train/rejected": -4.787240505218506, + "step": 2421 + }, + { + "epoch": 0.68, + "logps_train/chosen": -108.82115173339844, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -169.671630859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8321151733398438, + "rewards_train/margins": 4.335048198699951, + "rewards_train/rejected": -6.167163372039795, + "step": 2421 + }, + { + "epoch": 0.68, + "logps_train/chosen": -10.658432006835938, + "logps_train/ref_chosen": -1.53125, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -29.397075653076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9127182364463806, + "rewards_train/margins": 1.3332393765449524, + "rewards_train/rejected": -2.245957612991333, + "step": 2421 + }, + { + "epoch": 0.68, + "learning_rate": 5.317976616269315e-09, + "loss": 0.4145, + "step": 2422 + }, + { + "epoch": 0.68, + "logps_train/chosen": -145.47608947753906, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -185.24349975585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.647608995437622, + "rewards_train/margins": 2.0767409801483154, + "rewards_train/rejected": -5.7243499755859375, + "step": 2422 + }, + { + "epoch": 0.68, + "logps_train/chosen": -155.15008544921875, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -208.9760284423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.765008449554443, + "rewards_train/margins": 0.9325942993164062, + "rewards_train/rejected": -7.69760274887085, + "step": 2422 + }, + { + "epoch": 0.68, + "logps_train/chosen": -38.214881896972656, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -21.0, + "logps_train/rejected": -48.634647369384766, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7964882850646973, + "rewards_train/margins": -0.03302359580993652, + "rewards_train/rejected": -2.7634646892547607, + "step": 2422 + }, + { + "epoch": 0.68, + "logps_train/chosen": -22.634620666503906, + "logps_train/ref_chosen": -21.875, + "logps_train/ref_rejected": -24.25, + "logps_train/rejected": -23.46635627746582, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.07596206665039062, + "rewards_train/margins": -0.1543264389038086, + "rewards_train/rejected": 0.07836437225341797, + "step": 2422 + }, + { + "epoch": 0.68, + "logps_train/chosen": -138.0450439453125, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -101.5, + "logps_train/rejected": -207.06768798828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.05450439453125, + "rewards_train/margins": 6.502264976501465, + "rewards_train/rejected": -10.556769371032715, + "step": 2422 + }, + { + "epoch": 0.68, + "logps_train/chosen": -206.1173858642578, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -168.0, + "logps_train/rejected": -246.26675415039062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.361739158630371, + "rewards_train/margins": -0.5350637435913086, + "rewards_train/rejected": -7.8266754150390625, + "step": 2422 + }, + { + "epoch": 0.68, + "logps_train/chosen": -23.608877182006836, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -62.677162170410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0671377182006836, + "rewards_train/margins": 4.400578498840332, + "rewards_train/rejected": -5.467716217041016, + "step": 2422 + }, + { + "epoch": 0.68, + "logps_train/chosen": -124.44718933105469, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -132.71749877929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5947189331054688, + "rewards_train/margins": 1.1770310401916504, + "rewards_train/rejected": -2.771749973297119, + "step": 2422 + }, + { + "epoch": 0.68, + "logps_train/chosen": -10.990777969360352, + "logps_train/ref_chosen": -3.828125, + "logps_train/ref_rejected": -4.15625, + "logps_train/rejected": -17.620296478271484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7162653207778931, + "rewards_train/margins": 0.6301393508911133, + "rewards_train/rejected": -1.3464046716690063, + "step": 2423 + }, + { + "epoch": 0.68, + "logps_train/chosen": -43.36883544921875, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -7.125, + "logps_train/rejected": -43.71284103393555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.661883592605591, + "rewards_train/margins": 0.9969005584716797, + "rewards_train/rejected": -3.6587841510772705, + "step": 2423 + }, + { + "epoch": 0.68, + "logps_train/chosen": -50.92010498046875, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -46.806541442871094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.029510498046875, + "rewards_train/margins": -0.798856258392334, + "rewards_train/rejected": -3.230654239654541, + "step": 2423 + }, + { + "epoch": 0.68, + "logps_train/chosen": -206.21510314941406, + "logps_train/ref_chosen": -170.0, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -276.7442321777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6215102672576904, + "rewards_train/margins": 9.252912759780884, + "rewards_train/rejected": -12.874423027038574, + "step": 2423 + }, + { + "epoch": 0.68, + "logps_train/chosen": -74.86038970947266, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -109.07701110839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11396103352308273, + "rewards_train/margins": 0.2716621533036232, + "rewards_train/rejected": -0.15770111978054047, + "step": 2423 + }, + { + "epoch": 0.68, + "logps_train/chosen": -0.003259211778640747, + "logps_train/ref_chosen": -0.0654296875, + "logps_train/ref_rejected": -0.0654296875, + "logps_train/rejected": -0.0029703194741159678, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.006217047572135925, + "rewards_train/margins": -2.8889160603284836e-05, + "rewards_train/rejected": 0.00624593673273921, + "step": 2423 + }, + { + "epoch": 0.68, + "logps_train/chosen": -247.86325073242188, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -235.67138671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.186325073242188, + "rewards_train/margins": -0.019186019897460938, + "rewards_train/rejected": -11.167139053344727, + "step": 2423 + }, + { + "epoch": 0.68, + "logps_train/chosen": -118.64033508300781, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -182.12326049804688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.014033555984497, + "rewards_train/margins": 2.9982926845550537, + "rewards_train/rejected": -4.012326240539551, + "step": 2423 + }, + { + "epoch": 0.68, + "learning_rate": 5.048983018699826e-09, + "loss": 0.4459, + "step": 2424 + }, + { + "epoch": 0.68, + "logps_train/chosen": -60.912296295166016, + "logps_train/ref_chosen": -35.5, + "logps_train/ref_rejected": -36.25, + "logps_train/rejected": -62.98555374145508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.541229724884033, + "rewards_train/margins": 0.1323256492614746, + "rewards_train/rejected": -2.673555374145508, + "step": 2424 + }, + { + "epoch": 0.68, + "logps_train/chosen": -20.521713256835938, + "logps_train/ref_chosen": -12.875, + "logps_train/ref_rejected": -23.375, + "logps_train/rejected": -45.25162887573242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7646713256835938, + "rewards_train/margins": 1.4229915142059326, + "rewards_train/rejected": -2.1876628398895264, + "step": 2424 + }, + { + "epoch": 0.68, + "logps_train/chosen": -48.45329284667969, + "logps_train/ref_chosen": -31.75, + "logps_train/ref_rejected": -25.875, + "logps_train/rejected": -64.83990478515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6703293323516846, + "rewards_train/margins": 2.226161241531372, + "rewards_train/rejected": -3.8964905738830566, + "step": 2424 + }, + { + "epoch": 0.68, + "logps_train/chosen": -130.5848388671875, + "logps_train/ref_chosen": -99.0, + "logps_train/ref_rejected": -99.5, + "logps_train/rejected": -130.45126342773438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1584839820861816, + "rewards_train/margins": -0.06335759162902832, + "rewards_train/rejected": -3.0951263904571533, + "step": 2424 + }, + { + "epoch": 0.68, + "logps_train/chosen": -158.5970001220703, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -195.9729461669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.6596999168396, + "rewards_train/margins": 3.4875950813293457, + "rewards_train/rejected": -8.147294998168945, + "step": 2424 + }, + { + "epoch": 0.68, + "logps_train/chosen": -119.65170288085938, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -207.31195068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.615170478820801, + "rewards_train/margins": 4.866024971008301, + "rewards_train/rejected": -9.481195449829102, + "step": 2424 + }, + { + "epoch": 0.68, + "logps_train/chosen": -261.33740234375, + "logps_train/ref_chosen": -158.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -287.93670654296875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.333740234375, + "rewards_train/margins": 1.3599309921264648, + "rewards_train/rejected": -11.693671226501465, + "step": 2424 + }, + { + "epoch": 0.68, + "logps_train/chosen": -33.32661437988281, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -37.0, + "logps_train/rejected": -66.26683044433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.988911509513855, + "rewards_train/margins": 0.9377716779708862, + "rewards_train/rejected": -2.926683187484741, + "step": 2424 + }, + { + "epoch": 0.68, + "logps_train/chosen": -106.5325927734375, + "logps_train/ref_chosen": -80.0, + "logps_train/ref_rejected": -83.5, + "logps_train/rejected": -106.73309326171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.65325927734375, + "rewards_train/margins": -0.32994985580444336, + "rewards_train/rejected": -2.3233094215393066, + "step": 2425 + }, + { + "epoch": 0.68, + "logps_train/chosen": -88.87464141845703, + "logps_train/ref_chosen": -73.5, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -89.10466003417969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5374641418457031, + "rewards_train/margins": 2.9730019569396973, + "rewards_train/rejected": -4.5104660987854, + "step": 2425 + }, + { + "epoch": 0.68, + "logps_train/chosen": -11.98023509979248, + "logps_train/ref_chosen": -7.03125, + "logps_train/ref_rejected": -7.5, + "logps_train/rejected": -10.056608200073242, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4948985278606415, + "rewards_train/margins": -0.2392376959323883, + "rewards_train/rejected": -0.2556608319282532, + "step": 2425 + }, + { + "epoch": 0.68, + "logps_train/chosen": -20.71836280822754, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -2.671875, + "logps_train/rejected": -27.525602340698242, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.909336268901825, + "rewards_train/margins": 1.576036512851715, + "rewards_train/rejected": -2.48537278175354, + "step": 2425 + }, + { + "epoch": 0.68, + "logps_train/chosen": -23.922420501708984, + "logps_train/ref_chosen": -20.125, + "logps_train/ref_rejected": -11.625, + "logps_train/rejected": -35.5361328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3797420561313629, + "rewards_train/margins": 2.011371225118637, + "rewards_train/rejected": -2.39111328125, + "step": 2425 + }, + { + "epoch": 0.68, + "logps_train/chosen": -272.9028625488281, + "logps_train/ref_chosen": -156.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -233.80789184570312, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -11.690286636352539, + "rewards_train/margins": -1.0094976425170898, + "rewards_train/rejected": -10.68078899383545, + "step": 2425 + }, + { + "epoch": 0.68, + "logps_train/chosen": -95.1108627319336, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -95.01875305175781, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5610862970352173, + "rewards_train/margins": -0.009210944175720215, + "rewards_train/rejected": -1.551875352859497, + "step": 2425 + }, + { + "epoch": 0.68, + "logps_train/chosen": -185.20211791992188, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -185.98036193847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.620212078094482, + "rewards_train/margins": 1.4278244972229004, + "rewards_train/rejected": -9.048036575317383, + "step": 2425 + }, + { + "epoch": 0.68, + "learning_rate": 4.786953023361518e-09, + "loss": 0.4098, + "step": 2426 + }, + { + "epoch": 0.68, + "logps_train/chosen": -10.543451309204102, + "logps_train/ref_chosen": -2.640625, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -45.66408920288086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.790282666683197, + "rewards_train/margins": 2.7698763012886047, + "rewards_train/rejected": -3.5601589679718018, + "step": 2426 + }, + { + "epoch": 0.68, + "logps_train/chosen": -126.36247253417969, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -140.0, + "logps_train/rejected": -253.3203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6862472295761108, + "rewards_train/margins": 9.64578402042389, + "rewards_train/rejected": -11.33203125, + "step": 2426 + }, + { + "epoch": 0.68, + "logps_train/chosen": -21.040128707885742, + "logps_train/ref_chosen": -11.3125, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -32.491024017333984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9727628827095032, + "rewards_train/margins": 0.7263395190238953, + "rewards_train/rejected": -1.6991024017333984, + "step": 2426 + }, + { + "epoch": 0.68, + "logps_train/chosen": -98.1609878540039, + "logps_train/ref_chosen": -62.75, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -42.033058166503906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.5410988330841064, + "rewards_train/margins": -2.000293016433716, + "rewards_train/rejected": -1.5408058166503906, + "step": 2426 + }, + { + "epoch": 0.68, + "logps_train/chosen": -127.71389770507812, + "logps_train/ref_chosen": -118.5, + "logps_train/ref_rejected": -107.0, + "logps_train/rejected": -128.5206298828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9213897585868835, + "rewards_train/margins": 1.230673372745514, + "rewards_train/rejected": -2.1520631313323975, + "step": 2426 + }, + { + "epoch": 0.68, + "logps_train/chosen": -7.731696128845215, + "logps_train/ref_chosen": -2.75, + "logps_train/ref_rejected": -6.03125, + "logps_train/rejected": -18.46097183227539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4981696307659149, + "rewards_train/margins": 0.7448026239871979, + "rewards_train/rejected": -1.2429722547531128, + "step": 2426 + }, + { + "epoch": 0.68, + "logps_train/chosen": -53.376976013183594, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -66.0, + "logps_train/rejected": -91.49664306640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.037697602063417435, + "rewards_train/margins": 2.5119666568934917, + "rewards_train/rejected": -2.549664258956909, + "step": 2426 + }, + { + "epoch": 0.68, + "logps_train/chosen": -115.11277770996094, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -102.5, + "logps_train/rejected": -132.3157958984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1112778186798096, + "rewards_train/margins": 1.8703017234802246, + "rewards_train/rejected": -2.981579542160034, + "step": 2426 + }, + { + "epoch": 0.68, + "logps_train/chosen": -65.54661560058594, + "logps_train/ref_chosen": -39.75, + "logps_train/ref_rejected": -48.0, + "logps_train/rejected": -108.93484497070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5796616077423096, + "rewards_train/margins": 3.5138227939605713, + "rewards_train/rejected": -6.093484401702881, + "step": 2427 + }, + { + "epoch": 0.68, + "logps_train/chosen": -18.698246002197266, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -22.959930419921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2760746479034424, + "rewards_train/margins": 0.1261683702468872, + "rewards_train/rejected": -1.4022430181503296, + "step": 2427 + }, + { + "epoch": 0.68, + "logps_train/chosen": -68.28074645996094, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -53.0, + "logps_train/rejected": -95.3489761352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5030746459960938, + "rewards_train/margins": 3.731822967529297, + "rewards_train/rejected": -4.234897613525391, + "step": 2427 + }, + { + "epoch": 0.68, + "logps_train/chosen": -139.03225708007812, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -155.0, + "logps_train/rejected": -256.41143798828125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.253225803375244, + "rewards_train/margins": 7.887917995452881, + "rewards_train/rejected": -10.141143798828125, + "step": 2427 + }, + { + "epoch": 0.68, + "logps_train/chosen": -28.243078231811523, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -3.25, + "logps_train/rejected": -15.212671279907227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9493077993392944, + "rewards_train/margins": -0.7530406713485718, + "rewards_train/rejected": -1.1962671279907227, + "step": 2427 + }, + { + "epoch": 0.68, + "logps_train/chosen": -24.039392471313477, + "logps_train/ref_chosen": -2.015625, + "logps_train/ref_rejected": -4.34375, + "logps_train/rejected": -20.002687454223633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2023768424987793, + "rewards_train/margins": -0.6364830732345581, + "rewards_train/rejected": -1.5658937692642212, + "step": 2427 + }, + { + "epoch": 0.68, + "logps_train/chosen": -20.511106491088867, + "logps_train/ref_chosen": -7.1875, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -29.549463272094727, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3323606252670288, + "rewards_train/margins": -0.8524142801761627, + "rewards_train/rejected": -0.4799463450908661, + "step": 2427 + }, + { + "epoch": 0.68, + "logps_train/chosen": -104.08546447753906, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -150.95399475097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9585464000701904, + "rewards_train/margins": 0.13685321807861328, + "rewards_train/rejected": -3.0953996181488037, + "step": 2427 + }, + { + "epoch": 0.68, + "learning_rate": 4.531888464186506e-09, + "loss": 0.511, + "step": 2428 + }, + { + "epoch": 0.68, + "logps_train/chosen": -17.31311798095703, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -3.5, + "logps_train/rejected": -22.308391571044922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2250618040561676, + "rewards_train/margins": 1.6557774245738983, + "rewards_train/rejected": -1.880839228630066, + "step": 2428 + }, + { + "epoch": 0.68, + "logps_train/chosen": -63.620201110839844, + "logps_train/ref_chosen": -61.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -130.75668334960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2620201110839844, + "rewards_train/margins": 1.9636483192443848, + "rewards_train/rejected": -2.225668430328369, + "step": 2428 + }, + { + "epoch": 0.68, + "logps_train/chosen": -19.45848274230957, + "logps_train/ref_chosen": -3.671875, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -37.89698791503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5786608457565308, + "rewards_train/margins": 1.3360379934310913, + "rewards_train/rejected": -2.914698839187622, + "step": 2428 + }, + { + "epoch": 0.68, + "logps_train/chosen": -6.249260902404785, + "logps_train/ref_chosen": -4.8125, + "logps_train/ref_rejected": -1.15625, + "logps_train/rejected": -21.293094635009766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.14367608726024628, + "rewards_train/margins": 1.870008423924446, + "rewards_train/rejected": -2.0136845111846924, + "step": 2428 + }, + { + "epoch": 0.68, + "logps_train/chosen": -200.42401123046875, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -112.0, + "logps_train/rejected": -202.14141845703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.792401313781738, + "rewards_train/margins": 1.22174072265625, + "rewards_train/rejected": -9.014142036437988, + "step": 2428 + }, + { + "epoch": 0.68, + "logps_train/chosen": -9.667299270629883, + "logps_train/ref_chosen": -3.8125, + "logps_train/ref_rejected": -4.78125, + "logps_train/rejected": -28.965017318725586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5854799151420593, + "rewards_train/margins": 1.8328967690467834, + "rewards_train/rejected": -2.4183766841888428, + "step": 2428 + }, + { + "epoch": 0.68, + "logps_train/chosen": -35.36226272583008, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -49.40033721923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2862262725830078, + "rewards_train/margins": 1.6538074016571045, + "rewards_train/rejected": -2.9400336742401123, + "step": 2428 + }, + { + "epoch": 0.68, + "logps_train/chosen": -20.5161075592041, + "logps_train/ref_chosen": -4.78125, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -33.660980224609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5734857320785522, + "rewards_train/margins": 0.5176123380661011, + "rewards_train/rejected": -2.0910980701446533, + "step": 2428 + }, + { + "epoch": 0.68, + "logps_train/chosen": -50.01793670654297, + "logps_train/ref_chosen": -47.5, + "logps_train/ref_rejected": -20.5, + "logps_train/rejected": -68.31128692626953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25179368257522583, + "rewards_train/margins": 4.529335200786591, + "rewards_train/rejected": -4.781128883361816, + "step": 2429 + }, + { + "epoch": 0.68, + "logps_train/chosen": -4.224052429199219, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -8.625, + "logps_train/rejected": -7.063493251800537, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": 0.008844757452607155, + "rewards_train/margins": -0.14730592630803585, + "rewards_train/rejected": 0.156150683760643, + "step": 2429 + }, + { + "epoch": 0.68, + "logps_train/chosen": -111.19688415527344, + "logps_train/ref_chosen": -82.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -198.22601318359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8696885108947754, + "rewards_train/margins": 4.9029130935668945, + "rewards_train/rejected": -7.77260160446167, + "step": 2429 + }, + { + "epoch": 0.68, + "logps_train/chosen": -70.53360748291016, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -139.39596557617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0033607482910156, + "rewards_train/margins": 0.686235785484314, + "rewards_train/rejected": -1.6895965337753296, + "step": 2429 + }, + { + "epoch": 0.68, + "logps_train/chosen": -20.35002899169922, + "logps_train/ref_chosen": -5.0, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -16.9422664642334, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.5350029468536377, + "rewards_train/margins": -0.5939012765884399, + "rewards_train/rejected": -0.9411016702651978, + "step": 2429 + }, + { + "epoch": 0.68, + "logps_train/chosen": -34.771121978759766, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -12.6875, + "logps_train/rejected": -39.3521614074707, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3708622455596924, + "rewards_train/margins": 0.29560399055480957, + "rewards_train/rejected": -2.666466236114502, + "step": 2429 + }, + { + "epoch": 0.68, + "logps_train/chosen": -53.0380859375, + "logps_train/ref_chosen": -33.75, + "logps_train/ref_rejected": -46.5, + "logps_train/rejected": -61.582420349121094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.928808569908142, + "rewards_train/margins": -0.4205665588378906, + "rewards_train/rejected": -1.5082420110702515, + "step": 2429 + }, + { + "epoch": 0.68, + "logps_train/chosen": -142.9969482421875, + "logps_train/ref_chosen": -104.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -124.25769805908203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8496949672698975, + "rewards_train/margins": 0.676074743270874, + "rewards_train/rejected": -4.5257697105407715, + "step": 2429 + }, + { + "epoch": 0.68, + "learning_rate": 4.28379112635624e-09, + "loss": 0.3658, + "step": 2430 + }, + { + "epoch": 0.68, + "logps_train/chosen": -115.92237091064453, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -222.83599853515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7922371029853821, + "rewards_train/margins": 7.5913631319999695, + "rewards_train/rejected": -8.383600234985352, + "step": 2430 + }, + { + "epoch": 0.68, + "logps_train/chosen": -118.235595703125, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -96.0, + "logps_train/rejected": -191.61587524414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8735597133636475, + "rewards_train/margins": 6.688027620315552, + "rewards_train/rejected": -9.5615873336792, + "step": 2430 + }, + { + "epoch": 0.68, + "logps_train/chosen": -43.767250061035156, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -12.1875, + "logps_train/rejected": -50.06623077392578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4204750061035156, + "rewards_train/margins": 0.3673980236053467, + "rewards_train/rejected": -3.7878730297088623, + "step": 2430 + }, + { + "epoch": 0.68, + "logps_train/chosen": -7.228580474853516, + "logps_train/ref_chosen": -2.515625, + "logps_train/ref_rejected": -14.5625, + "logps_train/rejected": -39.6278076171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.471295565366745, + "rewards_train/margins": 2.035235196352005, + "rewards_train/rejected": -2.50653076171875, + "step": 2430 + }, + { + "epoch": 0.68, + "logps_train/chosen": -50.32122039794922, + "logps_train/ref_chosen": -12.8125, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -45.92435073852539, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7508721351623535, + "rewards_train/margins": -0.20218706130981445, + "rewards_train/rejected": -3.548685073852539, + "step": 2430 + }, + { + "epoch": 0.68, + "logps_train/chosen": -123.82730102539062, + "logps_train/ref_chosen": -109.5, + "logps_train/ref_rejected": -61.25, + "logps_train/rejected": -96.07157897949219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4327300786972046, + "rewards_train/margins": 2.04942786693573, + "rewards_train/rejected": -3.4821579456329346, + "step": 2430 + }, + { + "epoch": 0.68, + "logps_train/chosen": -34.51433563232422, + "logps_train/ref_chosen": -21.125, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -60.62187957763672, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3389335870742798, + "rewards_train/margins": 1.048254370689392, + "rewards_train/rejected": -2.387187957763672, + "step": 2430 + }, + { + "epoch": 0.68, + "logps_train/chosen": -7.867274761199951, + "logps_train/ref_chosen": -2.5, + "logps_train/ref_rejected": -1.71875, + "logps_train/rejected": -7.500593185424805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5367274880409241, + "rewards_train/margins": 0.04145681858062744, + "rewards_train/rejected": -0.5781843066215515, + "step": 2430 + }, + { + "epoch": 0.68, + "logps_train/chosen": -111.54939270019531, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -109.90399169921875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8049392700195312, + "rewards_train/margins": -0.16454005241394043, + "rewards_train/rejected": -1.6403992176055908, + "step": 2431 + }, + { + "epoch": 0.68, + "logps_train/chosen": -91.62602996826172, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -190.34762573242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.18739700317382812, + "rewards_train/margins": 8.272159576416016, + "rewards_train/rejected": -8.084762573242188, + "step": 2431 + }, + { + "epoch": 0.68, + "logps_train/chosen": -156.28768920898438, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -130.0, + "logps_train/rejected": -270.75152587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.528769016265869, + "rewards_train/margins": 9.546383380889893, + "rewards_train/rejected": -14.075152397155762, + "step": 2431 + }, + { + "epoch": 0.68, + "logps_train/chosen": -32.577728271484375, + "logps_train/ref_chosen": -15.5, + "logps_train/ref_rejected": -36.5, + "logps_train/rejected": -59.824275970458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7077728509902954, + "rewards_train/margins": 0.6246548891067505, + "rewards_train/rejected": -2.332427740097046, + "step": 2431 + }, + { + "epoch": 0.68, + "logps_train/chosen": -34.05509567260742, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -15.8125, + "logps_train/rejected": -50.84002685546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.455509662628174, + "rewards_train/margins": 1.0472431182861328, + "rewards_train/rejected": -3.5027527809143066, + "step": 2431 + }, + { + "epoch": 0.68, + "logps_train/chosen": -31.660602569580078, + "logps_train/ref_chosen": -35.25, + "logps_train/ref_rejected": -53.75, + "logps_train/rejected": -77.86431121826172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.3589397370815277, + "rewards_train/margins": 2.7703708112239838, + "rewards_train/rejected": -2.411431074142456, + "step": 2431 + }, + { + "epoch": 0.68, + "logps_train/chosen": -76.49281311035156, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -110.37918090820312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5992813110351562, + "rewards_train/margins": 3.088636875152588, + "rewards_train/rejected": -3.687918186187744, + "step": 2431 + }, + { + "epoch": 0.68, + "logps_train/chosen": -99.55291748046875, + "logps_train/ref_chosen": -92.0, + "logps_train/ref_rejected": -127.0, + "logps_train/rejected": -181.02601623535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.755291759967804, + "rewards_train/margins": 4.647309958934784, + "rewards_train/rejected": -5.402601718902588, + "step": 2431 + }, + { + "epoch": 0.68, + "learning_rate": 4.04266274628906e-09, + "loss": 0.2605, + "step": 2432 + }, + { + "epoch": 0.68, + "logps_train/chosen": -110.06365966796875, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -168.33090209960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.756365954875946, + "rewards_train/margins": 2.8767243027687073, + "rewards_train/rejected": -3.6330902576446533, + "step": 2432 + }, + { + "epoch": 0.68, + "logps_train/chosen": -20.405479431152344, + "logps_train/ref_chosen": -15.3125, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -15.798490524291992, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.5092979669570923, + "rewards_train/margins": -0.3981989100575447, + "rewards_train/rejected": -0.11109905689954758, + "step": 2432 + }, + { + "epoch": 0.68, + "logps_train/chosen": -10.909717559814453, + "logps_train/ref_chosen": -9.6875, + "logps_train/ref_rejected": -13.5, + "logps_train/rejected": -42.08748245239258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.12222176045179367, + "rewards_train/margins": 2.7365264371037483, + "rewards_train/rejected": -2.858748197555542, + "step": 2432 + }, + { + "epoch": 0.68, + "logps_train/chosen": -22.739896774291992, + "logps_train/ref_chosen": -7.96875, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -25.574628829956055, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4771146774291992, + "rewards_train/margins": 0.6115982532501221, + "rewards_train/rejected": -2.0887129306793213, + "step": 2432 + }, + { + "epoch": 0.68, + "logps_train/chosen": -125.46097564697266, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -151.90771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9460975527763367, + "rewards_train/margins": 3.244674026966095, + "rewards_train/rejected": -4.190771579742432, + "step": 2432 + }, + { + "epoch": 0.68, + "logps_train/chosen": -12.80167293548584, + "logps_train/ref_chosen": -10.4375, + "logps_train/ref_rejected": -10.3125, + "logps_train/rejected": -12.9237642288208, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23641729354858398, + "rewards_train/margins": 0.02470913529396057, + "rewards_train/rejected": -0.26112642884254456, + "step": 2432 + }, + { + "epoch": 0.68, + "logps_train/chosen": -18.614126205444336, + "logps_train/ref_chosen": -12.5625, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -21.957908630371094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6051626205444336, + "rewards_train/margins": 0.7843782901763916, + "rewards_train/rejected": -1.3895409107208252, + "step": 2432 + }, + { + "epoch": 0.68, + "logps_train/chosen": -19.105329513549805, + "logps_train/ref_chosen": -13.8125, + "logps_train/ref_rejected": -31.375, + "logps_train/rejected": -54.116058349609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5292829871177673, + "rewards_train/margins": 1.7448228001594543, + "rewards_train/rejected": -2.2741057872772217, + "step": 2432 + }, + { + "epoch": 0.68, + "logps_train/chosen": -25.11814308166504, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -6.875, + "logps_train/rejected": -26.882587432861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6743143200874329, + "rewards_train/margins": 1.3264445662498474, + "rewards_train/rejected": -2.0007588863372803, + "step": 2433 + }, + { + "epoch": 0.68, + "logps_train/chosen": -220.90475463867188, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -208.30328369140625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.590476036071777, + "rewards_train/margins": -0.7601470947265625, + "rewards_train/rejected": -8.830328941345215, + "step": 2433 + }, + { + "epoch": 0.68, + "logps_train/chosen": -97.68345642089844, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -265.7589416503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.068345785140991, + "rewards_train/margins": 9.107548952102661, + "rewards_train/rejected": -12.175894737243652, + "step": 2433 + }, + { + "epoch": 0.68, + "logps_train/chosen": -6.500088214874268, + "logps_train/ref_chosen": -5.625, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -24.413570404052734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08750881999731064, + "rewards_train/margins": 0.9788481965661049, + "rewards_train/rejected": -1.0663570165634155, + "step": 2433 + }, + { + "epoch": 0.68, + "logps_train/chosen": -75.77195739746094, + "logps_train/ref_chosen": -60.5, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -98.05436706542969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5271958112716675, + "rewards_train/margins": 0.228240966796875, + "rewards_train/rejected": -1.7554367780685425, + "step": 2433 + }, + { + "epoch": 0.68, + "logps_train/chosen": -166.912353515625, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -201.0, + "logps_train/rejected": -302.118896484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2912354469299316, + "rewards_train/margins": 6.820654392242432, + "rewards_train/rejected": -10.111889839172363, + "step": 2433 + }, + { + "epoch": 0.68, + "logps_train/chosen": -140.73892211914062, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -111.5, + "logps_train/rejected": -149.1256866455078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1738922595977783, + "rewards_train/margins": 0.5886764526367188, + "rewards_train/rejected": -3.762568712234497, + "step": 2433 + }, + { + "epoch": 0.68, + "logps_train/chosen": -103.77073669433594, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -97.5, + "logps_train/rejected": -126.74452209472656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8770737051963806, + "rewards_train/margins": 2.0473785996437073, + "rewards_train/rejected": -2.924452304840088, + "step": 2433 + }, + { + "epoch": 0.68, + "learning_rate": 3.808505011627883e-09, + "loss": 0.3479, + "step": 2434 + }, + { + "epoch": 0.68, + "logps_train/chosen": -5.389885902404785, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -4.25, + "logps_train/rejected": -6.737980365753174, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.10163640975952148, + "rewards_train/margins": 0.35043445229530334, + "rewards_train/rejected": -0.24879804253578186, + "step": 2434 + }, + { + "epoch": 0.68, + "logps_train/chosen": -8.530817031860352, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -23.26165008544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22183170914649963, + "rewards_train/margins": 1.1105832755565643, + "rewards_train/rejected": -1.332414984703064, + "step": 2434 + }, + { + "epoch": 0.68, + "logps_train/chosen": -14.931100845336914, + "logps_train/ref_chosen": -14.3125, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -43.81847381591797, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.061860084533691406, + "rewards_train/margins": 2.3699872493743896, + "rewards_train/rejected": -2.431847333908081, + "step": 2434 + }, + { + "epoch": 0.68, + "logps_train/chosen": -77.45132446289062, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -79.26524353027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.44513246417045593, + "rewards_train/margins": 1.1063919365406036, + "rewards_train/rejected": -1.5515244007110596, + "step": 2434 + }, + { + "epoch": 0.68, + "logps_train/chosen": -19.661834716796875, + "logps_train/ref_chosen": -12.9375, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -23.491355895996094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6724334955215454, + "rewards_train/margins": -0.3482978940010071, + "rewards_train/rejected": -0.32413560152053833, + "step": 2434 + }, + { + "epoch": 0.68, + "logps_train/chosen": -141.2234344482422, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -152.59193420410156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.672343730926514, + "rewards_train/margins": 1.2368497848510742, + "rewards_train/rejected": -5.909193515777588, + "step": 2434 + }, + { + "epoch": 0.68, + "logps_train/chosen": -84.6834716796875, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -169.25759887695312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1683472394943237, + "rewards_train/margins": 1.2574127912521362, + "rewards_train/rejected": -2.42576003074646, + "step": 2434 + }, + { + "epoch": 0.68, + "logps_train/chosen": -1.7953327894210815, + "logps_train/ref_chosen": -2.96875, + "logps_train/ref_rejected": -4.375, + "logps_train/rejected": -33.173362731933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.11734171956777573, + "rewards_train/margins": 2.997178040444851, + "rewards_train/rejected": -2.879836320877075, + "step": 2434 + }, + { + "epoch": 0.68, + "logps_train/chosen": -0.7254495620727539, + "logps_train/ref_chosen": -0.546875, + "logps_train/ref_rejected": -10.0625, + "logps_train/rejected": -10.146221160888672, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.01785745657980442, + "rewards_train/margins": -0.009485340677201748, + "rewards_train/rejected": -0.008372115902602673, + "step": 2435 + }, + { + "epoch": 0.68, + "logps_train/chosen": -20.835866928100586, + "logps_train/ref_chosen": -2.9375, + "logps_train/ref_rejected": -1.3125, + "logps_train/rejected": -19.581954956054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7898367643356323, + "rewards_train/margins": 0.037108778953552246, + "rewards_train/rejected": -1.8269455432891846, + "step": 2435 + }, + { + "epoch": 0.68, + "logps_train/chosen": -67.18714141845703, + "logps_train/ref_chosen": -44.75, + "logps_train/ref_rejected": -9.9375, + "logps_train/rejected": -66.16728210449219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2437140941619873, + "rewards_train/margins": 3.3792641162872314, + "rewards_train/rejected": -5.622978210449219, + "step": 2435 + }, + { + "epoch": 0.68, + "logps_train/chosen": -15.377052307128906, + "logps_train/ref_chosen": -3.515625, + "logps_train/ref_rejected": -6.6875, + "logps_train/rejected": -19.538719177246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1861428022384644, + "rewards_train/margins": 0.09897911548614502, + "rewards_train/rejected": -1.2851219177246094, + "step": 2435 + }, + { + "epoch": 0.68, + "logps_train/chosen": -186.2293701171875, + "logps_train/ref_chosen": -164.0, + "logps_train/ref_rejected": -164.0, + "logps_train/rejected": -236.98887634277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2229371070861816, + "rewards_train/margins": 5.075950622558594, + "rewards_train/rejected": -7.298887729644775, + "step": 2435 + }, + { + "epoch": 0.68, + "logps_train/chosen": -2.4375743865966797, + "logps_train/ref_chosen": -1.1328125, + "logps_train/ref_rejected": -1.1328125, + "logps_train/rejected": -2.2809700965881348, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1304761916399002, + "rewards_train/margins": -0.015660427510738373, + "rewards_train/rejected": -0.11481576412916183, + "step": 2435 + }, + { + "epoch": 0.68, + "logps_train/chosen": -97.74271392822266, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -187.5282745361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4242714047431946, + "rewards_train/margins": 7.378555953502655, + "rewards_train/rejected": -7.80282735824585, + "step": 2435 + }, + { + "epoch": 0.68, + "logps_train/chosen": -13.007766723632812, + "logps_train/ref_chosen": -10.875, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -27.910097122192383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.213276669383049, + "rewards_train/margins": 1.671483114361763, + "rewards_train/rejected": -1.884759783744812, + "step": 2435 + }, + { + "epoch": 0.68, + "learning_rate": 3.5813195612287573e-09, + "loss": 0.3475, + "step": 2436 + }, + { + "epoch": 0.68, + "logps_train/chosen": -257.4408264160156, + "logps_train/ref_chosen": -226.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -187.63479614257812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.14408278465271, + "rewards_train/margins": -0.4806032180786133, + "rewards_train/rejected": -2.6634795665740967, + "step": 2436 + }, + { + "epoch": 0.68, + "logps_train/chosen": -140.23504638671875, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -137.45660400390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.623504638671875, + "rewards_train/margins": 1.9221558570861816, + "rewards_train/rejected": -4.545660495758057, + "step": 2436 + }, + { + "epoch": 0.68, + "logps_train/chosen": -29.10493278503418, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -10.8125, + "logps_train/rejected": -44.03640365600586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.885493278503418, + "rewards_train/margins": 1.4368970394134521, + "rewards_train/rejected": -3.32239031791687, + "step": 2436 + }, + { + "epoch": 0.68, + "logps_train/chosen": -101.12452697753906, + "logps_train/ref_chosen": -95.5, + "logps_train/ref_rejected": -88.5, + "logps_train/rejected": -120.06523132324219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5624527335166931, + "rewards_train/margins": 2.5940704941749573, + "rewards_train/rejected": -3.1565232276916504, + "step": 2436 + }, + { + "epoch": 0.68, + "logps_train/chosen": -107.18585205078125, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -35.5, + "logps_train/rejected": -86.9443359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.931414783000946, + "rewards_train/margins": 6.075848281383514, + "rewards_train/rejected": -5.144433498382568, + "step": 2436 + }, + { + "epoch": 0.68, + "logps_train/chosen": -133.7392120361328, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -171.22207641601562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8239212036132812, + "rewards_train/margins": 3.948286533355713, + "rewards_train/rejected": -5.772207736968994, + "step": 2436 + }, + { + "epoch": 0.68, + "logps_train/chosen": -22.034027099609375, + "logps_train/ref_chosen": -5.8125, + "logps_train/ref_rejected": -18.875, + "logps_train/rejected": -28.348546981811523, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6221526861190796, + "rewards_train/margins": -0.6747979521751404, + "rewards_train/rejected": -0.9473547339439392, + "step": 2436 + }, + { + "epoch": 0.68, + "logps_train/chosen": -192.01263427734375, + "logps_train/ref_chosen": -143.0, + "logps_train/ref_rejected": -192.0, + "logps_train/rejected": -258.21630859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.90126371383667, + "rewards_train/margins": 1.720367431640625, + "rewards_train/rejected": -6.621631145477295, + "step": 2436 + }, + { + "epoch": 0.68, + "logps_train/chosen": -44.22005844116211, + "logps_train/ref_chosen": -30.375, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -60.1114501953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3845058679580688, + "rewards_train/margins": 0.9516392946243286, + "rewards_train/rejected": -2.3361451625823975, + "step": 2437 + }, + { + "epoch": 0.68, + "logps_train/chosen": -199.6215057373047, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -214.127197265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.062150478363037, + "rewards_train/margins": 2.0005698204040527, + "rewards_train/rejected": -9.06272029876709, + "step": 2437 + }, + { + "epoch": 0.68, + "logps_train/chosen": -21.103199005126953, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -25.4620418548584, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2103198766708374, + "rewards_train/margins": 0.31088435649871826, + "rewards_train/rejected": -1.5212042331695557, + "step": 2437 + }, + { + "epoch": 0.68, + "logps_train/chosen": -103.31869506835938, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -115.5, + "logps_train/rejected": -176.79095458984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5318695306777954, + "rewards_train/margins": 4.597226023674011, + "rewards_train/rejected": -6.129095554351807, + "step": 2437 + }, + { + "epoch": 0.68, + "logps_train/chosen": -15.458073616027832, + "logps_train/ref_chosen": -16.75, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -20.302995681762695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12919263541698456, + "rewards_train/margins": 0.7282422035932541, + "rewards_train/rejected": -0.5990495681762695, + "step": 2437 + }, + { + "epoch": 0.68, + "logps_train/chosen": -14.047160148620605, + "logps_train/ref_chosen": -9.9375, + "logps_train/ref_rejected": -4.53125, + "logps_train/rejected": -10.810461044311523, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41096600890159607, + "rewards_train/margins": 0.21695509552955627, + "rewards_train/rejected": -0.6279211044311523, + "step": 2437 + }, + { + "epoch": 0.68, + "logps_train/chosen": -121.8213119506836, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -151.5747528076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1821311712265015, + "rewards_train/margins": 0.5753440856933594, + "rewards_train/rejected": -1.7574752569198608, + "step": 2437 + }, + { + "epoch": 0.68, + "logps_train/chosen": -51.291744232177734, + "logps_train/ref_chosen": -41.75, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -58.60821533203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9541744589805603, + "rewards_train/margins": 2.594147026538849, + "rewards_train/rejected": -3.548321485519409, + "step": 2437 + }, + { + "epoch": 0.68, + "learning_rate": 3.361107985148881e-09, + "loss": 0.3233, + "step": 2438 + }, + { + "epoch": 0.68, + "logps_train/chosen": -221.69248962402344, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -217.09754943847656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.269248962402344, + "rewards_train/margins": -0.2594938278198242, + "rewards_train/rejected": -6.0097551345825195, + "step": 2438 + }, + { + "epoch": 0.68, + "logps_train/chosen": -66.19220733642578, + "logps_train/ref_chosen": -45.0, + "logps_train/ref_rejected": -4.9375, + "logps_train/rejected": -36.666107177734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.119220733642578, + "rewards_train/margins": 1.0536401271820068, + "rewards_train/rejected": -3.172860860824585, + "step": 2438 + }, + { + "epoch": 0.68, + "logps_train/chosen": -146.16439819335938, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -105.5, + "logps_train/rejected": -139.71636962890625, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.8164398670196533, + "rewards_train/margins": -0.3948028087615967, + "rewards_train/rejected": -3.4216370582580566, + "step": 2438 + }, + { + "epoch": 0.68, + "logps_train/chosen": -190.02154541015625, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -130.44668579101562, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.50215482711792, + "rewards_train/margins": -2.25748610496521, + "rewards_train/rejected": -3.24466872215271, + "step": 2438 + }, + { + "epoch": 0.68, + "logps_train/chosen": -92.86833190917969, + "logps_train/ref_chosen": -76.5, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -95.45875549316406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.6368331909179688, + "rewards_train/margins": -0.6409576535224915, + "rewards_train/rejected": -0.9958755373954773, + "step": 2438 + }, + { + "epoch": 0.68, + "logps_train/chosen": -118.30432891845703, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -139.27587890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5804328918457031, + "rewards_train/margins": 3.0471551418304443, + "rewards_train/rejected": -3.6275880336761475, + "step": 2438 + }, + { + "epoch": 0.68, + "logps_train/chosen": -157.07012939453125, + "logps_train/ref_chosen": -97.5, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -174.11199951171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.957013130187988, + "rewards_train/margins": 1.10418701171875, + "rewards_train/rejected": -7.061200141906738, + "step": 2438 + }, + { + "epoch": 0.68, + "logps_train/chosen": -13.389140129089355, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -47.75, + "logps_train/rejected": -53.032371520996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11391401290893555, + "rewards_train/margins": 0.4143231511116028, + "rewards_train/rejected": -0.5282371640205383, + "step": 2438 + }, + { + "epoch": 0.68, + "logps_train/chosen": -166.21783447265625, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -183.9642333984375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.421783447265625, + "rewards_train/margins": 2.6746397018432617, + "rewards_train/rejected": -8.096423149108887, + "step": 2439 + }, + { + "epoch": 0.68, + "logps_train/chosen": -24.664796829223633, + "logps_train/ref_chosen": -10.25, + "logps_train/ref_rejected": -10.25, + "logps_train/rejected": -24.57071304321289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4414796829223633, + "rewards_train/margins": -0.009408354759216309, + "rewards_train/rejected": -1.432071328163147, + "step": 2439 + }, + { + "epoch": 0.68, + "logps_train/chosen": -25.93508529663086, + "logps_train/ref_chosen": -4.375, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -25.782337188720703, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.15600848197937, + "rewards_train/margins": -0.2558997869491577, + "rewards_train/rejected": -1.9001086950302124, + "step": 2439 + }, + { + "epoch": 0.68, + "logps_train/chosen": -23.195234298706055, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -2.921875, + "logps_train/rejected": -27.752267837524414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3882734775543213, + "rewards_train/margins": 1.0947659015655518, + "rewards_train/rejected": -2.483039379119873, + "step": 2439 + }, + { + "epoch": 0.68, + "logps_train/chosen": -46.31200408935547, + "logps_train/ref_chosen": -27.75, + "logps_train/ref_rejected": -27.75, + "logps_train/rejected": -46.70644760131836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8562004566192627, + "rewards_train/margins": 0.03944432735443115, + "rewards_train/rejected": -1.8956447839736938, + "step": 2439 + }, + { + "epoch": 0.68, + "logps_train/chosen": -187.36492919921875, + "logps_train/ref_chosen": -114.0, + "logps_train/ref_rejected": -163.0, + "logps_train/rejected": -301.0238037109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.336493015289307, + "rewards_train/margins": 6.465887546539307, + "rewards_train/rejected": -13.802380561828613, + "step": 2439 + }, + { + "epoch": 0.68, + "logps_train/chosen": -155.14456176757812, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -174.96200561523438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.3644561767578125, + "rewards_train/margins": 1.0317444801330566, + "rewards_train/rejected": -5.396200656890869, + "step": 2439 + }, + { + "epoch": 0.68, + "logps_train/chosen": -10.489423751831055, + "logps_train/ref_chosen": -5.125, + "logps_train/ref_rejected": -7.25, + "logps_train/rejected": -28.187881469726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5364423990249634, + "rewards_train/margins": 1.5573457479476929, + "rewards_train/rejected": -2.0937881469726562, + "step": 2439 + }, + { + "epoch": 0.68, + "learning_rate": 3.147871824635717e-09, + "loss": 0.5848, + "step": 2440 + }, + { + "epoch": 0.68, + "logps_train/chosen": -30.86174774169922, + "logps_train/ref_chosen": -13.4375, + "logps_train/ref_rejected": -2.703125, + "logps_train/rejected": -32.004600524902344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7424248456954956, + "rewards_train/margins": 1.1877228021621704, + "rewards_train/rejected": -2.930147647857666, + "step": 2440 + }, + { + "epoch": 0.68, + "logps_train/chosen": -220.8616943359375, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -243.33059692382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.986169338226318, + "rewards_train/margins": 5.296890735626221, + "rewards_train/rejected": -12.283060073852539, + "step": 2440 + }, + { + "epoch": 0.68, + "logps_train/chosen": -57.08868408203125, + "logps_train/ref_chosen": -41.25, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -206.67343139648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.583868384361267, + "rewards_train/margins": 8.633474946022034, + "rewards_train/rejected": -10.2173433303833, + "step": 2440 + }, + { + "epoch": 0.68, + "logps_train/chosen": -31.988622665405273, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -58.87495040893555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8488622903823853, + "rewards_train/margins": 2.0886327028274536, + "rewards_train/rejected": -3.937494993209839, + "step": 2440 + }, + { + "epoch": 0.68, + "logps_train/chosen": -127.76351928710938, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -170.91900634765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7263519763946533, + "rewards_train/margins": 3.865548849105835, + "rewards_train/rejected": -6.591900825500488, + "step": 2440 + }, + { + "epoch": 0.68, + "logps_train/chosen": -137.64051818847656, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -92.0, + "logps_train/rejected": -133.8716278076172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6640517711639404, + "rewards_train/margins": 0.52311110496521, + "rewards_train/rejected": -4.18716287612915, + "step": 2440 + }, + { + "epoch": 0.68, + "logps_train/chosen": -43.869197845458984, + "logps_train/ref_chosen": -25.125, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -31.289371490478516, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8744198083877563, + "rewards_train/margins": -1.0704826712608337, + "rewards_train/rejected": -0.8039371371269226, + "step": 2440 + }, + { + "epoch": 0.68, + "logps_train/chosen": -174.3016357421875, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -134.4564971923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.23016357421875, + "rewards_train/margins": 0.11548614501953125, + "rewards_train/rejected": -2.3456497192382812, + "step": 2440 + }, + { + "epoch": 0.68, + "logps_train/chosen": -113.05414581298828, + "logps_train/ref_chosen": -89.0, + "logps_train/ref_rejected": -73.0, + "logps_train/rejected": -95.52202606201172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.405414581298828, + "rewards_train/margins": -0.1532118320465088, + "rewards_train/rejected": -2.2522027492523193, + "step": 2441 + }, + { + "epoch": 0.68, + "logps_train/chosen": -107.31214904785156, + "logps_train/ref_chosen": -76.0, + "logps_train/ref_rejected": -8.5625, + "logps_train/rejected": -40.27305603027344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1312148571014404, + "rewards_train/margins": 0.0398406982421875, + "rewards_train/rejected": -3.171055555343628, + "step": 2441 + }, + { + "epoch": 0.68, + "logps_train/chosen": -18.751609802246094, + "logps_train/ref_chosen": -5.6875, + "logps_train/ref_rejected": -2.828125, + "logps_train/rejected": -16.381437301635742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3064110279083252, + "rewards_train/margins": 0.048920273780822754, + "rewards_train/rejected": -1.355331301689148, + "step": 2441 + }, + { + "epoch": 0.68, + "logps_train/chosen": -26.060733795166016, + "logps_train/ref_chosen": -19.375, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -19.373943328857422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6685733795166016, + "rewards_train/margins": -0.22492903470993042, + "rewards_train/rejected": -0.44364434480667114, + "step": 2441 + }, + { + "epoch": 0.68, + "logps_train/chosen": -115.43692779541016, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -250.57669067382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.093692779541016, + "rewards_train/margins": 7.663976669311523, + "rewards_train/rejected": -11.757669448852539, + "step": 2441 + }, + { + "epoch": 0.68, + "logps_train/chosen": -152.08360290527344, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -280.50103759765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.058360576629639, + "rewards_train/margins": 9.291743755340576, + "rewards_train/rejected": -13.350104331970215, + "step": 2441 + }, + { + "epoch": 0.68, + "logps_train/chosen": -5.041569709777832, + "logps_train/ref_chosen": -2.609375, + "logps_train/ref_rejected": -6.625, + "logps_train/rejected": -22.71820831298828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24321947991847992, + "rewards_train/margins": 1.366101399064064, + "rewards_train/rejected": -1.609320878982544, + "step": 2441 + }, + { + "epoch": 0.68, + "logps_train/chosen": -21.789634704589844, + "logps_train/ref_chosen": -18.125, + "logps_train/ref_rejected": -18.125, + "logps_train/rejected": -31.650108337402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36646348237991333, + "rewards_train/margins": 0.9860473275184631, + "rewards_train/rejected": -1.3525108098983765, + "step": 2441 + }, + { + "epoch": 0.68, + "learning_rate": 2.9416125721164477e-09, + "loss": 0.3965, + "step": 2442 + }, + { + "epoch": 0.68, + "logps_train/chosen": -26.86676788330078, + "logps_train/ref_chosen": -18.625, + "logps_train/ref_rejected": -5.15625, + "logps_train/rejected": -3.817401647567749, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8241767883300781, + "rewards_train/margins": -0.958061620593071, + "rewards_train/rejected": 0.13388483226299286, + "step": 2442 + }, + { + "epoch": 0.68, + "logps_train/chosen": -177.64125061035156, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -159.0, + "logps_train/rejected": -293.14849853515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.964125156402588, + "rewards_train/margins": 6.450725078582764, + "rewards_train/rejected": -13.414850234985352, + "step": 2442 + }, + { + "epoch": 0.68, + "logps_train/chosen": -1.7230881452560425, + "logps_train/ref_chosen": -3.21875, + "logps_train/ref_rejected": -6.15625, + "logps_train/rejected": -8.47433853149414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.149566188454628, + "rewards_train/margins": 0.3813750445842743, + "rewards_train/rejected": -0.2318088561296463, + "step": 2442 + }, + { + "epoch": 0.68, + "logps_train/chosen": -59.90918731689453, + "logps_train/ref_chosen": -49.5, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -118.63446044921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0409187078475952, + "rewards_train/margins": 3.822527527809143, + "rewards_train/rejected": -4.863446235656738, + "step": 2442 + }, + { + "epoch": 0.68, + "logps_train/chosen": -30.378211975097656, + "logps_train/ref_chosen": -11.4375, + "logps_train/ref_rejected": -23.0, + "logps_train/rejected": -79.7559814453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8940712213516235, + "rewards_train/margins": 3.7815269231796265, + "rewards_train/rejected": -5.67559814453125, + "step": 2442 + }, + { + "epoch": 0.68, + "logps_train/chosen": -92.74549102783203, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -209.94454956054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8245491981506348, + "rewards_train/margins": 6.669906139373779, + "rewards_train/rejected": -9.494455337524414, + "step": 2442 + }, + { + "epoch": 0.68, + "logps_train/chosen": -164.7940673828125, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -182.75994873046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.379406690597534, + "rewards_train/margins": 0.6965882778167725, + "rewards_train/rejected": -4.075994968414307, + "step": 2442 + }, + { + "epoch": 0.68, + "logps_train/chosen": -202.10562133789062, + "logps_train/ref_chosen": -157.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -220.55349731445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.510562419891357, + "rewards_train/margins": 5.894787311553955, + "rewards_train/rejected": -10.405349731445312, + "step": 2442 + }, + { + "epoch": 0.68, + "logps_train/chosen": -21.476119995117188, + "logps_train/ref_chosen": -16.875, + "logps_train/ref_rejected": -14.25, + "logps_train/rejected": -15.769311904907227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4601120054721832, + "rewards_train/margins": -0.3081808090209961, + "rewards_train/rejected": -0.15193119645118713, + "step": 2443 + }, + { + "epoch": 0.68, + "logps_train/chosen": -11.49059772491455, + "logps_train/ref_chosen": -2.75, + "logps_train/ref_rejected": -17.375, + "logps_train/rejected": -44.228416442871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.874059796333313, + "rewards_train/margins": 1.8112818002700806, + "rewards_train/rejected": -2.6853415966033936, + "step": 2443 + }, + { + "epoch": 0.68, + "logps_train/chosen": -12.54221248626709, + "logps_train/ref_chosen": -1.84375, + "logps_train/ref_rejected": -4.875, + "logps_train/rejected": -12.053176879882812, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.069846272468567, + "rewards_train/margins": -0.3520285487174988, + "rewards_train/rejected": -0.7178177237510681, + "step": 2443 + }, + { + "epoch": 0.68, + "logps_train/chosen": -110.66914367675781, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -147.47134399414062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2669143676757812, + "rewards_train/margins": 3.2802200317382812, + "rewards_train/rejected": -6.5471343994140625, + "step": 2443 + }, + { + "epoch": 0.68, + "logps_train/chosen": -86.51783752441406, + "logps_train/ref_chosen": -59.75, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -150.31060791015625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.676783800125122, + "rewards_train/margins": 1.8542768955230713, + "rewards_train/rejected": -4.531060695648193, + "step": 2443 + }, + { + "epoch": 0.68, + "logps_train/chosen": -166.11087036132812, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -121.0, + "logps_train/rejected": -182.85435485839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.011086940765381, + "rewards_train/margins": 1.1743488311767578, + "rewards_train/rejected": -6.185435771942139, + "step": 2443 + }, + { + "epoch": 0.68, + "logps_train/chosen": -63.562496185302734, + "logps_train/ref_chosen": -63.25, + "logps_train/ref_rejected": -14.75, + "logps_train/rejected": -31.627243041992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.031249618157744408, + "rewards_train/margins": 1.6564747337251902, + "rewards_train/rejected": -1.6877243518829346, + "step": 2443 + }, + { + "epoch": 0.68, + "logps_train/chosen": -212.87185668945312, + "logps_train/ref_chosen": -138.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -215.60154724121094, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.487185955047607, + "rewards_train/margins": -0.9270310401916504, + "rewards_train/rejected": -6.560154914855957, + "step": 2443 + }, + { + "epoch": 0.68, + "learning_rate": 2.7423316711872037e-09, + "loss": 0.3773, + "step": 2444 + }, + { + "epoch": 0.68, + "logps_train/chosen": -152.094482421875, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -126.0, + "logps_train/rejected": -151.83912658691406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.609448194503784, + "rewards_train/margins": -0.02553558349609375, + "rewards_train/rejected": -2.5839126110076904, + "step": 2444 + }, + { + "epoch": 0.68, + "logps_train/chosen": -165.4608154296875, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -229.02969360351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.146081447601318, + "rewards_train/margins": 1.6568880081176758, + "rewards_train/rejected": -5.802969455718994, + "step": 2444 + }, + { + "epoch": 0.68, + "logps_train/chosen": -166.2506866455078, + "logps_train/ref_chosen": -126.5, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -251.4273223876953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9750688076019287, + "rewards_train/margins": 6.167663812637329, + "rewards_train/rejected": -10.142732620239258, + "step": 2444 + }, + { + "epoch": 0.68, + "logps_train/chosen": -187.03985595703125, + "logps_train/ref_chosen": -110.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -246.0023193359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.703985691070557, + "rewards_train/margins": 1.8962464332580566, + "rewards_train/rejected": -9.600232124328613, + "step": 2444 + }, + { + "epoch": 0.68, + "logps_train/chosen": -208.11785888671875, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -195.0, + "logps_train/rejected": -277.4237060546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.611785888671875, + "rewards_train/margins": 1.630584716796875, + "rewards_train/rejected": -8.24237060546875, + "step": 2444 + }, + { + "epoch": 0.68, + "logps_train/chosen": -20.536209106445312, + "logps_train/ref_chosen": -2.984375, + "logps_train/ref_rejected": -5.8125, + "logps_train/rejected": -37.25166702270508, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.755183458328247, + "rewards_train/margins": 1.3887333869934082, + "rewards_train/rejected": -3.1439168453216553, + "step": 2444 + }, + { + "epoch": 0.68, + "logps_train/chosen": -182.9229736328125, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -188.4735107421875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -8.492297172546387, + "rewards_train/margins": -1.1449460983276367, + "rewards_train/rejected": -7.34735107421875, + "step": 2444 + }, + { + "epoch": 0.68, + "logps_train/chosen": -151.28407287597656, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -177.07933044433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.028407335281372, + "rewards_train/margins": 3.0295259952545166, + "rewards_train/rejected": -6.057933330535889, + "step": 2444 + }, + { + "epoch": 0.68, + "logps_train/chosen": -148.09933471679688, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -92.5, + "logps_train/rejected": -164.78916931152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0099334716796875, + "rewards_train/margins": 4.2189836502075195, + "rewards_train/rejected": -7.228917121887207, + "step": 2445 + }, + { + "epoch": 0.68, + "logps_train/chosen": -182.9660186767578, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -171.04055786132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.896602153778076, + "rewards_train/margins": 0.8574538230895996, + "rewards_train/rejected": -6.754055976867676, + "step": 2445 + }, + { + "epoch": 0.68, + "logps_train/chosen": -180.72889709472656, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -168.94821166992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.872889757156372, + "rewards_train/margins": 4.971931219100952, + "rewards_train/rejected": -8.844820976257324, + "step": 2445 + }, + { + "epoch": 0.68, + "logps_train/chosen": -74.90420532226562, + "logps_train/ref_chosen": -57.0, + "logps_train/ref_rejected": -44.0, + "logps_train/rejected": -76.8217544555664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7904205322265625, + "rewards_train/margins": 1.4917550086975098, + "rewards_train/rejected": -3.2821755409240723, + "step": 2445 + }, + { + "epoch": 0.68, + "logps_train/chosen": -192.36981201171875, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -110.5, + "logps_train/rejected": -221.24969482421875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.986981391906738, + "rewards_train/margins": 1.0879878997802734, + "rewards_train/rejected": -11.074969291687012, + "step": 2445 + }, + { + "epoch": 0.68, + "logps_train/chosen": -127.85306549072266, + "logps_train/ref_chosen": -122.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -195.97152709960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5853065848350525, + "rewards_train/margins": 4.911846220493317, + "rewards_train/rejected": -5.497152805328369, + "step": 2445 + }, + { + "epoch": 0.68, + "logps_train/chosen": -117.54173278808594, + "logps_train/ref_chosen": -91.5, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -167.07054138183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.604173421859741, + "rewards_train/margins": 3.3028810024261475, + "rewards_train/rejected": -5.907054424285889, + "step": 2445 + }, + { + "epoch": 0.68, + "logps_train/chosen": -28.424110412597656, + "logps_train/ref_chosen": -11.1875, + "logps_train/ref_rejected": -7.1875, + "logps_train/rejected": -31.669206619262695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7236610651016235, + "rewards_train/margins": 0.724509596824646, + "rewards_train/rejected": -2.4481706619262695, + "step": 2445 + }, + { + "epoch": 0.68, + "learning_rate": 2.5500305166027415e-09, + "loss": 0.2625, + "step": 2446 + }, + { + "epoch": 0.68, + "logps_train/chosen": -10.587345123291016, + "logps_train/ref_chosen": -4.09375, + "logps_train/ref_rejected": -0.73828125, + "logps_train/rejected": -4.505833148956299, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6493595242500305, + "rewards_train/margins": -0.2726043164730072, + "rewards_train/rejected": -0.3767552077770233, + "step": 2446 + }, + { + "epoch": 0.68, + "logps_train/chosen": -134.44931030273438, + "logps_train/ref_chosen": -115.5, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -273.56787109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8949310779571533, + "rewards_train/margins": 10.561856031417847, + "rewards_train/rejected": -12.456787109375, + "step": 2446 + }, + { + "epoch": 0.68, + "logps_train/chosen": -11.070816040039062, + "logps_train/ref_chosen": -1.375, + "logps_train/ref_rejected": -1.2421875, + "logps_train/rejected": -9.08775520324707, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9695816040039062, + "rewards_train/margins": -0.18502479791641235, + "rewards_train/rejected": -0.7845568060874939, + "step": 2446 + }, + { + "epoch": 0.68, + "logps_train/chosen": -64.44108581542969, + "logps_train/ref_chosen": -29.625, + "logps_train/ref_rejected": -63.75, + "logps_train/rejected": -125.4817886352539, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4816086292266846, + "rewards_train/margins": 2.691570520401001, + "rewards_train/rejected": -6.1731791496276855, + "step": 2446 + }, + { + "epoch": 0.68, + "logps_train/chosen": -182.8728485107422, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -130.8653106689453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0872849225997925, + "rewards_train/margins": 0.24924612045288086, + "rewards_train/rejected": -1.3365310430526733, + "step": 2446 + }, + { + "epoch": 0.68, + "logps_train/chosen": -104.25276184082031, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -192.77822875976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.725276231765747, + "rewards_train/margins": 3.802546739578247, + "rewards_train/rejected": -6.527822971343994, + "step": 2446 + }, + { + "epoch": 0.68, + "logps_train/chosen": -170.29457092285156, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -158.2689208984375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.479457378387451, + "rewards_train/margins": -0.8525652885437012, + "rewards_train/rejected": -3.62689208984375, + "step": 2446 + }, + { + "epoch": 0.68, + "logps_train/chosen": -212.69027709960938, + "logps_train/ref_chosen": -163.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -184.9383087158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.969027996063232, + "rewards_train/margins": 2.674802780151367, + "rewards_train/rejected": -7.6438307762146, + "step": 2446 + }, + { + "epoch": 0.68, + "logps_train/chosen": -187.88534545898438, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -137.31231689453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.288534641265869, + "rewards_train/margins": 0.19269704818725586, + "rewards_train/rejected": -5.481231689453125, + "step": 2447 + }, + { + "epoch": 0.68, + "logps_train/chosen": -21.762117385864258, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -1.1015625, + "logps_train/rejected": -13.149504661560059, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8762117624282837, + "rewards_train/margins": 0.3285825252532959, + "rewards_train/rejected": -1.2047942876815796, + "step": 2447 + }, + { + "epoch": 0.68, + "logps_train/chosen": -32.45956802368164, + "logps_train/ref_chosen": -17.5, + "logps_train/ref_rejected": -13.6875, + "logps_train/rejected": -33.58213806152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4959567785263062, + "rewards_train/margins": 0.4935070276260376, + "rewards_train/rejected": -1.9894638061523438, + "step": 2447 + }, + { + "epoch": 0.68, + "logps_train/chosen": -45.46430969238281, + "logps_train/ref_chosen": -23.625, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -149.0472869873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1839311122894287, + "rewards_train/margins": 1.9707977771759033, + "rewards_train/rejected": -4.154728889465332, + "step": 2447 + }, + { + "epoch": 0.68, + "logps_train/chosen": -147.662109375, + "logps_train/ref_chosen": -111.5, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -210.90293884277344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6162109375, + "rewards_train/margins": 2.4740829467773438, + "rewards_train/rejected": -6.090293884277344, + "step": 2447 + }, + { + "epoch": 0.68, + "logps_train/chosen": -157.37216186523438, + "logps_train/ref_chosen": -119.5, + "logps_train/ref_rejected": -121.5, + "logps_train/rejected": -246.83523559570312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.7872161865234375, + "rewards_train/margins": 8.746307373046875, + "rewards_train/rejected": -12.533523559570312, + "step": 2447 + }, + { + "epoch": 0.68, + "logps_train/chosen": -35.609718322753906, + "logps_train/ref_chosen": -22.5, + "logps_train/ref_rejected": -17.25, + "logps_train/rejected": -58.672821044921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3109718561172485, + "rewards_train/margins": 2.8313101530075073, + "rewards_train/rejected": -4.142282009124756, + "step": 2447 + }, + { + "epoch": 0.68, + "logps_train/chosen": -32.30398941040039, + "logps_train/ref_chosen": -26.625, + "logps_train/ref_rejected": -16.75, + "logps_train/rejected": -35.66191101074219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5678989291191101, + "rewards_train/margins": 1.3232921957969666, + "rewards_train/rejected": -1.8911911249160767, + "step": 2447 + }, + { + "epoch": 0.68, + "learning_rate": 2.36471045426756e-09, + "loss": 0.3558, + "step": 2448 + }, + { + "epoch": 0.68, + "logps_train/chosen": -184.587158203125, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -139.0, + "logps_train/rejected": -186.46987915039062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.858716011047363, + "rewards_train/margins": -0.11172819137573242, + "rewards_train/rejected": -4.746987819671631, + "step": 2448 + }, + { + "epoch": 0.68, + "logps_train/chosen": -2.1314451694488525, + "logps_train/ref_chosen": -0.9453125, + "logps_train/ref_rejected": -2.203125, + "logps_train/rejected": -1.8071327209472656, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.11861326545476913, + "rewards_train/margins": -0.15821249410510063, + "rewards_train/rejected": 0.0395992286503315, + "step": 2448 + }, + { + "epoch": 0.68, + "logps_train/chosen": -156.53652954101562, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -234.00962829589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.953652858734131, + "rewards_train/margins": 5.597310543060303, + "rewards_train/rejected": -12.550963401794434, + "step": 2448 + }, + { + "epoch": 0.68, + "logps_train/chosen": -89.17109680175781, + "logps_train/ref_chosen": -61.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -132.56607055664062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7671096324920654, + "rewards_train/margins": 3.0894973278045654, + "rewards_train/rejected": -5.856606960296631, + "step": 2448 + }, + { + "epoch": 0.68, + "logps_train/chosen": -169.24862670898438, + "logps_train/ref_chosen": -144.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -169.71449279785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.524862766265869, + "rewards_train/margins": 0.04658651351928711, + "rewards_train/rejected": -2.5714492797851562, + "step": 2448 + }, + { + "epoch": 0.68, + "logps_train/chosen": -120.2154312133789, + "logps_train/ref_chosen": -102.0, + "logps_train/ref_rejected": -70.5, + "logps_train/rejected": -110.19679260253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8215430974960327, + "rewards_train/margins": 2.148136258125305, + "rewards_train/rejected": -3.969679355621338, + "step": 2448 + }, + { + "epoch": 0.68, + "logps_train/chosen": -286.71002197265625, + "logps_train/ref_chosen": -188.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -288.16094970703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.871002197265625, + "rewards_train/margins": 2.8450927734375, + "rewards_train/rejected": -12.716094970703125, + "step": 2448 + }, + { + "epoch": 0.68, + "logps_train/chosen": -210.46365356445312, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -208.89344787597656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.346365451812744, + "rewards_train/margins": 3.0429797172546387, + "rewards_train/rejected": -9.389345169067383, + "step": 2448 + }, + { + "epoch": 0.68, + "logps_train/chosen": -197.01187133789062, + "logps_train/ref_chosen": -140.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -206.90940856933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.7011871337890625, + "rewards_train/margins": 0.5897536277770996, + "rewards_train/rejected": -6.290940761566162, + "step": 2449 + }, + { + "epoch": 0.68, + "logps_train/chosen": -4.793186187744141, + "logps_train/ref_chosen": -5.1875, + "logps_train/ref_rejected": -8.0625, + "logps_train/rejected": -17.323835372924805, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.039431381970644, + "rewards_train/margins": 0.9655649550259113, + "rewards_train/rejected": -0.9261335730552673, + "step": 2449 + }, + { + "epoch": 0.68, + "logps_train/chosen": -126.5619888305664, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -110.0, + "logps_train/rejected": -122.11271667480469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7561988830566406, + "rewards_train/margins": 0.4550727605819702, + "rewards_train/rejected": -1.2112716436386108, + "step": 2449 + }, + { + "epoch": 0.68, + "logps_train/chosen": -28.058998107910156, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -11.375, + "logps_train/rejected": -34.83221435546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2183998823165894, + "rewards_train/margins": 1.1273216009140015, + "rewards_train/rejected": -2.345721483230591, + "step": 2449 + }, + { + "epoch": 0.68, + "logps_train/chosen": -97.43489074707031, + "logps_train/ref_chosen": -90.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -111.3221206665039, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.7434890866279602, + "rewards_train/margins": -1.211277037858963, + "rewards_train/rejected": 0.4677879512310028, + "step": 2449 + }, + { + "epoch": 0.68, + "logps_train/chosen": -9.771994590759277, + "logps_train/ref_chosen": -15.4375, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -19.3911075592041, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.5665505528450012, + "rewards_train/margins": 1.6869112849235535, + "rewards_train/rejected": -1.1203607320785522, + "step": 2449 + }, + { + "epoch": 0.68, + "logps_train/chosen": -30.380847930908203, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -107.99520874023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4005848169326782, + "rewards_train/margins": 2.898936152458191, + "rewards_train/rejected": -4.299520969390869, + "step": 2449 + }, + { + "epoch": 0.68, + "logps_train/chosen": -33.733985900878906, + "logps_train/ref_chosen": -20.0, + "logps_train/ref_rejected": -1.640625, + "logps_train/rejected": -18.1119384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3733986616134644, + "rewards_train/margins": 0.27373266220092773, + "rewards_train/rejected": -1.647131323814392, + "step": 2449 + }, + { + "epoch": 0.68, + "learning_rate": 2.186372781225465e-09, + "loss": 0.3908, + "step": 2450 + }, + { + "epoch": 0.68, + "logps_train/chosen": -146.85028076171875, + "logps_train/ref_chosen": -117.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -178.12425231933594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.935028076171875, + "rewards_train/margins": 4.477397441864014, + "rewards_train/rejected": -7.412425518035889, + "step": 2450 + }, + { + "epoch": 0.68, + "logps_train/chosen": -4.465160846710205, + "logps_train/ref_chosen": -0.8046875, + "logps_train/ref_rejected": -16.25, + "logps_train/rejected": -37.53074645996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.36604735255241394, + "rewards_train/margins": 1.7620272934436798, + "rewards_train/rejected": -2.1280746459960938, + "step": 2450 + }, + { + "epoch": 0.68, + "logps_train/chosen": -12.11948013305664, + "logps_train/ref_chosen": -8.0625, + "logps_train/ref_rejected": -15.9375, + "logps_train/rejected": -17.778667449951172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4056980311870575, + "rewards_train/margins": -0.22158128023147583, + "rewards_train/rejected": -0.18411675095558167, + "step": 2450 + }, + { + "epoch": 0.68, + "logps_train/chosen": -70.95282745361328, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -120.15348815917969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.045282747596502304, + "rewards_train/margins": 2.6200660206377506, + "rewards_train/rejected": -2.665348768234253, + "step": 2450 + }, + { + "epoch": 0.68, + "logps_train/chosen": -90.34405517578125, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -34.75, + "logps_train/rejected": -80.4344711303711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.43440580368042, + "rewards_train/margins": 0.13404130935668945, + "rewards_train/rejected": -4.568447113037109, + "step": 2450 + }, + { + "epoch": 0.68, + "logps_train/chosen": -122.76630401611328, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -167.67959594726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.476630449295044, + "rewards_train/margins": 5.2913291454315186, + "rewards_train/rejected": -7.7679595947265625, + "step": 2450 + }, + { + "epoch": 0.68, + "logps_train/chosen": -53.865882873535156, + "logps_train/ref_chosen": -39.5, + "logps_train/ref_rejected": -12.875, + "logps_train/rejected": -54.27040100097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4365882873535156, + "rewards_train/margins": 2.7029519081115723, + "rewards_train/rejected": -4.139540195465088, + "step": 2450 + }, + { + "epoch": 0.68, + "logps_train/chosen": -10.884089469909668, + "logps_train/ref_chosen": -11.875, + "logps_train/ref_rejected": -4.09375, + "logps_train/rejected": -14.937176704406738, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0990910530090332, + "rewards_train/margins": 1.1834337711334229, + "rewards_train/rejected": -1.0843427181243896, + "step": 2450 + }, + { + "epoch": 0.69, + "logps_train/chosen": -39.53672409057617, + "logps_train/ref_chosen": -11.8125, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -39.24058532714844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7724225521087646, + "rewards_train/margins": -0.0421140193939209, + "rewards_train/rejected": -2.7303085327148438, + "step": 2451 + }, + { + "epoch": 0.69, + "logps_train/chosen": -197.65196228027344, + "logps_train/ref_chosen": -153.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -208.171142578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.465196132659912, + "rewards_train/margins": 1.8519182205200195, + "rewards_train/rejected": -6.317114353179932, + "step": 2451 + }, + { + "epoch": 0.69, + "logps_train/chosen": -13.450902938842773, + "logps_train/ref_chosen": -2.53125, + "logps_train/ref_rejected": -6.0, + "logps_train/rejected": -12.581161499023438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.0919653177261353, + "rewards_train/margins": -0.43384915590286255, + "rewards_train/rejected": -0.6581161618232727, + "step": 2451 + }, + { + "epoch": 0.69, + "logps_train/chosen": -28.99064826965332, + "logps_train/ref_chosen": -2.03125, + "logps_train/ref_rejected": -3.53125, + "logps_train/rejected": -14.639780044555664, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.695939779281616, + "rewards_train/margins": -1.585086703300476, + "rewards_train/rejected": -1.1108530759811401, + "step": 2451 + }, + { + "epoch": 0.69, + "logps_train/chosen": -66.50611877441406, + "logps_train/ref_chosen": -37.25, + "logps_train/ref_rejected": -40.5, + "logps_train/rejected": -84.50804901123047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.925611972808838, + "rewards_train/margins": 1.4751930236816406, + "rewards_train/rejected": -4.4008049964904785, + "step": 2451 + }, + { + "epoch": 0.69, + "logps_train/chosen": -81.06147003173828, + "logps_train/ref_chosen": -51.0, + "logps_train/ref_rejected": -51.0, + "logps_train/rejected": -81.73634338378906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0061471462249756, + "rewards_train/margins": 0.06748723983764648, + "rewards_train/rejected": -3.073634386062622, + "step": 2451 + }, + { + "epoch": 0.69, + "logps_train/chosen": -18.552867889404297, + "logps_train/ref_chosen": -14.25, + "logps_train/ref_rejected": -22.375, + "logps_train/rejected": -32.17228698730469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.43028679490089417, + "rewards_train/margins": 0.5494419038295746, + "rewards_train/rejected": -0.9797286987304688, + "step": 2451 + }, + { + "epoch": 0.69, + "logps_train/chosen": -11.720123291015625, + "logps_train/ref_chosen": -12.6875, + "logps_train/ref_rejected": -2.84375, + "logps_train/rejected": -8.19304370880127, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09673767536878586, + "rewards_train/margins": 0.6316670700907707, + "rewards_train/rejected": -0.5349293947219849, + "step": 2451 + }, + { + "epoch": 0.69, + "learning_rate": 2.0150187456510205e-09, + "loss": 0.458, + "step": 2452 + }, + { + "epoch": 0.69, + "logps_train/chosen": -153.58291625976562, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -58.75, + "logps_train/rejected": -94.36607360839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6582916975021362, + "rewards_train/margins": 1.9033156633377075, + "rewards_train/rejected": -3.5616073608398438, + "step": 2452 + }, + { + "epoch": 0.69, + "logps_train/chosen": -118.10517883300781, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -117.55921936035156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3105179071426392, + "rewards_train/margins": -0.054595947265625, + "rewards_train/rejected": -1.2559219598770142, + "step": 2452 + }, + { + "epoch": 0.69, + "logps_train/chosen": -245.21067810058594, + "logps_train/ref_chosen": -159.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -242.499755859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.621068000793457, + "rewards_train/margins": 2.0289077758789062, + "rewards_train/rejected": -10.649975776672363, + "step": 2452 + }, + { + "epoch": 0.69, + "logps_train/chosen": -194.3779296875, + "logps_train/ref_chosen": -124.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -160.51547241210938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.037793159484863, + "rewards_train/margins": -0.1362457275390625, + "rewards_train/rejected": -6.901547431945801, + "step": 2452 + }, + { + "epoch": 0.69, + "logps_train/chosen": -23.035419464111328, + "logps_train/ref_chosen": -4.34375, + "logps_train/ref_rejected": -5.03125, + "logps_train/rejected": -45.42537307739258, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8691669702529907, + "rewards_train/margins": 2.1702455282211304, + "rewards_train/rejected": -4.039412498474121, + "step": 2452 + }, + { + "epoch": 0.69, + "logps_train/chosen": -16.774585723876953, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -7.96875, + "logps_train/rejected": -42.25584411621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1243336200714111, + "rewards_train/margins": 2.3043758869171143, + "rewards_train/rejected": -3.4287095069885254, + "step": 2452 + }, + { + "epoch": 0.69, + "logps_train/chosen": -17.49881935119629, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -4.6875, + "logps_train/rejected": -50.20363235473633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08761806786060333, + "rewards_train/margins": 4.639231398701668, + "rewards_train/rejected": -4.5516133308410645, + "step": 2452 + }, + { + "epoch": 0.69, + "logps_train/chosen": -34.13359832763672, + "logps_train/ref_chosen": -12.6875, + "logps_train/ref_rejected": -7.0, + "logps_train/rejected": -41.20774841308594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1446099281311035, + "rewards_train/margins": 1.2761650085449219, + "rewards_train/rejected": -3.4207749366760254, + "step": 2452 + }, + { + "epoch": 0.69, + "logps_train/chosen": -1.3568922281265259, + "logps_train/ref_chosen": -1.0234375, + "logps_train/ref_rejected": -1.6640625, + "logps_train/rejected": -5.309840202331543, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03334547206759453, + "rewards_train/margins": 0.33123229816555977, + "rewards_train/rejected": -0.3645777702331543, + "step": 2453 + }, + { + "epoch": 0.69, + "logps_train/chosen": -24.798419952392578, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -21.25, + "logps_train/rejected": -28.860210418701172, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8548420071601868, + "rewards_train/margins": -0.09382092952728271, + "rewards_train/rejected": -0.761021077632904, + "step": 2453 + }, + { + "epoch": 0.69, + "logps_train/chosen": -160.65603637695312, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -165.3791961669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.815603733062744, + "rewards_train/margins": 1.1223158836364746, + "rewards_train/rejected": -4.937919616699219, + "step": 2453 + }, + { + "epoch": 0.69, + "logps_train/chosen": -6.808618545532227, + "logps_train/ref_chosen": -11.5, + "logps_train/ref_rejected": -23.125, + "logps_train/rejected": -40.71078872680664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.46913814544677734, + "rewards_train/margins": 2.2277170419692993, + "rewards_train/rejected": -1.758578896522522, + "step": 2453 + }, + { + "epoch": 0.69, + "logps_train/chosen": -20.440690994262695, + "logps_train/ref_chosen": -15.875, + "logps_train/ref_rejected": -0.78125, + "logps_train/rejected": -19.60135269165039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.456569105386734, + "rewards_train/margins": 1.4254412353038788, + "rewards_train/rejected": -1.8820103406906128, + "step": 2453 + }, + { + "epoch": 0.69, + "logps_train/chosen": -144.33587646484375, + "logps_train/ref_chosen": -96.0, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -170.07968139648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.833587646484375, + "rewards_train/margins": 0.5743803977966309, + "rewards_train/rejected": -5.407968044281006, + "step": 2453 + }, + { + "epoch": 0.69, + "logps_train/chosen": -121.91947174072266, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -148.1583251953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.091947317123413, + "rewards_train/margins": 3.273885488510132, + "rewards_train/rejected": -5.365832805633545, + "step": 2453 + }, + { + "epoch": 0.69, + "logps_train/chosen": -6.453433990478516, + "logps_train/ref_chosen": -0.11181640625, + "logps_train/ref_rejected": -0.11181640625, + "logps_train/rejected": -6.452237606048584, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6341617703437805, + "rewards_train/margins": -0.00011962652206420898, + "rewards_train/rejected": -0.6340421438217163, + "step": 2453 + }, + { + "epoch": 0.69, + "learning_rate": 1.8506495468409988e-09, + "loss": 0.3292, + "step": 2454 + }, + { + "epoch": 0.69, + "logps_train/chosen": -16.53509521484375, + "logps_train/ref_chosen": -6.625, + "logps_train/ref_rejected": -5.90625, + "logps_train/rejected": -23.540687561035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.991009533405304, + "rewards_train/margins": 0.7724342942237854, + "rewards_train/rejected": -1.7634438276290894, + "step": 2454 + }, + { + "epoch": 0.69, + "logps_train/chosen": -14.519376754760742, + "logps_train/ref_chosen": -9.1875, + "logps_train/ref_rejected": -7.625, + "logps_train/rejected": -21.881181716918945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5331876873970032, + "rewards_train/margins": 0.8924304842948914, + "rewards_train/rejected": -1.4256181716918945, + "step": 2454 + }, + { + "epoch": 0.69, + "logps_train/chosen": -126.21477508544922, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -69.5, + "logps_train/rejected": -127.1190185546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.8714776039123535, + "rewards_train/margins": 0.8904242515563965, + "rewards_train/rejected": -5.76190185546875, + "step": 2454 + }, + { + "epoch": 0.69, + "logps_train/chosen": -195.0806121826172, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -157.0, + "logps_train/rejected": -215.26649475097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.008061408996582, + "rewards_train/margins": 0.8185882568359375, + "rewards_train/rejected": -5.8266496658325195, + "step": 2454 + }, + { + "epoch": 0.69, + "logps_train/chosen": -46.64208221435547, + "logps_train/ref_chosen": -25.0, + "logps_train/ref_rejected": -11.0625, + "logps_train/rejected": -44.76213836669922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.164208173751831, + "rewards_train/margins": 1.2057557106018066, + "rewards_train/rejected": -3.3699638843536377, + "step": 2454 + }, + { + "epoch": 0.69, + "logps_train/chosen": -163.17823791503906, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -80.5, + "logps_train/rejected": -148.43348693847656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4178237915039062, + "rewards_train/margins": 3.3755249977111816, + "rewards_train/rejected": -6.793348789215088, + "step": 2454 + }, + { + "epoch": 0.69, + "logps_train/chosen": -188.94590759277344, + "logps_train/ref_chosen": -115.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -233.30264282226562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.394590854644775, + "rewards_train/margins": 3.135673999786377, + "rewards_train/rejected": -10.530264854431152, + "step": 2454 + }, + { + "epoch": 0.69, + "logps_train/chosen": -49.829429626464844, + "logps_train/ref_chosen": -36.5, + "logps_train/ref_rejected": -37.25, + "logps_train/rejected": -61.43107986450195, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3329429626464844, + "rewards_train/margins": 1.085165023803711, + "rewards_train/rejected": -2.4181079864501953, + "step": 2454 + }, + { + "epoch": 0.69, + "logps_train/chosen": -27.97140121459961, + "logps_train/ref_chosen": -19.875, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -49.078025817871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.809640109539032, + "rewards_train/margins": 1.310662567615509, + "rewards_train/rejected": -2.120302677154541, + "step": 2455 + }, + { + "epoch": 0.69, + "logps_train/chosen": -174.51870727539062, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -105.80531311035156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.601871013641357, + "rewards_train/margins": -3.421339750289917, + "rewards_train/rejected": -3.1805312633514404, + "step": 2455 + }, + { + "epoch": 0.69, + "logps_train/chosen": -136.7948455810547, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -152.0, + "logps_train/rejected": -257.2436828613281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.529484510421753, + "rewards_train/margins": 6.99488377571106, + "rewards_train/rejected": -10.524368286132812, + "step": 2455 + }, + { + "epoch": 0.69, + "logps_train/chosen": -172.6287841796875, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -109.5, + "logps_train/rejected": -153.84812927246094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.962878465652466, + "rewards_train/margins": 0.47193455696105957, + "rewards_train/rejected": -4.434813022613525, + "step": 2455 + }, + { + "epoch": 0.69, + "logps_train/chosen": -149.72979736328125, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -147.45510864257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.472979784011841, + "rewards_train/margins": 3.8725311756134033, + "rewards_train/rejected": -6.345510959625244, + "step": 2455 + }, + { + "epoch": 0.69, + "logps_train/chosen": -19.216821670532227, + "logps_train/ref_chosen": -17.25, + "logps_train/ref_rejected": -5.34375, + "logps_train/rejected": -25.88420867919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1966821700334549, + "rewards_train/margins": 1.8573637455701828, + "rewards_train/rejected": -2.0540459156036377, + "step": 2455 + }, + { + "epoch": 0.69, + "logps_train/chosen": -12.264189720153809, + "logps_train/ref_chosen": -8.5625, + "logps_train/ref_rejected": -3.84375, + "logps_train/rejected": -4.773096561431885, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.3701689839363098, + "rewards_train/margins": -0.277234323322773, + "rewards_train/rejected": -0.09293466061353683, + "step": 2455 + }, + { + "epoch": 0.69, + "logps_train/chosen": -89.60709381103516, + "logps_train/ref_chosen": -68.0, + "logps_train/ref_rejected": -85.5, + "logps_train/rejected": -130.3402099609375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1607093811035156, + "rewards_train/margins": 2.3233118057250977, + "rewards_train/rejected": -4.484021186828613, + "step": 2455 + }, + { + "epoch": 0.69, + "learning_rate": 1.693266335205279e-09, + "loss": 0.4588, + "step": 2456 + }, + { + "epoch": 0.69, + "logps_train/chosen": -191.98919677734375, + "logps_train/ref_chosen": -125.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -211.5165557861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.698919773101807, + "rewards_train/margins": 0.652735710144043, + "rewards_train/rejected": -7.35165548324585, + "step": 2456 + }, + { + "epoch": 0.69, + "logps_train/chosen": -178.090087890625, + "logps_train/ref_chosen": -181.0, + "logps_train/ref_rejected": -135.0, + "logps_train/rejected": -151.6532440185547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2909912168979645, + "rewards_train/margins": 1.956315666437149, + "rewards_train/rejected": -1.6653244495391846, + "step": 2456 + }, + { + "epoch": 0.69, + "logps_train/chosen": -198.0640869140625, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -185.618408203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -8.00640869140625, + "rewards_train/margins": 0.6554327011108398, + "rewards_train/rejected": -8.66184139251709, + "step": 2456 + }, + { + "epoch": 0.69, + "logps_train/chosen": -27.352373123168945, + "logps_train/ref_chosen": -13.75, + "logps_train/ref_rejected": -12.75, + "logps_train/rejected": -38.51565170288086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3602373600006104, + "rewards_train/margins": 1.2163279056549072, + "rewards_train/rejected": -2.5765652656555176, + "step": 2456 + }, + { + "epoch": 0.69, + "logps_train/chosen": -44.595733642578125, + "logps_train/ref_chosen": -21.75, + "logps_train/ref_rejected": -38.0, + "logps_train/rejected": -60.14015579223633, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2845733165740967, + "rewards_train/margins": -0.0705575942993164, + "rewards_train/rejected": -2.2140157222747803, + "step": 2456 + }, + { + "epoch": 0.69, + "logps_train/chosen": -134.4555206298828, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -148.0, + "logps_train/rejected": -259.95208740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5955522060394287, + "rewards_train/margins": 8.599656343460083, + "rewards_train/rejected": -11.195208549499512, + "step": 2456 + }, + { + "epoch": 0.69, + "logps_train/chosen": -73.23880004882812, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -123.0, + "logps_train/rejected": -200.20968627929688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12612000107765198, + "rewards_train/margins": 7.847088724374771, + "rewards_train/rejected": -7.720968723297119, + "step": 2456 + }, + { + "epoch": 0.69, + "logps_train/chosen": -96.785888671875, + "logps_train/ref_chosen": -74.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -214.6815948486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.228588819503784, + "rewards_train/margins": 7.98957085609436, + "rewards_train/rejected": -10.218159675598145, + "step": 2456 + }, + { + "epoch": 0.69, + "logps_train/chosen": -29.335800170898438, + "logps_train/ref_chosen": -9.625, + "logps_train/ref_rejected": -27.0, + "logps_train/rejected": -56.01510238647461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9710800647735596, + "rewards_train/margins": 0.9304301738739014, + "rewards_train/rejected": -2.901510238647461, + "step": 2457 + }, + { + "epoch": 0.69, + "logps_train/chosen": -178.06600952148438, + "logps_train/ref_chosen": -136.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -262.8846435546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.206601142883301, + "rewards_train/margins": 4.981863021850586, + "rewards_train/rejected": -9.188464164733887, + "step": 2457 + }, + { + "epoch": 0.69, + "logps_train/chosen": -4.204556465148926, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -2.59375, + "logps_train/rejected": -6.207901954650879, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0014193535316735506, + "rewards_train/margins": 0.3628345609176904, + "rewards_train/rejected": -0.36141520738601685, + "step": 2457 + }, + { + "epoch": 0.69, + "logps_train/chosen": -71.81713104248047, + "logps_train/ref_chosen": -43.0, + "logps_train/ref_rejected": -7.78125, + "logps_train/rejected": -29.0191593170166, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.8817131519317627, + "rewards_train/margins": -0.7579221725463867, + "rewards_train/rejected": -2.123790979385376, + "step": 2457 + }, + { + "epoch": 0.69, + "logps_train/chosen": -12.177464485168457, + "logps_train/ref_chosen": -12.6875, + "logps_train/ref_rejected": -15.125, + "logps_train/rejected": -23.90700340270996, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.051003552973270416, + "rewards_train/margins": 0.9292039051651955, + "rewards_train/rejected": -0.878200352191925, + "step": 2457 + }, + { + "epoch": 0.69, + "logps_train/chosen": -60.278724670410156, + "logps_train/ref_chosen": -47.75, + "logps_train/ref_rejected": -46.25, + "logps_train/rejected": -108.88568878173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2528724670410156, + "rewards_train/margins": 5.0106964111328125, + "rewards_train/rejected": -6.263568878173828, + "step": 2457 + }, + { + "epoch": 0.69, + "logps_train/chosen": -78.94599151611328, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -62.0, + "logps_train/rejected": -87.43788146972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.844599187374115, + "rewards_train/margins": 1.699189007282257, + "rewards_train/rejected": -2.543788194656372, + "step": 2457 + }, + { + "epoch": 0.69, + "logps_train/chosen": -98.98430633544922, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -181.7216796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.148430585861206, + "rewards_train/margins": 6.473737955093384, + "rewards_train/rejected": -8.62216854095459, + "step": 2457 + }, + { + "epoch": 0.69, + "learning_rate": 1.5428702122596283e-09, + "loss": 0.2797, + "step": 2458 + }, + { + "epoch": 0.69, + "logps_train/chosen": -111.28002166748047, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -160.16952514648438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7280023097991943, + "rewards_train/margins": 3.788950204849243, + "rewards_train/rejected": -6.5169525146484375, + "step": 2458 + }, + { + "epoch": 0.69, + "logps_train/chosen": -11.793367385864258, + "logps_train/ref_chosen": -9.3125, + "logps_train/ref_rejected": -20.0, + "logps_train/rejected": -21.16991424560547, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.24808673560619354, + "rewards_train/margins": -0.1310953125357628, + "rewards_train/rejected": -0.11699142307043076, + "step": 2458 + }, + { + "epoch": 0.69, + "logps_train/chosen": -95.14825439453125, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -134.64797973632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.414825439453125, + "rewards_train/margins": 1.4499726295471191, + "rewards_train/rejected": -2.864798069000244, + "step": 2458 + }, + { + "epoch": 0.69, + "logps_train/chosen": -103.24295043945312, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -171.77084350585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.474295139312744, + "rewards_train/margins": 4.252789497375488, + "rewards_train/rejected": -6.727084636688232, + "step": 2458 + }, + { + "epoch": 0.69, + "logps_train/chosen": -3.3532752990722656, + "logps_train/ref_chosen": -4.5625, + "logps_train/ref_rejected": -11.125, + "logps_train/rejected": -12.270346641540527, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12092246860265732, + "rewards_train/margins": 0.2354571372270584, + "rewards_train/rejected": -0.11453466862440109, + "step": 2458 + }, + { + "epoch": 0.69, + "logps_train/chosen": -119.72569274902344, + "logps_train/ref_chosen": -95.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -140.9156494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.472569227218628, + "rewards_train/margins": 1.018995761871338, + "rewards_train/rejected": -3.491564989089966, + "step": 2458 + }, + { + "epoch": 0.69, + "logps_train/chosen": -131.38766479492188, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -120.81962585449219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6887664794921875, + "rewards_train/margins": 0.4931960105895996, + "rewards_train/rejected": -4.181962490081787, + "step": 2458 + }, + { + "epoch": 0.69, + "logps_train/chosen": -35.96674346923828, + "logps_train/ref_chosen": -26.625, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -35.339900970458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9341743588447571, + "rewards_train/margins": 1.2248157858848572, + "rewards_train/rejected": -2.1589901447296143, + "step": 2458 + }, + { + "epoch": 0.69, + "logps_train/chosen": -140.20396423339844, + "logps_train/ref_chosen": -112.5, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -218.2104034423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7703964710235596, + "rewards_train/margins": 9.200644254684448, + "rewards_train/rejected": -11.971040725708008, + "step": 2459 + }, + { + "epoch": 0.69, + "logps_train/chosen": -26.23101806640625, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -22.125, + "logps_train/rejected": -54.662994384765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.710601806640625, + "rewards_train/margins": 2.5431976318359375, + "rewards_train/rejected": -3.2537994384765625, + "step": 2459 + }, + { + "epoch": 0.69, + "logps_train/chosen": -27.44051742553711, + "logps_train/ref_chosen": -6.25, + "logps_train/ref_rejected": -18.375, + "logps_train/rejected": -44.63058090209961, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.119051694869995, + "rewards_train/margins": 0.5065064430236816, + "rewards_train/rejected": -2.6255581378936768, + "step": 2459 + }, + { + "epoch": 0.69, + "logps_train/chosen": -124.01698303222656, + "logps_train/ref_chosen": -98.0, + "logps_train/ref_rejected": -98.0, + "logps_train/rejected": -124.02223205566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.601698398590088, + "rewards_train/margins": 0.0005247592926025391, + "rewards_train/rejected": -2.6022231578826904, + "step": 2459 + }, + { + "epoch": 0.69, + "logps_train/chosen": -192.6064910888672, + "logps_train/ref_chosen": -94.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -210.9190216064453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.810648918151855, + "rewards_train/margins": -0.4187469482421875, + "rewards_train/rejected": -9.391901969909668, + "step": 2459 + }, + { + "epoch": 0.69, + "logps_train/chosen": -157.3497772216797, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -194.3272705078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.6349778175354004, + "rewards_train/margins": 1.9977493286132812, + "rewards_train/rejected": -5.632727146148682, + "step": 2459 + }, + { + "epoch": 0.69, + "logps_train/chosen": -22.10765838623047, + "logps_train/ref_chosen": -6.03125, + "logps_train/ref_rejected": -100.5, + "logps_train/rejected": -119.92964935302734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6076408624649048, + "rewards_train/margins": 0.3353240489959717, + "rewards_train/rejected": -1.9429649114608765, + "step": 2459 + }, + { + "epoch": 0.69, + "logps_train/chosen": -100.64450073242188, + "logps_train/ref_chosen": -94.0, + "logps_train/ref_rejected": -87.5, + "logps_train/rejected": -135.15078735351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6644501090049744, + "rewards_train/margins": 4.100628912448883, + "rewards_train/rejected": -4.765079021453857, + "step": 2459 + }, + { + "epoch": 0.69, + "learning_rate": 1.3994622306173764e-09, + "loss": 0.3426, + "step": 2460 + }, + { + "epoch": 0.69, + "logps_train/chosen": -61.844970703125, + "logps_train/ref_chosen": -42.0, + "logps_train/ref_rejected": -42.0, + "logps_train/rejected": -61.82862854003906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.9844970703125, + "rewards_train/margins": -0.0016342401504516602, + "rewards_train/rejected": -1.9828628301620483, + "step": 2460 + }, + { + "epoch": 0.69, + "logps_train/chosen": -17.314517974853516, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -3.265625, + "logps_train/rejected": -15.465963363647461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7814518213272095, + "rewards_train/margins": 0.43858206272125244, + "rewards_train/rejected": -1.220033884048462, + "step": 2460 + }, + { + "epoch": 0.69, + "logps_train/chosen": -24.23850440979004, + "logps_train/ref_chosen": -12.3125, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -49.40613555908203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1926004886627197, + "rewards_train/margins": -0.1769869327545166, + "rewards_train/rejected": -1.0156135559082031, + "step": 2460 + }, + { + "epoch": 0.69, + "logps_train/chosen": -71.58584594726562, + "logps_train/ref_chosen": -41.25, + "logps_train/ref_rejected": -47.5, + "logps_train/rejected": -77.50240325927734, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0335845947265625, + "rewards_train/margins": -0.033344268798828125, + "rewards_train/rejected": -3.0002403259277344, + "step": 2460 + }, + { + "epoch": 0.69, + "logps_train/chosen": -178.396728515625, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -150.0, + "logps_train/rejected": -201.2576904296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.239673137664795, + "rewards_train/margins": -0.11390399932861328, + "rewards_train/rejected": -5.125769138336182, + "step": 2460 + }, + { + "epoch": 0.69, + "logps_train/chosen": -96.18712615966797, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -163.29632568359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6687126159667969, + "rewards_train/margins": 4.160920143127441, + "rewards_train/rejected": -5.829632759094238, + "step": 2460 + }, + { + "epoch": 0.69, + "logps_train/chosen": -68.88397216796875, + "logps_train/ref_chosen": -70.5, + "logps_train/ref_rejected": -105.0, + "logps_train/rejected": -121.49267578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.16160278022289276, + "rewards_train/margins": 1.8108703345060349, + "rewards_train/rejected": -1.649267554283142, + "step": 2460 + }, + { + "epoch": 0.69, + "logps_train/chosen": -162.04983520507812, + "logps_train/ref_chosen": -97.0, + "logps_train/ref_rejected": -45.5, + "logps_train/rejected": -71.25736999511719, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.504983425140381, + "rewards_train/margins": -3.929246425628662, + "rewards_train/rejected": -2.5757369995117188, + "step": 2460 + }, + { + "epoch": 0.69, + "logps_train/chosen": -52.901371002197266, + "logps_train/ref_chosen": -27.375, + "logps_train/ref_rejected": -50.25, + "logps_train/rejected": -108.4959945678711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.5526371002197266, + "rewards_train/margins": 3.2719626426696777, + "rewards_train/rejected": -5.824599742889404, + "step": 2461 + }, + { + "epoch": 0.69, + "logps_train/chosen": -115.73808288574219, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -63.25, + "logps_train/rejected": -69.10888671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.2238082885742188, + "rewards_train/margins": -0.6379196047782898, + "rewards_train/rejected": -0.585888683795929, + "step": 2461 + }, + { + "epoch": 0.69, + "logps_train/chosen": -96.12386322021484, + "logps_train/ref_chosen": -87.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -150.32366943359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8623863458633423, + "rewards_train/margins": 1.7199805974960327, + "rewards_train/rejected": -2.582366943359375, + "step": 2461 + }, + { + "epoch": 0.69, + "logps_train/chosen": -165.89691162109375, + "logps_train/ref_chosen": -109.0, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -205.6205596923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.689691066741943, + "rewards_train/margins": 4.272365093231201, + "rewards_train/rejected": -9.962056159973145, + "step": 2461 + }, + { + "epoch": 0.69, + "logps_train/chosen": -144.20692443847656, + "logps_train/ref_chosen": -87.0, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -250.58229064941406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.7206926345825195, + "rewards_train/margins": 6.587536811828613, + "rewards_train/rejected": -12.308229446411133, + "step": 2461 + }, + { + "epoch": 0.69, + "logps_train/chosen": -104.2611083984375, + "logps_train/ref_chosen": -78.5, + "logps_train/ref_rejected": -112.5, + "logps_train/rejected": -185.36209106445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.57611083984375, + "rewards_train/margins": 4.7100982666015625, + "rewards_train/rejected": -7.2862091064453125, + "step": 2461 + }, + { + "epoch": 0.69, + "logps_train/chosen": -142.5680389404297, + "logps_train/ref_chosen": -101.0, + "logps_train/ref_rejected": -78.0, + "logps_train/rejected": -129.29388427734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.156804084777832, + "rewards_train/margins": 0.9725842475891113, + "rewards_train/rejected": -5.129388332366943, + "step": 2461 + }, + { + "epoch": 0.69, + "logps_train/chosen": -145.64697265625, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -160.0810546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6646972894668579, + "rewards_train/margins": 0.043408215045928955, + "rewards_train/rejected": -0.7081055045127869, + "step": 2461 + }, + { + "epoch": 0.69, + "learning_rate": 1.2630433939825324e-09, + "loss": 0.6149, + "step": 2462 + }, + { + "epoch": 0.69, + "logps_train/chosen": -8.920553207397461, + "logps_train/ref_chosen": -5.0625, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -24.802345275878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3858053386211395, + "rewards_train/margins": 1.2194291651248932, + "rewards_train/rejected": -1.6052345037460327, + "step": 2462 + }, + { + "epoch": 0.69, + "logps_train/chosen": -71.61192321777344, + "logps_train/ref_chosen": -50.25, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -181.2707977294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1361923217773438, + "rewards_train/margins": 2.7908873558044434, + "rewards_train/rejected": -4.927079677581787, + "step": 2462 + }, + { + "epoch": 0.69, + "logps_train/chosen": -115.09410095214844, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -149.8435821533203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.659410238265991, + "rewards_train/margins": 1.92494797706604, + "rewards_train/rejected": -4.584358215332031, + "step": 2462 + }, + { + "epoch": 0.69, + "logps_train/chosen": -66.46263122558594, + "logps_train/ref_chosen": -27.125, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -41.615570068359375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.933763265609741, + "rewards_train/margins": -0.6472063064575195, + "rewards_train/rejected": -3.2865569591522217, + "step": 2462 + }, + { + "epoch": 0.69, + "logps_train/chosen": -125.71939849853516, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -224.81988525390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.3219399452209473, + "rewards_train/margins": 6.060048580169678, + "rewards_train/rejected": -9.381988525390625, + "step": 2462 + }, + { + "epoch": 0.69, + "logps_train/chosen": -8.278672218322754, + "logps_train/ref_chosen": -3.828125, + "logps_train/ref_rejected": -24.25, + "logps_train/rejected": -32.335105895996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4450547397136688, + "rewards_train/margins": 0.3634558618068695, + "rewards_train/rejected": -0.8085106015205383, + "step": 2462 + }, + { + "epoch": 0.69, + "logps_train/chosen": -150.66700744628906, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -156.58091735839844, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.516700744628906, + "rewards_train/margins": -0.6586089134216309, + "rewards_train/rejected": -3.8580918312072754, + "step": 2462 + }, + { + "epoch": 0.69, + "logps_train/chosen": -35.66139221191406, + "logps_train/ref_chosen": -6.65625, + "logps_train/ref_rejected": -4.0625, + "logps_train/rejected": -24.808130264282227, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9005143642425537, + "rewards_train/margins": -0.825951337814331, + "rewards_train/rejected": -2.0745630264282227, + "step": 2462 + }, + { + "epoch": 0.69, + "logps_train/chosen": -144.57785034179688, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -131.0, + "logps_train/rejected": -145.66656494140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3577851057052612, + "rewards_train/margins": 0.1088714599609375, + "rewards_train/rejected": -1.4666565656661987, + "step": 2463 + }, + { + "epoch": 0.69, + "logps_train/chosen": -12.61078929901123, + "logps_train/ref_chosen": -8.4375, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -56.12209701538086, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.41732892394065857, + "rewards_train/margins": 3.819880872964859, + "rewards_train/rejected": -4.237209796905518, + "step": 2463 + }, + { + "epoch": 0.69, + "logps_train/chosen": -127.33208465576172, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -114.5, + "logps_train/rejected": -168.96987915039062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.5832085609436035, + "rewards_train/margins": 0.8637795448303223, + "rewards_train/rejected": -5.446988105773926, + "step": 2463 + }, + { + "epoch": 0.69, + "logps_train/chosen": -28.054229736328125, + "logps_train/ref_chosen": -6.3125, + "logps_train/ref_rejected": -10.4375, + "logps_train/rejected": -45.88758087158203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.17417311668396, + "rewards_train/margins": 1.3708350658416748, + "rewards_train/rejected": -3.5450081825256348, + "step": 2463 + }, + { + "epoch": 0.69, + "logps_train/chosen": -95.75826263427734, + "logps_train/ref_chosen": -73.0, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -136.48190307617188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2758262157440186, + "rewards_train/margins": 4.272364377975464, + "rewards_train/rejected": -6.548190593719482, + "step": 2463 + }, + { + "epoch": 0.69, + "logps_train/chosen": -31.41802978515625, + "logps_train/ref_chosen": -7.09375, + "logps_train/ref_rejected": -1.8359375, + "logps_train/rejected": -25.400192260742188, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.4324281215667725, + "rewards_train/margins": -0.07600259780883789, + "rewards_train/rejected": -2.3564255237579346, + "step": 2463 + }, + { + "epoch": 0.69, + "logps_train/chosen": -23.741065979003906, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -11.9375, + "logps_train/rejected": -27.653560638427734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5553566217422485, + "rewards_train/margins": 0.016249418258666992, + "rewards_train/rejected": -1.5716060400009155, + "step": 2463 + }, + { + "epoch": 0.69, + "logps_train/chosen": -18.783926010131836, + "logps_train/ref_chosen": -15.1875, + "logps_train/ref_rejected": -7.15625, + "logps_train/rejected": -16.175678253173828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.3596425950527191, + "rewards_train/margins": 0.5423002541065216, + "rewards_train/rejected": -0.9019428491592407, + "step": 2463 + }, + { + "epoch": 0.69, + "learning_rate": 1.1336146571421235e-09, + "loss": 0.4655, + "step": 2464 + }, + { + "epoch": 0.69, + "logps_train/chosen": -227.88790893554688, + "logps_train/ref_chosen": -152.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -289.65313720703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.5887908935546875, + "rewards_train/margins": 4.0765228271484375, + "rewards_train/rejected": -11.665313720703125, + "step": 2464 + }, + { + "epoch": 0.69, + "logps_train/chosen": -62.78697204589844, + "logps_train/ref_chosen": -46.0, + "logps_train/ref_rejected": -53.25, + "logps_train/rejected": -88.67268371582031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6786972284317017, + "rewards_train/margins": 1.863571286201477, + "rewards_train/rejected": -3.5422685146331787, + "step": 2464 + }, + { + "epoch": 0.69, + "logps_train/chosen": -1.2084851264953613, + "logps_train/ref_chosen": -2.09375, + "logps_train/ref_rejected": -2.671875, + "logps_train/rejected": -4.886959075927734, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.08852648735046387, + "rewards_train/margins": 0.3100349009037018, + "rewards_train/rejected": -0.22150841355323792, + "step": 2464 + }, + { + "epoch": 0.69, + "logps_train/chosen": -33.15040588378906, + "logps_train/ref_chosen": -3.9375, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -55.91790771484375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.921290636062622, + "rewards_train/margins": 0.6955001354217529, + "rewards_train/rejected": -3.616790771484375, + "step": 2464 + }, + { + "epoch": 0.69, + "logps_train/chosen": -27.20821762084961, + "logps_train/ref_chosen": -23.25, + "logps_train/ref_rejected": -31.125, + "logps_train/rejected": -48.47929382324219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.39582177996635437, + "rewards_train/margins": 1.3396076261997223, + "rewards_train/rejected": -1.7354294061660767, + "step": 2464 + }, + { + "epoch": 0.69, + "logps_train/chosen": -87.25257873535156, + "logps_train/ref_chosen": -77.0, + "logps_train/ref_rejected": -47.0, + "logps_train/rejected": -101.21713256835938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.02525794506073, + "rewards_train/margins": 4.396455407142639, + "rewards_train/rejected": -5.421713352203369, + "step": 2464 + }, + { + "epoch": 0.69, + "logps_train/chosen": -7.176376819610596, + "logps_train/ref_chosen": -8.125, + "logps_train/ref_rejected": -38.25, + "logps_train/rejected": -53.915283203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09486231952905655, + "rewards_train/margins": 1.6613906398415565, + "rewards_train/rejected": -1.5665283203125, + "step": 2464 + }, + { + "epoch": 0.69, + "logps_train/chosen": -20.26494789123535, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -36.47164535522461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6046198606491089, + "rewards_train/margins": 1.5519198179244995, + "rewards_train/rejected": -3.1565396785736084, + "step": 2464 + }, + { + "epoch": 0.69, + "logps_train/chosen": -25.027366638183594, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -13.75, + "logps_train/rejected": -42.672279357910156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2902367115020752, + "rewards_train/margins": 1.6019911766052246, + "rewards_train/rejected": -2.8922278881073, + "step": 2465 + }, + { + "epoch": 0.69, + "logps_train/chosen": -25.650354385375977, + "logps_train/ref_chosen": -6.53125, + "logps_train/ref_rejected": -3.109375, + "logps_train/rejected": -31.203454971313477, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9119104146957397, + "rewards_train/margins": 0.8974975347518921, + "rewards_train/rejected": -2.809407949447632, + "step": 2465 + }, + { + "epoch": 0.69, + "logps_train/chosen": -4.489201545715332, + "logps_train/ref_chosen": -5.78125, + "logps_train/ref_rejected": -8.0, + "logps_train/rejected": -24.556413650512695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1292048543691635, + "rewards_train/margins": 1.7848462909460068, + "rewards_train/rejected": -1.6556414365768433, + "step": 2465 + }, + { + "epoch": 0.69, + "logps_train/chosen": -142.8708953857422, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -147.0, + "logps_train/rejected": -256.11700439453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.937089681625366, + "rewards_train/margins": 6.9746105670928955, + "rewards_train/rejected": -10.911700248718262, + "step": 2465 + }, + { + "epoch": 0.69, + "logps_train/chosen": -30.56679916381836, + "logps_train/ref_chosen": -25.125, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -21.990520477294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5441799163818359, + "rewards_train/margins": 1.06737220287323, + "rewards_train/rejected": -1.611552119255066, + "step": 2465 + }, + { + "epoch": 0.69, + "logps_train/chosen": -13.451196670532227, + "logps_train/ref_chosen": -9.375, + "logps_train/ref_rejected": -32.25, + "logps_train/rejected": -69.66970825195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4076196849346161, + "rewards_train/margins": 3.3343510925769806, + "rewards_train/rejected": -3.7419707775115967, + "step": 2465 + }, + { + "epoch": 0.69, + "logps_train/chosen": -93.38845825195312, + "logps_train/ref_chosen": -84.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -177.02362060546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9388458132743835, + "rewards_train/margins": 7.213516056537628, + "rewards_train/rejected": -8.152361869812012, + "step": 2465 + }, + { + "epoch": 0.69, + "logps_train/chosen": -139.6798858642578, + "logps_train/ref_chosen": -96.5, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -264.1234130859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.317988872528076, + "rewards_train/margins": 9.294353008270264, + "rewards_train/rejected": -13.61234188079834, + "step": 2465 + }, + { + "epoch": 0.69, + "learning_rate": 1.0111769259600888e-09, + "loss": 0.1712, + "step": 2466 + }, + { + "epoch": 0.69, + "logps_train/chosen": -34.52835464477539, + "logps_train/ref_chosen": -22.375, + "logps_train/ref_rejected": -9.8125, + "logps_train/rejected": -22.322166442871094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.215335488319397, + "rewards_train/margins": 0.03563117980957031, + "rewards_train/rejected": -1.2509666681289673, + "step": 2466 + }, + { + "epoch": 0.69, + "logps_train/chosen": -83.82345581054688, + "logps_train/ref_chosen": -65.5, + "logps_train/ref_rejected": -15.3125, + "logps_train/rejected": -26.897674560546875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8323456048965454, + "rewards_train/margins": -0.673828125, + "rewards_train/rejected": -1.1585174798965454, + "step": 2466 + }, + { + "epoch": 0.69, + "logps_train/chosen": -39.408302307128906, + "logps_train/ref_chosen": -35.0, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -53.1694450378418, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4408302307128906, + "rewards_train/margins": 2.0886142253875732, + "rewards_train/rejected": -2.529444456100464, + "step": 2466 + }, + { + "epoch": 0.69, + "logps_train/chosen": -144.1115264892578, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -203.90383911132812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.811152935028076, + "rewards_train/margins": 5.079230785369873, + "rewards_train/rejected": -10.89038372039795, + "step": 2466 + }, + { + "epoch": 0.69, + "logps_train/chosen": -71.83003997802734, + "logps_train/ref_chosen": -57.25, + "logps_train/ref_rejected": -23.25, + "logps_train/rejected": -79.67814636230469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4580039978027344, + "rewards_train/margins": 4.184810638427734, + "rewards_train/rejected": -5.642814636230469, + "step": 2466 + }, + { + "epoch": 0.69, + "logps_train/chosen": -4.8012237548828125, + "logps_train/ref_chosen": -1.828125, + "logps_train/ref_rejected": -0.70703125, + "logps_train/rejected": -3.902775764465332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.29730987548828125, + "rewards_train/margins": 0.022264569997787476, + "rewards_train/rejected": -0.3195744454860687, + "step": 2466 + }, + { + "epoch": 0.69, + "logps_train/chosen": -106.73736572265625, + "logps_train/ref_chosen": -83.0, + "logps_train/ref_rejected": -133.0, + "logps_train/rejected": -189.72177124023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.373736619949341, + "rewards_train/margins": 3.29844069480896, + "rewards_train/rejected": -5.672177314758301, + "step": 2466 + }, + { + "epoch": 0.69, + "logps_train/chosen": -3.4178225994110107, + "logps_train/ref_chosen": -1.0703125, + "logps_train/ref_rejected": -4.90625, + "logps_train/rejected": -18.11093521118164, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.23475101590156555, + "rewards_train/margins": 1.0857175290584564, + "rewards_train/rejected": -1.320468544960022, + "step": 2466 + }, + { + "epoch": 0.69, + "logps_train/chosen": -122.18023681640625, + "logps_train/ref_chosen": -100.0, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -185.90679931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2180237770080566, + "rewards_train/margins": 5.922656536102295, + "rewards_train/rejected": -8.140680313110352, + "step": 2467 + }, + { + "epoch": 0.69, + "logps_train/chosen": -237.55252075195312, + "logps_train/ref_chosen": -187.0, + "logps_train/ref_rejected": -171.0, + "logps_train/rejected": -291.6721496582031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.0552520751953125, + "rewards_train/margins": 7.011962890625, + "rewards_train/rejected": -12.067214965820312, + "step": 2467 + }, + { + "epoch": 0.69, + "logps_train/chosen": -163.23097229003906, + "logps_train/ref_chosen": -122.5, + "logps_train/ref_rejected": -116.0, + "logps_train/rejected": -214.63185119628906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.073097229003906, + "rewards_train/margins": 5.790087699890137, + "rewards_train/rejected": -9.863184928894043, + "step": 2467 + }, + { + "epoch": 0.69, + "logps_train/chosen": -118.13371276855469, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -95.5, + "logps_train/rejected": -165.30154418945312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7133712768554688, + "rewards_train/margins": 4.266783237457275, + "rewards_train/rejected": -6.980154514312744, + "step": 2467 + }, + { + "epoch": 0.69, + "logps_train/chosen": -96.671630859375, + "logps_train/ref_chosen": -69.5, + "logps_train/ref_rejected": -84.5, + "logps_train/rejected": -126.64930725097656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7171630859375, + "rewards_train/margins": 1.4977679252624512, + "rewards_train/rejected": -4.214931011199951, + "step": 2467 + }, + { + "epoch": 0.69, + "logps_train/chosen": -30.38919448852539, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -193.7113494873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.201419472694397, + "rewards_train/margins": 4.969715476036072, + "rewards_train/rejected": -6.171134948730469, + "step": 2467 + }, + { + "epoch": 0.69, + "logps_train/chosen": -118.30420684814453, + "logps_train/ref_chosen": -78.0, + "logps_train/ref_rejected": -103.5, + "logps_train/rejected": -134.19198608398438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.030420780181885, + "rewards_train/margins": -0.9612221717834473, + "rewards_train/rejected": -3.0691986083984375, + "step": 2467 + }, + { + "epoch": 0.69, + "logps_train/chosen": -159.67971801757812, + "logps_train/ref_chosen": -125.5, + "logps_train/ref_rejected": -89.0, + "logps_train/rejected": -188.2252960205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.4179718494415283, + "rewards_train/margins": 6.504558324813843, + "rewards_train/rejected": -9.922530174255371, + "step": 2467 + }, + { + "epoch": 0.69, + "learning_rate": 8.957310573707299e-10, + "loss": 0.2763, + "step": 2468 + }, + { + "epoch": 0.69, + "logps_train/chosen": -10.515887260437012, + "logps_train/ref_chosen": -2.578125, + "logps_train/ref_rejected": -8.375, + "logps_train/rejected": -16.95508575439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7937762141227722, + "rewards_train/margins": 0.06423234939575195, + "rewards_train/rejected": -0.8580085635185242, + "step": 2468 + }, + { + "epoch": 0.69, + "logps_train/chosen": -100.09791564941406, + "logps_train/ref_chosen": -93.5, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -131.55198669433594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6597915887832642, + "rewards_train/margins": 0.6954071521759033, + "rewards_train/rejected": -1.3551987409591675, + "step": 2468 + }, + { + "epoch": 0.69, + "logps_train/chosen": -3.3278229236602783, + "logps_train/ref_chosen": -1.6484375, + "logps_train/ref_rejected": -6.78125, + "logps_train/rejected": -12.589682579040527, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.16793854534626007, + "rewards_train/margins": 0.4129047244787216, + "rewards_train/rejected": -0.5808432698249817, + "step": 2468 + }, + { + "epoch": 0.69, + "logps_train/chosen": -88.13887786865234, + "logps_train/ref_chosen": -70.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -139.62969970703125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8138878345489502, + "rewards_train/margins": 2.999082326889038, + "rewards_train/rejected": -4.812970161437988, + "step": 2468 + }, + { + "epoch": 0.69, + "logps_train/chosen": -63.97055435180664, + "logps_train/ref_chosen": -51.5, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -68.23515319824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2470554113388062, + "rewards_train/margins": 3.601460099220276, + "rewards_train/rejected": -4.848515510559082, + "step": 2468 + }, + { + "epoch": 0.69, + "logps_train/chosen": -140.00311279296875, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -218.75498962402344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4003113508224487, + "rewards_train/margins": 8.425188183784485, + "rewards_train/rejected": -9.825499534606934, + "step": 2468 + }, + { + "epoch": 0.69, + "logps_train/chosen": -172.5673370361328, + "logps_train/ref_chosen": -127.5, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -172.82156372070312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.5067338943481445, + "rewards_train/margins": 1.075422763824463, + "rewards_train/rejected": -5.582156658172607, + "step": 2468 + }, + { + "epoch": 0.69, + "logps_train/chosen": -13.700248718261719, + "logps_train/ref_chosen": -9.0, + "logps_train/ref_rejected": -25.5, + "logps_train/rejected": -52.06145477294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.47002488374710083, + "rewards_train/margins": 2.1861206889152527, + "rewards_train/rejected": -2.6561455726623535, + "step": 2468 + }, + { + "epoch": 0.69, + "logps_train/chosen": -66.0759048461914, + "logps_train/ref_chosen": -38.5, + "logps_train/ref_rejected": -17.875, + "logps_train/rejected": -30.750595092773438, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.7575905323028564, + "rewards_train/margins": -1.4700310230255127, + "rewards_train/rejected": -1.2875595092773438, + "step": 2469 + }, + { + "epoch": 0.69, + "logps_train/chosen": -221.85809326171875, + "logps_train/ref_chosen": -173.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -240.10824584960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.885809421539307, + "rewards_train/margins": 3.1250157356262207, + "rewards_train/rejected": -8.010825157165527, + "step": 2469 + }, + { + "epoch": 0.69, + "logps_train/chosen": -125.8727798461914, + "logps_train/ref_chosen": -102.5, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -166.5233612060547, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.337278127670288, + "rewards_train/margins": 2.115058183670044, + "rewards_train/rejected": -4.452336311340332, + "step": 2469 + }, + { + "epoch": 0.69, + "logps_train/chosen": -5.864541053771973, + "logps_train/ref_chosen": -2.90625, + "logps_train/ref_rejected": -1.953125, + "logps_train/rejected": -3.2458131313323975, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.2958291172981262, + "rewards_train/margins": -0.1665603071451187, + "rewards_train/rejected": -0.1292688101530075, + "step": 2469 + }, + { + "epoch": 0.69, + "logps_train/chosen": -21.043832778930664, + "logps_train/ref_chosen": -8.75, + "logps_train/ref_rejected": -21.625, + "logps_train/rejected": -60.05447006225586, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2293833494186401, + "rewards_train/margins": 2.613563656806946, + "rewards_train/rejected": -3.842947006225586, + "step": 2469 + }, + { + "epoch": 0.69, + "logps_train/chosen": -169.59457397460938, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -156.26040649414062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.059457540512085, + "rewards_train/margins": -2.3334168791770935, + "rewards_train/rejected": -0.7260406613349915, + "step": 2469 + }, + { + "epoch": 0.69, + "logps_train/chosen": -48.871299743652344, + "logps_train/ref_chosen": -43.5, + "logps_train/ref_rejected": -45.75, + "logps_train/rejected": -66.15129852294922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5371299982070923, + "rewards_train/margins": 1.5029999017715454, + "rewards_train/rejected": -2.0401298999786377, + "step": 2469 + }, + { + "epoch": 0.69, + "logps_train/chosen": -23.17182731628418, + "logps_train/ref_chosen": -5.53125, + "logps_train/ref_rejected": -3.5, + "logps_train/rejected": -24.689838409423828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7640577554702759, + "rewards_train/margins": 0.3549262285232544, + "rewards_train/rejected": -2.1189839839935303, + "step": 2469 + }, + { + "epoch": 0.69, + "learning_rate": 7.872778593728257e-10, + "loss": 0.4933, + "step": 2470 + }, + { + "epoch": 0.69, + "logps_train/chosen": -11.337181091308594, + "logps_train/ref_chosen": -1.2265625, + "logps_train/ref_rejected": -4.25, + "logps_train/rejected": -21.69860076904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0110619068145752, + "rewards_train/margins": 0.7337981462478638, + "rewards_train/rejected": -1.744860053062439, + "step": 2470 + }, + { + "epoch": 0.69, + "logps_train/chosen": -114.8406982421875, + "logps_train/ref_chosen": -79.5, + "logps_train/ref_rejected": -68.0, + "logps_train/rejected": -105.01245880126953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.534069776535034, + "rewards_train/margins": 0.1671762466430664, + "rewards_train/rejected": -3.7012460231781006, + "step": 2470 + }, + { + "epoch": 0.69, + "logps_train/chosen": -269.4510498046875, + "logps_train/ref_chosen": -199.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -226.87335205078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.04510498046875, + "rewards_train/margins": 3.3922300338745117, + "rewards_train/rejected": -10.437335014343262, + "step": 2470 + }, + { + "epoch": 0.69, + "logps_train/chosen": -130.41993713378906, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -122.5, + "logps_train/rejected": -140.97657775878906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.74199378490448, + "rewards_train/margins": 0.10566401481628418, + "rewards_train/rejected": -1.8476577997207642, + "step": 2470 + }, + { + "epoch": 0.69, + "logps_train/chosen": -80.10465240478516, + "logps_train/ref_chosen": -46.25, + "logps_train/ref_rejected": -46.25, + "logps_train/rejected": -80.17752075195312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.385465383529663, + "rewards_train/margins": 0.007286787033081055, + "rewards_train/rejected": -3.392752170562744, + "step": 2470 + }, + { + "epoch": 0.69, + "logps_train/chosen": -141.92942810058594, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -138.10433959960938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.3929429054260254, + "rewards_train/margins": 2.267491340637207, + "rewards_train/rejected": -4.660434246063232, + "step": 2470 + }, + { + "epoch": 0.69, + "logps_train/chosen": -27.579544067382812, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -7.875, + "logps_train/rejected": -24.238676071166992, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4454544186592102, + "rewards_train/margins": 1.1909132599830627, + "rewards_train/rejected": -1.636367678642273, + "step": 2470 + }, + { + "epoch": 0.69, + "logps_train/chosen": -93.53083038330078, + "logps_train/ref_chosen": -63.5, + "logps_train/ref_rejected": -120.0, + "logps_train/rejected": -193.02589416503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0030829906463623, + "rewards_train/margins": 4.299506425857544, + "rewards_train/rejected": -7.302589416503906, + "step": 2470 + }, + { + "epoch": 0.69, + "logps_train/chosen": -18.358524322509766, + "logps_train/ref_chosen": -5.90625, + "logps_train/ref_rejected": -43.5, + "logps_train/rejected": -94.36577606201172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2452274560928345, + "rewards_train/margins": 3.8413504362106323, + "rewards_train/rejected": -5.086577892303467, + "step": 2471 + }, + { + "epoch": 0.69, + "logps_train/chosen": -45.62715148925781, + "logps_train/ref_chosen": -28.375, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -161.74508666992188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.725215196609497, + "rewards_train/margins": 4.34929347038269, + "rewards_train/rejected": -6.0745086669921875, + "step": 2471 + }, + { + "epoch": 0.69, + "logps_train/chosen": -0.22312931716442108, + "logps_train/ref_chosen": -0.6953125, + "logps_train/ref_rejected": -1.3046875, + "logps_train/rejected": -1.9037281274795532, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.04721831902861595, + "rewards_train/margins": 0.10712238401174545, + "rewards_train/rejected": -0.0599040649831295, + "step": 2471 + }, + { + "epoch": 0.69, + "logps_train/chosen": -137.8717803955078, + "logps_train/ref_chosen": -120.5, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -131.9722137451172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.737178087234497, + "rewards_train/margins": 0.1600433588027954, + "rewards_train/rejected": -1.8972214460372925, + "step": 2471 + }, + { + "epoch": 0.69, + "logps_train/chosen": -84.76133728027344, + "logps_train/ref_chosen": -75.0, + "logps_train/ref_rejected": -93.0, + "logps_train/rejected": -130.7648468017578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9761337637901306, + "rewards_train/margins": 2.8003509640693665, + "rewards_train/rejected": -3.776484727859497, + "step": 2471 + }, + { + "epoch": 0.69, + "logps_train/chosen": -28.779558181762695, + "logps_train/ref_chosen": -14.5, + "logps_train/ref_rejected": -62.5, + "logps_train/rejected": -72.87677001953125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.4279558658599854, + "rewards_train/margins": -0.39027881622314453, + "rewards_train/rejected": -1.0376770496368408, + "step": 2471 + }, + { + "epoch": 0.69, + "logps_train/chosen": -216.0941619873047, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -173.0, + "logps_train/rejected": -281.2607116699219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.409416198730469, + "rewards_train/margins": 3.4166555404663086, + "rewards_train/rejected": -10.826071739196777, + "step": 2471 + }, + { + "epoch": 0.69, + "logps_train/chosen": -116.91466522216797, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -7.375, + "logps_train/rejected": -28.127113342285156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.3914666175842285, + "rewards_train/margins": -0.3162553310394287, + "rewards_train/rejected": -2.0752112865448, + "step": 2471 + }, + { + "epoch": 0.69, + "learning_rate": 6.858180910237488e-10, + "loss": 0.3692, + "step": 2472 + }, + { + "epoch": 0.69, + "logps_train/chosen": -87.28860473632812, + "logps_train/ref_chosen": -79.0, + "logps_train/ref_rejected": -79.0, + "logps_train/rejected": -87.53005981445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8288604617118835, + "rewards_train/margins": 0.024145543575286865, + "rewards_train/rejected": -0.8530060052871704, + "step": 2472 + }, + { + "epoch": 0.69, + "logps_train/chosen": -19.581239700317383, + "logps_train/ref_chosen": -6.40625, + "logps_train/ref_rejected": -17.125, + "logps_train/rejected": -25.160518646240234, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.317499041557312, + "rewards_train/margins": -0.5139471888542175, + "rewards_train/rejected": -0.8035518527030945, + "step": 2472 + }, + { + "epoch": 0.69, + "logps_train/chosen": -19.26683235168457, + "logps_train/ref_chosen": -10.3125, + "logps_train/ref_rejected": -10.0, + "logps_train/rejected": -12.287357330322266, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.895433247089386, + "rewards_train/margins": -0.6666975170373917, + "rewards_train/rejected": -0.22873573005199432, + "step": 2472 + }, + { + "epoch": 0.69, + "logps_train/chosen": -112.55445098876953, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -91.5, + "logps_train/rejected": -123.24698638916016, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.8054451942443848, + "rewards_train/margins": 0.36925339698791504, + "rewards_train/rejected": -3.1746985912323, + "step": 2472 + }, + { + "epoch": 0.69, + "logps_train/chosen": -22.852523803710938, + "logps_train/ref_chosen": -6.875, + "logps_train/ref_rejected": -127.5, + "logps_train/rejected": -143.58892822265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5977524518966675, + "rewards_train/margins": 0.01114034652709961, + "rewards_train/rejected": -1.608892798423767, + "step": 2472 + }, + { + "epoch": 0.69, + "logps_train/chosen": -27.182449340820312, + "logps_train/ref_chosen": -15.5625, + "logps_train/ref_rejected": -2.109375, + "logps_train/rejected": -19.61908721923828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1619949340820312, + "rewards_train/margins": 0.588976263999939, + "rewards_train/rejected": -1.7509711980819702, + "step": 2472 + }, + { + "epoch": 0.69, + "logps_train/chosen": -6.535715103149414, + "logps_train/ref_chosen": -5.03125, + "logps_train/ref_rejected": -5.875, + "logps_train/rejected": -32.49979019165039, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.15044651925563812, + "rewards_train/margins": 2.5120326429605484, + "rewards_train/rejected": -2.6624791622161865, + "step": 2472 + }, + { + "epoch": 0.69, + "logps_train/chosen": -12.326095581054688, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -22.0, + "logps_train/rejected": -28.37451171875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.9294845461845398, + "rewards_train/margins": -0.2920333743095398, + "rewards_train/rejected": -0.637451171875, + "step": 2472 + }, + { + "epoch": 0.69, + "logps_train/chosen": -1.4237596988677979, + "logps_train/ref_chosen": -0.4375, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -11.377385139465332, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09862597286701202, + "rewards_train/margins": 0.5953625291585922, + "rewards_train/rejected": -0.6939885020256042, + "step": 2473 + }, + { + "epoch": 0.69, + "logps_train/chosen": -75.24864196777344, + "logps_train/ref_chosen": -66.0, + "logps_train/ref_rejected": -27.25, + "logps_train/rejected": -90.16569519042969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9248642325401306, + "rewards_train/margins": 5.366705477237701, + "rewards_train/rejected": -6.291569709777832, + "step": 2473 + }, + { + "epoch": 0.69, + "logps_train/chosen": -26.72098159790039, + "logps_train/ref_chosen": -16.375, + "logps_train/ref_rejected": -5.71875, + "logps_train/rejected": -38.42469024658203, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0345982313156128, + "rewards_train/margins": 2.235995888710022, + "rewards_train/rejected": -3.2705941200256348, + "step": 2473 + }, + { + "epoch": 0.69, + "logps_train/chosen": -7.57305908203125, + "logps_train/ref_chosen": -5.28125, + "logps_train/ref_rejected": -5.625, + "logps_train/rejected": -21.817832946777344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.22918091714382172, + "rewards_train/margins": 1.3901024013757706, + "rewards_train/rejected": -1.6192833185195923, + "step": 2473 + }, + { + "epoch": 0.69, + "logps_train/chosen": -36.709434509277344, + "logps_train/ref_chosen": -25.125, + "logps_train/ref_rejected": -11.8125, + "logps_train/rejected": -41.142852783203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1584434509277344, + "rewards_train/margins": 1.7745919227600098, + "rewards_train/rejected": -2.933035373687744, + "step": 2473 + }, + { + "epoch": 0.69, + "logps_train/chosen": -18.134632110595703, + "logps_train/ref_chosen": -14.5625, + "logps_train/ref_rejected": -25.125, + "logps_train/rejected": -32.801334381103516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.35721322894096375, + "rewards_train/margins": 0.4104202091693878, + "rewards_train/rejected": -0.7676334381103516, + "step": 2473 + }, + { + "epoch": 0.69, + "logps_train/chosen": -28.324996948242188, + "logps_train/ref_chosen": -12.125, + "logps_train/ref_rejected": -14.375, + "logps_train/rejected": -38.242218017578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6199997663497925, + "rewards_train/margins": 0.7667220830917358, + "rewards_train/rejected": -2.3867218494415283, + "step": 2473 + }, + { + "epoch": 0.69, + "logps_train/chosen": -94.46810150146484, + "logps_train/ref_chosen": -93.0, + "logps_train/ref_rejected": -36.75, + "logps_train/rejected": -53.159095764160156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.1468101590871811, + "rewards_train/margins": 1.4940993934869766, + "rewards_train/rejected": -1.6409095525741577, + "step": 2473 + }, + { + "epoch": 0.69, + "learning_rate": 5.91352462434247e-10, + "loss": 0.4589, + "step": 2474 + }, + { + "epoch": 0.69, + "logps_train/chosen": -107.30582427978516, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -10.125, + "logps_train/rejected": -25.951026916503906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.5305824279785156, + "rewards_train/margins": -1.9479796886444092, + "rewards_train/rejected": -1.5826027393341064, + "step": 2474 + }, + { + "epoch": 0.69, + "logps_train/chosen": -14.776016235351562, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -2.484375, + "logps_train/rejected": -15.550471305847168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.21510162949562073, + "rewards_train/margins": 1.091508001089096, + "rewards_train/rejected": -1.3066096305847168, + "step": 2474 + }, + { + "epoch": 0.69, + "logps_train/chosen": -74.77458190917969, + "logps_train/ref_chosen": -52.25, + "logps_train/ref_rejected": -33.5, + "logps_train/rejected": -57.86124801635742, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.252458333969116, + "rewards_train/margins": 0.18366646766662598, + "rewards_train/rejected": -2.436124801635742, + "step": 2474 + }, + { + "epoch": 0.69, + "logps_train/chosen": -144.1285858154297, + "logps_train/ref_chosen": -134.0, + "logps_train/ref_rejected": -195.0, + "logps_train/rejected": -257.0349426269531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0128586292266846, + "rewards_train/margins": 5.190635919570923, + "rewards_train/rejected": -6.203494548797607, + "step": 2474 + }, + { + "epoch": 0.69, + "logps_train/chosen": -40.88136291503906, + "logps_train/ref_chosen": -13.5, + "logps_train/ref_rejected": -9.125, + "logps_train/rejected": -40.44090270996094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7381362915039062, + "rewards_train/margins": 0.39345407485961914, + "rewards_train/rejected": -3.1315903663635254, + "step": 2474 + }, + { + "epoch": 0.69, + "logps_train/chosen": -20.505359649658203, + "logps_train/ref_chosen": -3.484375, + "logps_train/ref_rejected": -5.5, + "logps_train/rejected": -31.63971710205078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7020984888076782, + "rewards_train/margins": 0.9118732213973999, + "rewards_train/rejected": -2.613971710205078, + "step": 2474 + }, + { + "epoch": 0.69, + "logps_train/chosen": -234.93994140625, + "logps_train/ref_chosen": -186.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -154.31069946289062, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.893994331359863, + "rewards_train/margins": -1.2629244327545166, + "rewards_train/rejected": -3.6310698986053467, + "step": 2474 + }, + { + "epoch": 0.69, + "logps_train/chosen": -107.53766632080078, + "logps_train/ref_chosen": -89.5, + "logps_train/ref_rejected": -24.625, + "logps_train/rejected": -50.13302993774414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8037666082382202, + "rewards_train/margins": 0.747036337852478, + "rewards_train/rejected": -2.5508029460906982, + "step": 2474 + }, + { + "epoch": 0.69, + "logps_train/chosen": -3.8639683723449707, + "logps_train/ref_chosen": -3.78125, + "logps_train/ref_rejected": -1.9140625, + "logps_train/rejected": -4.044101238250732, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0082718376070261, + "rewards_train/margins": 0.20473203621804714, + "rewards_train/rejected": -0.21300387382507324, + "step": 2475 + }, + { + "epoch": 0.69, + "logps_train/chosen": -13.607210159301758, + "logps_train/ref_chosen": -7.90625, + "logps_train/ref_rejected": -34.0, + "logps_train/rejected": -54.227413177490234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5700960159301758, + "rewards_train/margins": 1.4526453018188477, + "rewards_train/rejected": -2.0227413177490234, + "step": 2475 + }, + { + "epoch": 0.69, + "logps_train/chosen": -170.4505615234375, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -100.0, + "logps_train/rejected": -152.97567749023438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.24505615234375, + "rewards_train/margins": 1.0525116920471191, + "rewards_train/rejected": -5.297567844390869, + "step": 2475 + }, + { + "epoch": 0.69, + "logps_train/chosen": -41.87616729736328, + "logps_train/ref_chosen": -29.625, + "logps_train/ref_rejected": -27.5, + "logps_train/rejected": -89.63477325439453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2251167297363281, + "rewards_train/margins": 4.98836088180542, + "rewards_train/rejected": -6.213477611541748, + "step": 2475 + }, + { + "epoch": 0.69, + "logps_train/chosen": -123.77352142333984, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -171.84906005859375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6773521900177002, + "rewards_train/margins": 3.757553815841675, + "rewards_train/rejected": -5.434906005859375, + "step": 2475 + }, + { + "epoch": 0.69, + "logps_train/chosen": -101.9032974243164, + "logps_train/ref_chosen": -77.5, + "logps_train/ref_rejected": -109.0, + "logps_train/rejected": -158.58941650390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4403297901153564, + "rewards_train/margins": 2.5186121463775635, + "rewards_train/rejected": -4.95894193649292, + "step": 2475 + }, + { + "epoch": 0.69, + "logps_train/chosen": -22.236703872680664, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -14.4375, + "logps_train/rejected": -16.84708023071289, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.24867038428783417, + "rewards_train/margins": -0.007712364196777344, + "rewards_train/rejected": -0.24095802009105682, + "step": 2475 + }, + { + "epoch": 0.69, + "logps_train/chosen": -146.1831817626953, + "logps_train/ref_chosen": -133.0, + "logps_train/ref_rejected": -123.5, + "logps_train/rejected": -179.85049438476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.318318247795105, + "rewards_train/margins": 4.316731095314026, + "rewards_train/rejected": -5.635049343109131, + "step": 2475 + }, + { + "epoch": 0.69, + "learning_rate": 5.038816347636698e-10, + "loss": 0.4778, + "step": 2476 + }, + { + "epoch": 0.69, + "logps_train/chosen": -118.80902862548828, + "logps_train/ref_chosen": -107.0, + "logps_train/ref_rejected": -145.0, + "logps_train/rejected": -174.70753479003906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1809028387069702, + "rewards_train/margins": 1.7898505926132202, + "rewards_train/rejected": -2.9707534313201904, + "step": 2476 + }, + { + "epoch": 0.69, + "logps_train/chosen": -148.8924560546875, + "logps_train/ref_chosen": -147.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -217.1815948486328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18924561142921448, + "rewards_train/margins": 6.228913873434067, + "rewards_train/rejected": -6.418159484863281, + "step": 2476 + }, + { + "epoch": 0.69, + "logps_train/chosen": -188.11521911621094, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -149.59593200683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -6.011521816253662, + "rewards_train/margins": 0.5480713844299316, + "rewards_train/rejected": -6.559593200683594, + "step": 2476 + }, + { + "epoch": 0.69, + "logps_train/chosen": -20.74810791015625, + "logps_train/ref_chosen": -6.9375, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -66.5595932006836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3810608386993408, + "rewards_train/margins": 3.249898672103882, + "rewards_train/rejected": -4.630959510803223, + "step": 2476 + }, + { + "epoch": 0.69, + "logps_train/chosen": -45.77628707885742, + "logps_train/ref_chosen": -30.25, + "logps_train/ref_rejected": -6.46875, + "logps_train/rejected": -39.19173049926758, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.552628755569458, + "rewards_train/margins": 1.7196693420410156, + "rewards_train/rejected": -3.2722980976104736, + "step": 2476 + }, + { + "epoch": 0.69, + "logps_train/chosen": -91.43936157226562, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -240.47360229492188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.293936252593994, + "rewards_train/margins": 9.353424549102783, + "rewards_train/rejected": -11.647360801696777, + "step": 2476 + }, + { + "epoch": 0.69, + "logps_train/chosen": -39.831459045410156, + "logps_train/ref_chosen": -13.375, + "logps_train/ref_rejected": -21.875, + "logps_train/rejected": -64.9566879272461, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6456458568573, + "rewards_train/margins": 1.6625230312347412, + "rewards_train/rejected": -4.308168888092041, + "step": 2476 + }, + { + "epoch": 0.69, + "logps_train/chosen": -79.93386840820312, + "logps_train/ref_chosen": -64.5, + "logps_train/ref_rejected": -78.5, + "logps_train/rejected": -111.68324279785156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5433868169784546, + "rewards_train/margins": 1.7749375104904175, + "rewards_train/rejected": -3.318324327468872, + "step": 2476 + }, + { + "epoch": 0.69, + "logps_train/chosen": -45.1328239440918, + "logps_train/ref_chosen": -29.375, + "logps_train/ref_rejected": -14.9375, + "logps_train/rejected": -33.54096603393555, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.5757824182510376, + "rewards_train/margins": 0.2845642566680908, + "rewards_train/rejected": -1.8603466749191284, + "step": 2477 + }, + { + "epoch": 0.69, + "logps_train/chosen": -125.66960144042969, + "logps_train/ref_chosen": -104.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -207.73037719726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.1669602394104004, + "rewards_train/margins": 4.206077575683594, + "rewards_train/rejected": -6.373037815093994, + "step": 2477 + }, + { + "epoch": 0.69, + "logps_train/chosen": -41.65425109863281, + "logps_train/ref_chosen": -33.5, + "logps_train/ref_rejected": -50.75, + "logps_train/rejected": -97.66920471191406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8154250979423523, + "rewards_train/margins": 3.876495659351349, + "rewards_train/rejected": -4.691920757293701, + "step": 2477 + }, + { + "epoch": 0.69, + "logps_train/chosen": -208.87298583984375, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -222.54287719726562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.787298679351807, + "rewards_train/margins": 1.0669889450073242, + "rewards_train/rejected": -6.854287624359131, + "step": 2477 + }, + { + "epoch": 0.69, + "logps_train/chosen": -5.2717814445495605, + "logps_train/ref_chosen": -6.5625, + "logps_train/ref_rejected": -26.875, + "logps_train/rejected": -89.78187561035156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.12907186150550842, + "rewards_train/margins": 6.419759422540665, + "rewards_train/rejected": -6.290687561035156, + "step": 2477 + }, + { + "epoch": 0.69, + "logps_train/chosen": -33.18170166015625, + "logps_train/ref_chosen": -20.375, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -35.31053161621094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.280670166015625, + "rewards_train/margins": 0.16288304328918457, + "rewards_train/rejected": -1.4435532093048096, + "step": 2477 + }, + { + "epoch": 0.69, + "logps_train/chosen": -83.66770935058594, + "logps_train/ref_chosen": -59.25, + "logps_train/ref_rejected": -63.5, + "logps_train/rejected": -112.75470733642578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4417710304260254, + "rewards_train/margins": 2.4836997985839844, + "rewards_train/rejected": -4.92547082901001, + "step": 2477 + }, + { + "epoch": 0.69, + "logps_train/chosen": -36.0234489440918, + "logps_train/ref_chosen": -10.1875, + "logps_train/ref_rejected": -90.0, + "logps_train/rejected": -115.98045349121094, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.583595037460327, + "rewards_train/margins": 0.014450311660766602, + "rewards_train/rejected": -2.5980453491210938, + "step": 2477 + }, + { + "epoch": 0.69, + "learning_rate": 4.234062202149724e-10, + "loss": 0.2144, + "step": 2478 + }, + { + "epoch": 0.69, + "logps_train/chosen": -175.04983520507812, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -181.36624145507812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.804983615875244, + "rewards_train/margins": 0.6316404342651367, + "rewards_train/rejected": -6.436624050140381, + "step": 2478 + }, + { + "epoch": 0.69, + "logps_train/chosen": -71.61005401611328, + "logps_train/ref_chosen": -42.25, + "logps_train/ref_rejected": -15.5625, + "logps_train/rejected": -28.203529357910156, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.9360053539276123, + "rewards_train/margins": -1.6719024181365967, + "rewards_train/rejected": -1.2641029357910156, + "step": 2478 + }, + { + "epoch": 0.69, + "logps_train/chosen": -130.77951049804688, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -159.55226135253906, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.5279510021209717, + "rewards_train/margins": -0.17272472381591797, + "rewards_train/rejected": -2.3552262783050537, + "step": 2478 + }, + { + "epoch": 0.69, + "logps_train/chosen": -180.21658325195312, + "logps_train/ref_chosen": -169.0, + "logps_train/ref_rejected": -182.0, + "logps_train/rejected": -268.16510009765625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1216583251953125, + "rewards_train/margins": 7.494852066040039, + "rewards_train/rejected": -8.616510391235352, + "step": 2478 + }, + { + "epoch": 0.69, + "logps_train/chosen": -20.042728424072266, + "logps_train/ref_chosen": -2.8125, + "logps_train/ref_rejected": -3.5625, + "logps_train/rejected": -15.741260528564453, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7230228185653687, + "rewards_train/margins": -0.5051467418670654, + "rewards_train/rejected": -1.2178760766983032, + "step": 2478 + }, + { + "epoch": 0.69, + "logps_train/chosen": -86.03434753417969, + "logps_train/ref_chosen": -68.5, + "logps_train/ref_rejected": -120.5, + "logps_train/rejected": -241.96380615234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7534347772598267, + "rewards_train/margins": 10.392945647239685, + "rewards_train/rejected": -12.146380424499512, + "step": 2478 + }, + { + "epoch": 0.69, + "logps_train/chosen": -1.4395005702972412, + "logps_train/ref_chosen": -0.33984375, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -4.037816524505615, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.10996568202972412, + "rewards_train/margins": -0.3155590295791626, + "rewards_train/rejected": 0.20559334754943848, + "step": 2478 + }, + { + "epoch": 0.69, + "logps_train/chosen": -10.560017585754395, + "logps_train/ref_chosen": -9.4375, + "logps_train/ref_rejected": -27.125, + "logps_train/rejected": -51.29235076904297, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.11225175857543945, + "rewards_train/margins": 2.304483413696289, + "rewards_train/rejected": -2.4167351722717285, + "step": 2478 + }, + { + "epoch": 0.69, + "logps_train/chosen": -171.83807373046875, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -168.75222778320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.483807325363159, + "rewards_train/margins": 4.041415452957153, + "rewards_train/rejected": -7.5252227783203125, + "step": 2479 + }, + { + "epoch": 0.69, + "logps_train/chosen": -41.125064849853516, + "logps_train/ref_chosen": -18.375, + "logps_train/ref_rejected": -31.5, + "logps_train/rejected": -70.74088287353516, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2750065326690674, + "rewards_train/margins": 1.6490817070007324, + "rewards_train/rejected": -3.9240882396698, + "step": 2479 + }, + { + "epoch": 0.69, + "logps_train/chosen": -276.4113464355469, + "logps_train/ref_chosen": -203.0, + "logps_train/ref_rejected": -142.0, + "logps_train/rejected": -183.38714599609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.341134548187256, + "rewards_train/margins": -3.2024197578430176, + "rewards_train/rejected": -4.138714790344238, + "step": 2479 + }, + { + "epoch": 0.69, + "logps_train/chosen": -26.681583404541016, + "logps_train/ref_chosen": -15.625, + "logps_train/ref_rejected": -27.875, + "logps_train/rejected": -82.05042266845703, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1056584119796753, + "rewards_train/margins": 4.311884045600891, + "rewards_train/rejected": -5.417542457580566, + "step": 2479 + }, + { + "epoch": 0.69, + "logps_train/chosen": -1.7893273830413818, + "logps_train/ref_chosen": -0.6875, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -3.701716661453247, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.1101827397942543, + "rewards_train/margins": -0.0759485736489296, + "rewards_train/rejected": -0.03423416614532471, + "step": 2479 + }, + { + "epoch": 0.69, + "logps_train/chosen": -90.59461975097656, + "logps_train/ref_chosen": -66.5, + "logps_train/ref_rejected": -82.5, + "logps_train/rejected": -142.13412475585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4094619750976562, + "rewards_train/margins": 3.553950786590576, + "rewards_train/rejected": -5.963412761688232, + "step": 2479 + }, + { + "epoch": 0.69, + "logps_train/chosen": -115.69068908691406, + "logps_train/ref_chosen": -86.0, + "logps_train/ref_rejected": -96.5, + "logps_train/rejected": -138.66506958007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.969069004058838, + "rewards_train/margins": 1.2474379539489746, + "rewards_train/rejected": -4.2165069580078125, + "step": 2479 + }, + { + "epoch": 0.69, + "logps_train/chosen": -100.30599975585938, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -70.0, + "logps_train/rejected": -113.91329193115234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8306000232696533, + "rewards_train/margins": 2.5607292652130127, + "rewards_train/rejected": -4.391329288482666, + "step": 2479 + }, + { + "epoch": 0.69, + "learning_rate": 3.4992678203071835e-10, + "loss": 0.5951, + "step": 2480 + }, + { + "epoch": 0.69, + "logps_train/chosen": -9.870445251464844, + "logps_train/ref_chosen": -5.21875, + "logps_train/ref_rejected": -7.9375, + "logps_train/rejected": -25.793781280517578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4651695191860199, + "rewards_train/margins": 1.3204586803913116, + "rewards_train/rejected": -1.7856281995773315, + "step": 2480 + }, + { + "epoch": 0.69, + "logps_train/chosen": -139.6914825439453, + "logps_train/ref_chosen": -106.5, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -220.42922973632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.319148302078247, + "rewards_train/margins": 5.123775243759155, + "rewards_train/rejected": -8.442923545837402, + "step": 2480 + }, + { + "epoch": 0.69, + "logps_train/chosen": -81.66597747802734, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -37.75, + "logps_train/rejected": -73.82766723632812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7665977478027344, + "rewards_train/margins": 2.8411691188812256, + "rewards_train/rejected": -3.60776686668396, + "step": 2480 + }, + { + "epoch": 0.69, + "logps_train/chosen": -162.4893035888672, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -124.0, + "logps_train/rejected": -211.95159912109375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.448930501937866, + "rewards_train/margins": 5.346229791641235, + "rewards_train/rejected": -8.795160293579102, + "step": 2480 + }, + { + "epoch": 0.69, + "logps_train/chosen": -120.81360626220703, + "logps_train/ref_chosen": -112.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -175.33023071289062, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.881360650062561, + "rewards_train/margins": 5.001662611961365, + "rewards_train/rejected": -5.883023262023926, + "step": 2480 + }, + { + "epoch": 0.69, + "logps_train/chosen": -157.6169891357422, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -185.0264892578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8616989850997925, + "rewards_train/margins": 3.840950131416321, + "rewards_train/rejected": -5.702649116516113, + "step": 2480 + }, + { + "epoch": 0.69, + "logps_train/chosen": -168.25535583496094, + "logps_train/ref_chosen": -142.0, + "logps_train/ref_rejected": -146.0, + "logps_train/rejected": -228.64724731445312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.625535726547241, + "rewards_train/margins": 5.639189004898071, + "rewards_train/rejected": -8.264724731445312, + "step": 2480 + }, + { + "epoch": 0.69, + "logps_train/chosen": -139.9935302734375, + "logps_train/ref_chosen": -116.5, + "logps_train/ref_rejected": -106.0, + "logps_train/rejected": -214.62850952148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.349353075027466, + "rewards_train/margins": 8.513498067855835, + "rewards_train/rejected": -10.8628511428833, + "step": 2480 + }, + { + "epoch": 0.69, + "logps_train/chosen": -155.19479370117188, + "logps_train/ref_chosen": -149.0, + "logps_train/ref_rejected": -185.0, + "logps_train/rejected": -239.30491638183594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6194793581962585, + "rewards_train/margins": 4.81101256608963, + "rewards_train/rejected": -5.430491924285889, + "step": 2481 + }, + { + "epoch": 0.69, + "logps_train/chosen": -40.73228454589844, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -19.75, + "logps_train/rejected": -54.76879119873047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.773228406906128, + "rewards_train/margins": 0.7286508083343506, + "rewards_train/rejected": -3.5018792152404785, + "step": 2481 + }, + { + "epoch": 0.69, + "logps_train/chosen": -127.96053314208984, + "logps_train/ref_chosen": -101.5, + "logps_train/ref_rejected": -115.0, + "logps_train/rejected": -191.48162841796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6460533142089844, + "rewards_train/margins": 5.002109527587891, + "rewards_train/rejected": -7.648162841796875, + "step": 2481 + }, + { + "epoch": 0.69, + "logps_train/chosen": -35.73271179199219, + "logps_train/ref_chosen": -23.375, + "logps_train/ref_rejected": -4.46875, + "logps_train/rejected": -44.739990234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2357711791992188, + "rewards_train/margins": 2.7913527488708496, + "rewards_train/rejected": -4.027123928070068, + "step": 2481 + }, + { + "epoch": 0.69, + "logps_train/chosen": -13.546161651611328, + "logps_train/ref_chosen": -5.75, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -25.06723403930664, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7796161770820618, + "rewards_train/margins": 0.402107298374176, + "rewards_train/rejected": -1.1817234754562378, + "step": 2481 + }, + { + "epoch": 0.69, + "logps_train/chosen": -97.60200500488281, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -97.0, + "logps_train/rejected": -121.67353057861328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5102005004882812, + "rewards_train/margins": 1.9571526050567627, + "rewards_train/rejected": -2.467353105545044, + "step": 2481 + }, + { + "epoch": 0.69, + "logps_train/chosen": -17.257884979248047, + "logps_train/ref_chosen": -8.1875, + "logps_train/ref_rejected": -8.9375, + "logps_train/rejected": -33.99916076660156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9070385098457336, + "rewards_train/margins": 1.59912770986557, + "rewards_train/rejected": -2.5061662197113037, + "step": 2481 + }, + { + "epoch": 0.69, + "logps_train/chosen": -8.240997314453125, + "logps_train/ref_chosen": -2.0, + "logps_train/ref_rejected": -5.53125, + "logps_train/rejected": -32.69272994995117, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6240997314453125, + "rewards_train/margins": 2.092048406600952, + "rewards_train/rejected": -2.7161481380462646, + "step": 2481 + }, + { + "epoch": 0.69, + "learning_rate": 2.834438344891943e-10, + "loss": 0.1093, + "step": 2482 + }, + { + "epoch": 0.69, + "logps_train/chosen": -148.3949432373047, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -147.81365966796875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.7394943237304688, + "rewards_train/margins": -0.05812835693359375, + "rewards_train/rejected": -3.681365966796875, + "step": 2482 + }, + { + "epoch": 0.69, + "logps_train/chosen": -17.456695556640625, + "logps_train/ref_chosen": -3.421875, + "logps_train/ref_rejected": -8.75, + "logps_train/rejected": -31.118057250976562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.4034820795059204, + "rewards_train/margins": 0.83332359790802, + "rewards_train/rejected": -2.2368056774139404, + "step": 2482 + }, + { + "epoch": 0.69, + "logps_train/chosen": -105.7276840209961, + "logps_train/ref_chosen": -88.0, + "logps_train/ref_rejected": -108.0, + "logps_train/rejected": -126.64896392822266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7727683782577515, + "rewards_train/margins": 0.09212803840637207, + "rewards_train/rejected": -1.8648964166641235, + "step": 2482 + }, + { + "epoch": 0.69, + "logps_train/chosen": -2.897244453430176, + "logps_train/ref_chosen": -0.8515625, + "logps_train/ref_rejected": -0.8515625, + "logps_train/rejected": -2.844444990158081, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.20456819236278534, + "rewards_train/margins": -0.005279943346977234, + "rewards_train/rejected": -0.1992882490158081, + "step": 2482 + }, + { + "epoch": 0.69, + "logps_train/chosen": -22.927047729492188, + "logps_train/ref_chosen": -5.9375, + "logps_train/ref_rejected": -8.1875, + "logps_train/rejected": -26.30692481994629, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6989548206329346, + "rewards_train/margins": 0.11298763751983643, + "rewards_train/rejected": -1.811942458152771, + "step": 2482 + }, + { + "epoch": 0.69, + "logps_train/chosen": -163.04721069335938, + "logps_train/ref_chosen": -123.0, + "logps_train/ref_rejected": -73.5, + "logps_train/rejected": -106.39143371582031, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.004721164703369, + "rewards_train/margins": -0.7155778408050537, + "rewards_train/rejected": -3.2891433238983154, + "step": 2482 + }, + { + "epoch": 0.69, + "logps_train/chosen": -92.63744354248047, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -83.0, + "logps_train/rejected": -87.09858703613281, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.8137443661689758, + "rewards_train/margins": -0.4038856625556946, + "rewards_train/rejected": -0.40985870361328125, + "step": 2482 + }, + { + "epoch": 0.69, + "logps_train/chosen": -21.415935516357422, + "logps_train/ref_chosen": -20.5, + "logps_train/ref_rejected": -12.625, + "logps_train/rejected": -44.586212158203125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09159355610609055, + "rewards_train/margins": 3.104527659714222, + "rewards_train/rejected": -3.1961212158203125, + "step": 2482 + }, + { + "epoch": 0.69, + "logps_train/chosen": -28.855518341064453, + "logps_train/ref_chosen": -11.0625, + "logps_train/ref_rejected": -4.4375, + "logps_train/rejected": -13.939717292785645, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.7793018817901611, + "rewards_train/margins": -0.8290801644325256, + "rewards_train/rejected": -0.9502217173576355, + "step": 2483 + }, + { + "epoch": 0.69, + "logps_train/chosen": -150.9730682373047, + "logps_train/ref_chosen": -116.0, + "logps_train/ref_rejected": -98.5, + "logps_train/rejected": -132.82110595703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.4973068237304688, + "rewards_train/margins": -0.06519627571105957, + "rewards_train/rejected": -3.432110548019409, + "step": 2483 + }, + { + "epoch": 0.69, + "logps_train/chosen": -38.04768371582031, + "logps_train/ref_chosen": -15.5, + "logps_train/ref_rejected": -15.5, + "logps_train/rejected": -38.28352355957031, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.2547683715820312, + "rewards_train/margins": 0.02358412742614746, + "rewards_train/rejected": -2.2783524990081787, + "step": 2483 + }, + { + "epoch": 0.69, + "logps_train/chosen": -37.493370056152344, + "logps_train/ref_chosen": -10.9375, + "logps_train/ref_rejected": -7.125, + "logps_train/rejected": -49.1146125793457, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6555869579315186, + "rewards_train/margins": 1.5433743000030518, + "rewards_train/rejected": -4.19896125793457, + "step": 2483 + }, + { + "epoch": 0.69, + "logps_train/chosen": -13.324596405029297, + "logps_train/ref_chosen": -7.21875, + "logps_train/ref_rejected": -3.359375, + "logps_train/rejected": -12.540706634521484, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6105846762657166, + "rewards_train/margins": 0.30754852294921875, + "rewards_train/rejected": -0.9181331992149353, + "step": 2483 + }, + { + "epoch": 0.69, + "logps_train/chosen": -83.31034851074219, + "logps_train/ref_chosen": -75.5, + "logps_train/ref_rejected": -84.0, + "logps_train/rejected": -97.20061492919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7810348868370056, + "rewards_train/margins": 0.53902667760849, + "rewards_train/rejected": -1.3200615644454956, + "step": 2483 + }, + { + "epoch": 0.69, + "logps_train/chosen": -70.98506927490234, + "logps_train/ref_chosen": -53.0, + "logps_train/ref_rejected": -36.0, + "logps_train/rejected": -76.69575500488281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7985069751739502, + "rewards_train/margins": 2.271068811416626, + "rewards_train/rejected": -4.069575786590576, + "step": 2483 + }, + { + "epoch": 0.69, + "logps_train/chosen": -10.955662727355957, + "logps_train/ref_chosen": -9.125, + "logps_train/ref_rejected": -1.546875, + "logps_train/rejected": -11.352747917175293, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.18306627869606018, + "rewards_train/margins": 0.7975210249423981, + "rewards_train/rejected": -0.9805873036384583, + "step": 2483 + }, + { + "epoch": 0.69, + "learning_rate": 2.2395784290041298e-10, + "loss": 0.5882, + "step": 2484 + }, + { + "epoch": 0.69, + "logps_train/chosen": -88.57198333740234, + "logps_train/ref_chosen": -72.0, + "logps_train/ref_rejected": -644.0, + "logps_train/rejected": -692.01953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6571983098983765, + "rewards_train/margins": 3.144755005836487, + "rewards_train/rejected": -4.801953315734863, + "step": 2484 + }, + { + "epoch": 0.69, + "logps_train/chosen": -1.3228521347045898, + "logps_train/ref_chosen": -0.3515625, + "logps_train/ref_rejected": -20.875, + "logps_train/rejected": -28.543153762817383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.0971289649605751, + "rewards_train/margins": 0.6696863994002342, + "rewards_train/rejected": -0.7668153643608093, + "step": 2484 + }, + { + "epoch": 0.69, + "logps_train/chosen": -1.9414228200912476, + "logps_train/ref_chosen": -0.953125, + "logps_train/ref_rejected": -3.046875, + "logps_train/rejected": -5.747804641723633, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09882978349924088, + "rewards_train/margins": 0.1712631806731224, + "rewards_train/rejected": -0.2700929641723633, + "step": 2484 + }, + { + "epoch": 0.69, + "logps_train/chosen": -24.109819412231445, + "logps_train/ref_chosen": -4.21875, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -47.91957092285156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9891070127487183, + "rewards_train/margins": 1.4903501272201538, + "rewards_train/rejected": -3.479457139968872, + "step": 2484 + }, + { + "epoch": 0.69, + "logps_train/chosen": -18.00433349609375, + "logps_train/ref_chosen": -17.875, + "logps_train/ref_rejected": -16.375, + "logps_train/rejected": -34.94110870361328, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.012933350168168545, + "rewards_train/margins": 1.8436775440350175, + "rewards_train/rejected": -1.856610894203186, + "step": 2484 + }, + { + "epoch": 0.69, + "logps_train/chosen": -185.4228515625, + "logps_train/ref_chosen": -150.0, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -198.7915802001953, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.542285203933716, + "rewards_train/margins": 3.8368728160858154, + "rewards_train/rejected": -7.379158020019531, + "step": 2484 + }, + { + "epoch": 0.69, + "logps_train/chosen": -75.07077026367188, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -65.0, + "logps_train/rejected": -93.49774169921875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8070770502090454, + "rewards_train/margins": 2.0426970720291138, + "rewards_train/rejected": -2.849774122238159, + "step": 2484 + }, + { + "epoch": 0.69, + "logps_train/chosen": -236.40103149414062, + "logps_train/ref_chosen": -186.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -181.3624267578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.040103435516357, + "rewards_train/margins": 1.4461393356323242, + "rewards_train/rejected": -6.486242771148682, + "step": 2484 + }, + { + "epoch": 0.69, + "logps_train/chosen": -49.12687301635742, + "logps_train/ref_chosen": -43.25, + "logps_train/ref_rejected": -39.0, + "logps_train/rejected": -75.03216552734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5876873135566711, + "rewards_train/margins": 3.0155293345451355, + "rewards_train/rejected": -3.6032166481018066, + "step": 2485 + }, + { + "epoch": 0.69, + "logps_train/chosen": -148.9866943359375, + "logps_train/ref_chosen": -105.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -201.33990478515625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.398669719696045, + "rewards_train/margins": 5.835320949554443, + "rewards_train/rejected": -10.233990669250488, + "step": 2485 + }, + { + "epoch": 0.69, + "logps_train/chosen": -65.48100280761719, + "logps_train/ref_chosen": -48.25, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -59.24176788330078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7231003046035767, + "rewards_train/margins": 2.88857638835907, + "rewards_train/rejected": -4.6116766929626465, + "step": 2485 + }, + { + "epoch": 0.69, + "logps_train/chosen": -140.150634765625, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -149.0, + "logps_train/rejected": -230.10470581054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7150635719299316, + "rewards_train/margins": 5.395407199859619, + "rewards_train/rejected": -8.11047077178955, + "step": 2485 + }, + { + "epoch": 0.69, + "logps_train/chosen": -120.91455078125, + "logps_train/ref_chosen": -90.5, + "logps_train/ref_rejected": -19.0, + "logps_train/rejected": -57.31578063964844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.041455030441284, + "rewards_train/margins": 0.7901229858398438, + "rewards_train/rejected": -3.831578016281128, + "step": 2485 + }, + { + "epoch": 0.69, + "logps_train/chosen": -60.46275329589844, + "logps_train/ref_chosen": -62.25, + "logps_train/ref_rejected": -62.25, + "logps_train/rejected": -60.706138610839844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.17872467637062073, + "rewards_train/margins": 0.024338528513908386, + "rewards_train/rejected": 0.15438614785671234, + "step": 2485 + }, + { + "epoch": 0.69, + "logps_train/chosen": -41.58935546875, + "logps_train/ref_chosen": -12.625, + "logps_train/ref_rejected": -9.0625, + "logps_train/rejected": -46.4133415222168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.896435499191284, + "rewards_train/margins": 0.838648796081543, + "rewards_train/rejected": -3.735084295272827, + "step": 2485 + }, + { + "epoch": 0.69, + "logps_train/chosen": -22.9931697845459, + "logps_train/ref_chosen": -10.625, + "logps_train/ref_rejected": -20.625, + "logps_train/rejected": -48.49400329589844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.2368170022964478, + "rewards_train/margins": 1.5500832796096802, + "rewards_train/rejected": -2.786900281906128, + "step": 2485 + }, + { + "epoch": 0.69, + "learning_rate": 1.7146922360322668e-10, + "loss": 0.218, + "step": 2486 + }, + { + "epoch": 0.69, + "logps_train/chosen": -152.214111328125, + "logps_train/ref_chosen": -121.0, + "logps_train/ref_rejected": -160.0, + "logps_train/rejected": -221.12240600585938, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.121411085128784, + "rewards_train/margins": 2.9908297061920166, + "rewards_train/rejected": -6.112240791320801, + "step": 2486 + }, + { + "epoch": 0.69, + "logps_train/chosen": -108.0610580444336, + "logps_train/ref_chosen": -105.5, + "logps_train/ref_rejected": -221.0, + "logps_train/rejected": -341.1913146972656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25610581040382385, + "rewards_train/margins": 11.763025850057602, + "rewards_train/rejected": -12.019131660461426, + "step": 2486 + }, + { + "epoch": 0.69, + "logps_train/chosen": -88.70064544677734, + "logps_train/ref_chosen": -55.25, + "logps_train/ref_rejected": -47.75, + "logps_train/rejected": -69.8114013671875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.345064640045166, + "rewards_train/margins": -1.1389243602752686, + "rewards_train/rejected": -2.2061402797698975, + "step": 2486 + }, + { + "epoch": 0.69, + "logps_train/chosen": -14.544315338134766, + "logps_train/ref_chosen": -2.84375, + "logps_train/ref_rejected": -15.25, + "logps_train/rejected": -19.291980743408203, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1700565814971924, + "rewards_train/margins": -0.7658585011959076, + "rewards_train/rejected": -0.4041980803012848, + "step": 2486 + }, + { + "epoch": 0.69, + "logps_train/chosen": -79.79568481445312, + "logps_train/ref_chosen": -48.0, + "logps_train/ref_rejected": -39.25, + "logps_train/rejected": -47.780029296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.1795685291290283, + "rewards_train/margins": -2.3265655636787415, + "rewards_train/rejected": -0.8530029654502869, + "step": 2486 + }, + { + "epoch": 0.69, + "logps_train/chosen": -23.31499481201172, + "logps_train/ref_chosen": -19.25, + "logps_train/ref_rejected": -19.5, + "logps_train/rejected": -40.73545455932617, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4064994752407074, + "rewards_train/margins": 1.717045933008194, + "rewards_train/rejected": -2.1235454082489014, + "step": 2486 + }, + { + "epoch": 0.69, + "logps_train/chosen": -213.9556884765625, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -129.0, + "logps_train/rejected": -185.0718536376953, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -5.89556884765625, + "rewards_train/margins": -0.28838348388671875, + "rewards_train/rejected": -5.607185363769531, + "step": 2486 + }, + { + "epoch": 0.69, + "logps_train/chosen": -184.84732055664062, + "logps_train/ref_chosen": -154.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -215.51028442382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0847320556640625, + "rewards_train/margins": 4.066296577453613, + "rewards_train/rejected": -7.151028633117676, + "step": 2486 + }, + { + "epoch": 0.7, + "logps_train/chosen": -107.662353515625, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -151.1078338623047, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.566235303878784, + "rewards_train/margins": 3.1945483684539795, + "rewards_train/rejected": -5.760783672332764, + "step": 2487 + }, + { + "epoch": 0.7, + "logps_train/chosen": -144.37747192382812, + "logps_train/ref_chosen": -113.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -162.36363220214844, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.137747287750244, + "rewards_train/margins": 2.4486160278320312, + "rewards_train/rejected": -5.586363315582275, + "step": 2487 + }, + { + "epoch": 0.7, + "logps_train/chosen": -45.14297103881836, + "logps_train/ref_chosen": -13.1875, + "logps_train/ref_rejected": -14.0625, + "logps_train/rejected": -51.38471984863281, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.195547103881836, + "rewards_train/margins": 0.536674976348877, + "rewards_train/rejected": -3.732222080230713, + "step": 2487 + }, + { + "epoch": 0.7, + "logps_train/chosen": -127.8021469116211, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -242.5194091796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.4302146434783936, + "rewards_train/margins": 7.42172646522522, + "rewards_train/rejected": -9.851941108703613, + "step": 2487 + }, + { + "epoch": 0.7, + "logps_train/chosen": -108.75225830078125, + "logps_train/ref_chosen": -83.5, + "logps_train/ref_rejected": -94.0, + "logps_train/rejected": -186.81027221679688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.525225877761841, + "rewards_train/margins": 6.7558019161224365, + "rewards_train/rejected": -9.281027793884277, + "step": 2487 + }, + { + "epoch": 0.7, + "logps_train/chosen": -101.19654846191406, + "logps_train/ref_chosen": -74.0, + "logps_train/ref_rejected": -104.0, + "logps_train/rejected": -203.85833740234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.7196547985076904, + "rewards_train/margins": 7.266179323196411, + "rewards_train/rejected": -9.985834121704102, + "step": 2487 + }, + { + "epoch": 0.7, + "logps_train/chosen": -131.78662109375, + "logps_train/ref_chosen": -99.5, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -127.61367797851562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2286622524261475, + "rewards_train/margins": 2.4327056407928467, + "rewards_train/rejected": -5.661367893218994, + "step": 2487 + }, + { + "epoch": 0.7, + "logps_train/chosen": -30.849437713623047, + "logps_train/ref_chosen": -8.3125, + "logps_train/ref_rejected": -14.5, + "logps_train/rejected": -29.722734451293945, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.2536938190460205, + "rewards_train/margins": -0.7314203977584839, + "rewards_train/rejected": -1.5222734212875366, + "step": 2487 + }, + { + "epoch": 0.7, + "learning_rate": 1.2597834396255169e-10, + "loss": 0.4909, + "step": 2488 + }, + { + "epoch": 0.7, + "logps_train/chosen": -243.95797729492188, + "logps_train/ref_chosen": -103.0, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -268.8729553222656, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -14.095797538757324, + "rewards_train/margins": 0.34149837493896484, + "rewards_train/rejected": -14.437295913696289, + "step": 2488 + }, + { + "epoch": 0.7, + "logps_train/chosen": -63.31413269042969, + "logps_train/ref_chosen": -62.5, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -103.22773742675781, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08141326904296875, + "rewards_train/margins": 0.7413604855537415, + "rewards_train/rejected": -0.8227737545967102, + "step": 2488 + }, + { + "epoch": 0.7, + "logps_train/chosen": -2.061020612716675, + "logps_train/ref_chosen": -1.7578125, + "logps_train/ref_rejected": -13.25, + "logps_train/rejected": -24.97297477722168, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.03032081201672554, + "rewards_train/margins": 1.1419766657054424, + "rewards_train/rejected": -1.172297477722168, + "step": 2488 + }, + { + "epoch": 0.7, + "logps_train/chosen": -377.1597900390625, + "logps_train/ref_chosen": -274.0, + "logps_train/ref_rejected": -239.0, + "logps_train/rejected": -335.2073974609375, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.31597900390625, + "rewards_train/margins": -0.6952390670776367, + "rewards_train/rejected": -9.620739936828613, + "step": 2488 + }, + { + "epoch": 0.7, + "logps_train/chosen": -84.8458480834961, + "logps_train/ref_chosen": -91.0, + "logps_train/ref_rejected": -95.0, + "logps_train/rejected": -114.21012878417969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.6154152154922485, + "rewards_train/margins": 2.5364280939102173, + "rewards_train/rejected": -1.9210128784179688, + "step": 2488 + }, + { + "epoch": 0.7, + "logps_train/chosen": -173.1783447265625, + "logps_train/ref_chosen": -139.0, + "logps_train/ref_rejected": -161.0, + "logps_train/rejected": -217.211181640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.417834520339966, + "rewards_train/margins": 2.2032835483551025, + "rewards_train/rejected": -5.621118068695068, + "step": 2488 + }, + { + "epoch": 0.7, + "logps_train/chosen": -232.40225219726562, + "logps_train/ref_chosen": -162.0, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -245.20150756835938, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -7.040225505828857, + "rewards_train/margins": -1.5200748443603516, + "rewards_train/rejected": -5.520150661468506, + "step": 2488 + }, + { + "epoch": 0.7, + "logps_train/chosen": -159.51332092285156, + "logps_train/ref_chosen": -130.0, + "logps_train/ref_rejected": -111.0, + "logps_train/rejected": -182.6098175048828, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.9513320922851562, + "rewards_train/margins": 4.209649562835693, + "rewards_train/rejected": -7.16098165512085, + "step": 2488 + }, + { + "epoch": 0.7, + "logps_train/chosen": -61.06136703491211, + "logps_train/ref_chosen": -25.25, + "logps_train/ref_rejected": -30.875, + "logps_train/rejected": -53.14242172241211, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.581136703491211, + "rewards_train/margins": -1.3543944358825684, + "rewards_train/rejected": -2.2267422676086426, + "step": 2489 + }, + { + "epoch": 0.7, + "logps_train/chosen": -148.66928100585938, + "logps_train/ref_chosen": -129.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -153.73695373535156, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9669281244277954, + "rewards_train/margins": 3.956767439842224, + "rewards_train/rejected": -5.9236955642700195, + "step": 2489 + }, + { + "epoch": 0.7, + "logps_train/chosen": -112.00763702392578, + "logps_train/ref_chosen": -71.0, + "logps_train/ref_rejected": -82.0, + "logps_train/rejected": -171.27590942382812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.10076379776001, + "rewards_train/margins": 4.826827526092529, + "rewards_train/rejected": -8.927591323852539, + "step": 2489 + }, + { + "epoch": 0.7, + "logps_train/chosen": -120.33512878417969, + "logps_train/ref_chosen": -84.5, + "logps_train/ref_rejected": -64.0, + "logps_train/rejected": -123.50608825683594, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.583513021469116, + "rewards_train/margins": 2.367095708847046, + "rewards_train/rejected": -5.950608730316162, + "step": 2489 + }, + { + "epoch": 0.7, + "logps_train/chosen": -24.625436782836914, + "logps_train/ref_chosen": -13.125, + "logps_train/ref_rejected": -24.75, + "logps_train/rejected": -42.78263854980469, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1500437259674072, + "rewards_train/margins": 0.6532201766967773, + "rewards_train/rejected": -1.8032639026641846, + "step": 2489 + }, + { + "epoch": 0.7, + "logps_train/chosen": -93.24010467529297, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -93.00130462646484, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.1740105152130127, + "rewards_train/margins": -0.0238800048828125, + "rewards_train/rejected": -1.1501305103302002, + "step": 2489 + }, + { + "epoch": 0.7, + "logps_train/chosen": -163.51052856445312, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -116.5, + "logps_train/rejected": -205.61578369140625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.2510528564453125, + "rewards_train/margins": 5.660525321960449, + "rewards_train/rejected": -8.911578178405762, + "step": 2489 + }, + { + "epoch": 0.7, + "logps_train/chosen": -230.71676635742188, + "logps_train/ref_chosen": -137.0, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -350.97882080078125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -9.371676445007324, + "rewards_train/margins": 10.326205253601074, + "rewards_train/rejected": -19.6978816986084, + "step": 2489 + }, + { + "epoch": 0.7, + "learning_rate": 8.748552236603757e-11, + "loss": 0.4403, + "step": 2490 + }, + { + "epoch": 0.7, + "logps_train/chosen": -138.25146484375, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -118.0, + "logps_train/rejected": -131.54364013671875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.725146472454071, + "rewards_train/margins": 0.6292175650596619, + "rewards_train/rejected": -1.354364037513733, + "step": 2490 + }, + { + "epoch": 0.7, + "logps_train/chosen": -196.36138916015625, + "logps_train/ref_chosen": -131.0, + "logps_train/ref_rejected": -99.0, + "logps_train/rejected": -162.2689971923828, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -6.536139011383057, + "rewards_train/margins": -0.20923900604248047, + "rewards_train/rejected": -6.326900005340576, + "step": 2490 + }, + { + "epoch": 0.7, + "logps_train/chosen": -2.8661229610443115, + "logps_train/ref_chosen": -4.4375, + "logps_train/ref_rejected": -3.5625, + "logps_train/rejected": -37.57761764526367, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1571377068758011, + "rewards_train/margins": 3.5586496144533157, + "rewards_train/rejected": -3.4015119075775146, + "step": 2490 + }, + { + "epoch": 0.7, + "logps_train/chosen": -125.28390502929688, + "logps_train/ref_chosen": -113.5, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -144.93362426757812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1783905029296875, + "rewards_train/margins": 2.6649720668792725, + "rewards_train/rejected": -3.84336256980896, + "step": 2490 + }, + { + "epoch": 0.7, + "logps_train/chosen": 0.0, + "logps_train/ref_chosen": 0.0, + "logps_train/ref_rejected": -117.5, + "logps_train/rejected": -269.44195556640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.0, + "rewards_train/margins": 15.194195747375488, + "rewards_train/rejected": -15.194195747375488, + "step": 2490 + }, + { + "epoch": 0.7, + "logps_train/chosen": -9.002325057983398, + "logps_train/ref_chosen": -6.28125, + "logps_train/ref_rejected": -9.3125, + "logps_train/rejected": -16.81255340576172, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.2721075117588043, + "rewards_train/margins": 0.47789785265922546, + "rewards_train/rejected": -0.7500053644180298, + "step": 2490 + }, + { + "epoch": 0.7, + "logps_train/chosen": -46.450504302978516, + "logps_train/ref_chosen": -19.625, + "logps_train/ref_rejected": -14.6875, + "logps_train/rejected": -67.43338775634766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.6825504302978516, + "rewards_train/margins": 2.592038631439209, + "rewards_train/rejected": -5.2745890617370605, + "step": 2490 + }, + { + "epoch": 0.7, + "logps_train/chosen": -15.813414573669434, + "logps_train/ref_chosen": -15.0625, + "logps_train/ref_rejected": -14.3125, + "logps_train/rejected": -19.08009147644043, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.07509145885705948, + "rewards_train/margins": 0.4016677066683769, + "rewards_train/rejected": -0.4767591655254364, + "step": 2490 + }, + { + "epoch": 0.7, + "logps_train/chosen": -16.638328552246094, + "logps_train/ref_chosen": -2.6875, + "logps_train/ref_rejected": -1.703125, + "logps_train/rejected": -10.524141311645508, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.3950828313827515, + "rewards_train/margins": -0.5129811763763428, + "rewards_train/rejected": -0.8821016550064087, + "step": 2491 + }, + { + "epoch": 0.7, + "logps_train/chosen": -236.9080810546875, + "logps_train/ref_chosen": -223.0, + "logps_train/ref_rejected": -153.0, + "logps_train/rejected": -182.0280303955078, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.39080810546875, + "rewards_train/margins": 1.5119950771331787, + "rewards_train/rejected": -2.9028031826019287, + "step": 2491 + }, + { + "epoch": 0.7, + "logps_train/chosen": -155.07261657714844, + "logps_train/ref_chosen": -98.5, + "logps_train/ref_rejected": -91.0, + "logps_train/rejected": -208.6224365234375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.657261848449707, + "rewards_train/margins": 6.104982376098633, + "rewards_train/rejected": -11.76224422454834, + "step": 2491 + }, + { + "epoch": 0.7, + "logps_train/chosen": -31.575895309448242, + "logps_train/ref_chosen": -23.125, + "logps_train/ref_rejected": -25.625, + "logps_train/rejected": -39.81121826171875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8450895547866821, + "rewards_train/margins": 0.5735323429107666, + "rewards_train/rejected": -1.4186218976974487, + "step": 2491 + }, + { + "epoch": 0.7, + "logps_train/chosen": -38.696285247802734, + "logps_train/ref_chosen": -19.75, + "logps_train/ref_rejected": -9.25, + "logps_train/rejected": -28.57088851928711, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8946285247802734, + "rewards_train/margins": 0.0374603271484375, + "rewards_train/rejected": -1.932088851928711, + "step": 2491 + }, + { + "epoch": 0.7, + "logps_train/chosen": -15.594264030456543, + "logps_train/ref_chosen": -13.0, + "logps_train/ref_rejected": -30.875, + "logps_train/rejected": -69.40116119384766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.25942641496658325, + "rewards_train/margins": 3.5931896567344666, + "rewards_train/rejected": -3.85261607170105, + "step": 2491 + }, + { + "epoch": 0.7, + "logps_train/chosen": -25.278614044189453, + "logps_train/ref_chosen": -15.0, + "logps_train/ref_rejected": -21.5, + "logps_train/rejected": -39.419803619384766, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.027861475944519, + "rewards_train/margins": 0.7641189098358154, + "rewards_train/rejected": -1.7919803857803345, + "step": 2491 + }, + { + "epoch": 0.7, + "logps_train/chosen": -19.58942985534668, + "logps_train/ref_chosen": -3.46875, + "logps_train/ref_rejected": -13.1875, + "logps_train/rejected": -53.88359451293945, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.6120680570602417, + "rewards_train/margins": 2.457541584968567, + "rewards_train/rejected": -4.069609642028809, + "step": 2491 + }, + { + "epoch": 0.7, + "learning_rate": 5.599102822273494e-11, + "loss": 0.3244, + "step": 2492 + }, + { + "epoch": 0.7, + "logps_train/chosen": -92.673095703125, + "logps_train/ref_chosen": -61.75, + "logps_train/ref_rejected": -42.75, + "logps_train/rejected": -70.88202667236328, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -3.0923097133636475, + "rewards_train/margins": -0.27910709381103516, + "rewards_train/rejected": -2.8132026195526123, + "step": 2492 + }, + { + "epoch": 0.7, + "logps_train/chosen": -124.7614517211914, + "logps_train/ref_chosen": -111.0, + "logps_train/ref_rejected": -113.0, + "logps_train/rejected": -160.91250610351562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3761452436447144, + "rewards_train/margins": 3.41510546207428, + "rewards_train/rejected": -4.791250705718994, + "step": 2492 + }, + { + "epoch": 0.7, + "logps_train/chosen": -129.8035888671875, + "logps_train/ref_chosen": -110.5, + "logps_train/ref_rejected": -154.0, + "logps_train/rejected": -209.83456420898438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.93035888671875, + "rewards_train/margins": 3.653097629547119, + "rewards_train/rejected": -5.583456516265869, + "step": 2492 + }, + { + "epoch": 0.7, + "logps_train/chosen": -245.1651611328125, + "logps_train/ref_chosen": -169.0, + "logps_train/ref_rejected": -132.0, + "logps_train/rejected": -209.2049560546875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -7.61651611328125, + "rewards_train/margins": 0.10397958755493164, + "rewards_train/rejected": -7.720495700836182, + "step": 2492 + }, + { + "epoch": 0.7, + "logps_train/chosen": -103.65916442871094, + "logps_train/ref_chosen": -81.0, + "logps_train/ref_rejected": -94.5, + "logps_train/rejected": -152.04840087890625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.265916585922241, + "rewards_train/margins": 3.4889237880706787, + "rewards_train/rejected": -5.75484037399292, + "step": 2492 + }, + { + "epoch": 0.7, + "logps_train/chosen": -147.44723510742188, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -141.0, + "logps_train/rejected": -197.19679260253906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.0447235107421875, + "rewards_train/margins": 2.574955940246582, + "rewards_train/rejected": -5.6196794509887695, + "step": 2492 + }, + { + "epoch": 0.7, + "logps_train/chosen": -151.63296508789062, + "logps_train/ref_chosen": -100.5, + "logps_train/ref_rejected": -125.0, + "logps_train/rejected": -211.31105041503906, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.1132965087890625, + "rewards_train/margins": 3.5178089141845703, + "rewards_train/rejected": -8.631105422973633, + "step": 2492 + }, + { + "epoch": 0.7, + "logps_train/chosen": -10.352779388427734, + "logps_train/ref_chosen": -5.5, + "logps_train/ref_rejected": -1.0234375, + "logps_train/rejected": -5.802338600158691, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.4852779507637024, + "rewards_train/margins": -0.0073878467082977295, + "rewards_train/rejected": -0.47789010405540466, + "step": 2492 + }, + { + "epoch": 0.7, + "logps_train/chosen": -145.891845703125, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -174.0, + "logps_train/rejected": -272.15802001953125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7891845703125, + "rewards_train/margins": 8.026618003845215, + "rewards_train/rejected": -9.815802574157715, + "step": 2493 + }, + { + "epoch": 0.7, + "logps_train/chosen": -199.37522888183594, + "logps_train/ref_chosen": -145.0, + "logps_train/ref_rejected": -107.5, + "logps_train/rejected": -221.78448486328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.437522888183594, + "rewards_train/margins": 5.9909257888793945, + "rewards_train/rejected": -11.428448677062988, + "step": 2493 + }, + { + "epoch": 0.7, + "logps_train/chosen": -84.11388397216797, + "logps_train/ref_chosen": -55.0, + "logps_train/ref_rejected": -88.0, + "logps_train/rejected": -133.28179931640625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.911388397216797, + "rewards_train/margins": 1.6167917251586914, + "rewards_train/rejected": -4.528180122375488, + "step": 2493 + }, + { + "epoch": 0.7, + "logps_train/chosen": -131.88072204589844, + "logps_train/ref_chosen": -88.5, + "logps_train/ref_rejected": -113.5, + "logps_train/rejected": -163.10116577148438, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.338072299957275, + "rewards_train/margins": 0.622044563293457, + "rewards_train/rejected": -4.960116863250732, + "step": 2493 + }, + { + "epoch": 0.7, + "logps_train/chosen": -102.15830993652344, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -81.5, + "logps_train/rejected": -102.06129455566406, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.065830945968628, + "rewards_train/margins": -0.00970149040222168, + "rewards_train/rejected": -2.0561294555664062, + "step": 2493 + }, + { + "epoch": 0.7, + "logps_train/chosen": -12.280250549316406, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -19.125, + "logps_train/rejected": -32.039424896240234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.5499000549316406, + "rewards_train/margins": 0.7415424585342407, + "rewards_train/rejected": -1.2914425134658813, + "step": 2493 + }, + { + "epoch": 0.7, + "logps_train/chosen": -65.65874481201172, + "logps_train/ref_chosen": -56.75, + "logps_train/ref_rejected": -79.5, + "logps_train/rejected": -105.39435577392578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.8908745050430298, + "rewards_train/margins": 1.6985610723495483, + "rewards_train/rejected": -2.589435577392578, + "step": 2493 + }, + { + "epoch": 0.7, + "logps_train/chosen": -128.39096069335938, + "logps_train/ref_chosen": -108.0, + "logps_train/ref_rejected": -106.5, + "logps_train/rejected": -155.66827392578125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.0390961170196533, + "rewards_train/margins": 2.8777315616607666, + "rewards_train/rejected": -4.91682767868042, + "step": 2493 + }, + { + "epoch": 0.7, + "learning_rate": 3.1495081960764005e-11, + "loss": 0.2686, + "step": 2494 + }, + { + "epoch": 0.7, + "logps_train/chosen": -154.54904174804688, + "logps_train/ref_chosen": -119.0, + "logps_train/ref_rejected": -198.0, + "logps_train/rejected": -324.71575927734375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.5549042224884033, + "rewards_train/margins": 9.116672277450562, + "rewards_train/rejected": -12.671576499938965, + "step": 2494 + }, + { + "epoch": 0.7, + "logps_train/chosen": -33.270198822021484, + "logps_train/ref_chosen": -11.625, + "logps_train/ref_rejected": -7.4375, + "logps_train/rejected": -22.04501724243164, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.164520025253296, + "rewards_train/margins": -0.703768253326416, + "rewards_train/rejected": -1.4607517719268799, + "step": 2494 + }, + { + "epoch": 0.7, + "logps_train/chosen": -229.03599548339844, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -108.5, + "logps_train/rejected": -213.4518280029297, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -10.903599739074707, + "rewards_train/margins": -0.408416748046875, + "rewards_train/rejected": -10.495182991027832, + "step": 2494 + }, + { + "epoch": 0.7, + "logps_train/chosen": -51.535621643066406, + "logps_train/ref_chosen": -45.25, + "logps_train/ref_rejected": -6.09375, + "logps_train/rejected": -34.7265625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.6285621523857117, + "rewards_train/margins": 2.2347190976142883, + "rewards_train/rejected": -2.86328125, + "step": 2494 + }, + { + "epoch": 0.7, + "logps_train/chosen": -25.702402114868164, + "logps_train/ref_chosen": -16.25, + "logps_train/ref_rejected": -93.5, + "logps_train/rejected": -113.4580307006836, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9452401995658875, + "rewards_train/margins": 1.0505629181861877, + "rewards_train/rejected": -1.9958031177520752, + "step": 2494 + }, + { + "epoch": 0.7, + "logps_train/chosen": -40.793338775634766, + "logps_train/ref_chosen": -34.5, + "logps_train/ref_rejected": -1.1875, + "logps_train/rejected": -3.419353723526001, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.6293339133262634, + "rewards_train/margins": -0.4061485379934311, + "rewards_train/rejected": -0.22318537533283234, + "step": 2494 + }, + { + "epoch": 0.7, + "logps_train/chosen": -135.28036499023438, + "logps_train/ref_chosen": -128.0, + "logps_train/ref_rejected": -71.0, + "logps_train/rejected": -92.70535278320312, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.7280365228652954, + "rewards_train/margins": 1.442498803138733, + "rewards_train/rejected": -2.1705353260040283, + "step": 2494 + }, + { + "epoch": 0.7, + "logps_train/chosen": -161.08798217773438, + "logps_train/ref_chosen": -118.0, + "logps_train/ref_rejected": -118.5, + "logps_train/rejected": -187.98643493652344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -4.308798313140869, + "rewards_train/margins": 2.639845371246338, + "rewards_train/rejected": -6.948643684387207, + "step": 2494 + }, + { + "epoch": 0.7, + "logps_train/chosen": -288.37420654296875, + "logps_train/ref_chosen": -172.0, + "logps_train/ref_rejected": -144.0, + "logps_train/rejected": -273.0573425292969, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -11.637420654296875, + "rewards_train/margins": 1.2683134078979492, + "rewards_train/rejected": -12.905734062194824, + "step": 2495 + }, + { + "epoch": 0.7, + "logps_train/chosen": -113.1405029296875, + "logps_train/ref_chosen": -81.5, + "logps_train/ref_rejected": -134.0, + "logps_train/rejected": -197.42457580566406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.164050340652466, + "rewards_train/margins": 3.1784074306488037, + "rewards_train/rejected": -6.3424577713012695, + "step": 2495 + }, + { + "epoch": 0.7, + "logps_train/chosen": -143.43612670898438, + "logps_train/ref_chosen": -124.5, + "logps_train/ref_rejected": -124.5, + "logps_train/rejected": -143.55752563476562, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8936127424240112, + "rewards_train/margins": 0.01213979721069336, + "rewards_train/rejected": -1.9057525396347046, + "step": 2495 + }, + { + "epoch": 0.7, + "logps_train/chosen": -22.37287712097168, + "logps_train/ref_chosen": -1.9609375, + "logps_train/ref_rejected": -2.609375, + "logps_train/rejected": -20.53380584716797, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.041193962097168, + "rewards_train/margins": -0.24875080585479736, + "rewards_train/rejected": -1.7924431562423706, + "step": 2495 + }, + { + "epoch": 0.7, + "logps_train/chosen": -170.68528747558594, + "logps_train/ref_chosen": -117.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -192.59689331054688, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.368528842926025, + "rewards_train/margins": 1.091160774230957, + "rewards_train/rejected": -6.459689617156982, + "step": 2495 + }, + { + "epoch": 0.7, + "logps_train/chosen": -4.881369590759277, + "logps_train/ref_chosen": -3.03125, + "logps_train/ref_rejected": -6.9375, + "logps_train/rejected": -5.403672695159912, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -0.18501196801662445, + "rewards_train/margins": -0.3383947014808655, + "rewards_train/rejected": 0.15338273346424103, + "step": 2495 + }, + { + "epoch": 0.7, + "logps_train/chosen": -26.752368927001953, + "logps_train/ref_chosen": -3.59375, + "logps_train/ref_rejected": -4.3125, + "logps_train/rejected": -24.57590103149414, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.315861940383911, + "rewards_train/margins": -0.2895216941833496, + "rewards_train/rejected": -2.0263402462005615, + "step": 2495 + }, + { + "epoch": 0.7, + "logps_train/chosen": -300.2418212890625, + "logps_train/ref_chosen": -199.0, + "logps_train/ref_rejected": -201.0, + "logps_train/rejected": -309.341796875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -10.12418270111084, + "rewards_train/margins": 0.7099971771240234, + "rewards_train/rejected": -10.834179878234863, + "step": 2495 + }, + { + "epoch": 0.7, + "learning_rate": 1.3997855025649208e-11, + "loss": 0.4897, + "step": 2496 + }, + { + "epoch": 0.7, + "logps_train/chosen": -78.58535766601562, + "logps_train/ref_chosen": -67.0, + "logps_train/ref_rejected": -103.0, + "logps_train/rejected": -119.93046569824219, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1585358381271362, + "rewards_train/margins": 0.5345107316970825, + "rewards_train/rejected": -1.6930465698242188, + "step": 2496 + }, + { + "epoch": 0.7, + "logps_train/chosen": -179.7247772216797, + "logps_train/ref_chosen": -141.0, + "logps_train/ref_rejected": -122.0, + "logps_train/rejected": -197.52444458007812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.8724777698516846, + "rewards_train/margins": 3.679966688156128, + "rewards_train/rejected": -7.5524444580078125, + "step": 2496 + }, + { + "epoch": 0.7, + "logps_train/chosen": -58.2935791015625, + "logps_train/ref_chosen": -6.78125, + "logps_train/ref_rejected": -9.75, + "logps_train/rejected": -68.25128936767578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -5.151233196258545, + "rewards_train/margins": 0.6988959312438965, + "rewards_train/rejected": -5.850129127502441, + "step": 2496 + }, + { + "epoch": 0.7, + "logps_train/chosen": -15.760398864746094, + "logps_train/ref_chosen": -14.875, + "logps_train/ref_rejected": -20.25, + "logps_train/rejected": -26.848712921142578, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08853989094495773, + "rewards_train/margins": 0.5713313892483711, + "rewards_train/rejected": -0.6598712801933289, + "step": 2496 + }, + { + "epoch": 0.7, + "logps_train/chosen": -19.399686813354492, + "logps_train/ref_chosen": -7.78125, + "logps_train/ref_rejected": -8.4375, + "logps_train/rejected": -35.20452117919922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.1618436574935913, + "rewards_train/margins": 1.514858603477478, + "rewards_train/rejected": -2.6767022609710693, + "step": 2496 + }, + { + "epoch": 0.7, + "logps_train/chosen": -21.32331085205078, + "logps_train/ref_chosen": -2.375, + "logps_train/ref_rejected": -25.25, + "logps_train/rejected": -48.410213470458984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8948310613632202, + "rewards_train/margins": 0.42119038105010986, + "rewards_train/rejected": -2.31602144241333, + "step": 2496 + }, + { + "epoch": 0.7, + "logps_train/chosen": -118.13690185546875, + "logps_train/ref_chosen": -108.5, + "logps_train/ref_rejected": -101.0, + "logps_train/rejected": -128.31195068359375, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.9636902213096619, + "rewards_train/margins": 1.7675049901008606, + "rewards_train/rejected": -2.7311952114105225, + "step": 2496 + }, + { + "epoch": 0.7, + "logps_train/chosen": -1.2083137035369873, + "logps_train/ref_chosen": -2.203125, + "logps_train/ref_rejected": -3.96875, + "logps_train/rejected": -5.773928165435791, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09948112815618515, + "rewards_train/margins": 0.27999895066022873, + "rewards_train/rejected": -0.18051782250404358, + "step": 2496 + }, + { + "epoch": 0.7, + "logps_train/chosen": -305.14642333984375, + "logps_train/ref_chosen": -206.0, + "logps_train/ref_rejected": -190.0, + "logps_train/rejected": -276.15155029296875, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -9.914642333984375, + "rewards_train/margins": -1.2994871139526367, + "rewards_train/rejected": -8.615155220031738, + "step": 2497 + }, + { + "epoch": 0.7, + "logps_train/chosen": -13.78567886352539, + "logps_train/ref_chosen": -4.3125, + "logps_train/ref_rejected": -3.21875, + "logps_train/rejected": -16.959720611572266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.947317898273468, + "rewards_train/margins": 0.42677921056747437, + "rewards_train/rejected": -1.3740971088409424, + "step": 2497 + }, + { + "epoch": 0.7, + "logps_train/chosen": -50.2034797668457, + "logps_train/ref_chosen": -51.0, + "logps_train/ref_rejected": -26.625, + "logps_train/rejected": -55.094783782958984, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.07965202629566193, + "rewards_train/margins": 2.926630452275276, + "rewards_train/rejected": -2.8469784259796143, + "step": 2497 + }, + { + "epoch": 0.7, + "logps_train/chosen": -174.4943084716797, + "logps_train/ref_chosen": -135.0, + "logps_train/ref_rejected": -136.0, + "logps_train/rejected": -195.15167236328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.9494309425354004, + "rewards_train/margins": 1.9657363891601562, + "rewards_train/rejected": -5.915167331695557, + "step": 2497 + }, + { + "epoch": 0.7, + "logps_train/chosen": -60.71723937988281, + "logps_train/ref_chosen": -29.375, + "logps_train/ref_rejected": -5.78125, + "logps_train/rejected": -40.94559860229492, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.1342239379882812, + "rewards_train/margins": 0.38221096992492676, + "rewards_train/rejected": -3.516434907913208, + "step": 2497 + }, + { + "epoch": 0.7, + "logps_train/chosen": -35.320838928222656, + "logps_train/ref_chosen": -24.625, + "logps_train/ref_rejected": -74.0, + "logps_train/rejected": -91.0400390625, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.0695838928222656, + "rewards_train/margins": 0.6344200372695923, + "rewards_train/rejected": -1.704003930091858, + "step": 2497 + }, + { + "epoch": 0.7, + "logps_train/chosen": -129.15025329589844, + "logps_train/ref_chosen": -92.5, + "logps_train/ref_rejected": -104.5, + "logps_train/rejected": -162.93167114257812, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -3.665025472640991, + "rewards_train/margins": 2.1781418323516846, + "rewards_train/rejected": -5.843167304992676, + "step": 2497 + }, + { + "epoch": 0.7, + "logps_train/chosen": -6.688104152679443, + "logps_train/ref_chosen": -7.59375, + "logps_train/ref_rejected": -2.25, + "logps_train/rejected": -18.375782012939453, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.09056458622217178, + "rewards_train/margins": 1.7031428590416908, + "rewards_train/rejected": -1.612578272819519, + "step": 2497 + }, + { + "epoch": 0.7, + "learning_rate": 3.499469879542083e-12, + "loss": 0.3882, + "step": 2498 + }, + { + "epoch": 0.7, + "logps_train/chosen": -138.04129028320312, + "logps_train/ref_chosen": -120.0, + "logps_train/ref_rejected": -117.0, + "logps_train/rejected": -151.65621948242188, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.8041290044784546, + "rewards_train/margins": 1.661492943763733, + "rewards_train/rejected": -3.4656219482421875, + "step": 2498 + }, + { + "epoch": 0.7, + "logps_train/chosen": -85.61534881591797, + "logps_train/ref_chosen": -39.5, + "logps_train/ref_rejected": -31.25, + "logps_train/rejected": -65.70915985107422, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -4.61153507232666, + "rewards_train/margins": -1.165619134902954, + "rewards_train/rejected": -3.445915937423706, + "step": 2498 + }, + { + "epoch": 0.7, + "logps_train/chosen": -7.63490104675293, + "logps_train/ref_chosen": -6.8125, + "logps_train/ref_rejected": -2.828125, + "logps_train/rejected": -3.815744400024414, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.08224010467529297, + "rewards_train/margins": 0.016521833837032318, + "rewards_train/rejected": -0.09876193851232529, + "step": 2498 + }, + { + "epoch": 0.7, + "logps_train/chosen": -157.36614990234375, + "logps_train/ref_chosen": -151.0, + "logps_train/ref_rejected": -192.0, + "logps_train/rejected": -232.35137939453125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.636614978313446, + "rewards_train/margins": 3.3985231518745422, + "rewards_train/rejected": -4.035138130187988, + "step": 2498 + }, + { + "epoch": 0.7, + "logps_train/chosen": -31.793685913085938, + "logps_train/ref_chosen": -12.25, + "logps_train/ref_rejected": -3.90625, + "logps_train/rejected": -34.19734191894531, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.9543685913085938, + "rewards_train/margins": 1.0747406482696533, + "rewards_train/rejected": -3.029109239578247, + "step": 2498 + }, + { + "epoch": 0.7, + "logps_train/chosen": -56.50526428222656, + "logps_train/ref_chosen": -59.5, + "logps_train/ref_rejected": -86.5, + "logps_train/rejected": -88.32621002197266, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.2994735836982727, + "rewards_train/margins": 0.48209458589553833, + "rewards_train/rejected": -0.18262100219726562, + "step": 2498 + }, + { + "epoch": 0.7, + "logps_train/chosen": -10.527841567993164, + "logps_train/ref_chosen": -9.5625, + "logps_train/ref_rejected": -7.6875, + "logps_train/rejected": -27.512569427490234, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.09653415530920029, + "rewards_train/margins": 1.885972835123539, + "rewards_train/rejected": -1.9825069904327393, + "step": 2498 + }, + { + "epoch": 0.7, + "logps_train/chosen": -11.906068801879883, + "logps_train/ref_chosen": -7.25, + "logps_train/ref_rejected": -2.734375, + "logps_train/rejected": -12.168718338012695, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.4656068980693817, + "rewards_train/margins": 0.4778274595737457, + "rewards_train/rejected": -0.9434343576431274, + "step": 2498 + }, + { + "epoch": 0.7, + "logps_train/chosen": -95.51371002197266, + "logps_train/ref_chosen": -82.0, + "logps_train/ref_rejected": -61.0, + "logps_train/rejected": -130.74620056152344, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.3513710498809814, + "rewards_train/margins": 5.623249292373657, + "rewards_train/rejected": -6.974620342254639, + "step": 2499 + }, + { + "epoch": 0.7, + "logps_train/chosen": -178.5200653076172, + "logps_train/ref_chosen": -155.0, + "logps_train/ref_rejected": -128.0, + "logps_train/rejected": -227.60870361328125, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -2.352006673812866, + "rewards_train/margins": 7.608864068984985, + "rewards_train/rejected": -9.960870742797852, + "step": 2499 + }, + { + "epoch": 0.7, + "logps_train/chosen": -121.62806701660156, + "logps_train/ref_chosen": -103.5, + "logps_train/ref_rejected": -44.25, + "logps_train/rejected": -58.848907470703125, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -1.8128067255020142, + "rewards_train/margins": -0.35291600227355957, + "rewards_train/rejected": -1.4598907232284546, + "step": 2499 + }, + { + "epoch": 0.7, + "logps_train/chosen": -128.48361206054688, + "logps_train/ref_chosen": -126.0, + "logps_train/ref_rejected": -138.0, + "logps_train/rejected": -150.6868133544922, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -0.24836121499538422, + "rewards_train/margins": 1.0203201919794083, + "rewards_train/rejected": -1.2686814069747925, + "step": 2499 + }, + { + "epoch": 0.7, + "logps_train/chosen": -7.9457807540893555, + "logps_train/ref_chosen": -9.5, + "logps_train/ref_rejected": -7.53125, + "logps_train/rejected": -18.468202590942383, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": 0.1554219275712967, + "rewards_train/margins": 1.2491172105073929, + "rewards_train/rejected": -1.0936952829360962, + "step": 2499 + }, + { + "epoch": 0.7, + "logps_train/chosen": -36.70111083984375, + "logps_train/ref_chosen": -6.96875, + "logps_train/ref_rejected": -3.125, + "logps_train/rejected": -28.717100143432617, + "rewards_train/accuracies": 0.0, + "rewards_train/chosen": -2.973236083984375, + "rewards_train/margins": -0.41402602195739746, + "rewards_train/rejected": -2.5592100620269775, + "step": 2499 + }, + { + "epoch": 0.7, + "logps_train/chosen": -36.24474334716797, + "logps_train/ref_chosen": -19.125, + "logps_train/ref_rejected": -13.125, + "logps_train/rejected": -56.574928283691406, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.7119743824005127, + "rewards_train/margins": 2.633018732070923, + "rewards_train/rejected": -4.3449931144714355, + "step": 2499 + }, + { + "epoch": 0.7, + "logps_train/chosen": -60.59475326538086, + "logps_train/ref_chosen": -50.25, + "logps_train/ref_rejected": -67.0, + "logps_train/rejected": -82.0682373046875, + "rewards_train/accuracies": 1.0, + "rewards_train/chosen": -1.034475326538086, + "rewards_train/margins": 0.4723484516143799, + "rewards_train/rejected": -1.5068237781524658, + "step": 2499 + }, + { + "epoch": 0.7, + "learning_rate": 0.0, + "loss": 0.4157, + "step": 2500 + }, + { + "epoch": 0.7, + "step": 2500, + "total_flos": 0.0, + "train_loss": 0.4618360550403595, + "train_runtime": 8976.2003, + "train_samples_per_second": 2.228, + "train_steps_per_second": 0.279 + } + ], + "logging_steps": 2, + "max_steps": 2500, + "num_train_epochs": 1, + "save_steps": 500, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +}