|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3873, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.2886597938144328e-09, |
|
"logits/chosen": -4.2921271324157715, |
|
"logits/rejected": -3.812117338180542, |
|
"logps/chosen": -664.6867065429688, |
|
"logps/rejected": -226.7833709716797, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.2886597938144328e-08, |
|
"logits/chosen": -4.003667831420898, |
|
"logits/rejected": -4.013306140899658, |
|
"logps/chosen": -559.2938232421875, |
|
"logps/rejected": -452.70074462890625, |
|
"loss": 0.6948, |
|
"rewards/accuracies": 0.2638888955116272, |
|
"rewards/chosen": -0.007192640565335751, |
|
"rewards/margins": -0.006332792341709137, |
|
"rewards/rejected": -0.000859847932588309, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.5773195876288656e-08, |
|
"logits/chosen": -4.353642463684082, |
|
"logits/rejected": -4.292398929595947, |
|
"logps/chosen": -554.0906982421875, |
|
"logps/rejected": -500.97119140625, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.001169868279248476, |
|
"rewards/margins": 0.001462915213778615, |
|
"rewards/rejected": -0.0026327825617045164, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.865979381443299e-08, |
|
"logits/chosen": -4.102766513824463, |
|
"logits/rejected": -4.200378894805908, |
|
"logps/chosen": -617.0684204101562, |
|
"logps/rejected": -476.2395935058594, |
|
"loss": 0.695, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.001789045287296176, |
|
"rewards/margins": -0.003806379158049822, |
|
"rewards/rejected": 0.005595424212515354, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.154639175257731e-08, |
|
"logits/chosen": -4.30725622177124, |
|
"logits/rejected": -4.225460052490234, |
|
"logps/chosen": -497.7335510253906, |
|
"logps/rejected": -415.4452209472656, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0006419029086828232, |
|
"rewards/margins": 0.0031944490037858486, |
|
"rewards/rejected": -0.0025525467935949564, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.443298969072164e-08, |
|
"logits/chosen": -3.88063383102417, |
|
"logits/rejected": -3.8105220794677734, |
|
"logps/chosen": -627.067626953125, |
|
"logps/rejected": -403.6964111328125, |
|
"loss": 0.6976, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.00458294665440917, |
|
"rewards/margins": -0.016261283308267593, |
|
"rewards/rejected": 0.011678336188197136, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 7.731958762886598e-08, |
|
"logits/chosen": -4.216163635253906, |
|
"logits/rejected": -4.099843978881836, |
|
"logps/chosen": -470.12115478515625, |
|
"logps/rejected": -469.4156799316406, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0052711316384375095, |
|
"rewards/margins": -0.0011110258055850863, |
|
"rewards/rejected": 0.00638215895742178, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.02061855670103e-08, |
|
"logits/chosen": -4.200804233551025, |
|
"logits/rejected": -4.1986494064331055, |
|
"logps/chosen": -648.3743896484375, |
|
"logps/rejected": -488.0792541503906, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0002953239600174129, |
|
"rewards/margins": 0.00705097708851099, |
|
"rewards/rejected": -0.006755652371793985, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0309278350515462e-07, |
|
"logits/chosen": -4.013070106506348, |
|
"logits/rejected": -4.1909003257751465, |
|
"logps/chosen": -538.8270263671875, |
|
"logps/rejected": -391.4429931640625, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.007235817611217499, |
|
"rewards/margins": 0.009682310745120049, |
|
"rewards/rejected": -0.002446494298055768, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1597938144329897e-07, |
|
"logits/chosen": -3.9400150775909424, |
|
"logits/rejected": -3.9281005859375, |
|
"logps/chosen": -588.8606567382812, |
|
"logps/rejected": -484.28839111328125, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.004685616586357355, |
|
"rewards/margins": 0.008750900626182556, |
|
"rewards/rejected": -0.004065284971147776, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2886597938144328e-07, |
|
"logits/chosen": -4.089522361755371, |
|
"logits/rejected": -4.070917129516602, |
|
"logps/chosen": -573.93310546875, |
|
"logps/rejected": -485.439697265625, |
|
"loss": 0.6906, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.010096365585923195, |
|
"rewards/margins": 0.0105238426476717, |
|
"rewards/rejected": -0.00042747752740979195, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_logits/chosen": -4.051466941833496, |
|
"eval_logits/rejected": -4.089292526245117, |
|
"eval_logps/chosen": -549.3683471679688, |
|
"eval_logps/rejected": -437.9984130859375, |
|
"eval_loss": 0.6931844353675842, |
|
"eval_rewards/accuracies": 0.4860000014305115, |
|
"eval_rewards/chosen": 0.0008278049062937498, |
|
"eval_rewards/margins": 0.00017659256991464645, |
|
"eval_rewards/rejected": 0.0006512125837616622, |
|
"eval_runtime": 148.2369, |
|
"eval_samples_per_second": 13.492, |
|
"eval_steps_per_second": 1.686, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.417525773195876e-07, |
|
"logits/chosen": -4.135636329650879, |
|
"logits/rejected": -4.231348991394043, |
|
"logps/chosen": -458.62255859375, |
|
"logps/rejected": -379.28094482421875, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.003453848185017705, |
|
"rewards/margins": 0.0036365636624395847, |
|
"rewards/rejected": -0.00018271691806148738, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.5463917525773197e-07, |
|
"logits/chosen": -4.357504844665527, |
|
"logits/rejected": -4.165073871612549, |
|
"logps/chosen": -392.82891845703125, |
|
"logps/rejected": -405.0232849121094, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0029750962276011705, |
|
"rewards/margins": -0.0063691637478768826, |
|
"rewards/rejected": 0.009344260208308697, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.6752577319587627e-07, |
|
"logits/chosen": -4.224671840667725, |
|
"logits/rejected": -4.147946357727051, |
|
"logps/chosen": -530.8834228515625, |
|
"logps/rejected": -379.1323547363281, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.001731688855215907, |
|
"rewards/margins": -0.0059483470395207405, |
|
"rewards/rejected": 0.004216659348458052, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.804123711340206e-07, |
|
"logits/chosen": -4.243564128875732, |
|
"logits/rejected": -4.247513771057129, |
|
"logps/chosen": -555.7782592773438, |
|
"logps/rejected": -475.36474609375, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.0019786651246249676, |
|
"rewards/margins": -0.0012128886301070452, |
|
"rewards/rejected": 0.0031915525905787945, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9329896907216494e-07, |
|
"logits/chosen": -4.274221897125244, |
|
"logits/rejected": -4.187704086303711, |
|
"logps/chosen": -537.5848388671875, |
|
"logps/rejected": -444.8301696777344, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0053375037387013435, |
|
"rewards/margins": 1.6005151337594725e-05, |
|
"rewards/rejected": 0.005321498028934002, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0618556701030925e-07, |
|
"logits/chosen": -4.137946128845215, |
|
"logits/rejected": -4.2239580154418945, |
|
"logps/chosen": -473.9889221191406, |
|
"logps/rejected": -406.7872619628906, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.015802519395947456, |
|
"rewards/margins": 0.01578442193567753, |
|
"rewards/rejected": 1.8098298824043013e-05, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.190721649484536e-07, |
|
"logits/chosen": -4.182999134063721, |
|
"logits/rejected": -4.228874683380127, |
|
"logps/chosen": -527.0224609375, |
|
"logps/rejected": -448.3179626464844, |
|
"loss": 0.6961, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.019780535250902176, |
|
"rewards/margins": -0.006507801823318005, |
|
"rewards/rejected": 0.026288334280252457, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.3195876288659794e-07, |
|
"logits/chosen": -4.098742485046387, |
|
"logits/rejected": -4.176650524139404, |
|
"logps/chosen": -594.6082763671875, |
|
"logps/rejected": -453.4469299316406, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.03402475267648697, |
|
"rewards/margins": 0.01976330205798149, |
|
"rewards/rejected": 0.014261451549828053, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.4484536082474224e-07, |
|
"logits/chosen": -4.383849143981934, |
|
"logits/rejected": -4.319648742675781, |
|
"logps/chosen": -584.6770629882812, |
|
"logps/rejected": -408.61370849609375, |
|
"loss": 0.6902, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.025487428531050682, |
|
"rewards/margins": 0.011225923895835876, |
|
"rewards/rejected": 0.014261503703892231, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5773195876288655e-07, |
|
"logits/chosen": -4.132022857666016, |
|
"logits/rejected": -4.150428295135498, |
|
"logps/chosen": -518.2391357421875, |
|
"logps/rejected": -388.0254821777344, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.02871655486524105, |
|
"rewards/margins": 0.022094249725341797, |
|
"rewards/rejected": 0.0066223046742379665, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -4.050084114074707, |
|
"eval_logits/rejected": -4.087072849273682, |
|
"eval_logps/chosen": -549.053955078125, |
|
"eval_logps/rejected": -437.8319396972656, |
|
"eval_loss": 0.6855266094207764, |
|
"eval_rewards/accuracies": 0.5640000104904175, |
|
"eval_rewards/chosen": 0.032268982380628586, |
|
"eval_rewards/margins": 0.014963901601731777, |
|
"eval_rewards/rejected": 0.017305083572864532, |
|
"eval_runtime": 146.4759, |
|
"eval_samples_per_second": 13.654, |
|
"eval_steps_per_second": 1.707, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.706185567010309e-07, |
|
"logits/chosen": -4.013279438018799, |
|
"logits/rejected": -4.023941516876221, |
|
"logps/chosen": -581.2147827148438, |
|
"logps/rejected": -522.2059936523438, |
|
"loss": 0.6897, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.030506515875458717, |
|
"rewards/margins": -0.003913003019988537, |
|
"rewards/rejected": 0.03441951796412468, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.835051546391752e-07, |
|
"logits/chosen": -4.057482719421387, |
|
"logits/rejected": -4.15061092376709, |
|
"logps/chosen": -469.19769287109375, |
|
"logps/rejected": -427.91595458984375, |
|
"loss": 0.6862, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.03468897193670273, |
|
"rewards/margins": 0.013076464645564556, |
|
"rewards/rejected": 0.021612513810396194, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.963917525773196e-07, |
|
"logits/chosen": -4.064208507537842, |
|
"logits/rejected": -4.0749077796936035, |
|
"logps/chosen": -530.9828491210938, |
|
"logps/rejected": -439.2674865722656, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.04473203793168068, |
|
"rewards/margins": 0.025556465610861778, |
|
"rewards/rejected": 0.01917557418346405, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.0927835051546394e-07, |
|
"logits/chosen": -4.100975036621094, |
|
"logits/rejected": -4.096819877624512, |
|
"logps/chosen": -526.16748046875, |
|
"logps/rejected": -439.20452880859375, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.059132933616638184, |
|
"rewards/margins": 0.019664695486426353, |
|
"rewards/rejected": 0.03946823999285698, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.2216494845360824e-07, |
|
"logits/chosen": -4.139791488647461, |
|
"logits/rejected": -4.0367560386657715, |
|
"logps/chosen": -521.2025146484375, |
|
"logps/rejected": -388.7520751953125, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.06324665248394012, |
|
"rewards/margins": 0.04009511321783066, |
|
"rewards/rejected": 0.02315153181552887, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.3505154639175255e-07, |
|
"logits/chosen": -4.206658363342285, |
|
"logits/rejected": -4.1859846115112305, |
|
"logps/chosen": -668.1943969726562, |
|
"logps/rejected": -461.34259033203125, |
|
"loss": 0.6769, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.07276210933923721, |
|
"rewards/margins": 0.03734602779150009, |
|
"rewards/rejected": 0.03541607782244682, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.4793814432989685e-07, |
|
"logits/chosen": -3.937157392501831, |
|
"logits/rejected": -4.101494312286377, |
|
"logps/chosen": -664.857666015625, |
|
"logps/rejected": -487.4693908691406, |
|
"loss": 0.6737, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.10912873595952988, |
|
"rewards/margins": 0.05263194441795349, |
|
"rewards/rejected": 0.056496791541576385, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.608247422680412e-07, |
|
"logits/chosen": -4.2088494300842285, |
|
"logits/rejected": -4.2679290771484375, |
|
"logps/chosen": -711.7024536132812, |
|
"logps/rejected": -427.2392578125, |
|
"loss": 0.6648, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.12602953612804413, |
|
"rewards/margins": 0.07799698412418365, |
|
"rewards/rejected": 0.048032552003860474, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.737113402061856e-07, |
|
"logits/chosen": -4.126033782958984, |
|
"logits/rejected": -4.118724346160889, |
|
"logps/chosen": -527.9533081054688, |
|
"logps/rejected": -442.7091369628906, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.06928877532482147, |
|
"rewards/margins": 0.021227989345788956, |
|
"rewards/rejected": 0.04806078225374222, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.865979381443299e-07, |
|
"logits/chosen": -4.21649169921875, |
|
"logits/rejected": -4.306222438812256, |
|
"logps/chosen": -558.1029663085938, |
|
"logps/rejected": -426.37646484375, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.10165311396121979, |
|
"rewards/margins": 0.0537085235118866, |
|
"rewards/rejected": 0.047944579273462296, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": -4.043172359466553, |
|
"eval_logits/rejected": -4.078800201416016, |
|
"eval_logps/chosen": -548.3015747070312, |
|
"eval_logps/rejected": -437.4681701660156, |
|
"eval_loss": 0.6674865484237671, |
|
"eval_rewards/accuracies": 0.6159999966621399, |
|
"eval_rewards/chosen": 0.10750828683376312, |
|
"eval_rewards/margins": 0.05382777377963066, |
|
"eval_rewards/rejected": 0.053680501878261566, |
|
"eval_runtime": 146.1324, |
|
"eval_samples_per_second": 13.686, |
|
"eval_steps_per_second": 1.711, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.9948453608247424e-07, |
|
"logits/chosen": -4.439688205718994, |
|
"logits/rejected": -4.406257629394531, |
|
"logps/chosen": -576.0067138671875, |
|
"logps/rejected": -442.0852966308594, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.11767254769802094, |
|
"rewards/margins": 0.05607324838638306, |
|
"rewards/rejected": 0.06159929558634758, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.123711340206185e-07, |
|
"logits/chosen": -4.040421962738037, |
|
"logits/rejected": -3.995241165161133, |
|
"logps/chosen": -634.211181640625, |
|
"logps/rejected": -444.74945068359375, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.14802002906799316, |
|
"rewards/margins": 0.08729343116283417, |
|
"rewards/rejected": 0.0607265941798687, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.2525773195876285e-07, |
|
"logits/chosen": -4.013192176818848, |
|
"logits/rejected": -3.9118850231170654, |
|
"logps/chosen": -531.2618408203125, |
|
"logps/rejected": -369.8399963378906, |
|
"loss": 0.6573, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.14214035868644714, |
|
"rewards/margins": 0.0905950665473938, |
|
"rewards/rejected": 0.051545269787311554, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.381443298969072e-07, |
|
"logits/chosen": -4.299261569976807, |
|
"logits/rejected": -4.219182968139648, |
|
"logps/chosen": -580.9090576171875, |
|
"logps/rejected": -442.6720275878906, |
|
"loss": 0.6589, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.16068990528583527, |
|
"rewards/margins": 0.07925193011760712, |
|
"rewards/rejected": 0.08143799006938934, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.510309278350515e-07, |
|
"logits/chosen": -4.036250114440918, |
|
"logits/rejected": -3.9510204792022705, |
|
"logps/chosen": -485.1849670410156, |
|
"logps/rejected": -423.96746826171875, |
|
"loss": 0.6691, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.16000322997570038, |
|
"rewards/margins": 0.0713193342089653, |
|
"rewards/rejected": 0.08868391811847687, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.639175257731959e-07, |
|
"logits/chosen": -3.97419810295105, |
|
"logits/rejected": -3.945896863937378, |
|
"logps/chosen": -588.8265380859375, |
|
"logps/rejected": -500.585205078125, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.16587187349796295, |
|
"rewards/margins": 0.048247091472148895, |
|
"rewards/rejected": 0.11762477457523346, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.7680412371134024e-07, |
|
"logits/chosen": -4.279057502746582, |
|
"logits/rejected": -4.3186540603637695, |
|
"logps/chosen": -577.9805908203125, |
|
"logps/rejected": -508.83880615234375, |
|
"loss": 0.6621, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.1850723773241043, |
|
"rewards/margins": 0.04405021667480469, |
|
"rewards/rejected": 0.14102217555046082, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.896907216494845e-07, |
|
"logits/chosen": -4.560557842254639, |
|
"logits/rejected": -4.472795009613037, |
|
"logps/chosen": -585.3865966796875, |
|
"logps/rejected": -427.63092041015625, |
|
"loss": 0.6453, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.22210833430290222, |
|
"rewards/margins": 0.14162525534629822, |
|
"rewards/rejected": 0.0804830864071846, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.997130559540889e-07, |
|
"logits/chosen": -4.149146556854248, |
|
"logits/rejected": -4.130012035369873, |
|
"logps/chosen": -458.86334228515625, |
|
"logps/rejected": -402.4290466308594, |
|
"loss": 0.6574, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.155195951461792, |
|
"rewards/margins": 0.0719287320971489, |
|
"rewards/rejected": 0.0832671970129013, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.982783357245337e-07, |
|
"logits/chosen": -4.101078987121582, |
|
"logits/rejected": -3.9474518299102783, |
|
"logps/chosen": -594.5633544921875, |
|
"logps/rejected": -459.3837890625, |
|
"loss": 0.6579, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.22732532024383545, |
|
"rewards/margins": 0.15504160523414612, |
|
"rewards/rejected": 0.07228370010852814, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_logits/chosen": -4.030913829803467, |
|
"eval_logits/rejected": -4.064504623413086, |
|
"eval_logps/chosen": -547.223388671875, |
|
"eval_logps/rejected": -437.06365966796875, |
|
"eval_loss": 0.6425994038581848, |
|
"eval_rewards/accuracies": 0.6430000066757202, |
|
"eval_rewards/chosen": 0.21532239019870758, |
|
"eval_rewards/margins": 0.12119224667549133, |
|
"eval_rewards/rejected": 0.09413015842437744, |
|
"eval_runtime": 146.406, |
|
"eval_samples_per_second": 13.661, |
|
"eval_steps_per_second": 1.708, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.968436154949784e-07, |
|
"logits/chosen": -4.3761677742004395, |
|
"logits/rejected": -4.4744062423706055, |
|
"logps/chosen": -486.56976318359375, |
|
"logps/rejected": -388.5422058105469, |
|
"loss": 0.6246, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.2251121997833252, |
|
"rewards/margins": 0.1718236207962036, |
|
"rewards/rejected": 0.05328858643770218, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.954088952654232e-07, |
|
"logits/chosen": -3.916259288787842, |
|
"logits/rejected": -4.022424221038818, |
|
"logps/chosen": -609.468017578125, |
|
"logps/rejected": -489.47503662109375, |
|
"loss": 0.6397, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.2788829207420349, |
|
"rewards/margins": 0.13126085698604584, |
|
"rewards/rejected": 0.14762204885482788, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.93974175035868e-07, |
|
"logits/chosen": -4.073642253875732, |
|
"logits/rejected": -3.992410182952881, |
|
"logps/chosen": -589.1423950195312, |
|
"logps/rejected": -387.6160583496094, |
|
"loss": 0.625, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.27620893716812134, |
|
"rewards/margins": 0.2046860158443451, |
|
"rewards/rejected": 0.07152291387319565, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.925394548063128e-07, |
|
"logits/chosen": -4.047796249389648, |
|
"logits/rejected": -4.099135875701904, |
|
"logps/chosen": -556.1654663085938, |
|
"logps/rejected": -459.1832580566406, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.26381629705429077, |
|
"rewards/margins": 0.18791969120502472, |
|
"rewards/rejected": 0.07589660584926605, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.911047345767575e-07, |
|
"logits/chosen": -3.9531607627868652, |
|
"logits/rejected": -3.9822494983673096, |
|
"logps/chosen": -603.289306640625, |
|
"logps/rejected": -452.654541015625, |
|
"loss": 0.6246, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.24003124237060547, |
|
"rewards/margins": 0.1340305507183075, |
|
"rewards/rejected": 0.10600068420171738, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.896700143472023e-07, |
|
"logits/chosen": -4.046868801116943, |
|
"logits/rejected": -3.973362684249878, |
|
"logps/chosen": -543.8755493164062, |
|
"logps/rejected": -415.2347106933594, |
|
"loss": 0.6243, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.27269724011421204, |
|
"rewards/margins": 0.13904382288455963, |
|
"rewards/rejected": 0.1336534321308136, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.88235294117647e-07, |
|
"logits/chosen": -3.9586892127990723, |
|
"logits/rejected": -3.949618101119995, |
|
"logps/chosen": -521.1124267578125, |
|
"logps/rejected": -476.64599609375, |
|
"loss": 0.6414, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.26220518350601196, |
|
"rewards/margins": 0.10140831768512726, |
|
"rewards/rejected": 0.1607969105243683, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.868005738880918e-07, |
|
"logits/chosen": -4.340029716491699, |
|
"logits/rejected": -4.296602249145508, |
|
"logps/chosen": -498.50628662109375, |
|
"logps/rejected": -436.04193115234375, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.23516115546226501, |
|
"rewards/margins": 0.14310702681541443, |
|
"rewards/rejected": 0.09205415844917297, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.853658536585365e-07, |
|
"logits/chosen": -3.9785568714141846, |
|
"logits/rejected": -3.9936375617980957, |
|
"logps/chosen": -535.5206298828125, |
|
"logps/rejected": -418.255126953125, |
|
"loss": 0.6359, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.26155534386634827, |
|
"rewards/margins": 0.19521105289459229, |
|
"rewards/rejected": 0.06634429097175598, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.839311334289813e-07, |
|
"logits/chosen": -4.171419620513916, |
|
"logits/rejected": -4.2884111404418945, |
|
"logps/chosen": -497.77874755859375, |
|
"logps/rejected": -401.29046630859375, |
|
"loss": 0.6331, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.2628328502178192, |
|
"rewards/margins": 0.19560939073562622, |
|
"rewards/rejected": 0.06722346693277359, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_logits/chosen": -4.0221147537231445, |
|
"eval_logits/rejected": -4.052542686462402, |
|
"eval_logps/chosen": -546.3970336914062, |
|
"eval_logps/rejected": -436.89892578125, |
|
"eval_loss": 0.6240983605384827, |
|
"eval_rewards/accuracies": 0.6430000066757202, |
|
"eval_rewards/chosen": 0.2979632318019867, |
|
"eval_rewards/margins": 0.18736404180526733, |
|
"eval_rewards/rejected": 0.11059919744729996, |
|
"eval_runtime": 146.1671, |
|
"eval_samples_per_second": 13.683, |
|
"eval_steps_per_second": 1.71, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.824964131994261e-07, |
|
"logits/chosen": -4.073412895202637, |
|
"logits/rejected": -4.001163959503174, |
|
"logps/chosen": -588.8052978515625, |
|
"logps/rejected": -525.47314453125, |
|
"loss": 0.6532, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.2637428641319275, |
|
"rewards/margins": 0.08834028244018555, |
|
"rewards/rejected": 0.17540256679058075, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.810616929698708e-07, |
|
"logits/chosen": -4.054637908935547, |
|
"logits/rejected": -4.115445613861084, |
|
"logps/chosen": -586.9202270507812, |
|
"logps/rejected": -401.8949890136719, |
|
"loss": 0.6252, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.2919687032699585, |
|
"rewards/margins": 0.22300024330615997, |
|
"rewards/rejected": 0.06896845996379852, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.796269727403156e-07, |
|
"logits/chosen": -4.366249084472656, |
|
"logits/rejected": -4.296690940856934, |
|
"logps/chosen": -501.8008728027344, |
|
"logps/rejected": -414.6390686035156, |
|
"loss": 0.6275, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.2946510314941406, |
|
"rewards/margins": 0.20942220091819763, |
|
"rewards/rejected": 0.0852288231253624, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.781922525107604e-07, |
|
"logits/chosen": -4.214944362640381, |
|
"logits/rejected": -4.242516040802002, |
|
"logps/chosen": -582.1668701171875, |
|
"logps/rejected": -438.54376220703125, |
|
"loss": 0.6205, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3577159643173218, |
|
"rewards/margins": 0.19794291257858276, |
|
"rewards/rejected": 0.159773051738739, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7675753228120513e-07, |
|
"logits/chosen": -4.113412380218506, |
|
"logits/rejected": -3.993567705154419, |
|
"logps/chosen": -564.5824584960938, |
|
"logps/rejected": -398.8680419921875, |
|
"loss": 0.6193, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.316133588552475, |
|
"rewards/margins": 0.27710846066474915, |
|
"rewards/rejected": 0.039025187492370605, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.7532281205164993e-07, |
|
"logits/chosen": -4.085113048553467, |
|
"logits/rejected": -4.045032024383545, |
|
"logps/chosen": -643.7376708984375, |
|
"logps/rejected": -498.99859619140625, |
|
"loss": 0.6274, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.34889236092567444, |
|
"rewards/margins": 0.22896642982959747, |
|
"rewards/rejected": 0.11992595344781876, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.738880918220947e-07, |
|
"logits/chosen": -4.168662071228027, |
|
"logits/rejected": -4.141668319702148, |
|
"logps/chosen": -560.7593994140625, |
|
"logps/rejected": -406.78143310546875, |
|
"loss": 0.6173, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.331714928150177, |
|
"rewards/margins": 0.245022252202034, |
|
"rewards/rejected": 0.0866926982998848, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.7245337159253943e-07, |
|
"logits/chosen": -4.329155445098877, |
|
"logits/rejected": -4.298244476318359, |
|
"logps/chosen": -563.4876708984375, |
|
"logps/rejected": -376.99725341796875, |
|
"loss": 0.6147, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.2851625382900238, |
|
"rewards/margins": 0.19439759850502014, |
|
"rewards/rejected": 0.09076493978500366, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.710186513629842e-07, |
|
"logits/chosen": -4.025614261627197, |
|
"logits/rejected": -3.995368242263794, |
|
"logps/chosen": -570.0155029296875, |
|
"logps/rejected": -456.23223876953125, |
|
"loss": 0.6397, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.38556593656539917, |
|
"rewards/margins": 0.24411602318286896, |
|
"rewards/rejected": 0.1414499133825302, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.69583931133429e-07, |
|
"logits/chosen": -4.11724853515625, |
|
"logits/rejected": -4.225184440612793, |
|
"logps/chosen": -600.27685546875, |
|
"logps/rejected": -416.496826171875, |
|
"loss": 0.6229, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.3451527953147888, |
|
"rewards/margins": 0.26950401067733765, |
|
"rewards/rejected": 0.07564878463745117, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_logits/chosen": -4.0116496086120605, |
|
"eval_logits/rejected": -4.040153503417969, |
|
"eval_logps/chosen": -545.94873046875, |
|
"eval_logps/rejected": -436.90228271484375, |
|
"eval_loss": 0.6138368844985962, |
|
"eval_rewards/accuracies": 0.6579999923706055, |
|
"eval_rewards/chosen": 0.3427916169166565, |
|
"eval_rewards/margins": 0.2325276881456375, |
|
"eval_rewards/rejected": 0.11026395857334137, |
|
"eval_runtime": 145.937, |
|
"eval_samples_per_second": 13.705, |
|
"eval_steps_per_second": 1.713, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.681492109038737e-07, |
|
"logits/chosen": -4.138489723205566, |
|
"logits/rejected": -4.042520046234131, |
|
"logps/chosen": -544.0598754882812, |
|
"logps/rejected": -387.63031005859375, |
|
"loss": 0.5897, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.3607966899871826, |
|
"rewards/margins": 0.3171598017215729, |
|
"rewards/rejected": 0.043636929243803024, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.667144906743185e-07, |
|
"logits/chosen": -4.025771617889404, |
|
"logits/rejected": -3.9127840995788574, |
|
"logps/chosen": -517.0219116210938, |
|
"logps/rejected": -439.63800048828125, |
|
"loss": 0.5769, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.3815905749797821, |
|
"rewards/margins": 0.3481997549533844, |
|
"rewards/rejected": 0.033390797674655914, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.6527977044476324e-07, |
|
"logits/chosen": -4.107082843780518, |
|
"logits/rejected": -4.197465419769287, |
|
"logps/chosen": -576.8883056640625, |
|
"logps/rejected": -426.826904296875, |
|
"loss": 0.5992, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.4150086045265198, |
|
"rewards/margins": 0.3171616792678833, |
|
"rewards/rejected": 0.09784691035747528, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.6384505021520805e-07, |
|
"logits/chosen": -4.232905864715576, |
|
"logits/rejected": -4.251595497131348, |
|
"logps/chosen": -526.0496215820312, |
|
"logps/rejected": -378.66778564453125, |
|
"loss": 0.6053, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.36744990944862366, |
|
"rewards/margins": 0.237229585647583, |
|
"rewards/rejected": 0.13022030889987946, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.6241032998565275e-07, |
|
"logits/chosen": -3.990309953689575, |
|
"logits/rejected": -3.9665799140930176, |
|
"logps/chosen": -535.3065795898438, |
|
"logps/rejected": -371.23883056640625, |
|
"loss": 0.5688, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.35981225967407227, |
|
"rewards/margins": 0.3149539828300476, |
|
"rewards/rejected": 0.044858284294605255, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.6097560975609755e-07, |
|
"logits/chosen": -4.290364742279053, |
|
"logits/rejected": -4.3908371925354, |
|
"logps/chosen": -602.7872314453125, |
|
"logps/rejected": -467.87841796875, |
|
"loss": 0.6302, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.4449954628944397, |
|
"rewards/margins": 0.33234038949012756, |
|
"rewards/rejected": 0.11265511810779572, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.595408895265423e-07, |
|
"logits/chosen": -4.181097507476807, |
|
"logits/rejected": -4.184117317199707, |
|
"logps/chosen": -562.30908203125, |
|
"logps/rejected": -419.0519104003906, |
|
"loss": 0.6057, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.41988906264305115, |
|
"rewards/margins": 0.33613476157188416, |
|
"rewards/rejected": 0.08375430852174759, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.581061692969871e-07, |
|
"logits/chosen": -3.9935073852539062, |
|
"logits/rejected": -4.078420162200928, |
|
"logps/chosen": -594.1588134765625, |
|
"logps/rejected": -442.93218994140625, |
|
"loss": 0.5912, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.3990897536277771, |
|
"rewards/margins": 0.3166866898536682, |
|
"rewards/rejected": 0.08240304887294769, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.566714490674318e-07, |
|
"logits/chosen": -3.8547301292419434, |
|
"logits/rejected": -3.8780627250671387, |
|
"logps/chosen": -467.4917907714844, |
|
"logps/rejected": -409.6250915527344, |
|
"loss": 0.5982, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3181690573692322, |
|
"rewards/margins": 0.30037710070610046, |
|
"rewards/rejected": 0.01779193803668022, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.552367288378766e-07, |
|
"logits/chosen": -3.856755018234253, |
|
"logits/rejected": -3.7566399574279785, |
|
"logps/chosen": -496.44580078125, |
|
"logps/rejected": -416.92791748046875, |
|
"loss": 0.6008, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.3064565062522888, |
|
"rewards/margins": 0.19347265362739563, |
|
"rewards/rejected": 0.1129838228225708, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_logits/chosen": -4.004153728485107, |
|
"eval_logits/rejected": -4.03006649017334, |
|
"eval_logps/chosen": -545.5549926757812, |
|
"eval_logps/rejected": -437.035400390625, |
|
"eval_loss": 0.6053361892700195, |
|
"eval_rewards/accuracies": 0.656000018119812, |
|
"eval_rewards/chosen": 0.3821641206741333, |
|
"eval_rewards/margins": 0.28520864248275757, |
|
"eval_rewards/rejected": 0.09695547074079514, |
|
"eval_runtime": 146.9276, |
|
"eval_samples_per_second": 13.612, |
|
"eval_steps_per_second": 1.702, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.5380200860832136e-07, |
|
"logits/chosen": -4.1166276931762695, |
|
"logits/rejected": -4.0413994789123535, |
|
"logps/chosen": -559.1090087890625, |
|
"logps/rejected": -445.1997985839844, |
|
"loss": 0.6028, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.37777018547058105, |
|
"rewards/margins": 0.23465311527252197, |
|
"rewards/rejected": 0.1431170552968979, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5236728837876616e-07, |
|
"logits/chosen": -4.048049449920654, |
|
"logits/rejected": -3.983046293258667, |
|
"logps/chosen": -521.6533813476562, |
|
"logps/rejected": -423.5769958496094, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.3277244567871094, |
|
"rewards/margins": 0.251709520816803, |
|
"rewards/rejected": 0.07601495087146759, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.5093256814921086e-07, |
|
"logits/chosen": -3.96891713142395, |
|
"logits/rejected": -4.157193660736084, |
|
"logps/chosen": -527.0986328125, |
|
"logps/rejected": -350.09735107421875, |
|
"loss": 0.6191, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.3868439793586731, |
|
"rewards/margins": 0.27587562799453735, |
|
"rewards/rejected": 0.11096830666065216, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.4949784791965567e-07, |
|
"logits/chosen": -4.01112174987793, |
|
"logits/rejected": -3.9385008811950684, |
|
"logps/chosen": -575.333740234375, |
|
"logps/rejected": -411.80865478515625, |
|
"loss": 0.6002, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4110191762447357, |
|
"rewards/margins": 0.3183595538139343, |
|
"rewards/rejected": 0.0926596075296402, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.480631276901004e-07, |
|
"logits/chosen": -3.8952746391296387, |
|
"logits/rejected": -3.9051570892333984, |
|
"logps/chosen": -587.7459716796875, |
|
"logps/rejected": -426.0521545410156, |
|
"loss": 0.6019, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.4798852503299713, |
|
"rewards/margins": 0.3581300377845764, |
|
"rewards/rejected": 0.12175522744655609, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.466284074605452e-07, |
|
"logits/chosen": -4.128601551055908, |
|
"logits/rejected": -4.192216396331787, |
|
"logps/chosen": -555.259033203125, |
|
"logps/rejected": -431.3056640625, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4251033365726471, |
|
"rewards/margins": 0.35620301961898804, |
|
"rewards/rejected": 0.06890030205249786, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.451936872309899e-07, |
|
"logits/chosen": -4.191853046417236, |
|
"logits/rejected": -4.073651313781738, |
|
"logps/chosen": -564.2633056640625, |
|
"logps/rejected": -462.38232421875, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.46583813428878784, |
|
"rewards/margins": 0.4038007855415344, |
|
"rewards/rejected": 0.0620373897254467, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.437589670014347e-07, |
|
"logits/chosen": -3.9436306953430176, |
|
"logits/rejected": -4.079471111297607, |
|
"logps/chosen": -569.0813598632812, |
|
"logps/rejected": -438.1226501464844, |
|
"loss": 0.592, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.32991427183151245, |
|
"rewards/margins": 0.34178251028060913, |
|
"rewards/rejected": -0.011868256144225597, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.423242467718795e-07, |
|
"logits/chosen": -4.243984699249268, |
|
"logits/rejected": -4.39116907119751, |
|
"logps/chosen": -674.5192260742188, |
|
"logps/rejected": -492.4161682128906, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5247339010238647, |
|
"rewards/margins": 0.3629537522792816, |
|
"rewards/rejected": 0.1617802083492279, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.4088952654232423e-07, |
|
"logits/chosen": -3.945283889770508, |
|
"logits/rejected": -3.931304454803467, |
|
"logps/chosen": -520.6378173828125, |
|
"logps/rejected": -340.75103759765625, |
|
"loss": 0.5751, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.41377678513526917, |
|
"rewards/margins": 0.4200451374053955, |
|
"rewards/rejected": -0.006268366239964962, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -4.009909629821777, |
|
"eval_logits/rejected": -4.035899639129639, |
|
"eval_logps/chosen": -545.2993774414062, |
|
"eval_logps/rejected": -437.1260070800781, |
|
"eval_loss": 0.5998407006263733, |
|
"eval_rewards/accuracies": 0.6539999842643738, |
|
"eval_rewards/chosen": 0.4077303409576416, |
|
"eval_rewards/margins": 0.3198363780975342, |
|
"eval_rewards/rejected": 0.0878940224647522, |
|
"eval_runtime": 145.3508, |
|
"eval_samples_per_second": 13.76, |
|
"eval_steps_per_second": 1.72, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.39454806312769e-07, |
|
"logits/chosen": -3.9220452308654785, |
|
"logits/rejected": -4.041108131408691, |
|
"logps/chosen": -615.2744750976562, |
|
"logps/rejected": -500.8890686035156, |
|
"loss": 0.5732, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.49199342727661133, |
|
"rewards/margins": 0.3499099612236023, |
|
"rewards/rejected": 0.1420835256576538, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.380200860832138e-07, |
|
"logits/chosen": -3.90093994140625, |
|
"logits/rejected": -3.9337615966796875, |
|
"logps/chosen": -616.523681640625, |
|
"logps/rejected": -451.52996826171875, |
|
"loss": 0.5575, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5319877862930298, |
|
"rewards/margins": 0.5240375399589539, |
|
"rewards/rejected": 0.007950320839881897, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.3658536585365853e-07, |
|
"logits/chosen": -4.105984687805176, |
|
"logits/rejected": -4.126413345336914, |
|
"logps/chosen": -491.058349609375, |
|
"logps/rejected": -472.4222106933594, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.3649570047855377, |
|
"rewards/margins": 0.19882622361183167, |
|
"rewards/rejected": 0.16613081097602844, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.351506456241033e-07, |
|
"logits/chosen": -4.366209983825684, |
|
"logits/rejected": -4.29564905166626, |
|
"logps/chosen": -573.9385375976562, |
|
"logps/rejected": -327.5928649902344, |
|
"loss": 0.5732, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.47058337926864624, |
|
"rewards/margins": 0.47838321328163147, |
|
"rewards/rejected": -0.007799782790243626, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.3371592539454804e-07, |
|
"logits/chosen": -3.7974257469177246, |
|
"logits/rejected": -3.734402894973755, |
|
"logps/chosen": -471.0333557128906, |
|
"logps/rejected": -374.65673828125, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.34169143438339233, |
|
"rewards/margins": 0.2484813630580902, |
|
"rewards/rejected": 0.09321005642414093, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.322812051649928e-07, |
|
"logits/chosen": -4.0287275314331055, |
|
"logits/rejected": -4.05717134475708, |
|
"logps/chosen": -469.2396545410156, |
|
"logps/rejected": -434.5414123535156, |
|
"loss": 0.5803, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.4505770206451416, |
|
"rewards/margins": 0.4199690818786621, |
|
"rewards/rejected": 0.03060789778828621, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.308464849354376e-07, |
|
"logits/chosen": -3.7610325813293457, |
|
"logits/rejected": -3.8557701110839844, |
|
"logps/chosen": -529.0855712890625, |
|
"logps/rejected": -426.6482849121094, |
|
"loss": 0.5811, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.43691587448120117, |
|
"rewards/margins": 0.37098073959350586, |
|
"rewards/rejected": 0.0659351572394371, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.294117647058823e-07, |
|
"logits/chosen": -4.040841102600098, |
|
"logits/rejected": -4.051581382751465, |
|
"logps/chosen": -590.5636596679688, |
|
"logps/rejected": -456.1898498535156, |
|
"loss": 0.582, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.3900124430656433, |
|
"rewards/margins": 0.41093358397483826, |
|
"rewards/rejected": -0.020921092480421066, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.279770444763271e-07, |
|
"logits/chosen": -4.3830671310424805, |
|
"logits/rejected": -4.194474220275879, |
|
"logps/chosen": -587.708251953125, |
|
"logps/rejected": -454.063720703125, |
|
"loss": 0.6117, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.4363631308078766, |
|
"rewards/margins": 0.3432873785495758, |
|
"rewards/rejected": 0.09307573735713959, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2654232424677185e-07, |
|
"logits/chosen": -4.166562080383301, |
|
"logits/rejected": -4.1289520263671875, |
|
"logps/chosen": -507.2445373535156, |
|
"logps/rejected": -396.4598083496094, |
|
"loss": 0.6485, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.27186277508735657, |
|
"rewards/margins": 0.16729417443275452, |
|
"rewards/rejected": 0.10456860065460205, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_logits/chosen": -3.9936437606811523, |
|
"eval_logits/rejected": -4.016723155975342, |
|
"eval_logps/chosen": -545.1683349609375, |
|
"eval_logps/rejected": -437.3501281738281, |
|
"eval_loss": 0.5922096371650696, |
|
"eval_rewards/accuracies": 0.6600000262260437, |
|
"eval_rewards/chosen": 0.4208315312862396, |
|
"eval_rewards/margins": 0.3553457260131836, |
|
"eval_rewards/rejected": 0.06548583507537842, |
|
"eval_runtime": 146.2261, |
|
"eval_samples_per_second": 13.677, |
|
"eval_steps_per_second": 1.71, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.2510760401721665e-07, |
|
"logits/chosen": -4.098907947540283, |
|
"logits/rejected": -4.098723411560059, |
|
"logps/chosen": -650.6366577148438, |
|
"logps/rejected": -495.94879150390625, |
|
"loss": 0.5866, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5507170557975769, |
|
"rewards/margins": 0.5398961305618286, |
|
"rewards/rejected": 0.010820944793522358, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2367288378766135e-07, |
|
"logits/chosen": -4.1348772048950195, |
|
"logits/rejected": -4.166952610015869, |
|
"logps/chosen": -591.7069702148438, |
|
"logps/rejected": -477.39776611328125, |
|
"loss": 0.5992, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.47026628255844116, |
|
"rewards/margins": 0.3723362982273102, |
|
"rewards/rejected": 0.09792999923229218, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.2223816355810615e-07, |
|
"logits/chosen": -4.0753397941589355, |
|
"logits/rejected": -4.123549461364746, |
|
"logps/chosen": -559.75634765625, |
|
"logps/rejected": -458.8775329589844, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.39656537771224976, |
|
"rewards/margins": 0.3553692698478699, |
|
"rewards/rejected": 0.04119610786437988, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.208034433285509e-07, |
|
"logits/chosen": -4.269906520843506, |
|
"logits/rejected": -4.303974628448486, |
|
"logps/chosen": -593.5145874023438, |
|
"logps/rejected": -494.8085021972656, |
|
"loss": 0.5647, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5839260816574097, |
|
"rewards/margins": 0.44306641817092896, |
|
"rewards/rejected": 0.14085964858531952, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1936872309899565e-07, |
|
"logits/chosen": -3.792731523513794, |
|
"logits/rejected": -3.8398139476776123, |
|
"logps/chosen": -489.6437072753906, |
|
"logps/rejected": -401.5340576171875, |
|
"loss": 0.6287, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.2587449550628662, |
|
"rewards/margins": 0.2045062780380249, |
|
"rewards/rejected": 0.054238706827163696, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.179340028694404e-07, |
|
"logits/chosen": -4.206066131591797, |
|
"logits/rejected": -4.116007328033447, |
|
"logps/chosen": -482.42816162109375, |
|
"logps/rejected": -382.22845458984375, |
|
"loss": 0.5962, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3317318558692932, |
|
"rewards/margins": 0.34027567505836487, |
|
"rewards/rejected": -0.008543826639652252, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.164992826398852e-07, |
|
"logits/chosen": -4.003951549530029, |
|
"logits/rejected": -3.9982573986053467, |
|
"logps/chosen": -494.906005859375, |
|
"logps/rejected": -401.5001220703125, |
|
"loss": 0.6299, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.266286164522171, |
|
"rewards/margins": 0.27161869406700134, |
|
"rewards/rejected": -0.005332520697265863, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.1506456241032996e-07, |
|
"logits/chosen": -4.05717134475708, |
|
"logits/rejected": -3.8585472106933594, |
|
"logps/chosen": -563.6212768554688, |
|
"logps/rejected": -387.9486999511719, |
|
"loss": 0.5832, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3436369299888611, |
|
"rewards/margins": 0.378772109746933, |
|
"rewards/rejected": -0.035135164856910706, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.136298421807747e-07, |
|
"logits/chosen": -4.064385890960693, |
|
"logits/rejected": -4.122750759124756, |
|
"logps/chosen": -587.16162109375, |
|
"logps/rejected": -431.41796875, |
|
"loss": 0.572, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.4710933566093445, |
|
"rewards/margins": 0.4460281431674957, |
|
"rewards/rejected": 0.02506522461771965, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1219512195121946e-07, |
|
"logits/chosen": -4.021462917327881, |
|
"logits/rejected": -3.989718198776245, |
|
"logps/chosen": -584.3521728515625, |
|
"logps/rejected": -455.634033203125, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.48522061109542847, |
|
"rewards/margins": 0.3698544502258301, |
|
"rewards/rejected": 0.11536619812250137, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -3.986903429031372, |
|
"eval_logits/rejected": -4.009212017059326, |
|
"eval_logps/chosen": -545.3309326171875, |
|
"eval_logps/rejected": -437.7181701660156, |
|
"eval_loss": 0.5879542827606201, |
|
"eval_rewards/accuracies": 0.6620000004768372, |
|
"eval_rewards/chosen": 0.40457141399383545, |
|
"eval_rewards/margins": 0.37589016556739807, |
|
"eval_rewards/rejected": 0.028681199997663498, |
|
"eval_runtime": 146.6025, |
|
"eval_samples_per_second": 13.642, |
|
"eval_steps_per_second": 1.705, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.1076040172166427e-07, |
|
"logits/chosen": -4.139552116394043, |
|
"logits/rejected": -3.9534621238708496, |
|
"logps/chosen": -571.7590942382812, |
|
"logps/rejected": -444.6793518066406, |
|
"loss": 0.6451, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.3784537613391876, |
|
"rewards/margins": 0.23798270523548126, |
|
"rewards/rejected": 0.14047105610370636, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.09325681492109e-07, |
|
"logits/chosen": -4.049252510070801, |
|
"logits/rejected": -4.108782768249512, |
|
"logps/chosen": -644.1297607421875, |
|
"logps/rejected": -546.4414672851562, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.3904695510864258, |
|
"rewards/margins": 0.23047880828380585, |
|
"rewards/rejected": 0.15999077260494232, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.0789096126255377e-07, |
|
"logits/chosen": -4.110049247741699, |
|
"logits/rejected": -4.13530969619751, |
|
"logps/chosen": -601.107666015625, |
|
"logps/rejected": -430.6465759277344, |
|
"loss": 0.5972, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.39451712369918823, |
|
"rewards/margins": 0.3030509948730469, |
|
"rewards/rejected": 0.09146615862846375, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.064562410329985e-07, |
|
"logits/chosen": -4.069981575012207, |
|
"logits/rejected": -4.104067802429199, |
|
"logps/chosen": -562.4483642578125, |
|
"logps/rejected": -496.1336975097656, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.46653634309768677, |
|
"rewards/margins": 0.4154808521270752, |
|
"rewards/rejected": 0.051055438816547394, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.050215208034433e-07, |
|
"logits/chosen": -4.096522331237793, |
|
"logits/rejected": -4.07404088973999, |
|
"logps/chosen": -597.645751953125, |
|
"logps/rejected": -389.2298889160156, |
|
"loss": 0.566, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.43959060311317444, |
|
"rewards/margins": 0.4319628179073334, |
|
"rewards/rejected": 0.007627798710018396, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.035868005738881e-07, |
|
"logits/chosen": -4.101964950561523, |
|
"logits/rejected": -3.971134901046753, |
|
"logps/chosen": -654.4503784179688, |
|
"logps/rejected": -446.3114318847656, |
|
"loss": 0.5856, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5462326407432556, |
|
"rewards/margins": 0.52418452501297, |
|
"rewards/rejected": 0.022048136219382286, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.0215208034433283e-07, |
|
"logits/chosen": -4.149927139282227, |
|
"logits/rejected": -4.159340858459473, |
|
"logps/chosen": -569.7572631835938, |
|
"logps/rejected": -407.02545166015625, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.25654059648513794, |
|
"rewards/margins": 0.33243244886398315, |
|
"rewards/rejected": -0.0758919045329094, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.007173601147776e-07, |
|
"logits/chosen": -4.003470420837402, |
|
"logits/rejected": -3.9572086334228516, |
|
"logps/chosen": -565.874267578125, |
|
"logps/rejected": -392.24761962890625, |
|
"loss": 0.6221, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.3559120297431946, |
|
"rewards/margins": 0.3540252149105072, |
|
"rewards/rejected": 0.0018868416082113981, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.992826398852224e-07, |
|
"logits/chosen": -4.156329154968262, |
|
"logits/rejected": -4.075765132904053, |
|
"logps/chosen": -503.8531188964844, |
|
"logps/rejected": -443.68731689453125, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3798424005508423, |
|
"rewards/margins": 0.3366612493991852, |
|
"rewards/rejected": 0.04318114370107651, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 3.978479196556671e-07, |
|
"logits/chosen": -4.099778652191162, |
|
"logits/rejected": -4.040897846221924, |
|
"logps/chosen": -482.4664001464844, |
|
"logps/rejected": -434.3218688964844, |
|
"loss": 0.6225, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.2912788987159729, |
|
"rewards/margins": 0.3869735598564148, |
|
"rewards/rejected": -0.09569470584392548, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_logits/chosen": -3.998389720916748, |
|
"eval_logits/rejected": -4.024014949798584, |
|
"eval_logps/chosen": -545.3189086914062, |
|
"eval_logps/rejected": -437.8950500488281, |
|
"eval_loss": 0.5851995944976807, |
|
"eval_rewards/accuracies": 0.6679999828338623, |
|
"eval_rewards/chosen": 0.40576791763305664, |
|
"eval_rewards/margins": 0.3947778642177582, |
|
"eval_rewards/rejected": 0.01099009346216917, |
|
"eval_runtime": 145.9401, |
|
"eval_samples_per_second": 13.704, |
|
"eval_steps_per_second": 1.713, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.964131994261119e-07, |
|
"logits/chosen": -4.306766986846924, |
|
"logits/rejected": -4.230467796325684, |
|
"logps/chosen": -549.1437377929688, |
|
"logps/rejected": -444.1548767089844, |
|
"loss": 0.5957, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4746015965938568, |
|
"rewards/margins": 0.5222647786140442, |
|
"rewards/rejected": -0.047663114964962006, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9497847919655664e-07, |
|
"logits/chosen": -4.1666669845581055, |
|
"logits/rejected": -4.193212509155273, |
|
"logps/chosen": -506.9803161621094, |
|
"logps/rejected": -432.77783203125, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3090699315071106, |
|
"rewards/margins": 0.2045580893754959, |
|
"rewards/rejected": 0.10451184213161469, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9354375896700144e-07, |
|
"logits/chosen": -3.8870487213134766, |
|
"logits/rejected": -3.9582340717315674, |
|
"logps/chosen": -616.6710815429688, |
|
"logps/rejected": -508.39813232421875, |
|
"loss": 0.5705, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.48951154947280884, |
|
"rewards/margins": 0.4596976637840271, |
|
"rewards/rejected": 0.02981388568878174, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 3.9210903873744614e-07, |
|
"logits/chosen": -3.9056262969970703, |
|
"logits/rejected": -3.7358765602111816, |
|
"logps/chosen": -550.6695556640625, |
|
"logps/rejected": -412.7212829589844, |
|
"loss": 0.5673, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.3575005829334259, |
|
"rewards/margins": 0.3841975927352905, |
|
"rewards/rejected": -0.02669701538980007, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.9067431850789094e-07, |
|
"logits/chosen": -3.873683452606201, |
|
"logits/rejected": -3.945786714553833, |
|
"logps/chosen": -598.2088623046875, |
|
"logps/rejected": -395.6291198730469, |
|
"loss": 0.6215, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.40675464272499084, |
|
"rewards/margins": 0.4558509886264801, |
|
"rewards/rejected": -0.04909630864858627, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.892395982783357e-07, |
|
"logits/chosen": -3.874563217163086, |
|
"logits/rejected": -3.988626480102539, |
|
"logps/chosen": -580.8389282226562, |
|
"logps/rejected": -464.979248046875, |
|
"loss": 0.5563, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4360577464103699, |
|
"rewards/margins": 0.436431884765625, |
|
"rewards/rejected": -0.0003741338732652366, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.878048780487805e-07, |
|
"logits/chosen": -3.8558075428009033, |
|
"logits/rejected": -3.862384080886841, |
|
"logps/chosen": -603.0067138671875, |
|
"logps/rejected": -453.36126708984375, |
|
"loss": 0.5751, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5401335954666138, |
|
"rewards/margins": 0.4825173020362854, |
|
"rewards/rejected": 0.05761627480387688, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.863701578192252e-07, |
|
"logits/chosen": -4.10439395904541, |
|
"logits/rejected": -4.0709943771362305, |
|
"logps/chosen": -562.9437255859375, |
|
"logps/rejected": -468.41046142578125, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5524980425834656, |
|
"rewards/margins": 0.43577200174331665, |
|
"rewards/rejected": 0.1167261153459549, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8493543758967e-07, |
|
"logits/chosen": -3.9623591899871826, |
|
"logits/rejected": -3.9735617637634277, |
|
"logps/chosen": -496.47479248046875, |
|
"logps/rejected": -354.4454040527344, |
|
"loss": 0.639, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2600507140159607, |
|
"rewards/margins": 0.2917958199977875, |
|
"rewards/rejected": -0.031745124608278275, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8350071736011475e-07, |
|
"logits/chosen": -3.906859874725342, |
|
"logits/rejected": -3.9087185859680176, |
|
"logps/chosen": -427.0773010253906, |
|
"logps/rejected": -349.86077880859375, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.2505320608615875, |
|
"rewards/margins": 0.1847679316997528, |
|
"rewards/rejected": 0.06576415151357651, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_logits/chosen": -3.9994406700134277, |
|
"eval_logits/rejected": -4.025309085845947, |
|
"eval_logps/chosen": -545.2498168945312, |
|
"eval_logps/rejected": -437.9264831542969, |
|
"eval_loss": 0.5823842287063599, |
|
"eval_rewards/accuracies": 0.6669999957084656, |
|
"eval_rewards/chosen": 0.41267773509025574, |
|
"eval_rewards/margins": 0.4048316776752472, |
|
"eval_rewards/rejected": 0.007846098393201828, |
|
"eval_runtime": 147.2172, |
|
"eval_samples_per_second": 13.585, |
|
"eval_steps_per_second": 1.698, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 3.8206599713055956e-07, |
|
"logits/chosen": -4.042483329772949, |
|
"logits/rejected": -3.88130259513855, |
|
"logps/chosen": -579.4273681640625, |
|
"logps/rejected": -485.6758728027344, |
|
"loss": 0.6176, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.34652236104011536, |
|
"rewards/margins": 0.2972865104675293, |
|
"rewards/rejected": 0.049235861748456955, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.8063127690100426e-07, |
|
"logits/chosen": -4.063638210296631, |
|
"logits/rejected": -4.126063346862793, |
|
"logps/chosen": -599.88916015625, |
|
"logps/rejected": -484.5962829589844, |
|
"loss": 0.5938, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.32978740334510803, |
|
"rewards/margins": 0.2084900587797165, |
|
"rewards/rejected": 0.12129731476306915, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.7919655667144906e-07, |
|
"logits/chosen": -3.99627947807312, |
|
"logits/rejected": -3.940380573272705, |
|
"logps/chosen": -546.5948486328125, |
|
"logps/rejected": -412.3846740722656, |
|
"loss": 0.5958, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.44469523429870605, |
|
"rewards/margins": 0.48819655179977417, |
|
"rewards/rejected": -0.0435013584792614, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.777618364418938e-07, |
|
"logits/chosen": -3.9554569721221924, |
|
"logits/rejected": -3.9587948322296143, |
|
"logps/chosen": -467.00677490234375, |
|
"logps/rejected": -398.10955810546875, |
|
"loss": 0.5939, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.2916651964187622, |
|
"rewards/margins": 0.2515104413032532, |
|
"rewards/rejected": 0.040154773741960526, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 3.763271162123386e-07, |
|
"logits/chosen": -3.7756049633026123, |
|
"logits/rejected": -3.755903720855713, |
|
"logps/chosen": -591.2271728515625, |
|
"logps/rejected": -444.3221130371094, |
|
"loss": 0.5495, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.4893193244934082, |
|
"rewards/margins": 0.5091069936752319, |
|
"rewards/rejected": -0.019787678495049477, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.748923959827833e-07, |
|
"logits/chosen": -4.159283638000488, |
|
"logits/rejected": -4.039699077606201, |
|
"logps/chosen": -449.0978088378906, |
|
"logps/rejected": -326.54791259765625, |
|
"loss": 0.624, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.2927771806716919, |
|
"rewards/margins": 0.25214409828186035, |
|
"rewards/rejected": 0.040633104741573334, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.734576757532281e-07, |
|
"logits/chosen": -4.083529472351074, |
|
"logits/rejected": -4.100892543792725, |
|
"logps/chosen": -642.364501953125, |
|
"logps/rejected": -476.1664123535156, |
|
"loss": 0.5597, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.5070067644119263, |
|
"rewards/margins": 0.6494277715682983, |
|
"rewards/rejected": -0.1424209624528885, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.7202295552367287e-07, |
|
"logits/chosen": -4.093569755554199, |
|
"logits/rejected": -4.279056549072266, |
|
"logps/chosen": -589.33642578125, |
|
"logps/rejected": -452.7460021972656, |
|
"loss": 0.5692, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5026015639305115, |
|
"rewards/margins": 0.4538155198097229, |
|
"rewards/rejected": 0.048786066472530365, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 3.705882352941176e-07, |
|
"logits/chosen": -4.209478855133057, |
|
"logits/rejected": -4.320340633392334, |
|
"logps/chosen": -601.5377197265625, |
|
"logps/rejected": -405.8938293457031, |
|
"loss": 0.5319, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5548459887504578, |
|
"rewards/margins": 0.6283925771713257, |
|
"rewards/rejected": -0.07354650646448135, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6915351506456237e-07, |
|
"logits/chosen": -4.303310871124268, |
|
"logits/rejected": -4.392244338989258, |
|
"logps/chosen": -526.3382568359375, |
|
"logps/rejected": -336.311279296875, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.2645590901374817, |
|
"rewards/margins": 0.33163318037986755, |
|
"rewards/rejected": -0.06707411259412766, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_logits/chosen": -3.9953150749206543, |
|
"eval_logits/rejected": -4.021241188049316, |
|
"eval_logps/chosen": -545.1544189453125, |
|
"eval_logps/rejected": -437.90802001953125, |
|
"eval_loss": 0.5818018913269043, |
|
"eval_rewards/accuracies": 0.6679999828338623, |
|
"eval_rewards/chosen": 0.42221859097480774, |
|
"eval_rewards/margins": 0.41252991557121277, |
|
"eval_rewards/rejected": 0.009688721038401127, |
|
"eval_runtime": 146.2307, |
|
"eval_samples_per_second": 13.677, |
|
"eval_steps_per_second": 1.71, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.677187948350072e-07, |
|
"logits/chosen": -4.2785139083862305, |
|
"logits/rejected": -4.281913757324219, |
|
"logps/chosen": -631.8258056640625, |
|
"logps/rejected": -432.3312072753906, |
|
"loss": 0.5513, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.4580743908882141, |
|
"rewards/margins": 0.5590990781784058, |
|
"rewards/rejected": -0.10102470219135284, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.6628407460545193e-07, |
|
"logits/chosen": -4.18049955368042, |
|
"logits/rejected": -4.1783599853515625, |
|
"logps/chosen": -482.9546813964844, |
|
"logps/rejected": -441.1940002441406, |
|
"loss": 0.6002, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.4040532112121582, |
|
"rewards/margins": 0.2903508245944977, |
|
"rewards/rejected": 0.11370239406824112, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3.648493543758967e-07, |
|
"logits/chosen": -4.047448635101318, |
|
"logits/rejected": -4.031399726867676, |
|
"logps/chosen": -513.3343505859375, |
|
"logps/rejected": -439.59857177734375, |
|
"loss": 0.5949, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.5155481696128845, |
|
"rewards/margins": 0.5262617468833923, |
|
"rewards/rejected": -0.01071359496563673, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6341463414634143e-07, |
|
"logits/chosen": -4.256237983703613, |
|
"logits/rejected": -4.140265941619873, |
|
"logps/chosen": -586.2674560546875, |
|
"logps/rejected": -513.4707641601562, |
|
"loss": 0.6051, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4771571159362793, |
|
"rewards/margins": 0.3665952682495117, |
|
"rewards/rejected": 0.11056187003850937, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.6197991391678623e-07, |
|
"logits/chosen": -4.259045600891113, |
|
"logits/rejected": -4.169145584106445, |
|
"logps/chosen": -492.68572998046875, |
|
"logps/rejected": -336.1100769042969, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.3294292390346527, |
|
"rewards/margins": 0.42583298683166504, |
|
"rewards/rejected": -0.09640369564294815, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.60545193687231e-07, |
|
"logits/chosen": -4.042055130004883, |
|
"logits/rejected": -4.034060478210449, |
|
"logps/chosen": -437.0550231933594, |
|
"logps/rejected": -344.05828857421875, |
|
"loss": 0.5862, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3367193341255188, |
|
"rewards/margins": 0.3423077464103699, |
|
"rewards/rejected": -0.0055884262546896935, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 3.5911047345767574e-07, |
|
"logits/chosen": -3.9995014667510986, |
|
"logits/rejected": -4.026850700378418, |
|
"logps/chosen": -576.7128295898438, |
|
"logps/rejected": -466.55010986328125, |
|
"loss": 0.6112, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4717404246330261, |
|
"rewards/margins": 0.5018633604049683, |
|
"rewards/rejected": -0.030122917145490646, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.576757532281205e-07, |
|
"logits/chosen": -3.9444518089294434, |
|
"logits/rejected": -3.91229510307312, |
|
"logps/chosen": -586.0652465820312, |
|
"logps/rejected": -464.03271484375, |
|
"loss": 0.5891, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.3900233507156372, |
|
"rewards/margins": 0.42400288581848145, |
|
"rewards/rejected": -0.03397948667407036, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.562410329985653e-07, |
|
"logits/chosen": -3.970731735229492, |
|
"logits/rejected": -4.1245927810668945, |
|
"logps/chosen": -567.556884765625, |
|
"logps/rejected": -479.55841064453125, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.331743061542511, |
|
"rewards/margins": 0.251522034406662, |
|
"rewards/rejected": 0.08022100478410721, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.5480631276901004e-07, |
|
"logits/chosen": -4.056004524230957, |
|
"logits/rejected": -4.051678657531738, |
|
"logps/chosen": -536.676513671875, |
|
"logps/rejected": -385.55767822265625, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.28222841024398804, |
|
"rewards/margins": 0.38999465107917786, |
|
"rewards/rejected": -0.10776624828577042, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_logits/chosen": -4.006156921386719, |
|
"eval_logits/rejected": -4.033264636993408, |
|
"eval_logps/chosen": -545.279052734375, |
|
"eval_logps/rejected": -438.14556884765625, |
|
"eval_loss": 0.5797023773193359, |
|
"eval_rewards/accuracies": 0.6729999780654907, |
|
"eval_rewards/chosen": 0.409759521484375, |
|
"eval_rewards/margins": 0.42382344603538513, |
|
"eval_rewards/rejected": -0.014063959941267967, |
|
"eval_runtime": 147.9054, |
|
"eval_samples_per_second": 13.522, |
|
"eval_steps_per_second": 1.69, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 3.533715925394548e-07, |
|
"logits/chosen": -3.957362413406372, |
|
"logits/rejected": -3.8286430835723877, |
|
"logps/chosen": -518.3594970703125, |
|
"logps/rejected": -345.4431457519531, |
|
"loss": 0.5558, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.4510959982872009, |
|
"rewards/margins": 0.5645402669906616, |
|
"rewards/rejected": -0.11344428360462189, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.5193687230989955e-07, |
|
"logits/chosen": -4.1850199699401855, |
|
"logits/rejected": -4.076201915740967, |
|
"logps/chosen": -611.8565673828125, |
|
"logps/rejected": -568.882080078125, |
|
"loss": 0.6438, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.4028521478176117, |
|
"rewards/margins": 0.12559688091278076, |
|
"rewards/rejected": 0.2772553265094757, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.5050215208034435e-07, |
|
"logits/chosen": -4.432595252990723, |
|
"logits/rejected": -4.352065086364746, |
|
"logps/chosen": -611.6333618164062, |
|
"logps/rejected": -451.0577697753906, |
|
"loss": 0.5624, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.40066614747047424, |
|
"rewards/margins": 0.4248287081718445, |
|
"rewards/rejected": -0.024162566289305687, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.4906743185078905e-07, |
|
"logits/chosen": -4.167354106903076, |
|
"logits/rejected": -4.119304656982422, |
|
"logps/chosen": -597.4181518554688, |
|
"logps/rejected": -468.30126953125, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5246739983558655, |
|
"rewards/margins": 0.6675662994384766, |
|
"rewards/rejected": -0.1428922414779663, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.4763271162123385e-07, |
|
"logits/chosen": -3.9422059059143066, |
|
"logits/rejected": -4.009974002838135, |
|
"logps/chosen": -456.62103271484375, |
|
"logps/rejected": -456.80462646484375, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.46443605422973633, |
|
"rewards/margins": 0.3656379282474518, |
|
"rewards/rejected": 0.09879810363054276, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.461979913916786e-07, |
|
"logits/chosen": -4.015919208526611, |
|
"logits/rejected": -4.085513114929199, |
|
"logps/chosen": -490.6377868652344, |
|
"logps/rejected": -374.94866943359375, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.44695836305618286, |
|
"rewards/margins": 0.5198525190353394, |
|
"rewards/rejected": -0.07289411872625351, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.447632711621234e-07, |
|
"logits/chosen": -4.100437164306641, |
|
"logits/rejected": -4.218926906585693, |
|
"logps/chosen": -555.1058349609375, |
|
"logps/rejected": -427.87139892578125, |
|
"loss": 0.6011, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.3257507383823395, |
|
"rewards/margins": 0.39021044969558716, |
|
"rewards/rejected": -0.06445976346731186, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.433285509325681e-07, |
|
"logits/chosen": -3.8781065940856934, |
|
"logits/rejected": -3.8362109661102295, |
|
"logps/chosen": -423.455078125, |
|
"logps/rejected": -366.01617431640625, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.23626752197742462, |
|
"rewards/margins": 0.28217631578445435, |
|
"rewards/rejected": -0.04590878635644913, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.418938307030129e-07, |
|
"logits/chosen": -4.141830921173096, |
|
"logits/rejected": -4.139374256134033, |
|
"logps/chosen": -490.91473388671875, |
|
"logps/rejected": -431.1683654785156, |
|
"loss": 0.5698, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3411320149898529, |
|
"rewards/margins": 0.3740822374820709, |
|
"rewards/rejected": -0.032950229942798615, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.4045911047345766e-07, |
|
"logits/chosen": -4.246241569519043, |
|
"logits/rejected": -4.1093244552612305, |
|
"logps/chosen": -595.197509765625, |
|
"logps/rejected": -455.29656982421875, |
|
"loss": 0.5659, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.3736713230609894, |
|
"rewards/margins": 0.5091021060943604, |
|
"rewards/rejected": -0.13543078303337097, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_logits/chosen": -3.996328353881836, |
|
"eval_logits/rejected": -4.024491786956787, |
|
"eval_logps/chosen": -545.1725463867188, |
|
"eval_logps/rejected": -438.1591491699219, |
|
"eval_loss": 0.5790306925773621, |
|
"eval_rewards/accuracies": 0.6779999732971191, |
|
"eval_rewards/chosen": 0.4204104244709015, |
|
"eval_rewards/margins": 0.43583211302757263, |
|
"eval_rewards/rejected": -0.015421712771058083, |
|
"eval_runtime": 145.84, |
|
"eval_samples_per_second": 13.714, |
|
"eval_steps_per_second": 1.714, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3902439024390247e-07, |
|
"logits/chosen": -3.910076141357422, |
|
"logits/rejected": -4.025428295135498, |
|
"logps/chosen": -489.580322265625, |
|
"logps/rejected": -334.91131591796875, |
|
"loss": 0.5546, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.26078709959983826, |
|
"rewards/margins": 0.4329034686088562, |
|
"rewards/rejected": -0.17211636900901794, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.3758967001434716e-07, |
|
"logits/chosen": -4.3428544998168945, |
|
"logits/rejected": -4.32183837890625, |
|
"logps/chosen": -733.9967041015625, |
|
"logps/rejected": -545.9852905273438, |
|
"loss": 0.5303, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.6733923554420471, |
|
"rewards/margins": 0.6076704263687134, |
|
"rewards/rejected": 0.06572196632623672, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.3615494978479197e-07, |
|
"logits/chosen": -4.124747276306152, |
|
"logits/rejected": -4.166211128234863, |
|
"logps/chosen": -608.829345703125, |
|
"logps/rejected": -383.62518310546875, |
|
"loss": 0.5576, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.39822494983673096, |
|
"rewards/margins": 0.5036035776138306, |
|
"rewards/rejected": -0.1053786501288414, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.347202295552367e-07, |
|
"logits/chosen": -4.033061981201172, |
|
"logits/rejected": -4.068852424621582, |
|
"logps/chosen": -511.05682373046875, |
|
"logps/rejected": -465.43475341796875, |
|
"loss": 0.6175, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.35540980100631714, |
|
"rewards/margins": 0.31782034039497375, |
|
"rewards/rejected": 0.03758946806192398, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.332855093256815e-07, |
|
"logits/chosen": -4.0989203453063965, |
|
"logits/rejected": -4.1430792808532715, |
|
"logps/chosen": -612.7939453125, |
|
"logps/rejected": -483.2908630371094, |
|
"loss": 0.5781, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.567456841468811, |
|
"rewards/margins": 0.6063565611839294, |
|
"rewards/rejected": -0.03889976069331169, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 3.318507890961262e-07, |
|
"logits/chosen": -4.184214115142822, |
|
"logits/rejected": -4.299299716949463, |
|
"logps/chosen": -540.6097412109375, |
|
"logps/rejected": -396.2356872558594, |
|
"loss": 0.5515, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.401826947927475, |
|
"rewards/margins": 0.48910683393478394, |
|
"rewards/rejected": -0.08727996051311493, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.3041606886657103e-07, |
|
"logits/chosen": -3.9090118408203125, |
|
"logits/rejected": -3.9316658973693848, |
|
"logps/chosen": -574.2691650390625, |
|
"logps/rejected": -485.7064514160156, |
|
"loss": 0.674, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.4122130274772644, |
|
"rewards/margins": 0.2545969486236572, |
|
"rewards/rejected": 0.1576160490512848, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.289813486370158e-07, |
|
"logits/chosen": -4.144872665405273, |
|
"logits/rejected": -4.0689921379089355, |
|
"logps/chosen": -513.11181640625, |
|
"logps/rejected": -471.02606201171875, |
|
"loss": 0.6057, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.34228435158729553, |
|
"rewards/margins": 0.4427550733089447, |
|
"rewards/rejected": -0.10047070682048798, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.275466284074606e-07, |
|
"logits/chosen": -3.9695823192596436, |
|
"logits/rejected": -4.070342063903809, |
|
"logps/chosen": -653.9967651367188, |
|
"logps/rejected": -471.898193359375, |
|
"loss": 0.5594, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.57194983959198, |
|
"rewards/margins": 0.6099370121955872, |
|
"rewards/rejected": -0.03798716515302658, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.261119081779053e-07, |
|
"logits/chosen": -4.089110851287842, |
|
"logits/rejected": -4.0619401931762695, |
|
"logps/chosen": -515.8906860351562, |
|
"logps/rejected": -470.29541015625, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.42164483666419983, |
|
"rewards/margins": 0.3623473346233368, |
|
"rewards/rejected": 0.05929745361208916, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_logits/chosen": -3.990658760070801, |
|
"eval_logits/rejected": -4.0185322761535645, |
|
"eval_logps/chosen": -545.216064453125, |
|
"eval_logps/rejected": -438.2904052734375, |
|
"eval_loss": 0.5782522559165955, |
|
"eval_rewards/accuracies": 0.671999990940094, |
|
"eval_rewards/chosen": 0.41605862975120544, |
|
"eval_rewards/margins": 0.4446040093898773, |
|
"eval_rewards/rejected": -0.02854539081454277, |
|
"eval_runtime": 147.5337, |
|
"eval_samples_per_second": 13.556, |
|
"eval_steps_per_second": 1.695, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.246771879483501e-07, |
|
"logits/chosen": -3.944901704788208, |
|
"logits/rejected": -3.9903030395507812, |
|
"logps/chosen": -475.91363525390625, |
|
"logps/rejected": -396.0389099121094, |
|
"loss": 0.5996, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.2757423520088196, |
|
"rewards/margins": 0.3251574635505676, |
|
"rewards/rejected": -0.049415141344070435, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2324246771879484e-07, |
|
"logits/chosen": -4.066908359527588, |
|
"logits/rejected": -3.8957467079162598, |
|
"logps/chosen": -538.4827880859375, |
|
"logps/rejected": -386.1225280761719, |
|
"loss": 0.5916, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.40634027123451233, |
|
"rewards/margins": 0.44673413038253784, |
|
"rewards/rejected": -0.04039386659860611, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2180774748923953e-07, |
|
"logits/chosen": -4.119419097900391, |
|
"logits/rejected": -3.880350112915039, |
|
"logps/chosen": -571.5280151367188, |
|
"logps/rejected": -467.98321533203125, |
|
"loss": 0.5709, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.3607470393180847, |
|
"rewards/margins": 0.40588730573654175, |
|
"rewards/rejected": -0.04514027386903763, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.2037302725968434e-07, |
|
"logits/chosen": -3.995079517364502, |
|
"logits/rejected": -3.9660801887512207, |
|
"logps/chosen": -528.3262939453125, |
|
"logps/rejected": -391.5002746582031, |
|
"loss": 0.6413, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3550952970981598, |
|
"rewards/margins": 0.4048345685005188, |
|
"rewards/rejected": -0.049739234149456024, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.189383070301291e-07, |
|
"logits/chosen": -4.0606889724731445, |
|
"logits/rejected": -4.020025253295898, |
|
"logps/chosen": -606.38330078125, |
|
"logps/rejected": -492.71759033203125, |
|
"loss": 0.5607, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.392439067363739, |
|
"rewards/margins": 0.29129353165626526, |
|
"rewards/rejected": 0.10114555060863495, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.175035868005739e-07, |
|
"logits/chosen": -4.366388320922852, |
|
"logits/rejected": -4.3169779777526855, |
|
"logps/chosen": -572.692626953125, |
|
"logps/rejected": -431.1947326660156, |
|
"loss": 0.6249, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.4136829972267151, |
|
"rewards/margins": 0.31379351019859314, |
|
"rewards/rejected": 0.09988941252231598, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.160688665710186e-07, |
|
"logits/chosen": -4.261553764343262, |
|
"logits/rejected": -4.20203971862793, |
|
"logps/chosen": -548.4271240234375, |
|
"logps/rejected": -461.83563232421875, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4961729943752289, |
|
"rewards/margins": 0.5303093194961548, |
|
"rewards/rejected": -0.034136295318603516, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.146341463414634e-07, |
|
"logits/chosen": -4.172554016113281, |
|
"logits/rejected": -4.170234680175781, |
|
"logps/chosen": -538.4212036132812, |
|
"logps/rejected": -511.212890625, |
|
"loss": 0.5634, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.45014914870262146, |
|
"rewards/margins": 0.44471946358680725, |
|
"rewards/rejected": 0.005429693963378668, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1319942611190815e-07, |
|
"logits/chosen": -3.915037155151367, |
|
"logits/rejected": -3.8585174083709717, |
|
"logps/chosen": -497.04229736328125, |
|
"logps/rejected": -471.8094787597656, |
|
"loss": 0.5919, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.2910478711128235, |
|
"rewards/margins": 0.37008222937583923, |
|
"rewards/rejected": -0.07903440296649933, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1176470588235295e-07, |
|
"logits/chosen": -3.864201307296753, |
|
"logits/rejected": -3.8585095405578613, |
|
"logps/chosen": -542.01953125, |
|
"logps/rejected": -397.53424072265625, |
|
"loss": 0.5999, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.17917154729366302, |
|
"rewards/margins": 0.28133153915405273, |
|
"rewards/rejected": -0.10215996205806732, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": -3.99351167678833, |
|
"eval_logits/rejected": -4.020653247833252, |
|
"eval_logps/chosen": -545.3095092773438, |
|
"eval_logps/rejected": -438.4728698730469, |
|
"eval_loss": 0.5767195820808411, |
|
"eval_rewards/accuracies": 0.6840000152587891, |
|
"eval_rewards/chosen": 0.40671002864837646, |
|
"eval_rewards/margins": 0.4535037875175476, |
|
"eval_rewards/rejected": -0.04679381474852562, |
|
"eval_runtime": 147.2862, |
|
"eval_samples_per_second": 13.579, |
|
"eval_steps_per_second": 1.697, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.1032998565279765e-07, |
|
"logits/chosen": -4.243551254272461, |
|
"logits/rejected": -4.064631938934326, |
|
"logps/chosen": -478.11187744140625, |
|
"logps/rejected": -458.78692626953125, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.40352755784988403, |
|
"rewards/margins": 0.4834938645362854, |
|
"rewards/rejected": -0.07996630668640137, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.0889526542324245e-07, |
|
"logits/chosen": -4.1683268547058105, |
|
"logits/rejected": -4.173158645629883, |
|
"logps/chosen": -652.5173950195312, |
|
"logps/rejected": -432.58428955078125, |
|
"loss": 0.5737, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.4788149297237396, |
|
"rewards/margins": 0.5799158811569214, |
|
"rewards/rejected": -0.10110093653202057, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.074605451936872e-07, |
|
"logits/chosen": -4.137356758117676, |
|
"logits/rejected": -4.176325798034668, |
|
"logps/chosen": -576.1214599609375, |
|
"logps/rejected": -380.2808837890625, |
|
"loss": 0.5699, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.4571780562400818, |
|
"rewards/margins": 0.49380144476890564, |
|
"rewards/rejected": -0.03662336990237236, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.06025824964132e-07, |
|
"logits/chosen": -4.188223838806152, |
|
"logits/rejected": -4.05302095413208, |
|
"logps/chosen": -480.8373107910156, |
|
"logps/rejected": -422.5328063964844, |
|
"loss": 0.5799, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.2358378916978836, |
|
"rewards/margins": 0.28218406438827515, |
|
"rewards/rejected": -0.04634615033864975, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.045911047345767e-07, |
|
"logits/chosen": -4.110243797302246, |
|
"logits/rejected": -4.0695366859436035, |
|
"logps/chosen": -615.70263671875, |
|
"logps/rejected": -426.46075439453125, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.5083123445510864, |
|
"rewards/margins": 0.7217694520950317, |
|
"rewards/rejected": -0.2134571522474289, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.031563845050215e-07, |
|
"logits/chosen": -4.203267574310303, |
|
"logits/rejected": -4.161170482635498, |
|
"logps/chosen": -590.3410034179688, |
|
"logps/rejected": -447.26715087890625, |
|
"loss": 0.5185, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5431427955627441, |
|
"rewards/margins": 0.68207848072052, |
|
"rewards/rejected": -0.13893572986125946, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.0172166427546626e-07, |
|
"logits/chosen": -4.285967826843262, |
|
"logits/rejected": -4.167950630187988, |
|
"logps/chosen": -533.8848876953125, |
|
"logps/rejected": -413.30975341796875, |
|
"loss": 0.5936, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.39957305788993835, |
|
"rewards/margins": 0.3795527517795563, |
|
"rewards/rejected": 0.020020361989736557, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.00286944045911e-07, |
|
"logits/chosen": -4.027644634246826, |
|
"logits/rejected": -3.9792587757110596, |
|
"logps/chosen": -626.9630737304688, |
|
"logps/rejected": -397.4438781738281, |
|
"loss": 0.6014, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.45451897382736206, |
|
"rewards/margins": 0.5369467735290527, |
|
"rewards/rejected": -0.08242778480052948, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9885222381635577e-07, |
|
"logits/chosen": -4.1345133781433105, |
|
"logits/rejected": -4.244950771331787, |
|
"logps/chosen": -562.5131225585938, |
|
"logps/rejected": -422.6846618652344, |
|
"loss": 0.5804, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.40697044134140015, |
|
"rewards/margins": 0.48475074768066406, |
|
"rewards/rejected": -0.07778030633926392, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.9741750358680057e-07, |
|
"logits/chosen": -4.032704830169678, |
|
"logits/rejected": -3.9772307872772217, |
|
"logps/chosen": -568.47802734375, |
|
"logps/rejected": -502.3460998535156, |
|
"loss": 0.6004, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.46621161699295044, |
|
"rewards/margins": 0.4852283000946045, |
|
"rewards/rejected": -0.019016731530427933, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_logits/chosen": -3.9943645000457764, |
|
"eval_logits/rejected": -4.0218825340271, |
|
"eval_logps/chosen": -545.1437377929688, |
|
"eval_logps/rejected": -438.3991394042969, |
|
"eval_loss": 0.5730865597724915, |
|
"eval_rewards/accuracies": 0.6830000281333923, |
|
"eval_rewards/chosen": 0.4232881963253021, |
|
"eval_rewards/margins": 0.46270594000816345, |
|
"eval_rewards/rejected": -0.03941771015524864, |
|
"eval_runtime": 148.86, |
|
"eval_samples_per_second": 13.435, |
|
"eval_steps_per_second": 1.679, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.959827833572453e-07, |
|
"logits/chosen": -4.1379075050354, |
|
"logits/rejected": -4.1423420906066895, |
|
"logps/chosen": -620.6439819335938, |
|
"logps/rejected": -438.18084716796875, |
|
"loss": 0.5651, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5286334753036499, |
|
"rewards/margins": 0.5047623515129089, |
|
"rewards/rejected": 0.02387116476893425, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.9454806312769007e-07, |
|
"logits/chosen": -4.126761436462402, |
|
"logits/rejected": -4.265500545501709, |
|
"logps/chosen": -494.80206298828125, |
|
"logps/rejected": -427.181640625, |
|
"loss": 0.6087, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.4608178734779358, |
|
"rewards/margins": 0.36958831548690796, |
|
"rewards/rejected": 0.09122952073812485, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.931133428981348e-07, |
|
"logits/chosen": -4.138312339782715, |
|
"logits/rejected": -4.2697319984436035, |
|
"logps/chosen": -492.8348693847656, |
|
"logps/rejected": -405.1728515625, |
|
"loss": 0.6199, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.21532472968101501, |
|
"rewards/margins": 0.19911542534828186, |
|
"rewards/rejected": 0.016209278255701065, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.9167862266857963e-07, |
|
"logits/chosen": -4.26310396194458, |
|
"logits/rejected": -4.242154121398926, |
|
"logps/chosen": -562.9186401367188, |
|
"logps/rejected": -377.67303466796875, |
|
"loss": 0.5497, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5041324496269226, |
|
"rewards/margins": 0.5989420413970947, |
|
"rewards/rejected": -0.09480961412191391, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.902439024390244e-07, |
|
"logits/chosen": -4.286158561706543, |
|
"logits/rejected": -4.289405345916748, |
|
"logps/chosen": -607.9891357421875, |
|
"logps/rejected": -496.7867126464844, |
|
"loss": 0.5634, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5594509840011597, |
|
"rewards/margins": 0.6072807908058167, |
|
"rewards/rejected": -0.04782974720001221, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.8880918220946913e-07, |
|
"logits/chosen": -4.170632839202881, |
|
"logits/rejected": -4.215968132019043, |
|
"logps/chosen": -445.17559814453125, |
|
"logps/rejected": -355.8191223144531, |
|
"loss": 0.6043, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.31880825757980347, |
|
"rewards/margins": 0.2609938383102417, |
|
"rewards/rejected": 0.05781441926956177, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.873744619799139e-07, |
|
"logits/chosen": -4.265324592590332, |
|
"logits/rejected": -4.255076885223389, |
|
"logps/chosen": -590.837158203125, |
|
"logps/rejected": -441.911376953125, |
|
"loss": 0.6081, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.33519288897514343, |
|
"rewards/margins": 0.3991120457649231, |
|
"rewards/rejected": -0.06391920149326324, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.859397417503587e-07, |
|
"logits/chosen": -4.415879249572754, |
|
"logits/rejected": -4.314742565155029, |
|
"logps/chosen": -501.2169494628906, |
|
"logps/rejected": -451.86553955078125, |
|
"loss": 0.6035, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.33651891350746155, |
|
"rewards/margins": 0.30277958512306213, |
|
"rewards/rejected": 0.03373932093381882, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8450502152080344e-07, |
|
"logits/chosen": -3.878053665161133, |
|
"logits/rejected": -4.040474891662598, |
|
"logps/chosen": -647.9942626953125, |
|
"logps/rejected": -437.6617736816406, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5844290852546692, |
|
"rewards/margins": 0.661879301071167, |
|
"rewards/rejected": -0.07745026051998138, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.830703012912482e-07, |
|
"logits/chosen": -4.195162296295166, |
|
"logits/rejected": -4.2288408279418945, |
|
"logps/chosen": -590.7817993164062, |
|
"logps/rejected": -428.28387451171875, |
|
"loss": 0.5349, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.510196328163147, |
|
"rewards/margins": 0.6029139757156372, |
|
"rewards/rejected": -0.09271766245365143, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_logits/chosen": -4.0012006759643555, |
|
"eval_logits/rejected": -4.029512405395508, |
|
"eval_logps/chosen": -545.0914306640625, |
|
"eval_logps/rejected": -438.4334716796875, |
|
"eval_loss": 0.5719799995422363, |
|
"eval_rewards/accuracies": 0.6830000281333923, |
|
"eval_rewards/chosen": 0.4285166561603546, |
|
"eval_rewards/margins": 0.4713680148124695, |
|
"eval_rewards/rejected": -0.04285132512450218, |
|
"eval_runtime": 148.6253, |
|
"eval_samples_per_second": 13.457, |
|
"eval_steps_per_second": 1.682, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.8163558106169294e-07, |
|
"logits/chosen": -4.198761940002441, |
|
"logits/rejected": -4.1362786293029785, |
|
"logps/chosen": -616.3384399414062, |
|
"logps/rejected": -427.9803771972656, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5358282327651978, |
|
"rewards/margins": 0.5532472729682922, |
|
"rewards/rejected": -0.017419060692191124, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.8020086083213774e-07, |
|
"logits/chosen": -4.3343377113342285, |
|
"logits/rejected": -4.233187198638916, |
|
"logps/chosen": -663.6807861328125, |
|
"logps/rejected": -496.68121337890625, |
|
"loss": 0.5346, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5831555128097534, |
|
"rewards/margins": 0.7528368830680847, |
|
"rewards/rejected": -0.16968131065368652, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7876614060258244e-07, |
|
"logits/chosen": -4.208827018737793, |
|
"logits/rejected": -4.191887378692627, |
|
"logps/chosen": -546.9085693359375, |
|
"logps/rejected": -454.7889099121094, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4015926420688629, |
|
"rewards/margins": 0.4776438772678375, |
|
"rewards/rejected": -0.0760512501001358, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.7733142037302725e-07, |
|
"logits/chosen": -3.8217597007751465, |
|
"logits/rejected": -3.925053119659424, |
|
"logps/chosen": -661.263916015625, |
|
"logps/rejected": -534.646728515625, |
|
"loss": 0.5292, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.4541807770729065, |
|
"rewards/margins": 0.5835798382759094, |
|
"rewards/rejected": -0.12939909100532532, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.75896700143472e-07, |
|
"logits/chosen": -4.182621955871582, |
|
"logits/rejected": -3.9824492931365967, |
|
"logps/chosen": -570.587890625, |
|
"logps/rejected": -394.5463562011719, |
|
"loss": 0.5491, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5437583923339844, |
|
"rewards/margins": 0.5803283452987671, |
|
"rewards/rejected": -0.03656994178891182, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.744619799139168e-07, |
|
"logits/chosen": -4.0817694664001465, |
|
"logits/rejected": -4.021645545959473, |
|
"logps/chosen": -562.7216796875, |
|
"logps/rejected": -408.1803894042969, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.38751038908958435, |
|
"rewards/margins": 0.412889301776886, |
|
"rewards/rejected": -0.025378871709108353, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.730272596843615e-07, |
|
"logits/chosen": -4.210979461669922, |
|
"logits/rejected": -4.233429908752441, |
|
"logps/chosen": -531.6992797851562, |
|
"logps/rejected": -415.34130859375, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.4554630219936371, |
|
"rewards/margins": 0.6501585841178894, |
|
"rewards/rejected": -0.1946956068277359, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.715925394548063e-07, |
|
"logits/chosen": -3.991922378540039, |
|
"logits/rejected": -3.861186981201172, |
|
"logps/chosen": -516.5054931640625, |
|
"logps/rejected": -480.88043212890625, |
|
"loss": 0.6105, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.35484379529953003, |
|
"rewards/margins": 0.21209315955638885, |
|
"rewards/rejected": 0.14275071024894714, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.7015781922525106e-07, |
|
"logits/chosen": -3.956188678741455, |
|
"logits/rejected": -3.9556357860565186, |
|
"logps/chosen": -464.2613830566406, |
|
"logps/rejected": -448.0741271972656, |
|
"loss": 0.6277, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.34133443236351013, |
|
"rewards/margins": 0.3067266345024109, |
|
"rewards/rejected": 0.03460781276226044, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6872309899569586e-07, |
|
"logits/chosen": -4.143117427825928, |
|
"logits/rejected": -4.186631679534912, |
|
"logps/chosen": -578.9446411132812, |
|
"logps/rejected": -439.56658935546875, |
|
"loss": 0.5377, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.5403174161911011, |
|
"rewards/margins": 0.633220911026001, |
|
"rewards/rejected": -0.0929035171866417, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -4.000906944274902, |
|
"eval_logits/rejected": -4.028975009918213, |
|
"eval_logps/chosen": -545.1220092773438, |
|
"eval_logps/rejected": -438.54486083984375, |
|
"eval_loss": 0.5702030062675476, |
|
"eval_rewards/accuracies": 0.6850000023841858, |
|
"eval_rewards/chosen": 0.4254603683948517, |
|
"eval_rewards/margins": 0.4794518053531647, |
|
"eval_rewards/rejected": -0.0539914108812809, |
|
"eval_runtime": 147.8823, |
|
"eval_samples_per_second": 13.524, |
|
"eval_steps_per_second": 1.691, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6728837876614056e-07, |
|
"logits/chosen": -4.1301422119140625, |
|
"logits/rejected": -4.1415839195251465, |
|
"logps/chosen": -582.57666015625, |
|
"logps/rejected": -450.591552734375, |
|
"loss": 0.5582, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5017611384391785, |
|
"rewards/margins": 0.6365527510643005, |
|
"rewards/rejected": -0.13479158282279968, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.6585365853658536e-07, |
|
"logits/chosen": -4.098201274871826, |
|
"logits/rejected": -4.06491756439209, |
|
"logps/chosen": -536.2640380859375, |
|
"logps/rejected": -417.99481201171875, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.4373628497123718, |
|
"rewards/margins": 0.393180251121521, |
|
"rewards/rejected": 0.044182561337947845, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.644189383070301e-07, |
|
"logits/chosen": -4.560007572174072, |
|
"logits/rejected": -4.380262851715088, |
|
"logps/chosen": -560.2337646484375, |
|
"logps/rejected": -399.133056640625, |
|
"loss": 0.5672, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5478136539459229, |
|
"rewards/margins": 0.6600695252418518, |
|
"rewards/rejected": -0.11225590854883194, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.629842180774749e-07, |
|
"logits/chosen": -4.084300518035889, |
|
"logits/rejected": -4.194989204406738, |
|
"logps/chosen": -615.1845703125, |
|
"logps/rejected": -399.0810546875, |
|
"loss": 0.5302, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5594775676727295, |
|
"rewards/margins": 0.8519641160964966, |
|
"rewards/rejected": -0.2924865782260895, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.615494978479196e-07, |
|
"logits/chosen": -3.948491334915161, |
|
"logits/rejected": -3.790837049484253, |
|
"logps/chosen": -506.5896911621094, |
|
"logps/rejected": -390.9329528808594, |
|
"loss": 0.5634, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.3724905848503113, |
|
"rewards/margins": 0.4565068781375885, |
|
"rewards/rejected": -0.08401624858379364, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.601147776183644e-07, |
|
"logits/chosen": -4.237751007080078, |
|
"logits/rejected": -4.1675705909729, |
|
"logps/chosen": -569.1129150390625, |
|
"logps/rejected": -415.8509826660156, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.3635835647583008, |
|
"rewards/margins": 0.4130166471004486, |
|
"rewards/rejected": -0.04943311959505081, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.5868005738880917e-07, |
|
"logits/chosen": -4.343452453613281, |
|
"logits/rejected": -4.299803733825684, |
|
"logps/chosen": -468.18768310546875, |
|
"logps/rejected": -420.81256103515625, |
|
"loss": 0.5592, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.4336473047733307, |
|
"rewards/margins": 0.4215630888938904, |
|
"rewards/rejected": 0.012084214016795158, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.57245337159254e-07, |
|
"logits/chosen": -4.12381649017334, |
|
"logits/rejected": -4.066357135772705, |
|
"logps/chosen": -512.6734619140625, |
|
"logps/rejected": -421.00872802734375, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.26631009578704834, |
|
"rewards/margins": 0.49776148796081543, |
|
"rewards/rejected": -0.23145142197608948, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.558106169296987e-07, |
|
"logits/chosen": -4.055663108825684, |
|
"logits/rejected": -4.202220439910889, |
|
"logps/chosen": -531.7757568359375, |
|
"logps/rejected": -427.21051025390625, |
|
"loss": 0.5916, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.35120105743408203, |
|
"rewards/margins": 0.329306036233902, |
|
"rewards/rejected": 0.021895062178373337, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.543758967001435e-07, |
|
"logits/chosen": -3.896604537963867, |
|
"logits/rejected": -3.8615658283233643, |
|
"logps/chosen": -546.3208618164062, |
|
"logps/rejected": -435.53143310546875, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.46676820516586304, |
|
"rewards/margins": 0.7485499382019043, |
|
"rewards/rejected": -0.28178170323371887, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_logits/chosen": -4.003889560699463, |
|
"eval_logits/rejected": -4.031704425811768, |
|
"eval_logps/chosen": -545.0299072265625, |
|
"eval_logps/rejected": -438.5533142089844, |
|
"eval_loss": 0.5712563395500183, |
|
"eval_rewards/accuracies": 0.6840000152587891, |
|
"eval_rewards/chosen": 0.43467363715171814, |
|
"eval_rewards/margins": 0.4895067512989044, |
|
"eval_rewards/rejected": -0.05483310669660568, |
|
"eval_runtime": 148.9309, |
|
"eval_samples_per_second": 13.429, |
|
"eval_steps_per_second": 1.679, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.5294117647058823e-07, |
|
"logits/chosen": -4.0738677978515625, |
|
"logits/rejected": -4.02095890045166, |
|
"logps/chosen": -547.7879638671875, |
|
"logps/rejected": -461.1922302246094, |
|
"loss": 0.5612, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4428838789463043, |
|
"rewards/margins": 0.4759696424007416, |
|
"rewards/rejected": -0.03308583423495293, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.51506456241033e-07, |
|
"logits/chosen": -4.027615547180176, |
|
"logits/rejected": -4.137267112731934, |
|
"logps/chosen": -519.4240112304688, |
|
"logps/rejected": -418.49884033203125, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.33291319012641907, |
|
"rewards/margins": 0.5208204388618469, |
|
"rewards/rejected": -0.18790724873542786, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5007173601147773e-07, |
|
"logits/chosen": -3.9522101879119873, |
|
"logits/rejected": -4.056872367858887, |
|
"logps/chosen": -581.5064697265625, |
|
"logps/rejected": -583.0844116210938, |
|
"loss": 0.5702, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.40307894349098206, |
|
"rewards/margins": 0.49982690811157227, |
|
"rewards/rejected": -0.09674793481826782, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.486370157819225e-07, |
|
"logits/chosen": -4.075150966644287, |
|
"logits/rejected": -3.9781277179718018, |
|
"logps/chosen": -570.3604736328125, |
|
"logps/rejected": -457.1639099121094, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.4488401412963867, |
|
"rewards/margins": 0.4194963574409485, |
|
"rewards/rejected": 0.029343824833631516, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.472022955523673e-07, |
|
"logits/chosen": -3.9273715019226074, |
|
"logits/rejected": -4.03403377532959, |
|
"logps/chosen": -576.674072265625, |
|
"logps/rejected": -480.82330322265625, |
|
"loss": 0.5837, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.4813673496246338, |
|
"rewards/margins": 0.5193904638290405, |
|
"rewards/rejected": -0.038023076951503754, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4576757532281204e-07, |
|
"logits/chosen": -4.104135513305664, |
|
"logits/rejected": -4.141896724700928, |
|
"logps/chosen": -570.6798095703125, |
|
"logps/rejected": -480.5628967285156, |
|
"loss": 0.6344, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4069296419620514, |
|
"rewards/margins": 0.38408637046813965, |
|
"rewards/rejected": 0.02284328266978264, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.443328550932568e-07, |
|
"logits/chosen": -3.959376811981201, |
|
"logits/rejected": -3.8872084617614746, |
|
"logps/chosen": -552.5087280273438, |
|
"logps/rejected": -436.44189453125, |
|
"loss": 0.568, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.29828330874443054, |
|
"rewards/margins": 0.3948608338832855, |
|
"rewards/rejected": -0.09657756984233856, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4289813486370154e-07, |
|
"logits/chosen": -4.081685543060303, |
|
"logits/rejected": -4.045130729675293, |
|
"logps/chosen": -545.3685302734375, |
|
"logps/rejected": -373.71600341796875, |
|
"loss": 0.5179, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.462582528591156, |
|
"rewards/margins": 0.6623843908309937, |
|
"rewards/rejected": -0.19980189204216003, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.4146341463414635e-07, |
|
"logits/chosen": -4.286005973815918, |
|
"logits/rejected": -4.341670036315918, |
|
"logps/chosen": -546.548828125, |
|
"logps/rejected": -416.674072265625, |
|
"loss": 0.5689, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4305512011051178, |
|
"rewards/margins": 0.6499841213226318, |
|
"rewards/rejected": -0.21943287551403046, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.400286944045911e-07, |
|
"logits/chosen": -3.9777417182922363, |
|
"logits/rejected": -3.9910645484924316, |
|
"logps/chosen": -470.6476135253906, |
|
"logps/rejected": -453.6451721191406, |
|
"loss": 0.6093, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.38502687215805054, |
|
"rewards/margins": 0.30599719285964966, |
|
"rewards/rejected": 0.07902970165014267, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_logits/chosen": -4.001364231109619, |
|
"eval_logits/rejected": -4.028832912445068, |
|
"eval_logps/chosen": -544.912841796875, |
|
"eval_logps/rejected": -438.4606628417969, |
|
"eval_loss": 0.5706081986427307, |
|
"eval_rewards/accuracies": 0.6809999942779541, |
|
"eval_rewards/chosen": 0.44638243317604065, |
|
"eval_rewards/margins": 0.49195748567581177, |
|
"eval_rewards/rejected": -0.045575033873319626, |
|
"eval_runtime": 146.1996, |
|
"eval_samples_per_second": 13.68, |
|
"eval_steps_per_second": 1.71, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3859397417503585e-07, |
|
"logits/chosen": -4.030927658081055, |
|
"logits/rejected": -3.9580256938934326, |
|
"logps/chosen": -514.30712890625, |
|
"logps/rejected": -354.2815246582031, |
|
"loss": 0.5861, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.4988827705383301, |
|
"rewards/margins": 0.5217560529708862, |
|
"rewards/rejected": -0.0228732917457819, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3715925394548063e-07, |
|
"logits/chosen": -4.007624626159668, |
|
"logits/rejected": -4.2475457191467285, |
|
"logps/chosen": -661.9398193359375, |
|
"logps/rejected": -411.38848876953125, |
|
"loss": 0.538, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.663671612739563, |
|
"rewards/margins": 0.7176098227500916, |
|
"rewards/rejected": -0.05393817275762558, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3572453371592538e-07, |
|
"logits/chosen": -4.090206146240234, |
|
"logits/rejected": -4.1433539390563965, |
|
"logps/chosen": -504.1729431152344, |
|
"logps/rejected": -387.82623291015625, |
|
"loss": 0.5351, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.45734721422195435, |
|
"rewards/margins": 0.4615212082862854, |
|
"rewards/rejected": -0.0041740150190889835, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3428981348637013e-07, |
|
"logits/chosen": -4.289696216583252, |
|
"logits/rejected": -4.263758659362793, |
|
"logps/chosen": -579.8231201171875, |
|
"logps/rejected": -404.7850646972656, |
|
"loss": 0.5307, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.6702554225921631, |
|
"rewards/margins": 0.7904798984527588, |
|
"rewards/rejected": -0.12022446095943451, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.328550932568149e-07, |
|
"logits/chosen": -3.835402727127075, |
|
"logits/rejected": -3.8511269092559814, |
|
"logps/chosen": -510.6192932128906, |
|
"logps/rejected": -446.6246643066406, |
|
"loss": 0.6048, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.38664764165878296, |
|
"rewards/margins": 0.3122571110725403, |
|
"rewards/rejected": 0.07439050823450089, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.3142037302725966e-07, |
|
"logits/chosen": -3.915778398513794, |
|
"logits/rejected": -3.8879055976867676, |
|
"logps/chosen": -513.9568481445312, |
|
"logps/rejected": -410.12310791015625, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4804447293281555, |
|
"rewards/margins": 0.5258998274803162, |
|
"rewards/rejected": -0.04545507952570915, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2998565279770444e-07, |
|
"logits/chosen": -4.199291229248047, |
|
"logits/rejected": -4.164752006530762, |
|
"logps/chosen": -644.1868896484375, |
|
"logps/rejected": -394.4111022949219, |
|
"loss": 0.603, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.38507136702537537, |
|
"rewards/margins": 0.504477858543396, |
|
"rewards/rejected": -0.11940644681453705, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.285509325681492e-07, |
|
"logits/chosen": -4.0966033935546875, |
|
"logits/rejected": -4.1276421546936035, |
|
"logps/chosen": -525.5519409179688, |
|
"logps/rejected": -452.3783264160156, |
|
"loss": 0.5688, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.3376069664955139, |
|
"rewards/margins": 0.48656487464904785, |
|
"rewards/rejected": -0.14895787835121155, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2711621233859396e-07, |
|
"logits/chosen": -3.9321861267089844, |
|
"logits/rejected": -3.8997440338134766, |
|
"logps/chosen": -524.1985473632812, |
|
"logps/rejected": -368.1291198730469, |
|
"loss": 0.5533, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.33177611231803894, |
|
"rewards/margins": 0.49390387535095215, |
|
"rewards/rejected": -0.1621277630329132, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.2568149210903872e-07, |
|
"logits/chosen": -4.461883544921875, |
|
"logits/rejected": -4.530648708343506, |
|
"logps/chosen": -619.8561401367188, |
|
"logps/rejected": -468.6748046875, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.5222944021224976, |
|
"rewards/margins": 0.6014097929000854, |
|
"rewards/rejected": -0.07911545038223267, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_logits/chosen": -3.998574733734131, |
|
"eval_logits/rejected": -4.025696277618408, |
|
"eval_logps/chosen": -544.8922119140625, |
|
"eval_logps/rejected": -438.4912109375, |
|
"eval_loss": 0.5689104199409485, |
|
"eval_rewards/accuracies": 0.6880000233650208, |
|
"eval_rewards/chosen": 0.448445200920105, |
|
"eval_rewards/margins": 0.49707192182540894, |
|
"eval_rewards/rejected": -0.04862673580646515, |
|
"eval_runtime": 148.9177, |
|
"eval_samples_per_second": 13.43, |
|
"eval_steps_per_second": 1.679, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.242467718794835e-07, |
|
"logits/chosen": -3.9245476722717285, |
|
"logits/rejected": -4.00443696975708, |
|
"logps/chosen": -561.0607299804688, |
|
"logps/rejected": -444.2076110839844, |
|
"loss": 0.542, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.41927021741867065, |
|
"rewards/margins": 0.5216065049171448, |
|
"rewards/rejected": -0.10233630239963531, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2281205164992824e-07, |
|
"logits/chosen": -4.177279949188232, |
|
"logits/rejected": -4.077963352203369, |
|
"logps/chosen": -504.2713928222656, |
|
"logps/rejected": -444.5032653808594, |
|
"loss": 0.6156, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.43868088722229004, |
|
"rewards/margins": 0.4138507843017578, |
|
"rewards/rejected": 0.024830086156725883, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.2137733142037302e-07, |
|
"logits/chosen": -3.990638017654419, |
|
"logits/rejected": -4.014552593231201, |
|
"logps/chosen": -549.037353515625, |
|
"logps/rejected": -442.3534240722656, |
|
"loss": 0.5213, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5215023756027222, |
|
"rewards/margins": 0.638957679271698, |
|
"rewards/rejected": -0.11745530366897583, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.1994261119081777e-07, |
|
"logits/chosen": -4.201764106750488, |
|
"logits/rejected": -4.19627046585083, |
|
"logps/chosen": -564.8765258789062, |
|
"logps/rejected": -433.4271545410156, |
|
"loss": 0.5939, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4676602780818939, |
|
"rewards/margins": 0.5004759430885315, |
|
"rewards/rejected": -0.03281565010547638, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1850789096126255e-07, |
|
"logits/chosen": -3.911675214767456, |
|
"logits/rejected": -4.010054588317871, |
|
"logps/chosen": -611.3627319335938, |
|
"logps/rejected": -452.5043029785156, |
|
"loss": 0.663, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.40320587158203125, |
|
"rewards/margins": 0.3427828252315521, |
|
"rewards/rejected": 0.060423027724027634, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.170731707317073e-07, |
|
"logits/chosen": -4.167824745178223, |
|
"logits/rejected": -4.243043422698975, |
|
"logps/chosen": -556.890625, |
|
"logps/rejected": -397.8431091308594, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.4790073037147522, |
|
"rewards/margins": 0.5861107110977173, |
|
"rewards/rejected": -0.10710340738296509, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1563845050215208e-07, |
|
"logits/chosen": -4.110980033874512, |
|
"logits/rejected": -4.188474655151367, |
|
"logps/chosen": -569.0153198242188, |
|
"logps/rejected": -404.994384765625, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.4905467927455902, |
|
"rewards/margins": 0.5199242830276489, |
|
"rewards/rejected": -0.029377540573477745, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.1420373027259683e-07, |
|
"logits/chosen": -4.269859313964844, |
|
"logits/rejected": -4.332370758056641, |
|
"logps/chosen": -543.8313598632812, |
|
"logps/rejected": -437.123046875, |
|
"loss": 0.5571, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.500403106212616, |
|
"rewards/margins": 0.7072377800941467, |
|
"rewards/rejected": -0.20683467388153076, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.127690100430416e-07, |
|
"logits/chosen": -3.926335096359253, |
|
"logits/rejected": -3.9738330841064453, |
|
"logps/chosen": -533.0458984375, |
|
"logps/rejected": -424.850341796875, |
|
"loss": 0.6198, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.35639142990112305, |
|
"rewards/margins": 0.4018617570400238, |
|
"rewards/rejected": -0.04547032713890076, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.1133428981348636e-07, |
|
"logits/chosen": -3.779186248779297, |
|
"logits/rejected": -3.8913798332214355, |
|
"logps/chosen": -617.508056640625, |
|
"logps/rejected": -492.209228515625, |
|
"loss": 0.5753, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5383332371711731, |
|
"rewards/margins": 0.467332661151886, |
|
"rewards/rejected": 0.07100055366754532, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_logits/chosen": -3.9845926761627197, |
|
"eval_logits/rejected": -4.009966850280762, |
|
"eval_logps/chosen": -544.7802124023438, |
|
"eval_logps/rejected": -438.44573974609375, |
|
"eval_loss": 0.5681360960006714, |
|
"eval_rewards/accuracies": 0.6850000023841858, |
|
"eval_rewards/chosen": 0.4596436619758606, |
|
"eval_rewards/margins": 0.5037252306938171, |
|
"eval_rewards/rejected": -0.04408155009150505, |
|
"eval_runtime": 148.5709, |
|
"eval_samples_per_second": 13.462, |
|
"eval_steps_per_second": 1.683, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.098995695839311e-07, |
|
"logits/chosen": -3.978921890258789, |
|
"logits/rejected": -3.8923873901367188, |
|
"logps/chosen": -556.697998046875, |
|
"logps/rejected": -416.08184814453125, |
|
"loss": 0.534, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.4480956494808197, |
|
"rewards/margins": 0.6271561980247498, |
|
"rewards/rejected": -0.17906051874160767, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.084648493543759e-07, |
|
"logits/chosen": -4.281157493591309, |
|
"logits/rejected": -4.271050453186035, |
|
"logps/chosen": -673.8267211914062, |
|
"logps/rejected": -463.4944763183594, |
|
"loss": 0.5461, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5151551365852356, |
|
"rewards/margins": 0.6002731919288635, |
|
"rewards/rejected": -0.08511805534362793, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0703012912482064e-07, |
|
"logits/chosen": -3.901240110397339, |
|
"logits/rejected": -3.862910509109497, |
|
"logps/chosen": -591.2846069335938, |
|
"logps/rejected": -389.8904113769531, |
|
"loss": 0.6189, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.2968345284461975, |
|
"rewards/margins": 0.3309480547904968, |
|
"rewards/rejected": -0.03411349281668663, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0559540889526542e-07, |
|
"logits/chosen": -4.235989570617676, |
|
"logits/rejected": -4.060244560241699, |
|
"logps/chosen": -597.9428100585938, |
|
"logps/rejected": -404.6048889160156, |
|
"loss": 0.5864, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.46942558884620667, |
|
"rewards/margins": 0.5440183877944946, |
|
"rewards/rejected": -0.07459276914596558, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.0416068866571017e-07, |
|
"logits/chosen": -3.8396244049072266, |
|
"logits/rejected": -3.752044677734375, |
|
"logps/chosen": -601.25341796875, |
|
"logps/rejected": -435.07647705078125, |
|
"loss": 0.5917, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.49751096963882446, |
|
"rewards/margins": 0.46320000290870667, |
|
"rewards/rejected": 0.03431097790598869, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.0272596843615495e-07, |
|
"logits/chosen": -4.197469711303711, |
|
"logits/rejected": -4.122381687164307, |
|
"logps/chosen": -553.6739501953125, |
|
"logps/rejected": -420.4598083496094, |
|
"loss": 0.5932, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.453556627035141, |
|
"rewards/margins": 0.4589596390724182, |
|
"rewards/rejected": -0.005403043236583471, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.012912482065997e-07, |
|
"logits/chosen": -4.135566711425781, |
|
"logits/rejected": -4.087862968444824, |
|
"logps/chosen": -528.1041259765625, |
|
"logps/rejected": -432.2552795410156, |
|
"loss": 0.5474, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5243477821350098, |
|
"rewards/margins": 0.636970043182373, |
|
"rewards/rejected": -0.11262223869562149, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9985652797704448e-07, |
|
"logits/chosen": -3.9298617839813232, |
|
"logits/rejected": -3.9982573986053467, |
|
"logps/chosen": -466.2574157714844, |
|
"logps/rejected": -393.4660339355469, |
|
"loss": 0.5623, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.469001442193985, |
|
"rewards/margins": 0.48559433221817017, |
|
"rewards/rejected": -0.016592923551797867, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.9842180774748923e-07, |
|
"logits/chosen": -3.9439053535461426, |
|
"logits/rejected": -3.8986332416534424, |
|
"logps/chosen": -559.919921875, |
|
"logps/rejected": -430.98162841796875, |
|
"loss": 0.5399, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4386838376522064, |
|
"rewards/margins": 0.4418070912361145, |
|
"rewards/rejected": -0.003123197006061673, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.96987087517934e-07, |
|
"logits/chosen": -4.274647235870361, |
|
"logits/rejected": -4.253532409667969, |
|
"logps/chosen": -593.2935791015625, |
|
"logps/rejected": -445.554931640625, |
|
"loss": 0.5709, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4176342487335205, |
|
"rewards/margins": 0.4632183909416199, |
|
"rewards/rejected": -0.04558416083455086, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_logits/chosen": -3.984861135482788, |
|
"eval_logits/rejected": -4.0099897384643555, |
|
"eval_logps/chosen": -544.6834716796875, |
|
"eval_logps/rejected": -438.3924255371094, |
|
"eval_loss": 0.5672796368598938, |
|
"eval_rewards/accuracies": 0.6909999847412109, |
|
"eval_rewards/chosen": 0.4693204462528229, |
|
"eval_rewards/margins": 0.5080692172050476, |
|
"eval_rewards/rejected": -0.03874876722693443, |
|
"eval_runtime": 146.2016, |
|
"eval_samples_per_second": 13.68, |
|
"eval_steps_per_second": 1.71, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9555236728837876e-07, |
|
"logits/chosen": -4.1506547927856445, |
|
"logits/rejected": -4.121700286865234, |
|
"logps/chosen": -559.0662231445312, |
|
"logps/rejected": -428.6475524902344, |
|
"loss": 0.5692, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5197519659996033, |
|
"rewards/margins": 0.5856004953384399, |
|
"rewards/rejected": -0.06584848463535309, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9411764705882353e-07, |
|
"logits/chosen": -4.2824015617370605, |
|
"logits/rejected": -4.196056365966797, |
|
"logps/chosen": -598.8436279296875, |
|
"logps/rejected": -525.2330322265625, |
|
"loss": 0.5574, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4597892165184021, |
|
"rewards/margins": 0.5178920030593872, |
|
"rewards/rejected": -0.05810274928808212, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.9268292682926829e-07, |
|
"logits/chosen": -4.085073947906494, |
|
"logits/rejected": -4.154143810272217, |
|
"logps/chosen": -669.5693359375, |
|
"logps/rejected": -449.7344665527344, |
|
"loss": 0.5496, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.6506977081298828, |
|
"rewards/margins": 0.7707425355911255, |
|
"rewards/rejected": -0.12004482746124268, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.9124820659971306e-07, |
|
"logits/chosen": -3.931575059890747, |
|
"logits/rejected": -3.969634532928467, |
|
"logps/chosen": -670.7431640625, |
|
"logps/rejected": -442.46405029296875, |
|
"loss": 0.5956, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.47314882278442383, |
|
"rewards/margins": 0.5320797562599182, |
|
"rewards/rejected": -0.058930885046720505, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8981348637015781e-07, |
|
"logits/chosen": -3.7605667114257812, |
|
"logits/rejected": -3.7463626861572266, |
|
"logps/chosen": -507.4091796875, |
|
"logps/rejected": -419.73919677734375, |
|
"loss": 0.6149, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.4424256384372711, |
|
"rewards/margins": 0.36436089873313904, |
|
"rewards/rejected": 0.07806471735239029, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.883787661406026e-07, |
|
"logits/chosen": -3.9714431762695312, |
|
"logits/rejected": -4.080648899078369, |
|
"logps/chosen": -589.9671630859375, |
|
"logps/rejected": -402.8089904785156, |
|
"loss": 0.5552, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5086973309516907, |
|
"rewards/margins": 0.7329601645469666, |
|
"rewards/rejected": -0.2242628037929535, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.8694404591104734e-07, |
|
"logits/chosen": -4.056425094604492, |
|
"logits/rejected": -4.178628444671631, |
|
"logps/chosen": -524.011962890625, |
|
"logps/rejected": -415.20233154296875, |
|
"loss": 0.6136, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.41316717863082886, |
|
"rewards/margins": 0.47009795904159546, |
|
"rewards/rejected": -0.056930772960186005, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.855093256814921e-07, |
|
"logits/chosen": -4.216281890869141, |
|
"logits/rejected": -4.076776027679443, |
|
"logps/chosen": -544.1904296875, |
|
"logps/rejected": -466.5315856933594, |
|
"loss": 0.575, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.41413965821266174, |
|
"rewards/margins": 0.48670220375061035, |
|
"rewards/rejected": -0.07256259769201279, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8407460545193687e-07, |
|
"logits/chosen": -4.047214984893799, |
|
"logits/rejected": -4.0597639083862305, |
|
"logps/chosen": -594.7711181640625, |
|
"logps/rejected": -453.0791015625, |
|
"loss": 0.612, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.6000150442123413, |
|
"rewards/margins": 0.62732994556427, |
|
"rewards/rejected": -0.027314912527799606, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.8263988522238162e-07, |
|
"logits/chosen": -3.8675410747528076, |
|
"logits/rejected": -3.881988525390625, |
|
"logps/chosen": -478.98443603515625, |
|
"logps/rejected": -408.0418395996094, |
|
"loss": 0.5565, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.34187614917755127, |
|
"rewards/margins": 0.5348206162452698, |
|
"rewards/rejected": -0.19294443726539612, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_logits/chosen": -3.984271764755249, |
|
"eval_logits/rejected": -4.009637832641602, |
|
"eval_logps/chosen": -544.6849975585938, |
|
"eval_logps/rejected": -438.4054260253906, |
|
"eval_loss": 0.5665393471717834, |
|
"eval_rewards/accuracies": 0.6819999814033508, |
|
"eval_rewards/chosen": 0.469163715839386, |
|
"eval_rewards/margins": 0.5092154145240784, |
|
"eval_rewards/rejected": -0.04005170986056328, |
|
"eval_runtime": 145.7676, |
|
"eval_samples_per_second": 13.72, |
|
"eval_steps_per_second": 1.715, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.812051649928264e-07, |
|
"logits/chosen": -3.83473539352417, |
|
"logits/rejected": -3.9073386192321777, |
|
"logps/chosen": -604.2052612304688, |
|
"logps/rejected": -456.9849548339844, |
|
"loss": 0.5876, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5973328351974487, |
|
"rewards/margins": 0.4590074121952057, |
|
"rewards/rejected": 0.13832543790340424, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7977044476327115e-07, |
|
"logits/chosen": -3.848345994949341, |
|
"logits/rejected": -3.786773681640625, |
|
"logps/chosen": -502.00653076171875, |
|
"logps/rejected": -401.30877685546875, |
|
"loss": 0.6026, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.4152609407901764, |
|
"rewards/margins": 0.3713647425174713, |
|
"rewards/rejected": 0.04389624670147896, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7833572453371593e-07, |
|
"logits/chosen": -4.126666069030762, |
|
"logits/rejected": -4.045652389526367, |
|
"logps/chosen": -495.5149841308594, |
|
"logps/rejected": -428.750244140625, |
|
"loss": 0.5802, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4449082911014557, |
|
"rewards/margins": 0.48476019501686096, |
|
"rewards/rejected": -0.03985190391540527, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7690100430416068e-07, |
|
"logits/chosen": -4.006113529205322, |
|
"logits/rejected": -4.05719518661499, |
|
"logps/chosen": -564.3095092773438, |
|
"logps/rejected": -465.112060546875, |
|
"loss": 0.5478, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4402576982975006, |
|
"rewards/margins": 0.5477269887924194, |
|
"rewards/rejected": -0.10746929794549942, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.7546628407460546e-07, |
|
"logits/chosen": -4.016690254211426, |
|
"logits/rejected": -4.137378692626953, |
|
"logps/chosen": -540.1869506835938, |
|
"logps/rejected": -384.2567443847656, |
|
"loss": 0.5053, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.46003857254981995, |
|
"rewards/margins": 0.587317168712616, |
|
"rewards/rejected": -0.1272786259651184, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.740315638450502e-07, |
|
"logits/chosen": -4.14896297454834, |
|
"logits/rejected": -4.022231101989746, |
|
"logps/chosen": -551.8126831054688, |
|
"logps/rejected": -429.8963317871094, |
|
"loss": 0.553, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5274641513824463, |
|
"rewards/margins": 0.4932268559932709, |
|
"rewards/rejected": 0.034237295389175415, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.72596843615495e-07, |
|
"logits/chosen": -3.9441299438476562, |
|
"logits/rejected": -3.7229416370391846, |
|
"logps/chosen": -541.1564331054688, |
|
"logps/rejected": -522.1112060546875, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.36212533712387085, |
|
"rewards/margins": 0.2563532590866089, |
|
"rewards/rejected": 0.10577203333377838, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.7116212338593974e-07, |
|
"logits/chosen": -4.236396312713623, |
|
"logits/rejected": -4.220719337463379, |
|
"logps/chosen": -498.52874755859375, |
|
"logps/rejected": -399.10699462890625, |
|
"loss": 0.6321, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.4771362245082855, |
|
"rewards/margins": 0.3947621285915375, |
|
"rewards/rejected": 0.08237410336732864, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.6972740315638452e-07, |
|
"logits/chosen": -4.176735877990723, |
|
"logits/rejected": -3.9685966968536377, |
|
"logps/chosen": -628.45947265625, |
|
"logps/rejected": -414.69329833984375, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.5496100783348083, |
|
"rewards/margins": 0.7379701733589172, |
|
"rewards/rejected": -0.1883600354194641, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6829268292682927e-07, |
|
"logits/chosen": -4.031551361083984, |
|
"logits/rejected": -4.172730445861816, |
|
"logps/chosen": -528.1746215820312, |
|
"logps/rejected": -456.8675231933594, |
|
"loss": 0.585, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5029736757278442, |
|
"rewards/margins": 0.41340795159339905, |
|
"rewards/rejected": 0.08956580609083176, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_logits/chosen": -3.981973171234131, |
|
"eval_logits/rejected": -4.00735330581665, |
|
"eval_logps/chosen": -544.59619140625, |
|
"eval_logps/rejected": -438.3558044433594, |
|
"eval_loss": 0.5649946331977844, |
|
"eval_rewards/accuracies": 0.6940000057220459, |
|
"eval_rewards/chosen": 0.47803932428359985, |
|
"eval_rewards/margins": 0.5131266713142395, |
|
"eval_rewards/rejected": -0.03508726879954338, |
|
"eval_runtime": 146.3353, |
|
"eval_samples_per_second": 13.667, |
|
"eval_steps_per_second": 1.708, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6685796269727405e-07, |
|
"logits/chosen": -4.223569393157959, |
|
"logits/rejected": -4.211024284362793, |
|
"logps/chosen": -586.1395874023438, |
|
"logps/rejected": -488.5260314941406, |
|
"loss": 0.5806, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.5072857141494751, |
|
"rewards/margins": 0.45038923621177673, |
|
"rewards/rejected": 0.05689648538827896, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.654232424677188e-07, |
|
"logits/chosen": -4.059657573699951, |
|
"logits/rejected": -4.050175666809082, |
|
"logps/chosen": -636.249755859375, |
|
"logps/rejected": -445.1454162597656, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.6326101422309875, |
|
"rewards/margins": 0.6772100925445557, |
|
"rewards/rejected": -0.04459994286298752, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.6398852223816355e-07, |
|
"logits/chosen": -3.965324878692627, |
|
"logits/rejected": -3.852470874786377, |
|
"logps/chosen": -587.070556640625, |
|
"logps/rejected": -468.05755615234375, |
|
"loss": 0.5195, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.6876250505447388, |
|
"rewards/margins": 0.6847888827323914, |
|
"rewards/rejected": 0.0028361976146698, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6255380200860833e-07, |
|
"logits/chosen": -3.9872021675109863, |
|
"logits/rejected": -4.126004695892334, |
|
"logps/chosen": -575.1105346679688, |
|
"logps/rejected": -469.9864807128906, |
|
"loss": 0.6969, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.39717382192611694, |
|
"rewards/margins": 0.3177093267440796, |
|
"rewards/rejected": 0.07946449518203735, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.6111908177905308e-07, |
|
"logits/chosen": -4.012079238891602, |
|
"logits/rejected": -3.936004161834717, |
|
"logps/chosen": -597.6954345703125, |
|
"logps/rejected": -411.5677795410156, |
|
"loss": 0.6023, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4575771391391754, |
|
"rewards/margins": 0.4063941538333893, |
|
"rewards/rejected": 0.05118294805288315, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5968436154949786e-07, |
|
"logits/chosen": -4.271051406860352, |
|
"logits/rejected": -4.011579990386963, |
|
"logps/chosen": -563.8511962890625, |
|
"logps/rejected": -387.9336853027344, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.46211299300193787, |
|
"rewards/margins": 0.5439087748527527, |
|
"rewards/rejected": -0.0817958191037178, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.582496413199426e-07, |
|
"logits/chosen": -4.078734397888184, |
|
"logits/rejected": -4.14528751373291, |
|
"logps/chosen": -748.7713623046875, |
|
"logps/rejected": -491.18206787109375, |
|
"loss": 0.5499, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.6352272033691406, |
|
"rewards/margins": 0.7783417701721191, |
|
"rewards/rejected": -0.14311453700065613, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.5681492109038739e-07, |
|
"logits/chosen": -4.074445724487305, |
|
"logits/rejected": -3.9905147552490234, |
|
"logps/chosen": -471.805908203125, |
|
"logps/rejected": -419.62109375, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5179725885391235, |
|
"rewards/margins": 0.5387195348739624, |
|
"rewards/rejected": -0.020746838301420212, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.553802008608321e-07, |
|
"logits/chosen": -4.071807384490967, |
|
"logits/rejected": -4.146918296813965, |
|
"logps/chosen": -561.8287353515625, |
|
"logps/rejected": -447.42034912109375, |
|
"loss": 0.5752, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5403738021850586, |
|
"rewards/margins": 0.5378284454345703, |
|
"rewards/rejected": 0.0025453567504882812, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1.539454806312769e-07, |
|
"logits/chosen": -3.9499289989471436, |
|
"logits/rejected": -3.7599105834960938, |
|
"logps/chosen": -573.884765625, |
|
"logps/rejected": -480.6022033691406, |
|
"loss": 0.5883, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.5067580938339233, |
|
"rewards/margins": 0.46255749464035034, |
|
"rewards/rejected": 0.044200599193573, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_logits/chosen": -3.966898202896118, |
|
"eval_logits/rejected": -3.9893743991851807, |
|
"eval_logps/chosen": -544.46240234375, |
|
"eval_logps/rejected": -438.1562194824219, |
|
"eval_loss": 0.5670157074928284, |
|
"eval_rewards/accuracies": 0.6880000233650208, |
|
"eval_rewards/chosen": 0.4914305508136749, |
|
"eval_rewards/margins": 0.5065579414367676, |
|
"eval_rewards/rejected": -0.015127355232834816, |
|
"eval_runtime": 146.024, |
|
"eval_samples_per_second": 13.696, |
|
"eval_steps_per_second": 1.712, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5251076040172164e-07, |
|
"logits/chosen": -4.044391632080078, |
|
"logits/rejected": -4.076410293579102, |
|
"logps/chosen": -598.0468139648438, |
|
"logps/rejected": -476.67181396484375, |
|
"loss": 0.5806, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5654221773147583, |
|
"rewards/margins": 0.4769902229309082, |
|
"rewards/rejected": 0.0884319394826889, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.5107604017216642e-07, |
|
"logits/chosen": -3.93943452835083, |
|
"logits/rejected": -4.029221534729004, |
|
"logps/chosen": -531.6763916015625, |
|
"logps/rejected": -357.01910400390625, |
|
"loss": 0.5708, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5427955389022827, |
|
"rewards/margins": 0.6645749807357788, |
|
"rewards/rejected": -0.12177946418523788, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4964131994261117e-07, |
|
"logits/chosen": -4.1592698097229, |
|
"logits/rejected": -4.196699142456055, |
|
"logps/chosen": -555.0905151367188, |
|
"logps/rejected": -400.6699523925781, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.5090584754943848, |
|
"rewards/margins": 0.5514262318611145, |
|
"rewards/rejected": -0.04236777871847153, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.4820659971305595e-07, |
|
"logits/chosen": -3.996324062347412, |
|
"logits/rejected": -3.8942997455596924, |
|
"logps/chosen": -559.6539916992188, |
|
"logps/rejected": -462.087890625, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.48809123039245605, |
|
"rewards/margins": 0.41656923294067383, |
|
"rewards/rejected": 0.07152204215526581, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.467718794835007e-07, |
|
"logits/chosen": -3.6908695697784424, |
|
"logits/rejected": -3.810857057571411, |
|
"logps/chosen": -488.4864196777344, |
|
"logps/rejected": -417.60400390625, |
|
"loss": 0.5171, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.4484861493110657, |
|
"rewards/margins": 0.4763699173927307, |
|
"rewards/rejected": -0.02788383699953556, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4533715925394547e-07, |
|
"logits/chosen": -3.739753007888794, |
|
"logits/rejected": -3.9605700969696045, |
|
"logps/chosen": -507.19659423828125, |
|
"logps/rejected": -371.3736572265625, |
|
"loss": 0.5889, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5019505023956299, |
|
"rewards/margins": 0.4500049650669098, |
|
"rewards/rejected": 0.05194549635052681, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4390243902439023e-07, |
|
"logits/chosen": -4.046762466430664, |
|
"logits/rejected": -4.0279541015625, |
|
"logps/chosen": -556.5122680664062, |
|
"logps/rejected": -335.6501770019531, |
|
"loss": 0.5436, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.6151873469352722, |
|
"rewards/margins": 0.7387471795082092, |
|
"rewards/rejected": -0.12355981022119522, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.4246771879483498e-07, |
|
"logits/chosen": -3.7312331199645996, |
|
"logits/rejected": -3.662278413772583, |
|
"logps/chosen": -459.88525390625, |
|
"logps/rejected": -383.14984130859375, |
|
"loss": 0.582, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3872886300086975, |
|
"rewards/margins": 0.42437830567359924, |
|
"rewards/rejected": -0.03708968311548233, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.4103299856527975e-07, |
|
"logits/chosen": -4.147296905517578, |
|
"logits/rejected": -4.07787561416626, |
|
"logps/chosen": -554.8704833984375, |
|
"logps/rejected": -398.159912109375, |
|
"loss": 0.6256, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5256353616714478, |
|
"rewards/margins": 0.47513723373413086, |
|
"rewards/rejected": 0.05049814656376839, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.395982783357245e-07, |
|
"logits/chosen": -3.914976119995117, |
|
"logits/rejected": -3.905255079269409, |
|
"logps/chosen": -563.65576171875, |
|
"logps/rejected": -461.05389404296875, |
|
"loss": 0.624, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.48630857467651367, |
|
"rewards/margins": 0.4222942888736725, |
|
"rewards/rejected": 0.0640142410993576, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_logits/chosen": -3.970454692840576, |
|
"eval_logits/rejected": -3.993534803390503, |
|
"eval_logps/chosen": -544.4996948242188, |
|
"eval_logps/rejected": -438.1958312988281, |
|
"eval_loss": 0.5662667155265808, |
|
"eval_rewards/accuracies": 0.6840000152587891, |
|
"eval_rewards/chosen": 0.4876936674118042, |
|
"eval_rewards/margins": 0.5067842602729797, |
|
"eval_rewards/rejected": -0.019090561196208, |
|
"eval_runtime": 146.1468, |
|
"eval_samples_per_second": 13.685, |
|
"eval_steps_per_second": 1.711, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3816355810616928e-07, |
|
"logits/chosen": -3.885633945465088, |
|
"logits/rejected": -3.992154598236084, |
|
"logps/chosen": -606.8893432617188, |
|
"logps/rejected": -488.0694885253906, |
|
"loss": 0.6481, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.506466269493103, |
|
"rewards/margins": 0.38042640686035156, |
|
"rewards/rejected": 0.12603983283042908, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.3672883787661404e-07, |
|
"logits/chosen": -3.9882044792175293, |
|
"logits/rejected": -4.012315273284912, |
|
"logps/chosen": -598.3148193359375, |
|
"logps/rejected": -423.91839599609375, |
|
"loss": 0.5414, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4659012258052826, |
|
"rewards/margins": 0.621524453163147, |
|
"rewards/rejected": -0.15562327206134796, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.352941176470588e-07, |
|
"logits/chosen": -4.175354957580566, |
|
"logits/rejected": -4.1613287925720215, |
|
"logps/chosen": -553.1173706054688, |
|
"logps/rejected": -457.8443298339844, |
|
"loss": 0.5357, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.49676817655563354, |
|
"rewards/margins": 0.5213097333908081, |
|
"rewards/rejected": -0.024541499093174934, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3385939741750356e-07, |
|
"logits/chosen": -4.05168342590332, |
|
"logits/rejected": -4.1532673835754395, |
|
"logps/chosen": -515.8355712890625, |
|
"logps/rejected": -411.39788818359375, |
|
"loss": 0.5648, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.4736366271972656, |
|
"rewards/margins": 0.5187323689460754, |
|
"rewards/rejected": -0.045095693320035934, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.3242467718794834e-07, |
|
"logits/chosen": -4.0590500831604, |
|
"logits/rejected": -4.028027534484863, |
|
"logps/chosen": -478.349853515625, |
|
"logps/rejected": -426.6962890625, |
|
"loss": 0.5647, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4894079566001892, |
|
"rewards/margins": 0.4928358197212219, |
|
"rewards/rejected": -0.0034278512466698885, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.309899569583931e-07, |
|
"logits/chosen": -3.9538333415985107, |
|
"logits/rejected": -4.088204383850098, |
|
"logps/chosen": -615.9334106445312, |
|
"logps/rejected": -433.25189208984375, |
|
"loss": 0.5954, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5048609972000122, |
|
"rewards/margins": 0.6196298599243164, |
|
"rewards/rejected": -0.11476895958185196, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2955523672883787e-07, |
|
"logits/chosen": -3.891871690750122, |
|
"logits/rejected": -3.8232593536376953, |
|
"logps/chosen": -516.1154174804688, |
|
"logps/rejected": -465.57977294921875, |
|
"loss": 0.55, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.6237145662307739, |
|
"rewards/margins": 0.559754490852356, |
|
"rewards/rejected": 0.06396011263132095, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2812051649928262e-07, |
|
"logits/chosen": -4.138208866119385, |
|
"logits/rejected": -4.1710052490234375, |
|
"logps/chosen": -514.2755737304688, |
|
"logps/rejected": -385.1489562988281, |
|
"loss": 0.6169, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.442038357257843, |
|
"rewards/margins": 0.44703513383865356, |
|
"rewards/rejected": -0.004996694624423981, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.266857962697274e-07, |
|
"logits/chosen": -4.293547630310059, |
|
"logits/rejected": -4.3495774269104, |
|
"logps/chosen": -570.8736572265625, |
|
"logps/rejected": -520.841796875, |
|
"loss": 0.6195, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.49560147523880005, |
|
"rewards/margins": 0.48620933294296265, |
|
"rewards/rejected": 0.009392100386321545, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.2525107604017215e-07, |
|
"logits/chosen": -4.126075267791748, |
|
"logits/rejected": -3.8955910205841064, |
|
"logps/chosen": -650.3043212890625, |
|
"logps/rejected": -456.94110107421875, |
|
"loss": 0.5347, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.6632064580917358, |
|
"rewards/margins": 0.6959556341171265, |
|
"rewards/rejected": -0.032749250531196594, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_logits/chosen": -3.9776611328125, |
|
"eval_logits/rejected": -4.001935958862305, |
|
"eval_logps/chosen": -544.619873046875, |
|
"eval_logps/rejected": -438.3401184082031, |
|
"eval_loss": 0.564439594745636, |
|
"eval_rewards/accuracies": 0.6850000023841858, |
|
"eval_rewards/chosen": 0.4756743311882019, |
|
"eval_rewards/margins": 0.5091925859451294, |
|
"eval_rewards/rejected": -0.03351828455924988, |
|
"eval_runtime": 145.9011, |
|
"eval_samples_per_second": 13.708, |
|
"eval_steps_per_second": 1.713, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2381635581061693e-07, |
|
"logits/chosen": -4.2549543380737305, |
|
"logits/rejected": -4.473557472229004, |
|
"logps/chosen": -614.374267578125, |
|
"logps/rejected": -472.72259521484375, |
|
"loss": 0.5698, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.49129754304885864, |
|
"rewards/margins": 0.42049235105514526, |
|
"rewards/rejected": 0.07080519199371338, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2238163558106168e-07, |
|
"logits/chosen": -4.080137729644775, |
|
"logits/rejected": -4.035037040710449, |
|
"logps/chosen": -531.7971801757812, |
|
"logps/rejected": -427.37860107421875, |
|
"loss": 0.6113, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.28571632504463196, |
|
"rewards/margins": 0.4164826273918152, |
|
"rewards/rejected": -0.13076625764369965, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.2094691535150646e-07, |
|
"logits/chosen": -3.9666385650634766, |
|
"logits/rejected": -4.024598121643066, |
|
"logps/chosen": -485.36181640625, |
|
"logps/rejected": -329.271728515625, |
|
"loss": 0.5889, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.49098771810531616, |
|
"rewards/margins": 0.6027761697769165, |
|
"rewards/rejected": -0.11178841441869736, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.195121951219512e-07, |
|
"logits/chosen": -4.0698957443237305, |
|
"logits/rejected": -4.125982761383057, |
|
"logps/chosen": -544.8397827148438, |
|
"logps/rejected": -468.3692932128906, |
|
"loss": 0.5681, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5031709671020508, |
|
"rewards/margins": 0.5633045434951782, |
|
"rewards/rejected": -0.06013358756899834, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1807747489239597e-07, |
|
"logits/chosen": -4.321501731872559, |
|
"logits/rejected": -4.136828899383545, |
|
"logps/chosen": -502.498779296875, |
|
"logps/rejected": -387.68841552734375, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.33200693130493164, |
|
"rewards/margins": 0.5042457580566406, |
|
"rewards/rejected": -0.17223885655403137, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.1664275466284074e-07, |
|
"logits/chosen": -4.194310188293457, |
|
"logits/rejected": -4.310281753540039, |
|
"logps/chosen": -580.3814086914062, |
|
"logps/rejected": -446.2391662597656, |
|
"loss": 0.5559, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.44598498940467834, |
|
"rewards/margins": 0.5190831422805786, |
|
"rewards/rejected": -0.07309817522764206, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 1.152080344332855e-07, |
|
"logits/chosen": -4.0883283615112305, |
|
"logits/rejected": -4.150923728942871, |
|
"logps/chosen": -554.2647094726562, |
|
"logps/rejected": -424.0943298339844, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.4851420521736145, |
|
"rewards/margins": 0.5894008874893188, |
|
"rewards/rejected": -0.10425883531570435, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1377331420373027e-07, |
|
"logits/chosen": -4.141337871551514, |
|
"logits/rejected": -4.110450267791748, |
|
"logps/chosen": -580.8941040039062, |
|
"logps/rejected": -452.0682067871094, |
|
"loss": 0.5978, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5248143076896667, |
|
"rewards/margins": 0.5298766493797302, |
|
"rewards/rejected": -0.005062357988208532, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.1233859397417503e-07, |
|
"logits/chosen": -4.203800201416016, |
|
"logits/rejected": -4.3287224769592285, |
|
"logps/chosen": -611.932373046875, |
|
"logps/rejected": -462.0245056152344, |
|
"loss": 0.5423, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.6605905890464783, |
|
"rewards/margins": 0.6125748753547668, |
|
"rewards/rejected": 0.04801566153764725, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.109038737446198e-07, |
|
"logits/chosen": -3.9451992511749268, |
|
"logits/rejected": -3.978661060333252, |
|
"logps/chosen": -548.4806518554688, |
|
"logps/rejected": -418.55255126953125, |
|
"loss": 0.5837, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.4787958264350891, |
|
"rewards/margins": 0.4884239733219147, |
|
"rewards/rejected": -0.009628054685890675, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": -3.9741623401641846, |
|
"eval_logits/rejected": -3.997610092163086, |
|
"eval_logps/chosen": -544.5935668945312, |
|
"eval_logps/rejected": -438.3072509765625, |
|
"eval_loss": 0.563690721988678, |
|
"eval_rewards/accuracies": 0.6830000281333923, |
|
"eval_rewards/chosen": 0.47830715775489807, |
|
"eval_rewards/margins": 0.5085403323173523, |
|
"eval_rewards/rejected": -0.03023313544690609, |
|
"eval_runtime": 145.8972, |
|
"eval_samples_per_second": 13.708, |
|
"eval_steps_per_second": 1.714, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.0946915351506456e-07, |
|
"logits/chosen": -3.9184958934783936, |
|
"logits/rejected": -3.7819457054138184, |
|
"logps/chosen": -571.571533203125, |
|
"logps/rejected": -445.2757263183594, |
|
"loss": 0.5715, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.40955060720443726, |
|
"rewards/margins": 0.4587160050868988, |
|
"rewards/rejected": -0.049165401607751846, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0803443328550932e-07, |
|
"logits/chosen": -3.905442476272583, |
|
"logits/rejected": -3.8821640014648438, |
|
"logps/chosen": -460.50372314453125, |
|
"logps/rejected": -420.4170837402344, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.32348185777664185, |
|
"rewards/margins": 0.37588781118392944, |
|
"rewards/rejected": -0.05240591615438461, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0659971305595408e-07, |
|
"logits/chosen": -3.8710105419158936, |
|
"logits/rejected": -3.9780330657958984, |
|
"logps/chosen": -492.71075439453125, |
|
"logps/rejected": -370.08160400390625, |
|
"loss": 0.5272, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.4087301790714264, |
|
"rewards/margins": 0.49260735511779785, |
|
"rewards/rejected": -0.08387719094753265, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.0516499282639884e-07, |
|
"logits/chosen": -4.243491172790527, |
|
"logits/rejected": -4.153388977050781, |
|
"logps/chosen": -496.98260498046875, |
|
"logps/rejected": -367.54632568359375, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.44594526290893555, |
|
"rewards/margins": 0.5719509720802307, |
|
"rewards/rejected": -0.12600573897361755, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.037302725968436e-07, |
|
"logits/chosen": -3.9939746856689453, |
|
"logits/rejected": -4.004325866699219, |
|
"logps/chosen": -478.77056884765625, |
|
"logps/rejected": -464.50665283203125, |
|
"loss": 0.6232, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.35015568137168884, |
|
"rewards/margins": 0.37633177638053894, |
|
"rewards/rejected": -0.02617608569562435, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0229555236728837e-07, |
|
"logits/chosen": -4.253720283508301, |
|
"logits/rejected": -4.267764568328857, |
|
"logps/chosen": -557.8858642578125, |
|
"logps/rejected": -412.69818115234375, |
|
"loss": 0.5698, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4118489623069763, |
|
"rewards/margins": 0.5684719085693359, |
|
"rewards/rejected": -0.15662303566932678, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.0086083213773313e-07, |
|
"logits/chosen": -3.7371535301208496, |
|
"logits/rejected": -3.7913818359375, |
|
"logps/chosen": -562.0777587890625, |
|
"logps/rejected": -491.91143798828125, |
|
"loss": 0.5108, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5288134813308716, |
|
"rewards/margins": 0.5345078706741333, |
|
"rewards/rejected": -0.005694452673196793, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.94261119081779e-08, |
|
"logits/chosen": -3.9377448558807373, |
|
"logits/rejected": -3.955479383468628, |
|
"logps/chosen": -502.92718505859375, |
|
"logps/rejected": -408.81976318359375, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5690404772758484, |
|
"rewards/margins": 0.502609372138977, |
|
"rewards/rejected": 0.06643114238977432, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 9.799139167862266e-08, |
|
"logits/chosen": -4.40088415145874, |
|
"logits/rejected": -4.40977144241333, |
|
"logps/chosen": -582.387451171875, |
|
"logps/rejected": -508.1477966308594, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.5947442650794983, |
|
"rewards/margins": 0.6568835377693176, |
|
"rewards/rejected": -0.06213930994272232, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.655667144906743e-08, |
|
"logits/chosen": -4.194244861602783, |
|
"logits/rejected": -4.144165992736816, |
|
"logps/chosen": -582.037109375, |
|
"logps/rejected": -409.24224853515625, |
|
"loss": 0.5293, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5745357275009155, |
|
"rewards/margins": 0.728204071521759, |
|
"rewards/rejected": -0.15366844832897186, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_logits/chosen": -3.9778196811676025, |
|
"eval_logits/rejected": -4.002331733703613, |
|
"eval_logps/chosen": -544.66162109375, |
|
"eval_logps/rejected": -438.367919921875, |
|
"eval_loss": 0.5634328126907349, |
|
"eval_rewards/accuracies": 0.6890000104904175, |
|
"eval_rewards/chosen": 0.47150418162345886, |
|
"eval_rewards/margins": 0.5078018307685852, |
|
"eval_rewards/rejected": -0.03629762679338455, |
|
"eval_runtime": 146.8004, |
|
"eval_samples_per_second": 13.624, |
|
"eval_steps_per_second": 1.703, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.512195121951219e-08, |
|
"logits/chosen": -4.067798614501953, |
|
"logits/rejected": -4.208149433135986, |
|
"logps/chosen": -511.45013427734375, |
|
"logps/rejected": -376.13067626953125, |
|
"loss": 0.5854, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.4113733768463135, |
|
"rewards/margins": 0.48462361097335815, |
|
"rewards/rejected": -0.07325027137994766, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.368723098995696e-08, |
|
"logits/chosen": -4.2905426025390625, |
|
"logits/rejected": -4.173062801361084, |
|
"logps/chosen": -573.29296875, |
|
"logps/rejected": -427.76739501953125, |
|
"loss": 0.5641, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.48305606842041016, |
|
"rewards/margins": 0.5324376225471497, |
|
"rewards/rejected": -0.049381546676158905, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.225251076040172e-08, |
|
"logits/chosen": -3.8998687267303467, |
|
"logits/rejected": -3.798374891281128, |
|
"logps/chosen": -553.4527587890625, |
|
"logps/rejected": -378.1935119628906, |
|
"loss": 0.5525, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.4461655020713806, |
|
"rewards/margins": 0.6628150343894958, |
|
"rewards/rejected": -0.21664953231811523, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.081779053084649e-08, |
|
"logits/chosen": -4.098966121673584, |
|
"logits/rejected": -4.07062292098999, |
|
"logps/chosen": -563.8943481445312, |
|
"logps/rejected": -444.6372985839844, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5742425918579102, |
|
"rewards/margins": 0.5752390027046204, |
|
"rewards/rejected": -0.0009963444899767637, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.938307030129125e-08, |
|
"logits/chosen": -4.253169059753418, |
|
"logits/rejected": -4.21138858795166, |
|
"logps/chosen": -535.7103271484375, |
|
"logps/rejected": -373.97003173828125, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.3600585460662842, |
|
"rewards/margins": 0.45460644364356995, |
|
"rewards/rejected": -0.09454789757728577, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.794835007173601e-08, |
|
"logits/chosen": -3.9807746410369873, |
|
"logits/rejected": -3.941415309906006, |
|
"logps/chosen": -572.0859985351562, |
|
"logps/rejected": -468.2994079589844, |
|
"loss": 0.58, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5067304968833923, |
|
"rewards/margins": 0.50401771068573, |
|
"rewards/rejected": 0.0027127789799124002, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 8.651362984218078e-08, |
|
"logits/chosen": -4.293785095214844, |
|
"logits/rejected": -4.3718976974487305, |
|
"logps/chosen": -584.475830078125, |
|
"logps/rejected": -409.7781677246094, |
|
"loss": 0.5662, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.3756251931190491, |
|
"rewards/margins": 0.5408438444137573, |
|
"rewards/rejected": -0.16521869599819183, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.507890961262554e-08, |
|
"logits/chosen": -3.9542198181152344, |
|
"logits/rejected": -3.9630751609802246, |
|
"logps/chosen": -513.8878173828125, |
|
"logps/rejected": -526.8805541992188, |
|
"loss": 0.5605, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5460586547851562, |
|
"rewards/margins": 0.4736880362033844, |
|
"rewards/rejected": 0.07237061113119125, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.364418938307031e-08, |
|
"logits/chosen": -3.925539493560791, |
|
"logits/rejected": -3.744020462036133, |
|
"logps/chosen": -536.7335205078125, |
|
"logps/rejected": -374.2388610839844, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4619140028953552, |
|
"rewards/margins": 0.5450933575630188, |
|
"rewards/rejected": -0.08317933976650238, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.220946915351506e-08, |
|
"logits/chosen": -3.950735569000244, |
|
"logits/rejected": -3.9631354808807373, |
|
"logps/chosen": -522.4519653320312, |
|
"logps/rejected": -515.7288208007812, |
|
"loss": 0.5128, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.4143013060092926, |
|
"rewards/margins": 0.5980950593948364, |
|
"rewards/rejected": -0.1837938129901886, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_logits/chosen": -3.9803950786590576, |
|
"eval_logits/rejected": -4.005295753479004, |
|
"eval_logps/chosen": -544.6318969726562, |
|
"eval_logps/rejected": -438.39166259765625, |
|
"eval_loss": 0.5619609355926514, |
|
"eval_rewards/accuracies": 0.6880000233650208, |
|
"eval_rewards/chosen": 0.474471777677536, |
|
"eval_rewards/margins": 0.5131421089172363, |
|
"eval_rewards/rejected": -0.038670338690280914, |
|
"eval_runtime": 147.8686, |
|
"eval_samples_per_second": 13.526, |
|
"eval_steps_per_second": 1.691, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 8.077474892395982e-08, |
|
"logits/chosen": -4.0901780128479, |
|
"logits/rejected": -4.084465980529785, |
|
"logps/chosen": -538.7194213867188, |
|
"logps/rejected": -432.45892333984375, |
|
"loss": 0.5549, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.45796340703964233, |
|
"rewards/margins": 0.44825053215026855, |
|
"rewards/rejected": 0.009712914004921913, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.934002869440459e-08, |
|
"logits/chosen": -3.90226411819458, |
|
"logits/rejected": -4.059938907623291, |
|
"logps/chosen": -635.9435424804688, |
|
"logps/rejected": -362.847412109375, |
|
"loss": 0.5811, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.3464578092098236, |
|
"rewards/margins": 0.35814762115478516, |
|
"rewards/rejected": -0.011689816601574421, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.790530846484935e-08, |
|
"logits/chosen": -4.0388689041137695, |
|
"logits/rejected": -3.9921679496765137, |
|
"logps/chosen": -555.1847534179688, |
|
"logps/rejected": -400.4381408691406, |
|
"loss": 0.5537, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.46570801734924316, |
|
"rewards/margins": 0.5739080309867859, |
|
"rewards/rejected": -0.1082000583410263, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.647058823529412e-08, |
|
"logits/chosen": -4.057839870452881, |
|
"logits/rejected": -3.962494373321533, |
|
"logps/chosen": -633.3861083984375, |
|
"logps/rejected": -521.1183471679688, |
|
"loss": 0.569, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.44552069902420044, |
|
"rewards/margins": 0.42656344175338745, |
|
"rewards/rejected": 0.01895725727081299, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 7.503586800573888e-08, |
|
"logits/chosen": -4.196125507354736, |
|
"logits/rejected": -4.071255683898926, |
|
"logps/chosen": -488.28460693359375, |
|
"logps/rejected": -371.49169921875, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.39670076966285706, |
|
"rewards/margins": 0.3868991434574127, |
|
"rewards/rejected": 0.009801648557186127, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.360114777618365e-08, |
|
"logits/chosen": -3.972092390060425, |
|
"logits/rejected": -4.106286525726318, |
|
"logps/chosen": -572.9547119140625, |
|
"logps/rejected": -420.35400390625, |
|
"loss": 0.5315, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.49660277366638184, |
|
"rewards/margins": 0.5567241311073303, |
|
"rewards/rejected": -0.06012127920985222, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.21664275466284e-08, |
|
"logits/chosen": -3.9770302772521973, |
|
"logits/rejected": -4.03969669342041, |
|
"logps/chosen": -547.3425903320312, |
|
"logps/rejected": -495.91851806640625, |
|
"loss": 0.604, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.48121723532676697, |
|
"rewards/margins": 0.4144704341888428, |
|
"rewards/rejected": 0.06674680858850479, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 7.073170731707316e-08, |
|
"logits/chosen": -4.059111595153809, |
|
"logits/rejected": -4.129426002502441, |
|
"logps/chosen": -564.0946655273438, |
|
"logps/rejected": -506.58709716796875, |
|
"loss": 0.549, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.40859222412109375, |
|
"rewards/margins": 0.3801480233669281, |
|
"rewards/rejected": 0.028444204479455948, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.929698708751793e-08, |
|
"logits/chosen": -4.225644111633301, |
|
"logits/rejected": -4.292551040649414, |
|
"logps/chosen": -588.9600830078125, |
|
"logps/rejected": -472.1261291503906, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4812944531440735, |
|
"rewards/margins": 0.5918601155281067, |
|
"rewards/rejected": -0.11056558787822723, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.786226685796269e-08, |
|
"logits/chosen": -4.155394077301025, |
|
"logits/rejected": -4.14565896987915, |
|
"logps/chosen": -525.122314453125, |
|
"logps/rejected": -392.67376708984375, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.31629544496536255, |
|
"rewards/margins": 0.37368619441986084, |
|
"rewards/rejected": -0.0573907308280468, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_logits/chosen": -3.9814672470092773, |
|
"eval_logits/rejected": -4.006735324859619, |
|
"eval_logps/chosen": -544.69775390625, |
|
"eval_logps/rejected": -438.4468688964844, |
|
"eval_loss": 0.5624998211860657, |
|
"eval_rewards/accuracies": 0.6859999895095825, |
|
"eval_rewards/chosen": 0.46788930892944336, |
|
"eval_rewards/margins": 0.5120863914489746, |
|
"eval_rewards/rejected": -0.044197000563144684, |
|
"eval_runtime": 147.1881, |
|
"eval_samples_per_second": 13.588, |
|
"eval_steps_per_second": 1.699, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.642754662840746e-08, |
|
"logits/chosen": -4.196806907653809, |
|
"logits/rejected": -4.363795280456543, |
|
"logps/chosen": -574.8983154296875, |
|
"logps/rejected": -502.860107421875, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.49528592824935913, |
|
"rewards/margins": 0.5257723927497864, |
|
"rewards/rejected": -0.03048643469810486, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 6.499282639885222e-08, |
|
"logits/chosen": -3.957125186920166, |
|
"logits/rejected": -3.888190507888794, |
|
"logps/chosen": -547.2122192382812, |
|
"logps/rejected": -404.66595458984375, |
|
"loss": 0.5457, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4918132722377777, |
|
"rewards/margins": 0.5843526124954224, |
|
"rewards/rejected": -0.09253935515880585, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.355810616929698e-08, |
|
"logits/chosen": -3.979330539703369, |
|
"logits/rejected": -4.06231164932251, |
|
"logps/chosen": -524.4691772460938, |
|
"logps/rejected": -422.9524841308594, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.4810391962528229, |
|
"rewards/margins": 0.5485955476760864, |
|
"rewards/rejected": -0.06755636632442474, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.212338593974175e-08, |
|
"logits/chosen": -4.29758882522583, |
|
"logits/rejected": -4.182176113128662, |
|
"logps/chosen": -595.9281616210938, |
|
"logps/rejected": -425.3091735839844, |
|
"loss": 0.5189, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5549203157424927, |
|
"rewards/margins": 0.5970374941825867, |
|
"rewards/rejected": -0.04211718589067459, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.068866571018651e-08, |
|
"logits/chosen": -4.2188720703125, |
|
"logits/rejected": -4.125060081481934, |
|
"logps/chosen": -521.8778076171875, |
|
"logps/rejected": -426.075439453125, |
|
"loss": 0.6504, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.3750000596046448, |
|
"rewards/margins": 0.34474819898605347, |
|
"rewards/rejected": 0.03025185689330101, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.925394548063128e-08, |
|
"logits/chosen": -4.250518321990967, |
|
"logits/rejected": -4.323083400726318, |
|
"logps/chosen": -596.1790161132812, |
|
"logps/rejected": -468.62078857421875, |
|
"loss": 0.628, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.49697431921958923, |
|
"rewards/margins": 0.5001575350761414, |
|
"rewards/rejected": -0.00318324426189065, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.7819225251076036e-08, |
|
"logits/chosen": -4.112654685974121, |
|
"logits/rejected": -3.997405529022217, |
|
"logps/chosen": -498.8377990722656, |
|
"logps/rejected": -473.70550537109375, |
|
"loss": 0.5889, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3584319055080414, |
|
"rewards/margins": 0.3437018394470215, |
|
"rewards/rejected": 0.014730053022503853, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.63845050215208e-08, |
|
"logits/chosen": -4.2403950691223145, |
|
"logits/rejected": -4.099762916564941, |
|
"logps/chosen": -572.9876098632812, |
|
"logps/rejected": -437.2333984375, |
|
"loss": 0.565, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.43449336290359497, |
|
"rewards/margins": 0.5010371804237366, |
|
"rewards/rejected": -0.06654379516839981, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.4949784791965565e-08, |
|
"logits/chosen": -4.034601211547852, |
|
"logits/rejected": -4.012211322784424, |
|
"logps/chosen": -535.9708862304688, |
|
"logps/rejected": -390.2044372558594, |
|
"loss": 0.5451, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5869877338409424, |
|
"rewards/margins": 0.674468994140625, |
|
"rewards/rejected": -0.087481290102005, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.351506456241032e-08, |
|
"logits/chosen": -4.010631561279297, |
|
"logits/rejected": -3.996525526046753, |
|
"logps/chosen": -550.5261840820312, |
|
"logps/rejected": -470.1412658691406, |
|
"loss": 0.5469, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5485318899154663, |
|
"rewards/margins": 0.49418431520462036, |
|
"rewards/rejected": 0.054347604513168335, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_logits/chosen": -3.98427677154541, |
|
"eval_logits/rejected": -4.009834289550781, |
|
"eval_logps/chosen": -544.7650756835938, |
|
"eval_logps/rejected": -438.4956359863281, |
|
"eval_loss": 0.5618208050727844, |
|
"eval_rewards/accuracies": 0.6859999895095825, |
|
"eval_rewards/chosen": 0.4611594080924988, |
|
"eval_rewards/margins": 0.5102306604385376, |
|
"eval_rewards/rejected": -0.049071334302425385, |
|
"eval_runtime": 146.3352, |
|
"eval_samples_per_second": 13.667, |
|
"eval_steps_per_second": 1.708, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.208034433285509e-08, |
|
"logits/chosen": -3.8921310901641846, |
|
"logits/rejected": -3.848719358444214, |
|
"logps/chosen": -647.0032958984375, |
|
"logps/rejected": -568.3533935546875, |
|
"loss": 0.5397, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.5782560110092163, |
|
"rewards/margins": 0.5668593645095825, |
|
"rewards/rejected": 0.011396640911698341, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 5.064562410329985e-08, |
|
"logits/chosen": -4.195284843444824, |
|
"logits/rejected": -4.076591491699219, |
|
"logps/chosen": -530.7032470703125, |
|
"logps/rejected": -445.4291076660156, |
|
"loss": 0.5744, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.45487624406814575, |
|
"rewards/margins": 0.4947517514228821, |
|
"rewards/rejected": -0.03987548500299454, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.9210903873744616e-08, |
|
"logits/chosen": -4.124705791473389, |
|
"logits/rejected": -4.10734748840332, |
|
"logps/chosen": -541.8250122070312, |
|
"logps/rejected": -430.775146484375, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.4992215633392334, |
|
"rewards/margins": 0.41505131125450134, |
|
"rewards/rejected": 0.08417025953531265, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.777618364418938e-08, |
|
"logits/chosen": -3.803776502609253, |
|
"logits/rejected": -3.8794872760772705, |
|
"logps/chosen": -551.4606323242188, |
|
"logps/rejected": -393.61907958984375, |
|
"loss": 0.5847, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.3885660171508789, |
|
"rewards/margins": 0.4686763882637024, |
|
"rewards/rejected": -0.08011035621166229, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.6341463414634145e-08, |
|
"logits/chosen": -4.222638130187988, |
|
"logits/rejected": -4.128180027008057, |
|
"logps/chosen": -616.28515625, |
|
"logps/rejected": -464.65692138671875, |
|
"loss": 0.6212, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5344708561897278, |
|
"rewards/margins": 0.4752708971500397, |
|
"rewards/rejected": 0.059200018644332886, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.490674318507891e-08, |
|
"logits/chosen": -4.027795314788818, |
|
"logits/rejected": -3.9686641693115234, |
|
"logps/chosen": -548.213134765625, |
|
"logps/rejected": -391.7416076660156, |
|
"loss": 0.6047, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.4162333607673645, |
|
"rewards/margins": 0.338506817817688, |
|
"rewards/rejected": 0.07772652804851532, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.3472022955523674e-08, |
|
"logits/chosen": -4.117037773132324, |
|
"logits/rejected": -4.153426647186279, |
|
"logps/chosen": -632.22607421875, |
|
"logps/rejected": -593.6041870117188, |
|
"loss": 0.627, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.40994158387184143, |
|
"rewards/margins": 0.3304620385169983, |
|
"rewards/rejected": 0.07947959750890732, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.203730272596843e-08, |
|
"logits/chosen": -3.860865831375122, |
|
"logits/rejected": -3.7739486694335938, |
|
"logps/chosen": -516.7479248046875, |
|
"logps/rejected": -392.3046569824219, |
|
"loss": 0.6383, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.31629177927970886, |
|
"rewards/margins": 0.39345088601112366, |
|
"rewards/rejected": -0.07715904712677002, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.0602582496413197e-08, |
|
"logits/chosen": -3.8026537895202637, |
|
"logits/rejected": -3.838305711746216, |
|
"logps/chosen": -599.1148681640625, |
|
"logps/rejected": -478.2513122558594, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.4942142963409424, |
|
"rewards/margins": 0.4958348274230957, |
|
"rewards/rejected": -0.001620540046133101, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.916786226685796e-08, |
|
"logits/chosen": -3.803657054901123, |
|
"logits/rejected": -3.809593677520752, |
|
"logps/chosen": -450.1053161621094, |
|
"logps/rejected": -387.50457763671875, |
|
"loss": 0.5807, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.4047510623931885, |
|
"rewards/margins": 0.3800794184207916, |
|
"rewards/rejected": 0.0246716421097517, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_logits/chosen": -3.9818077087402344, |
|
"eval_logits/rejected": -4.006768703460693, |
|
"eval_logps/chosen": -544.7014770507812, |
|
"eval_logps/rejected": -438.45843505859375, |
|
"eval_loss": 0.561528742313385, |
|
"eval_rewards/accuracies": 0.6890000104904175, |
|
"eval_rewards/chosen": 0.46752142906188965, |
|
"eval_rewards/margins": 0.5128761529922485, |
|
"eval_rewards/rejected": -0.04535466805100441, |
|
"eval_runtime": 146.6531, |
|
"eval_samples_per_second": 13.638, |
|
"eval_steps_per_second": 1.705, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.7733142037302726e-08, |
|
"logits/chosen": -4.237338066101074, |
|
"logits/rejected": -4.075575828552246, |
|
"logps/chosen": -507.56475830078125, |
|
"logps/rejected": -460.54864501953125, |
|
"loss": 0.5626, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3922446668148041, |
|
"rewards/margins": 0.3932397663593292, |
|
"rewards/rejected": -0.000995102571323514, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 3.629842180774749e-08, |
|
"logits/chosen": -4.196072578430176, |
|
"logits/rejected": -4.2456374168396, |
|
"logps/chosen": -561.850830078125, |
|
"logps/rejected": -485.9574279785156, |
|
"loss": 0.5872, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.45551833510398865, |
|
"rewards/margins": 0.5499417185783386, |
|
"rewards/rejected": -0.09442339837551117, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.4863701578192255e-08, |
|
"logits/chosen": -4.031411170959473, |
|
"logits/rejected": -4.024487495422363, |
|
"logps/chosen": -486.8564453125, |
|
"logps/rejected": -412.80712890625, |
|
"loss": 0.5609, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.395702064037323, |
|
"rewards/margins": 0.37622708082199097, |
|
"rewards/rejected": 0.01947496458888054, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.342898134863702e-08, |
|
"logits/chosen": -4.104000091552734, |
|
"logits/rejected": -4.140475273132324, |
|
"logps/chosen": -629.5847778320312, |
|
"logps/rejected": -438.322021484375, |
|
"loss": 0.5127, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.6019877195358276, |
|
"rewards/margins": 0.7443768382072449, |
|
"rewards/rejected": -0.1423892229795456, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.1994261119081784e-08, |
|
"logits/chosen": -4.0185065269470215, |
|
"logits/rejected": -3.976111888885498, |
|
"logps/chosen": -567.458740234375, |
|
"logps/rejected": -433.2574157714844, |
|
"loss": 0.5677, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.40292826294898987, |
|
"rewards/margins": 0.45192545652389526, |
|
"rewards/rejected": -0.048997145146131516, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.055954088952654e-08, |
|
"logits/chosen": -3.958566665649414, |
|
"logits/rejected": -4.042834758758545, |
|
"logps/chosen": -507.0655212402344, |
|
"logps/rejected": -423.1856384277344, |
|
"loss": 0.5869, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.4078023433685303, |
|
"rewards/margins": 0.4608895778656006, |
|
"rewards/rejected": -0.05308721214532852, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.9124820659971306e-08, |
|
"logits/chosen": -4.479077339172363, |
|
"logits/rejected": -4.497659683227539, |
|
"logps/chosen": -593.07666015625, |
|
"logps/rejected": -471.2254943847656, |
|
"loss": 0.5922, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5073246359825134, |
|
"rewards/margins": 0.5663779973983765, |
|
"rewards/rejected": -0.059053339064121246, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.7690100430416067e-08, |
|
"logits/chosen": -4.271915912628174, |
|
"logits/rejected": -4.254021644592285, |
|
"logps/chosen": -458.9129943847656, |
|
"logps/rejected": -393.89862060546875, |
|
"loss": 0.5112, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.3906114995479584, |
|
"rewards/margins": 0.6087072491645813, |
|
"rewards/rejected": -0.2180958241224289, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.625538020086083e-08, |
|
"logits/chosen": -4.178504943847656, |
|
"logits/rejected": -4.220212459564209, |
|
"logps/chosen": -584.3404541015625, |
|
"logps/rejected": -444.0389709472656, |
|
"loss": 0.5676, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.4277767241001129, |
|
"rewards/margins": 0.5100394487380981, |
|
"rewards/rejected": -0.08226276189088821, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.4820659971305596e-08, |
|
"logits/chosen": -4.02055549621582, |
|
"logits/rejected": -4.129875183105469, |
|
"logps/chosen": -557.54345703125, |
|
"logps/rejected": -435.833984375, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.48887911438941956, |
|
"rewards/margins": 0.5414212942123413, |
|
"rewards/rejected": -0.05254218727350235, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/chosen": -3.9832568168640137, |
|
"eval_logits/rejected": -4.008208274841309, |
|
"eval_logps/chosen": -544.701904296875, |
|
"eval_logps/rejected": -438.4403381347656, |
|
"eval_loss": 0.5619760751724243, |
|
"eval_rewards/accuracies": 0.6880000233650208, |
|
"eval_rewards/chosen": 0.46747326850891113, |
|
"eval_rewards/margins": 0.5110137462615967, |
|
"eval_rewards/rejected": -0.04354046657681465, |
|
"eval_runtime": 147.499, |
|
"eval_samples_per_second": 13.559, |
|
"eval_steps_per_second": 1.695, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3385939741750357e-08, |
|
"logits/chosen": -4.32746696472168, |
|
"logits/rejected": -4.259668827056885, |
|
"logps/chosen": -622.6856079101562, |
|
"logps/rejected": -556.1048583984375, |
|
"loss": 0.5794, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.5587274432182312, |
|
"rewards/margins": 0.6079638600349426, |
|
"rewards/rejected": -0.049236398190259933, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.195121951219512e-08, |
|
"logits/chosen": -3.798098087310791, |
|
"logits/rejected": -4.027928829193115, |
|
"logps/chosen": -534.12646484375, |
|
"logps/rejected": -370.48907470703125, |
|
"loss": 0.5292, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5056679844856262, |
|
"rewards/margins": 0.6213759183883667, |
|
"rewards/rejected": -0.11570799350738525, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.0516499282639883e-08, |
|
"logits/chosen": -4.053610801696777, |
|
"logits/rejected": -4.082070350646973, |
|
"logps/chosen": -639.72998046875, |
|
"logps/rejected": -456.34844970703125, |
|
"loss": 0.5726, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5697764158248901, |
|
"rewards/margins": 0.5703621506690979, |
|
"rewards/rejected": -0.0005857095238752663, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.9081779053084647e-08, |
|
"logits/chosen": -3.877751111984253, |
|
"logits/rejected": -3.9127018451690674, |
|
"logps/chosen": -583.1852416992188, |
|
"logps/rejected": -401.8482971191406, |
|
"loss": 0.5216, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5018226504325867, |
|
"rewards/margins": 0.5718634724617004, |
|
"rewards/rejected": -0.07004072517156601, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.7647058823529412e-08, |
|
"logits/chosen": -4.168404579162598, |
|
"logits/rejected": -4.074638366699219, |
|
"logps/chosen": -510.4005432128906, |
|
"logps/rejected": -415.7406311035156, |
|
"loss": 0.5916, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4106532037258148, |
|
"rewards/margins": 0.4389967918395996, |
|
"rewards/rejected": -0.028343593701720238, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.6212338593974173e-08, |
|
"logits/chosen": -4.087225914001465, |
|
"logits/rejected": -3.9666972160339355, |
|
"logps/chosen": -465.10986328125, |
|
"logps/rejected": -324.4447021484375, |
|
"loss": 0.5872, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3475942015647888, |
|
"rewards/margins": 0.424374520778656, |
|
"rewards/rejected": -0.07678033411502838, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.4777618364418938e-08, |
|
"logits/chosen": -4.230082035064697, |
|
"logits/rejected": -4.243712902069092, |
|
"logps/chosen": -635.1439208984375, |
|
"logps/rejected": -543.9008178710938, |
|
"loss": 0.5959, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.6082009077072144, |
|
"rewards/margins": 0.42760133743286133, |
|
"rewards/rejected": 0.1805996149778366, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3342898134863702e-08, |
|
"logits/chosen": -4.096220970153809, |
|
"logits/rejected": -4.115006446838379, |
|
"logps/chosen": -577.7149658203125, |
|
"logps/rejected": -440.178955078125, |
|
"loss": 0.6036, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.4464997351169586, |
|
"rewards/margins": 0.4255821108818054, |
|
"rewards/rejected": 0.020917650312185287, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.1908177905308463e-08, |
|
"logits/chosen": -3.8991074562072754, |
|
"logits/rejected": -3.802464246749878, |
|
"logps/chosen": -519.2384643554688, |
|
"logps/rejected": -435.0231018066406, |
|
"loss": 0.5509, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.44993266463279724, |
|
"rewards/margins": 0.5668569207191467, |
|
"rewards/rejected": -0.1169242262840271, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.0473457675753228e-08, |
|
"logits/chosen": -4.110097885131836, |
|
"logits/rejected": -4.070154190063477, |
|
"logps/chosen": -539.1266479492188, |
|
"logps/rejected": -404.272216796875, |
|
"loss": 0.5484, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5660614371299744, |
|
"rewards/margins": 0.6612092852592468, |
|
"rewards/rejected": -0.0951477512717247, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_logits/chosen": -3.9850714206695557, |
|
"eval_logits/rejected": -4.010331630706787, |
|
"eval_logps/chosen": -544.69189453125, |
|
"eval_logps/rejected": -438.4535827636719, |
|
"eval_loss": 0.5614883899688721, |
|
"eval_rewards/accuracies": 0.6930000185966492, |
|
"eval_rewards/chosen": 0.46847668290138245, |
|
"eval_rewards/margins": 0.5133422613143921, |
|
"eval_rewards/rejected": -0.044865623116493225, |
|
"eval_runtime": 146.7356, |
|
"eval_samples_per_second": 13.63, |
|
"eval_steps_per_second": 1.704, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 9.03873744619799e-09, |
|
"logits/chosen": -4.123238563537598, |
|
"logits/rejected": -4.03770112991333, |
|
"logps/chosen": -511.1904296875, |
|
"logps/rejected": -446.9549865722656, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.46543973684310913, |
|
"rewards/margins": 0.4664246439933777, |
|
"rewards/rejected": -0.0009849362540990114, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 7.604017216642753e-09, |
|
"logits/chosen": -4.272444725036621, |
|
"logits/rejected": -4.183244705200195, |
|
"logps/chosen": -505.7000427246094, |
|
"logps/rejected": -433.4576721191406, |
|
"loss": 0.5317, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5172845721244812, |
|
"rewards/margins": 0.5496169328689575, |
|
"rewards/rejected": -0.03233236074447632, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 6.169296987087518e-09, |
|
"logits/chosen": -4.149974346160889, |
|
"logits/rejected": -4.188473701477051, |
|
"logps/chosen": -587.9978637695312, |
|
"logps/rejected": -437.9640197753906, |
|
"loss": 0.557, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5558081269264221, |
|
"rewards/margins": 0.5474778413772583, |
|
"rewards/rejected": 0.008330265991389751, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.734576757532282e-09, |
|
"logits/chosen": -4.119086265563965, |
|
"logits/rejected": -4.145096778869629, |
|
"logps/chosen": -511.7611389160156, |
|
"logps/rejected": -394.4190368652344, |
|
"loss": 0.5667, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.42686066031455994, |
|
"rewards/margins": 0.49367666244506836, |
|
"rewards/rejected": -0.06681600958108902, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.299856527977044e-09, |
|
"logits/chosen": -3.9760982990264893, |
|
"logits/rejected": -4.040436267852783, |
|
"logps/chosen": -612.0133056640625, |
|
"logps/rejected": -519.7421875, |
|
"loss": 0.5988, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.49833711981773376, |
|
"rewards/margins": 0.5593429803848267, |
|
"rewards/rejected": -0.06100592762231827, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.8651362984218077e-09, |
|
"logits/chosen": -3.951416015625, |
|
"logits/rejected": -3.949162006378174, |
|
"logps/chosen": -557.078857421875, |
|
"logps/rejected": -389.9462890625, |
|
"loss": 0.5123, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5364678502082825, |
|
"rewards/margins": 0.6398257613182068, |
|
"rewards/rejected": -0.10335797071456909, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.30416068866571e-10, |
|
"logits/chosen": -4.126285076141357, |
|
"logits/rejected": -4.162901878356934, |
|
"logps/chosen": -532.77880859375, |
|
"logps/rejected": -499.14410400390625, |
|
"loss": 0.6348, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.4620552659034729, |
|
"rewards/margins": 0.32118576765060425, |
|
"rewards/rejected": 0.14086945354938507, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3873, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5913154047772216, |
|
"train_runtime": 14580.3501, |
|
"train_samples_per_second": 4.25, |
|
"train_steps_per_second": 0.266 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3873, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|