|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 3821, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00026171159382360636, |
|
"grad_norm": 7.5399394035339355, |
|
"learning_rate": 1.3054830287206266e-09, |
|
"logits/chosen": -3.2296347618103027, |
|
"logits/rejected": -3.202975034713745, |
|
"logps/chosen": -402.0491638183594, |
|
"logps/rejected": -447.69073486328125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0026171159382360636, |
|
"grad_norm": 7.425467491149902, |
|
"learning_rate": 1.3054830287206264e-08, |
|
"logits/chosen": -3.1455202102661133, |
|
"logits/rejected": -3.127438545227051, |
|
"logps/chosen": -350.64984130859375, |
|
"logps/rejected": -302.1429443359375, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4236111044883728, |
|
"rewards/chosen": -0.0005491668125614524, |
|
"rewards/margins": -0.0004519576614256948, |
|
"rewards/rejected": -9.720920934341848e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.005234231876472127, |
|
"grad_norm": 8.282913208007812, |
|
"learning_rate": 2.610966057441253e-08, |
|
"logits/chosen": -3.162764310836792, |
|
"logits/rejected": -3.1438052654266357, |
|
"logps/chosen": -390.9164123535156, |
|
"logps/rejected": -291.6170654296875, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.00015077728312462568, |
|
"rewards/margins": -0.00010697855759644881, |
|
"rewards/rejected": -4.379871461424045e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.007851347814708191, |
|
"grad_norm": 7.730243682861328, |
|
"learning_rate": 3.91644908616188e-08, |
|
"logits/chosen": -3.145042657852173, |
|
"logits/rejected": -3.1386446952819824, |
|
"logps/chosen": -333.2342224121094, |
|
"logps/rejected": -318.4365234375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -9.905405022436753e-05, |
|
"rewards/margins": 8.009998418856412e-05, |
|
"rewards/rejected": -0.00017915402713697404, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 6.743426322937012, |
|
"learning_rate": 5.221932114882506e-08, |
|
"logits/chosen": -3.1190810203552246, |
|
"logits/rejected": -3.1290841102600098, |
|
"logps/chosen": -278.45318603515625, |
|
"logps/rejected": -271.45623779296875, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.00019631900067906827, |
|
"rewards/margins": 0.00044984457781538367, |
|
"rewards/rejected": -0.0002535254752729088, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01308557969118032, |
|
"grad_norm": 6.665031433105469, |
|
"learning_rate": 6.527415143603133e-08, |
|
"logits/chosen": -3.2044689655303955, |
|
"logits/rejected": -3.1922316551208496, |
|
"logps/chosen": -344.5279235839844, |
|
"logps/rejected": -289.36700439453125, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.41874998807907104, |
|
"rewards/chosen": -0.0005578735726885498, |
|
"rewards/margins": -0.00022401110618375242, |
|
"rewards/rejected": -0.0003338624082971364, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.015702695629416383, |
|
"grad_norm": 7.2192559242248535, |
|
"learning_rate": 7.83289817232376e-08, |
|
"logits/chosen": -3.1214582920074463, |
|
"logits/rejected": -3.118607759475708, |
|
"logps/chosen": -327.0560607910156, |
|
"logps/rejected": -280.2232971191406, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.00027051212964579463, |
|
"rewards/margins": -2.3294798666029237e-05, |
|
"rewards/rejected": -0.0002472173946443945, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.018319811567652448, |
|
"grad_norm": 7.3564372062683105, |
|
"learning_rate": 9.138381201044386e-08, |
|
"logits/chosen": -3.1655259132385254, |
|
"logits/rejected": -3.1472818851470947, |
|
"logps/chosen": -345.159912109375, |
|
"logps/rejected": -303.17254638671875, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.00026873586466535926, |
|
"rewards/margins": -0.0002816948399413377, |
|
"rewards/rejected": 1.2959059858985711e-05, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 8.323124885559082, |
|
"learning_rate": 1.0443864229765012e-07, |
|
"logits/chosen": -3.0945706367492676, |
|
"logits/rejected": -3.090824842453003, |
|
"logps/chosen": -339.45819091796875, |
|
"logps/rejected": -308.2747497558594, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.0003364879812579602, |
|
"rewards/margins": 5.1764400268439204e-05, |
|
"rewards/rejected": -0.0003882523742504418, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.023554043444124574, |
|
"grad_norm": 7.6322407722473145, |
|
"learning_rate": 1.174934725848564e-07, |
|
"logits/chosen": -3.1316659450531006, |
|
"logits/rejected": -3.1392831802368164, |
|
"logps/chosen": -320.486572265625, |
|
"logps/rejected": -294.0498352050781, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0007914910092949867, |
|
"rewards/margins": 0.0005054398206993937, |
|
"rewards/rejected": -0.0012969308299943805, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"grad_norm": 7.107323169708252, |
|
"learning_rate": 1.3054830287206266e-07, |
|
"logits/chosen": -3.1230645179748535, |
|
"logits/rejected": -3.142228364944458, |
|
"logps/chosen": -323.48577880859375, |
|
"logps/rejected": -288.4997863769531, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.0014953254722058773, |
|
"rewards/margins": 0.00022950551647227257, |
|
"rewards/rejected": -0.0017248311778530478, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02617115938236064, |
|
"eval_logits/chosen": -3.136061429977417, |
|
"eval_logits/rejected": -3.1227903366088867, |
|
"eval_logps/chosen": -336.35565185546875, |
|
"eval_logps/rejected": -297.29937744140625, |
|
"eval_loss": 0.6928624510765076, |
|
"eval_rewards/accuracies": 0.5320000052452087, |
|
"eval_rewards/chosen": -0.0013669482432305813, |
|
"eval_rewards/margins": 0.0005784809472970665, |
|
"eval_rewards/rejected": -0.0019454291323199868, |
|
"eval_runtime": 305.21, |
|
"eval_samples_per_second": 6.553, |
|
"eval_steps_per_second": 0.819, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.028788275320596704, |
|
"grad_norm": 7.128358364105225, |
|
"learning_rate": 1.4360313315926893e-07, |
|
"logits/chosen": -3.166736125946045, |
|
"logits/rejected": -3.151339054107666, |
|
"logps/chosen": -353.5014343261719, |
|
"logps/rejected": -290.27734375, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.002510789781808853, |
|
"rewards/margins": -3.944762283936143e-06, |
|
"rewards/rejected": -0.0025068449322134256, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 6.817880630493164, |
|
"learning_rate": 1.566579634464752e-07, |
|
"logits/chosen": -3.145479917526245, |
|
"logits/rejected": -3.143561601638794, |
|
"logps/chosen": -369.2731628417969, |
|
"logps/rejected": -329.998291015625, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -0.0027725263498723507, |
|
"rewards/margins": 0.0011518350802361965, |
|
"rewards/rejected": -0.003924361430108547, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03402250719706883, |
|
"grad_norm": 7.847947597503662, |
|
"learning_rate": 1.6971279373368143e-07, |
|
"logits/chosen": -3.109908103942871, |
|
"logits/rejected": -3.0981078147888184, |
|
"logps/chosen": -329.1286926269531, |
|
"logps/rejected": -307.6993713378906, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.00396283995360136, |
|
"rewards/margins": 0.0023776493035256863, |
|
"rewards/rejected": -0.006340488791465759, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.036639623135304895, |
|
"grad_norm": 8.320524215698242, |
|
"learning_rate": 1.8276762402088773e-07, |
|
"logits/chosen": -3.164989948272705, |
|
"logits/rejected": -3.1148316860198975, |
|
"logps/chosen": -355.7626037597656, |
|
"logps/rejected": -275.7682800292969, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.006009591277688742, |
|
"rewards/margins": 0.0014354930026456714, |
|
"rewards/rejected": -0.007445084396749735, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03925673907354096, |
|
"grad_norm": 7.911156177520752, |
|
"learning_rate": 1.95822454308094e-07, |
|
"logits/chosen": -3.152989625930786, |
|
"logits/rejected": -3.1499996185302734, |
|
"logps/chosen": -357.94586181640625, |
|
"logps/rejected": -294.5421142578125, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.006876949220895767, |
|
"rewards/margins": 0.00266222539357841, |
|
"rewards/rejected": -0.009539174847304821, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 8.252726554870605, |
|
"learning_rate": 2.0887728459530023e-07, |
|
"logits/chosen": -3.139591932296753, |
|
"logits/rejected": -3.137519359588623, |
|
"logps/chosen": -324.2300720214844, |
|
"logps/rejected": -310.9124450683594, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.012208414264023304, |
|
"rewards/margins": 0.002138238400220871, |
|
"rewards/rejected": -0.01434665359556675, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.04449097095001309, |
|
"grad_norm": 8.332549095153809, |
|
"learning_rate": 2.2193211488250652e-07, |
|
"logits/chosen": -3.1174330711364746, |
|
"logits/rejected": -3.1148269176483154, |
|
"logps/chosen": -286.5895690917969, |
|
"logps/rejected": -268.8719177246094, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.01521300245076418, |
|
"rewards/margins": 0.002711162669584155, |
|
"rewards/rejected": -0.01792416349053383, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.04710808688824915, |
|
"grad_norm": 6.830652713775635, |
|
"learning_rate": 2.349869451697128e-07, |
|
"logits/chosen": -3.139569044113159, |
|
"logits/rejected": -3.118330955505371, |
|
"logps/chosen": -325.8437805175781, |
|
"logps/rejected": -292.931884765625, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.019671550020575523, |
|
"rewards/margins": 0.0024939056020230055, |
|
"rewards/rejected": -0.022165456786751747, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04972520282648522, |
|
"grad_norm": 7.171481132507324, |
|
"learning_rate": 2.4804177545691903e-07, |
|
"logits/chosen": -3.167520046234131, |
|
"logits/rejected": -3.1782307624816895, |
|
"logps/chosen": -348.24981689453125, |
|
"logps/rejected": -295.1228942871094, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.01866580918431282, |
|
"rewards/margins": 0.0057288832031190395, |
|
"rewards/rejected": -0.024394694715738297, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 6.543122291564941, |
|
"learning_rate": 2.610966057441253e-07, |
|
"logits/chosen": -3.1437888145446777, |
|
"logits/rejected": -3.152954578399658, |
|
"logps/chosen": -319.37432861328125, |
|
"logps/rejected": -268.0211181640625, |
|
"loss": 0.6887, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.026566719636321068, |
|
"rewards/margins": 0.009126237593591213, |
|
"rewards/rejected": -0.03569295257329941, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"eval_logits/chosen": -3.13464617729187, |
|
"eval_logits/rejected": -3.1214706897735596, |
|
"eval_logps/chosen": -339.234130859375, |
|
"eval_logps/rejected": -300.934814453125, |
|
"eval_loss": 0.6891751885414124, |
|
"eval_rewards/accuracies": 0.6159999966621399, |
|
"eval_rewards/chosen": -0.03015170618891716, |
|
"eval_rewards/margins": 0.008148480206727982, |
|
"eval_rewards/rejected": -0.03830018267035484, |
|
"eval_runtime": 305.3758, |
|
"eval_samples_per_second": 6.549, |
|
"eval_steps_per_second": 0.819, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05495943470295734, |
|
"grad_norm": 6.885265827178955, |
|
"learning_rate": 2.7415143603133156e-07, |
|
"logits/chosen": -3.1693227291107178, |
|
"logits/rejected": -3.1635937690734863, |
|
"logps/chosen": -338.8418273925781, |
|
"logps/rejected": -288.22625732421875, |
|
"loss": 0.6876, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.0318625271320343, |
|
"rewards/margins": 0.01143469475209713, |
|
"rewards/rejected": -0.04329722374677658, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.05757655064119341, |
|
"grad_norm": 7.088402271270752, |
|
"learning_rate": 2.8720626631853785e-07, |
|
"logits/chosen": -3.113354206085205, |
|
"logits/rejected": -3.1317508220672607, |
|
"logps/chosen": -328.857421875, |
|
"logps/rejected": -287.6562805175781, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.04106331616640091, |
|
"rewards/margins": 0.01231370773166418, |
|
"rewards/rejected": -0.053377024829387665, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06019366657942947, |
|
"grad_norm": 7.345415115356445, |
|
"learning_rate": 3.002610966057441e-07, |
|
"logits/chosen": -3.194124937057495, |
|
"logits/rejected": -3.1830177307128906, |
|
"logps/chosen": -392.96978759765625, |
|
"logps/rejected": -345.01678466796875, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.048357121646404266, |
|
"rewards/margins": 0.011998703703284264, |
|
"rewards/rejected": -0.06035583093762398, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 7.639434814453125, |
|
"learning_rate": 3.133159268929504e-07, |
|
"logits/chosen": -3.146155834197998, |
|
"logits/rejected": -3.1524384021759033, |
|
"logps/chosen": -372.24835205078125, |
|
"logps/rejected": -348.30535888671875, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.06389515846967697, |
|
"rewards/margins": 0.01166953518986702, |
|
"rewards/rejected": -0.07556469738483429, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.06542789845590159, |
|
"grad_norm": 8.062237739562988, |
|
"learning_rate": 3.263707571801567e-07, |
|
"logits/chosen": -3.1019375324249268, |
|
"logits/rejected": -3.125652313232422, |
|
"logps/chosen": -342.43292236328125, |
|
"logps/rejected": -297.35906982421875, |
|
"loss": 0.6848, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.06669998914003372, |
|
"rewards/margins": 0.017446473240852356, |
|
"rewards/rejected": -0.08414646238088608, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06804501439413765, |
|
"grad_norm": 7.048512935638428, |
|
"learning_rate": 3.3942558746736286e-07, |
|
"logits/chosen": -3.1437766551971436, |
|
"logits/rejected": -3.1343367099761963, |
|
"logps/chosen": -357.90447998046875, |
|
"logps/rejected": -309.511962890625, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.08034952729940414, |
|
"rewards/margins": 0.018954290077090263, |
|
"rewards/rejected": -0.09930381923913956, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07066213033237373, |
|
"grad_norm": 6.4997453689575195, |
|
"learning_rate": 3.5248041775456916e-07, |
|
"logits/chosen": -3.1495137214660645, |
|
"logits/rejected": -3.1338560581207275, |
|
"logps/chosen": -339.3167419433594, |
|
"logps/rejected": -293.10089111328125, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0888824537396431, |
|
"rewards/margins": 0.02101912908256054, |
|
"rewards/rejected": -0.10990158468484879, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 7.67672872543335, |
|
"learning_rate": 3.6553524804177545e-07, |
|
"logits/chosen": -3.147304058074951, |
|
"logits/rejected": -3.116621494293213, |
|
"logps/chosen": -342.96771240234375, |
|
"logps/rejected": -300.9717102050781, |
|
"loss": 0.6834, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.09658826142549515, |
|
"rewards/margins": 0.021063242107629776, |
|
"rewards/rejected": -0.11765149980783463, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.07589636220884585, |
|
"grad_norm": 7.983977317810059, |
|
"learning_rate": 3.785900783289817e-07, |
|
"logits/chosen": -3.1366944313049316, |
|
"logits/rejected": -3.142000675201416, |
|
"logps/chosen": -365.61944580078125, |
|
"logps/rejected": -324.70709228515625, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0843677967786789, |
|
"rewards/margins": 0.031160688027739525, |
|
"rewards/rejected": -0.11552847921848297, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"grad_norm": 7.798567771911621, |
|
"learning_rate": 3.91644908616188e-07, |
|
"logits/chosen": -3.087780237197876, |
|
"logits/rejected": -3.045769214630127, |
|
"logps/chosen": -329.89642333984375, |
|
"logps/rejected": -295.91046142578125, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.08805381506681442, |
|
"rewards/margins": 0.030454417690634727, |
|
"rewards/rejected": -0.11850825697183609, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07851347814708191, |
|
"eval_logits/chosen": -3.121612548828125, |
|
"eval_logits/rejected": -3.1093947887420654, |
|
"eval_logps/chosen": -344.1050720214844, |
|
"eval_logps/rejected": -307.97979736328125, |
|
"eval_loss": 0.6793686747550964, |
|
"eval_rewards/accuracies": 0.6359999775886536, |
|
"eval_rewards/chosen": -0.07886076718568802, |
|
"eval_rewards/margins": 0.029889002442359924, |
|
"eval_rewards/rejected": -0.10874976962804794, |
|
"eval_runtime": 305.2365, |
|
"eval_samples_per_second": 6.552, |
|
"eval_steps_per_second": 0.819, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08113059408531798, |
|
"grad_norm": 8.230989456176758, |
|
"learning_rate": 4.046997389033943e-07, |
|
"logits/chosen": -3.1827919483184814, |
|
"logits/rejected": -3.1549506187438965, |
|
"logps/chosen": -373.86859130859375, |
|
"logps/rejected": -295.2381591796875, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.06812898814678192, |
|
"rewards/margins": 0.04669738933444023, |
|
"rewards/rejected": -0.11482638120651245, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 7.9239606857299805, |
|
"learning_rate": 4.1775456919060046e-07, |
|
"logits/chosen": -3.1656203269958496, |
|
"logits/rejected": -3.14418625831604, |
|
"logps/chosen": -340.9632873535156, |
|
"logps/rejected": -307.0127868652344, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0765252560377121, |
|
"rewards/margins": 0.03432226926088333, |
|
"rewards/rejected": -0.11084753274917603, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.08636482596179011, |
|
"grad_norm": 7.936288356781006, |
|
"learning_rate": 4.3080939947780675e-07, |
|
"logits/chosen": -3.1270527839660645, |
|
"logits/rejected": -3.1329689025878906, |
|
"logps/chosen": -338.80364990234375, |
|
"logps/rejected": -306.50103759765625, |
|
"loss": 0.678, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.08645441383123398, |
|
"rewards/margins": 0.03408702462911606, |
|
"rewards/rejected": -0.12054143846035004, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.08898194190002617, |
|
"grad_norm": 8.688923835754395, |
|
"learning_rate": 4.4386422976501305e-07, |
|
"logits/chosen": -3.165428638458252, |
|
"logits/rejected": -3.1660349369049072, |
|
"logps/chosen": -370.370361328125, |
|
"logps/rejected": -334.38140869140625, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.08854931592941284, |
|
"rewards/margins": 0.04290110990405083, |
|
"rewards/rejected": -0.13145044445991516, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09159905783826224, |
|
"grad_norm": 7.534700393676758, |
|
"learning_rate": 4.569190600522193e-07, |
|
"logits/chosen": -3.093208074569702, |
|
"logits/rejected": -3.0936062335968018, |
|
"logps/chosen": -376.98089599609375, |
|
"logps/rejected": -351.4696350097656, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.10356112569570541, |
|
"rewards/margins": 0.052223630249500275, |
|
"rewards/rejected": -0.1557847559452057, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 6.478003978729248, |
|
"learning_rate": 4.699738903394256e-07, |
|
"logits/chosen": -3.131310224533081, |
|
"logits/rejected": -3.118582248687744, |
|
"logps/chosen": -317.2442932128906, |
|
"logps/rejected": -290.4730529785156, |
|
"loss": 0.6722, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.12776212394237518, |
|
"rewards/margins": 0.047861333936452866, |
|
"rewards/rejected": -0.17562346160411835, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.09683328971473436, |
|
"grad_norm": 9.120585441589355, |
|
"learning_rate": 4.830287206266319e-07, |
|
"logits/chosen": -3.106093406677246, |
|
"logits/rejected": -3.0930678844451904, |
|
"logps/chosen": -363.0904846191406, |
|
"logps/rejected": -299.1496276855469, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.10671161115169525, |
|
"rewards/margins": 0.07157553732395172, |
|
"rewards/rejected": -0.17828714847564697, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.09945040565297043, |
|
"grad_norm": 8.681933403015137, |
|
"learning_rate": 4.960835509138381e-07, |
|
"logits/chosen": -3.0945727825164795, |
|
"logits/rejected": -3.0602633953094482, |
|
"logps/chosen": -382.96539306640625, |
|
"logps/rejected": -332.73968505859375, |
|
"loss": 0.6631, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.10888388007879257, |
|
"rewards/margins": 0.06903685629367828, |
|
"rewards/rejected": -0.17792072892189026, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1020675215912065, |
|
"grad_norm": 10.902252197265625, |
|
"learning_rate": 4.999948856244767e-07, |
|
"logits/chosen": -3.0760512351989746, |
|
"logits/rejected": -3.0887162685394287, |
|
"logps/chosen": -368.3498840332031, |
|
"logps/rejected": -337.3893737792969, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.09421645104885101, |
|
"rewards/margins": 0.07887949794530869, |
|
"rewards/rejected": -0.1730959713459015, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 8.231829643249512, |
|
"learning_rate": 4.999698361256577e-07, |
|
"logits/chosen": -3.104179859161377, |
|
"logits/rejected": -3.0953407287597656, |
|
"logps/chosen": -345.273681640625, |
|
"logps/rejected": -292.58612060546875, |
|
"loss": 0.6624, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.13569284975528717, |
|
"rewards/margins": 0.0712323784828186, |
|
"rewards/rejected": -0.20692522823810577, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"eval_logits/chosen": -3.077139377593994, |
|
"eval_logits/rejected": -3.0663528442382812, |
|
"eval_logps/chosen": -354.28900146484375, |
|
"eval_logps/rejected": -322.285400390625, |
|
"eval_loss": 0.6634809970855713, |
|
"eval_rewards/accuracies": 0.6389999985694885, |
|
"eval_rewards/chosen": -0.18070009350776672, |
|
"eval_rewards/margins": 0.07110566645860672, |
|
"eval_rewards/rejected": -0.25180572271347046, |
|
"eval_runtime": 305.333, |
|
"eval_samples_per_second": 6.55, |
|
"eval_steps_per_second": 0.819, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.10730175346767862, |
|
"grad_norm": 8.956136703491211, |
|
"learning_rate": 4.99923914217458e-07, |
|
"logits/chosen": -3.0722298622131348, |
|
"logits/rejected": -3.0702052116394043, |
|
"logps/chosen": -326.5191955566406, |
|
"logps/rejected": -319.8614807128906, |
|
"loss": 0.6786, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.19534249603748322, |
|
"rewards/margins": 0.04017153009772301, |
|
"rewards/rejected": -0.23551401495933533, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.10991886940591468, |
|
"grad_norm": 11.84124755859375, |
|
"learning_rate": 4.99857123734344e-07, |
|
"logits/chosen": -3.0375618934631348, |
|
"logits/rejected": -3.0047786235809326, |
|
"logps/chosen": -316.19671630859375, |
|
"logps/rejected": -287.55462646484375, |
|
"loss": 0.6703, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.18900929391384125, |
|
"rewards/margins": 0.05846139043569565, |
|
"rewards/rejected": -0.2474706918001175, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11253598534415074, |
|
"grad_norm": 10.265412330627441, |
|
"learning_rate": 4.997694702533016e-07, |
|
"logits/chosen": -3.0566139221191406, |
|
"logits/rejected": -3.0296247005462646, |
|
"logps/chosen": -365.24237060546875, |
|
"logps/rejected": -334.05352783203125, |
|
"loss": 0.6575, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.13795217871665955, |
|
"rewards/margins": 0.08431630581617355, |
|
"rewards/rejected": -0.2222684919834137, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 8.988418579101562, |
|
"learning_rate": 4.996609610933712e-07, |
|
"logits/chosen": -3.1079680919647217, |
|
"logits/rejected": -3.115506649017334, |
|
"logps/chosen": -354.38226318359375, |
|
"logps/rejected": -315.77880859375, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.10598504543304443, |
|
"rewards/margins": 0.06034703180193901, |
|
"rewards/rejected": -0.16633208096027374, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.11777021722062288, |
|
"grad_norm": 9.168876647949219, |
|
"learning_rate": 4.995316053150366e-07, |
|
"logits/chosen": -3.029470920562744, |
|
"logits/rejected": -3.0375399589538574, |
|
"logps/chosen": -347.7948303222656, |
|
"logps/rejected": -318.85504150390625, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.08917012065649033, |
|
"rewards/margins": 0.07738355547189713, |
|
"rewards/rejected": -0.16655369102954865, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12038733315885894, |
|
"grad_norm": 12.874235153198242, |
|
"learning_rate": 4.99381413719468e-07, |
|
"logits/chosen": -3.0497491359710693, |
|
"logits/rejected": -3.053278923034668, |
|
"logps/chosen": -356.35498046875, |
|
"logps/rejected": -337.4245300292969, |
|
"loss": 0.6446, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1851172149181366, |
|
"rewards/margins": 0.11571681499481201, |
|
"rewards/rejected": -0.3008340001106262, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.123004449097095, |
|
"grad_norm": 15.89233112335205, |
|
"learning_rate": 4.992103988476205e-07, |
|
"logits/chosen": -3.0434165000915527, |
|
"logits/rejected": -3.0270047187805176, |
|
"logps/chosen": -341.6517333984375, |
|
"logps/rejected": -321.88836669921875, |
|
"loss": 0.6467, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.2506132423877716, |
|
"rewards/margins": 0.11548835039138794, |
|
"rewards/rejected": -0.36610156297683716, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 12.271796226501465, |
|
"learning_rate": 4.990185749791864e-07, |
|
"logits/chosen": -3.062100887298584, |
|
"logits/rejected": -3.052743673324585, |
|
"logps/chosen": -350.6018981933594, |
|
"logps/rejected": -336.20159912109375, |
|
"loss": 0.6468, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.20994243025779724, |
|
"rewards/margins": 0.11367367208003998, |
|
"rewards/rejected": -0.323616087436676, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.12823868097356714, |
|
"grad_norm": 16.589054107666016, |
|
"learning_rate": 4.988059581314039e-07, |
|
"logits/chosen": -3.0361151695251465, |
|
"logits/rejected": -3.0559310913085938, |
|
"logps/chosen": -389.7374267578125, |
|
"logps/rejected": -341.6214599609375, |
|
"loss": 0.6457, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.2314407378435135, |
|
"rewards/margins": 0.11587095260620117, |
|
"rewards/rejected": -0.34731167554855347, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"grad_norm": 11.054908752441406, |
|
"learning_rate": 4.985725660577184e-07, |
|
"logits/chosen": -3.0402045249938965, |
|
"logits/rejected": -3.028681755065918, |
|
"logps/chosen": -382.54754638671875, |
|
"logps/rejected": -331.24432373046875, |
|
"loss": 0.6373, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.29573333263397217, |
|
"rewards/margins": 0.13905784487724304, |
|
"rewards/rejected": -0.43479123711586, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13085579691180318, |
|
"eval_logits/chosen": -2.983909845352173, |
|
"eval_logits/rejected": -2.9692585468292236, |
|
"eval_logps/chosen": -366.0958557128906, |
|
"eval_logps/rejected": -338.3079833984375, |
|
"eval_loss": 0.6503274440765381, |
|
"eval_rewards/accuracies": 0.6424999833106995, |
|
"eval_rewards/chosen": -0.2987686097621918, |
|
"eval_rewards/margins": 0.11326280236244202, |
|
"eval_rewards/rejected": -0.4120314419269562, |
|
"eval_runtime": 305.3597, |
|
"eval_samples_per_second": 6.55, |
|
"eval_steps_per_second": 0.819, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13347291285003926, |
|
"grad_norm": 12.795198440551758, |
|
"learning_rate": 4.983184182463008e-07, |
|
"logits/chosen": -3.0068187713623047, |
|
"logits/rejected": -3.0001091957092285, |
|
"logps/chosen": -374.67767333984375, |
|
"logps/rejected": -341.01715087890625, |
|
"loss": 0.64, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.24733750522136688, |
|
"rewards/margins": 0.1369626820087433, |
|
"rewards/rejected": -0.384300172328949, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 15.100224494934082, |
|
"learning_rate": 4.980435359184203e-07, |
|
"logits/chosen": -3.0251529216766357, |
|
"logits/rejected": -3.0229506492614746, |
|
"logps/chosen": -370.63250732421875, |
|
"logps/rejected": -348.49871826171875, |
|
"loss": 0.6491, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.27271249890327454, |
|
"rewards/margins": 0.11933918297290802, |
|
"rewards/rejected": -0.392051637172699, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.13870714472651138, |
|
"grad_norm": 13.007247924804688, |
|
"learning_rate": 4.977479420266723e-07, |
|
"logits/chosen": -2.944122791290283, |
|
"logits/rejected": -2.9631760120391846, |
|
"logps/chosen": -376.72235107421875, |
|
"logps/rejected": -384.7277526855469, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.37087589502334595, |
|
"rewards/margins": 0.13121145963668823, |
|
"rewards/rejected": -0.5020872950553894, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14132426066474746, |
|
"grad_norm": 12.603479385375977, |
|
"learning_rate": 4.974316612530614e-07, |
|
"logits/chosen": -2.947624683380127, |
|
"logits/rejected": -2.930647850036621, |
|
"logps/chosen": -396.7923889160156, |
|
"logps/rejected": -340.41363525390625, |
|
"loss": 0.6186, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.4305369257926941, |
|
"rewards/margins": 0.18575596809387207, |
|
"rewards/rejected": -0.6162929534912109, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1439413766029835, |
|
"grad_norm": 15.142595291137695, |
|
"learning_rate": 4.970947200069415e-07, |
|
"logits/chosen": -2.9528539180755615, |
|
"logits/rejected": -2.966151237487793, |
|
"logps/chosen": -370.8870544433594, |
|
"logps/rejected": -354.8753967285156, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.2849995791912079, |
|
"rewards/margins": 0.11011602729558945, |
|
"rewards/rejected": -0.39511561393737793, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 11.820856094360352, |
|
"learning_rate": 4.967371464228095e-07, |
|
"logits/chosen": -3.029913902282715, |
|
"logits/rejected": -3.0248889923095703, |
|
"logps/chosen": -353.63006591796875, |
|
"logps/rejected": -352.70806884765625, |
|
"loss": 0.6449, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.2146318405866623, |
|
"rewards/margins": 0.13045233488082886, |
|
"rewards/rejected": -0.34508416056632996, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.14917560847945563, |
|
"grad_norm": 12.829729080200195, |
|
"learning_rate": 4.963589703579569e-07, |
|
"logits/chosen": -3.086268186569214, |
|
"logits/rejected": -3.0621392726898193, |
|
"logps/chosen": -407.73565673828125, |
|
"logps/rejected": -370.2496337890625, |
|
"loss": 0.6455, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.29791611433029175, |
|
"rewards/margins": 0.135451078414917, |
|
"rewards/rejected": -0.43336719274520874, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1517927244176917, |
|
"grad_norm": 14.211217880249023, |
|
"learning_rate": 4.959602233899761e-07, |
|
"logits/chosen": -3.047476053237915, |
|
"logits/rejected": -3.0107288360595703, |
|
"logps/chosen": -416.2422790527344, |
|
"logps/rejected": -366.1787414550781, |
|
"loss": 0.6202, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.34379562735557556, |
|
"rewards/margins": 0.19932778179645538, |
|
"rewards/rejected": -0.5431233644485474, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.15440984035592778, |
|
"grad_norm": 14.383965492248535, |
|
"learning_rate": 4.955409388141243e-07, |
|
"logits/chosen": -2.9714608192443848, |
|
"logits/rejected": -2.9554789066314697, |
|
"logps/chosen": -369.77392578125, |
|
"logps/rejected": -347.66058349609375, |
|
"loss": 0.6443, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.44513049721717834, |
|
"rewards/margins": 0.15323522686958313, |
|
"rewards/rejected": -0.5983657240867615, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 18.99432945251465, |
|
"learning_rate": 4.951011516405429e-07, |
|
"logits/chosen": -2.9896721839904785, |
|
"logits/rejected": -3.010812520980835, |
|
"logps/chosen": -368.553466796875, |
|
"logps/rejected": -354.1104736328125, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.4014170169830322, |
|
"rewards/margins": 0.15394745767116547, |
|
"rewards/rejected": -0.5553644895553589, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"eval_logits/chosen": -2.953789710998535, |
|
"eval_logits/rejected": -2.9371681213378906, |
|
"eval_logps/chosen": -375.1290588378906, |
|
"eval_logps/rejected": -350.5517578125, |
|
"eval_loss": 0.6456736326217651, |
|
"eval_rewards/accuracies": 0.637499988079071, |
|
"eval_rewards/chosen": -0.3891007900238037, |
|
"eval_rewards/margins": 0.1453685462474823, |
|
"eval_rewards/rejected": -0.5344693660736084, |
|
"eval_runtime": 305.2345, |
|
"eval_samples_per_second": 6.552, |
|
"eval_steps_per_second": 0.819, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1596440722323999, |
|
"grad_norm": 14.88294792175293, |
|
"learning_rate": 4.946408985913344e-07, |
|
"logits/chosen": -2.9678356647491455, |
|
"logits/rejected": -2.9549574851989746, |
|
"logps/chosen": -354.6939392089844, |
|
"logps/rejected": -326.92181396484375, |
|
"loss": 0.6583, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.3528710603713989, |
|
"rewards/margins": 0.1104646697640419, |
|
"rewards/rejected": -0.46333569288253784, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.16226118817063595, |
|
"grad_norm": 16.416210174560547, |
|
"learning_rate": 4.941602180974958e-07, |
|
"logits/chosen": -2.9832162857055664, |
|
"logits/rejected": -2.9431064128875732, |
|
"logps/chosen": -400.99798583984375, |
|
"logps/rejected": -325.7860412597656, |
|
"loss": 0.6386, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.3458956480026245, |
|
"rewards/margins": 0.15582728385925293, |
|
"rewards/rejected": -0.5017229318618774, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.16487830410887203, |
|
"grad_norm": 11.9800386428833, |
|
"learning_rate": 4.936591502957101e-07, |
|
"logits/chosen": -2.984358787536621, |
|
"logits/rejected": -2.9828975200653076, |
|
"logps/chosen": -356.4893493652344, |
|
"logps/rejected": -343.63519287109375, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.371735155582428, |
|
"rewards/margins": 0.22488275170326233, |
|
"rewards/rejected": -0.5966178178787231, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 13.97966194152832, |
|
"learning_rate": 4.931377370249945e-07, |
|
"logits/chosen": -2.979612350463867, |
|
"logits/rejected": -2.942445993423462, |
|
"logps/chosen": -383.40643310546875, |
|
"logps/rejected": -351.91448974609375, |
|
"loss": 0.6306, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.4733237326145172, |
|
"rewards/margins": 0.18433848023414612, |
|
"rewards/rejected": -0.6576622128486633, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17011253598534415, |
|
"grad_norm": 15.059297561645508, |
|
"learning_rate": 4.925960218232072e-07, |
|
"logits/chosen": -2.9805855751037598, |
|
"logits/rejected": -2.963609218597412, |
|
"logps/chosen": -371.2182312011719, |
|
"logps/rejected": -376.56842041015625, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.4861178994178772, |
|
"rewards/margins": 0.249765545129776, |
|
"rewards/rejected": -0.7358834147453308, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.17272965192358022, |
|
"grad_norm": 17.203136444091797, |
|
"learning_rate": 4.920340499234116e-07, |
|
"logits/chosen": -2.945225477218628, |
|
"logits/rejected": -2.918910503387451, |
|
"logps/chosen": -391.84295654296875, |
|
"logps/rejected": -356.2134704589844, |
|
"loss": 0.6413, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5608336925506592, |
|
"rewards/margins": 0.1767912060022354, |
|
"rewards/rejected": -0.7376248836517334, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.17534676786181627, |
|
"grad_norm": 23.82184600830078, |
|
"learning_rate": 4.914518682500995e-07, |
|
"logits/chosen": -3.0318546295166016, |
|
"logits/rejected": -3.0210180282592773, |
|
"logps/chosen": -397.6226501464844, |
|
"logps/rejected": -371.94366455078125, |
|
"loss": 0.6136, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5073692202568054, |
|
"rewards/margins": 0.23156848549842834, |
|
"rewards/rejected": -0.7389377355575562, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 12.960205078125, |
|
"learning_rate": 4.90849525415273e-07, |
|
"logits/chosen": -2.9876632690429688, |
|
"logits/rejected": -2.9763565063476562, |
|
"logps/chosen": -399.0164489746094, |
|
"logps/rejected": -358.02972412109375, |
|
"loss": 0.6074, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5352808833122253, |
|
"rewards/margins": 0.2567313313484192, |
|
"rewards/rejected": -0.7920122742652893, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.1805809997382884, |
|
"grad_norm": 16.679786682128906, |
|
"learning_rate": 4.902270717143858e-07, |
|
"logits/chosen": -2.982391119003296, |
|
"logits/rejected": -2.9801688194274902, |
|
"logps/chosen": -371.9320373535156, |
|
"logps/rejected": -395.40777587890625, |
|
"loss": 0.6047, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.6789665222167969, |
|
"rewards/margins": 0.27203455567359924, |
|
"rewards/rejected": -0.9510010480880737, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"grad_norm": 13.776201248168945, |
|
"learning_rate": 4.895845591221426e-07, |
|
"logits/chosen": -2.9419898986816406, |
|
"logits/rejected": -2.9705393314361572, |
|
"logps/chosen": -395.2472839355469, |
|
"logps/rejected": -399.77435302734375, |
|
"loss": 0.6266, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6857240796089172, |
|
"rewards/margins": 0.23093768954277039, |
|
"rewards/rejected": -0.9166617393493652, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18319811567652447, |
|
"eval_logits/chosen": -2.9229085445404053, |
|
"eval_logits/rejected": -2.9094736576080322, |
|
"eval_logps/chosen": -406.5211486816406, |
|
"eval_logps/rejected": -387.9122619628906, |
|
"eval_loss": 0.642038881778717, |
|
"eval_rewards/accuracies": 0.6365000009536743, |
|
"eval_rewards/chosen": -0.7030214667320251, |
|
"eval_rewards/margins": 0.205052450299263, |
|
"eval_rewards/rejected": -0.9080740213394165, |
|
"eval_runtime": 305.3295, |
|
"eval_samples_per_second": 6.55, |
|
"eval_steps_per_second": 0.819, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.18581523161476055, |
|
"grad_norm": 44.67826461791992, |
|
"learning_rate": 4.8892204128816e-07, |
|
"logits/chosen": -2.973836660385132, |
|
"logits/rejected": -2.9684879779815674, |
|
"logps/chosen": -407.85052490234375, |
|
"logps/rejected": -396.7626647949219, |
|
"loss": 0.6492, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6791498064994812, |
|
"rewards/margins": 0.1747795045375824, |
|
"rewards/rejected": -0.853929340839386, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 22.158349990844727, |
|
"learning_rate": 4.882395735324863e-07, |
|
"logits/chosen": -2.9512436389923096, |
|
"logits/rejected": -2.902366876602173, |
|
"logps/chosen": -407.8587646484375, |
|
"logps/rejected": -385.2088317871094, |
|
"loss": 0.651, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6286091804504395, |
|
"rewards/margins": 0.17438486218452454, |
|
"rewards/rejected": -0.8029941320419312, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.19104946349123267, |
|
"grad_norm": 15.196109771728516, |
|
"learning_rate": 4.875372128409829e-07, |
|
"logits/chosen": -2.939497232437134, |
|
"logits/rejected": -2.9162261486053467, |
|
"logps/chosen": -397.8129577636719, |
|
"logps/rejected": -357.2427978515625, |
|
"loss": 0.6525, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5556532144546509, |
|
"rewards/margins": 0.15908560156822205, |
|
"rewards/rejected": -0.7147387266159058, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.19366657942946872, |
|
"grad_norm": 13.985071182250977, |
|
"learning_rate": 4.868150178605653e-07, |
|
"logits/chosen": -2.9422688484191895, |
|
"logits/rejected": -2.945159435272217, |
|
"logps/chosen": -343.93597412109375, |
|
"logps/rejected": -314.3185119628906, |
|
"loss": 0.6162, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5319259166717529, |
|
"rewards/margins": 0.2180342972278595, |
|
"rewards/rejected": -0.74996018409729, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.1962836953677048, |
|
"grad_norm": 14.363373756408691, |
|
"learning_rate": 4.860730488943068e-07, |
|
"logits/chosen": -2.917142629623413, |
|
"logits/rejected": -2.9183247089385986, |
|
"logps/chosen": -356.16552734375, |
|
"logps/rejected": -352.13116455078125, |
|
"loss": 0.6178, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4881489872932434, |
|
"rewards/margins": 0.2087596356868744, |
|
"rewards/rejected": -0.6969085931777954, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 16.34290885925293, |
|
"learning_rate": 4.853113678964021e-07, |
|
"logits/chosen": -2.939605712890625, |
|
"logits/rejected": -2.9561939239501953, |
|
"logps/chosen": -398.5882263183594, |
|
"logps/rejected": -393.08612060546875, |
|
"loss": 0.6337, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5295611619949341, |
|
"rewards/margins": 0.18238191306591034, |
|
"rewards/rejected": -0.7119430303573608, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.20151792724417691, |
|
"grad_norm": 13.591556549072266, |
|
"learning_rate": 4.845300384669957e-07, |
|
"logits/chosen": -2.9653282165527344, |
|
"logits/rejected": -2.9657387733459473, |
|
"logps/chosen": -367.00433349609375, |
|
"logps/rejected": -339.44781494140625, |
|
"loss": 0.6483, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.4500230848789215, |
|
"rewards/margins": 0.13824717700481415, |
|
"rewards/rejected": -0.5882702469825745, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.204135043182413, |
|
"grad_norm": 16.679380416870117, |
|
"learning_rate": 4.8372912584687e-07, |
|
"logits/chosen": -2.9903674125671387, |
|
"logits/rejected": -2.9671452045440674, |
|
"logps/chosen": -396.09393310546875, |
|
"logps/rejected": -374.65313720703125, |
|
"loss": 0.6298, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.3986968398094177, |
|
"rewards/margins": 0.19737406075000763, |
|
"rewards/rejected": -0.5960708856582642, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.20675215912064904, |
|
"grad_norm": 15.164281845092773, |
|
"learning_rate": 4.829086969119983e-07, |
|
"logits/chosen": -2.9598774909973145, |
|
"logits/rejected": -2.9876530170440674, |
|
"logps/chosen": -361.7220153808594, |
|
"logps/rejected": -362.77734375, |
|
"loss": 0.6576, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.4737739562988281, |
|
"rewards/margins": 0.13218006491661072, |
|
"rewards/rejected": -0.6059540510177612, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 14.60089111328125, |
|
"learning_rate": 4.820688201679605e-07, |
|
"logits/chosen": -3.0159783363342285, |
|
"logits/rejected": -2.9888081550598145, |
|
"logps/chosen": -381.7862243652344, |
|
"logps/rejected": -319.8965759277344, |
|
"loss": 0.5942, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.4121805727481842, |
|
"rewards/margins": 0.2772974371910095, |
|
"rewards/rejected": -0.6894780397415161, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"eval_logits/chosen": -2.939723253250122, |
|
"eval_logits/rejected": -2.925475835800171, |
|
"eval_logps/chosen": -385.91180419921875, |
|
"eval_logps/rejected": -364.74835205078125, |
|
"eval_loss": 0.6367480754852295, |
|
"eval_rewards/accuracies": 0.6474999785423279, |
|
"eval_rewards/chosen": -0.4969281554222107, |
|
"eval_rewards/margins": 0.17950665950775146, |
|
"eval_rewards/rejected": -0.6764348149299622, |
|
"eval_runtime": 305.2085, |
|
"eval_samples_per_second": 6.553, |
|
"eval_steps_per_second": 0.819, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21198639099712116, |
|
"grad_norm": 12.582085609436035, |
|
"learning_rate": 4.812095657442231e-07, |
|
"logits/chosen": -2.991004467010498, |
|
"logits/rejected": -3.030839204788208, |
|
"logps/chosen": -396.093017578125, |
|
"logps/rejected": -396.8296203613281, |
|
"loss": 0.6504, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.5232123732566833, |
|
"rewards/margins": 0.1550460159778595, |
|
"rewards/rejected": -0.6782584190368652, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.21460350693535724, |
|
"grad_norm": 16.284395217895508, |
|
"learning_rate": 4.803310053882831e-07, |
|
"logits/chosen": -2.9644618034362793, |
|
"logits/rejected": -2.983457088470459, |
|
"logps/chosen": -342.4942321777344, |
|
"logps/rejected": -368.9544372558594, |
|
"loss": 0.6324, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5437088012695312, |
|
"rewards/margins": 0.18539607524871826, |
|
"rewards/rejected": -0.7291048765182495, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.2172206228735933, |
|
"grad_norm": 16.87338638305664, |
|
"learning_rate": 4.794332124596775e-07, |
|
"logits/chosen": -2.9662275314331055, |
|
"logits/rejected": -2.9847869873046875, |
|
"logps/chosen": -412.5888671875, |
|
"logps/rejected": -409.6481018066406, |
|
"loss": 0.6377, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.606338381767273, |
|
"rewards/margins": 0.19459688663482666, |
|
"rewards/rejected": -0.8009351491928101, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 18.873977661132812, |
|
"learning_rate": 4.785162619238574e-07, |
|
"logits/chosen": -2.9262964725494385, |
|
"logits/rejected": -2.9126768112182617, |
|
"logps/chosen": -387.4312438964844, |
|
"logps/rejected": -363.56671142578125, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.6643389463424683, |
|
"rewards/margins": 0.21009120345115662, |
|
"rewards/rejected": -0.8744300603866577, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.22245485475006543, |
|
"grad_norm": 22.909137725830078, |
|
"learning_rate": 4.775802303459287e-07, |
|
"logits/chosen": -2.9198760986328125, |
|
"logits/rejected": -2.91233491897583, |
|
"logps/chosen": -391.82257080078125, |
|
"logps/rejected": -389.66168212890625, |
|
"loss": 0.6364, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6800801157951355, |
|
"rewards/margins": 0.21083597838878632, |
|
"rewards/rejected": -0.8909161686897278, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.22507197068830148, |
|
"grad_norm": 17.654157638549805, |
|
"learning_rate": 4.766251958842589e-07, |
|
"logits/chosen": -2.899445056915283, |
|
"logits/rejected": -2.8913986682891846, |
|
"logps/chosen": -407.30523681640625, |
|
"logps/rejected": -389.7769775390625, |
|
"loss": 0.6371, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.6278396844863892, |
|
"rewards/margins": 0.1948491483926773, |
|
"rewards/rejected": -0.82268887758255, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.22768908662653756, |
|
"grad_norm": 23.719482421875, |
|
"learning_rate": 4.756512382839506e-07, |
|
"logits/chosen": -2.8611950874328613, |
|
"logits/rejected": -2.8404242992401123, |
|
"logps/chosen": -392.8919982910156, |
|
"logps/rejected": -398.45172119140625, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6410590410232544, |
|
"rewards/margins": 0.19700810313224792, |
|
"rewards/rejected": -0.8380670547485352, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 19.95950698852539, |
|
"learning_rate": 4.746584388701831e-07, |
|
"logits/chosen": -2.9077162742614746, |
|
"logits/rejected": -2.902599334716797, |
|
"logps/chosen": -403.0314636230469, |
|
"logps/rejected": -388.3122253417969, |
|
"loss": 0.6378, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6414380073547363, |
|
"rewards/margins": 0.19418500363826752, |
|
"rewards/rejected": -0.8356229662895203, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.23292331850300968, |
|
"grad_norm": 20.028629302978516, |
|
"learning_rate": 4.736468805414218e-07, |
|
"logits/chosen": -2.889512538909912, |
|
"logits/rejected": -2.907574415206909, |
|
"logps/chosen": -376.83734130859375, |
|
"logps/rejected": -398.76568603515625, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5963784456253052, |
|
"rewards/margins": 0.25601688027381897, |
|
"rewards/rejected": -0.852395236492157, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"grad_norm": 22.77333641052246, |
|
"learning_rate": 4.7261664776249595e-07, |
|
"logits/chosen": -2.8576667308807373, |
|
"logits/rejected": -2.837547779083252, |
|
"logps/chosen": -346.2492980957031, |
|
"logps/rejected": -343.90350341796875, |
|
"loss": 0.6171, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.5129637122154236, |
|
"rewards/margins": 0.22999227046966553, |
|
"rewards/rejected": -0.7429560422897339, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.23554043444124576, |
|
"eval_logits/chosen": -2.8992178440093994, |
|
"eval_logits/rejected": -2.881546974182129, |
|
"eval_logps/chosen": -390.1064758300781, |
|
"eval_logps/rejected": -371.53509521484375, |
|
"eval_loss": 0.6330006122589111, |
|
"eval_rewards/accuracies": 0.6545000076293945, |
|
"eval_rewards/chosen": -0.5388749837875366, |
|
"eval_rewards/margins": 0.20542745292186737, |
|
"eval_rewards/rejected": -0.7443024516105652, |
|
"eval_runtime": 305.2957, |
|
"eval_samples_per_second": 6.551, |
|
"eval_steps_per_second": 0.819, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2381575503794818, |
|
"grad_norm": 16.45803451538086, |
|
"learning_rate": 4.7156782655754624e-07, |
|
"logits/chosen": -2.96109938621521, |
|
"logits/rejected": -2.9101853370666504, |
|
"logps/chosen": -412.92095947265625, |
|
"logps/rejected": -353.53106689453125, |
|
"loss": 0.638, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.5114455223083496, |
|
"rewards/margins": 0.19183678925037384, |
|
"rewards/rejected": -0.703282356262207, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 14.590066909790039, |
|
"learning_rate": 4.705005045028414e-07, |
|
"logits/chosen": -2.8991317749023438, |
|
"logits/rejected": -2.8741579055786133, |
|
"logps/chosen": -404.0238952636719, |
|
"logps/rejected": -386.28668212890625, |
|
"loss": 0.6204, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6287791132926941, |
|
"rewards/margins": 0.23485076427459717, |
|
"rewards/rejected": -0.863629937171936, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.24339178225595393, |
|
"grad_norm": 21.144271850585938, |
|
"learning_rate": 4.694147707194659e-07, |
|
"logits/chosen": -2.976510524749756, |
|
"logits/rejected": -2.9652724266052246, |
|
"logps/chosen": -416.43731689453125, |
|
"logps/rejected": -405.408935546875, |
|
"loss": 0.6055, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.7039724588394165, |
|
"rewards/margins": 0.302602618932724, |
|
"rewards/rejected": -1.0065749883651733, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.24600889819419, |
|
"grad_norm": 19.868425369262695, |
|
"learning_rate": 4.683107158658781e-07, |
|
"logits/chosen": -2.8925671577453613, |
|
"logits/rejected": -2.888092517852783, |
|
"logps/chosen": -431.69171142578125, |
|
"logps/rejected": -417.070556640625, |
|
"loss": 0.5975, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6716684103012085, |
|
"rewards/margins": 0.3120590150356293, |
|
"rewards/rejected": -0.9837274551391602, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.24862601413242608, |
|
"grad_norm": 25.779151916503906, |
|
"learning_rate": 4.6718843213034066e-07, |
|
"logits/chosen": -2.9175336360931396, |
|
"logits/rejected": -2.9109795093536377, |
|
"logps/chosen": -401.5478515625, |
|
"logps/rejected": -391.0757141113281, |
|
"loss": 0.6248, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.8010644912719727, |
|
"rewards/margins": 0.26846641302108765, |
|
"rewards/rejected": -1.0695308446884155, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 19.95328712463379, |
|
"learning_rate": 4.660480132232224e-07, |
|
"logits/chosen": -2.910229206085205, |
|
"logits/rejected": -2.9196810722351074, |
|
"logps/chosen": -433.5536193847656, |
|
"logps/rejected": -417.9415588378906, |
|
"loss": 0.6403, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.8831882476806641, |
|
"rewards/margins": 0.22323890030384064, |
|
"rewards/rejected": -1.1064269542694092, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.25386024600889817, |
|
"grad_norm": 23.01506996154785, |
|
"learning_rate": 4.64889554369174e-07, |
|
"logits/chosen": -2.9168238639831543, |
|
"logits/rejected": -2.8963983058929443, |
|
"logps/chosen": -433.3750915527344, |
|
"logps/rejected": -402.49578857421875, |
|
"loss": 0.589, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8203709721565247, |
|
"rewards/margins": 0.3379738926887512, |
|
"rewards/rejected": -1.1583448648452759, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.2564773619471343, |
|
"grad_norm": 21.516292572021484, |
|
"learning_rate": 4.637131522991764e-07, |
|
"logits/chosen": -2.8918282985687256, |
|
"logits/rejected": -2.890716552734375, |
|
"logps/chosen": -435.3067932128906, |
|
"logps/rejected": -429.76068115234375, |
|
"loss": 0.6116, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8054723739624023, |
|
"rewards/margins": 0.27397674322128296, |
|
"rewards/rejected": -1.0794490575790405, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.2590944778853703, |
|
"grad_norm": 18.14751625061035, |
|
"learning_rate": 4.6251890524246375e-07, |
|
"logits/chosen": -2.9248886108398438, |
|
"logits/rejected": -2.9085328578948975, |
|
"logps/chosen": -389.9818115234375, |
|
"logps/rejected": -382.972900390625, |
|
"loss": 0.5939, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.819879412651062, |
|
"rewards/margins": 0.344366192817688, |
|
"rewards/rejected": -1.16424560546875, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 18.19559097290039, |
|
"learning_rate": 4.613069129183218e-07, |
|
"logits/chosen": -2.947519063949585, |
|
"logits/rejected": -2.893505811691284, |
|
"logps/chosen": -472.57659912109375, |
|
"logps/rejected": -439.01263427734375, |
|
"loss": 0.6156, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.858264148235321, |
|
"rewards/margins": 0.26358070969581604, |
|
"rewards/rejected": -1.121845006942749, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"eval_logits/chosen": -2.8664913177490234, |
|
"eval_logits/rejected": -2.846865177154541, |
|
"eval_logps/chosen": -428.9975280761719, |
|
"eval_logps/rejected": -414.9855041503906, |
|
"eval_loss": 0.6271011829376221, |
|
"eval_rewards/accuracies": 0.6460000276565552, |
|
"eval_rewards/chosen": -0.9277856349945068, |
|
"eval_rewards/margins": 0.2510209381580353, |
|
"eval_rewards/rejected": -1.1788065433502197, |
|
"eval_runtime": 305.3271, |
|
"eval_samples_per_second": 6.55, |
|
"eval_steps_per_second": 0.819, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2643287097618425, |
|
"grad_norm": 28.633275985717773, |
|
"learning_rate": 4.6007727652776065e-07, |
|
"logits/chosen": -2.8592729568481445, |
|
"logits/rejected": -2.8527398109436035, |
|
"logps/chosen": -399.6195983886719, |
|
"logps/rejected": -404.9161071777344, |
|
"loss": 0.6108, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9239141345024109, |
|
"rewards/margins": 0.2775779664516449, |
|
"rewards/rejected": -1.2014920711517334, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2669458257000785, |
|
"grad_norm": 16.247283935546875, |
|
"learning_rate": 4.588300987450652e-07, |
|
"logits/chosen": -2.925482749938965, |
|
"logits/rejected": -2.9205174446105957, |
|
"logps/chosen": -404.5378723144531, |
|
"logps/rejected": -372.3696594238281, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.8224859237670898, |
|
"rewards/margins": 0.25445401668548584, |
|
"rewards/rejected": -1.0769398212432861, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.26956294163831457, |
|
"grad_norm": 15.247428894042969, |
|
"learning_rate": 4.5756548370922134e-07, |
|
"logits/chosen": -2.8664026260375977, |
|
"logits/rejected": -2.852753162384033, |
|
"logps/chosen": -376.1838073730469, |
|
"logps/rejected": -374.4062194824219, |
|
"loss": 0.6468, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.6621295213699341, |
|
"rewards/margins": 0.20799696445465088, |
|
"rewards/rejected": -0.8701265454292297, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 26.429670333862305, |
|
"learning_rate": 4.5628353701522047e-07, |
|
"logits/chosen": -2.8901050090789795, |
|
"logits/rejected": -2.8922314643859863, |
|
"logps/chosen": -440.0348205566406, |
|
"logps/rejected": -419.0166015625, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5985369086265564, |
|
"rewards/margins": 0.2978154718875885, |
|
"rewards/rejected": -0.8963524699211121, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2747971735147867, |
|
"grad_norm": 16.453901290893555, |
|
"learning_rate": 4.549843657052429e-07, |
|
"logits/chosen": -2.913086414337158, |
|
"logits/rejected": -2.9038262367248535, |
|
"logps/chosen": -408.3675842285156, |
|
"logps/rejected": -414.268310546875, |
|
"loss": 0.5836, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6507371664047241, |
|
"rewards/margins": 0.3384549021720886, |
|
"rewards/rejected": -0.9891921281814575, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.27741428945302277, |
|
"grad_norm": 20.94676971435547, |
|
"learning_rate": 4.5366807825971907e-07, |
|
"logits/chosen": -2.854055643081665, |
|
"logits/rejected": -2.854862928390503, |
|
"logps/chosen": -393.8934020996094, |
|
"logps/rejected": -386.7836608886719, |
|
"loss": 0.6529, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8595059514045715, |
|
"rewards/margins": 0.21301527321338654, |
|
"rewards/rejected": -1.0725212097167969, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2800314053912588, |
|
"grad_norm": 19.222152709960938, |
|
"learning_rate": 4.5233478458827176e-07, |
|
"logits/chosen": -2.895177125930786, |
|
"logits/rejected": -2.871817111968994, |
|
"logps/chosen": -451.8866271972656, |
|
"logps/rejected": -412.9781188964844, |
|
"loss": 0.579, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8334499597549438, |
|
"rewards/margins": 0.3665994703769684, |
|
"rewards/rejected": -1.2000494003295898, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 18.82132339477539, |
|
"learning_rate": 4.509845960205389e-07, |
|
"logits/chosen": -2.8257076740264893, |
|
"logits/rejected": -2.8204989433288574, |
|
"logps/chosen": -429.1475524902344, |
|
"logps/rejected": -408.2667541503906, |
|
"loss": 0.6282, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.771919846534729, |
|
"rewards/margins": 0.25145334005355835, |
|
"rewards/rejected": -1.0233732461929321, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.28526563726773096, |
|
"grad_norm": 22.576290130615234, |
|
"learning_rate": 4.4961762529687736e-07, |
|
"logits/chosen": -2.8405702114105225, |
|
"logits/rejected": -2.8226046562194824, |
|
"logps/chosen": -415.6966857910156, |
|
"logps/rejected": -397.05230712890625, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.814344048500061, |
|
"rewards/margins": 0.15356814861297607, |
|
"rewards/rejected": -0.9679121971130371, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"grad_norm": 26.202625274658203, |
|
"learning_rate": 4.482339865589492e-07, |
|
"logits/chosen": -2.860849142074585, |
|
"logits/rejected": -2.819664239883423, |
|
"logps/chosen": -419.00335693359375, |
|
"logps/rejected": -368.59674072265625, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.829715371131897, |
|
"rewards/margins": 0.14634691178798676, |
|
"rewards/rejected": -0.9760621786117554, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.287882753205967, |
|
"eval_logits/chosen": -2.83473801612854, |
|
"eval_logits/rejected": -2.8143739700317383, |
|
"eval_logps/chosen": -416.0617980957031, |
|
"eval_logps/rejected": -400.14886474609375, |
|
"eval_loss": 0.6234466433525085, |
|
"eval_rewards/accuracies": 0.6514999866485596, |
|
"eval_rewards/chosen": -0.7984281182289124, |
|
"eval_rewards/margins": 0.23201218247413635, |
|
"eval_rewards/rejected": -1.0304402112960815, |
|
"eval_runtime": 305.573, |
|
"eval_samples_per_second": 6.545, |
|
"eval_steps_per_second": 0.818, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.2904998691442031, |
|
"grad_norm": 21.07489776611328, |
|
"learning_rate": 4.4683379534019076e-07, |
|
"logits/chosen": -2.883356809616089, |
|
"logits/rejected": -2.8841772079467773, |
|
"logps/chosen": -424.1853942871094, |
|
"logps/rejected": -418.2691345214844, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7898249626159668, |
|
"rewards/margins": 0.19554057717323303, |
|
"rewards/rejected": -0.9853655099868774, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 21.048519134521484, |
|
"learning_rate": 4.4541716855616593e-07, |
|
"logits/chosen": -2.821552038192749, |
|
"logits/rejected": -2.7955610752105713, |
|
"logps/chosen": -392.34136962890625, |
|
"logps/rejected": -401.60382080078125, |
|
"loss": 0.6075, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7865413427352905, |
|
"rewards/margins": 0.277415007352829, |
|
"rewards/rejected": -1.0639564990997314, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.2957341010206752, |
|
"grad_norm": 16.305269241333008, |
|
"learning_rate": 4.4398422449480357e-07, |
|
"logits/chosen": -2.7998039722442627, |
|
"logits/rejected": -2.76208758354187, |
|
"logps/chosen": -412.30511474609375, |
|
"logps/rejected": -412.4529724121094, |
|
"loss": 0.6492, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8083792924880981, |
|
"rewards/margins": 0.19571712613105774, |
|
"rewards/rejected": -1.0040963888168335, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.29835121695891126, |
|
"grad_norm": 19.82772445678711, |
|
"learning_rate": 4.4253508280652036e-07, |
|
"logits/chosen": -2.8314437866210938, |
|
"logits/rejected": -2.7757253646850586, |
|
"logps/chosen": -435.48638916015625, |
|
"logps/rejected": -384.79669189453125, |
|
"loss": 0.6123, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.6878252029418945, |
|
"rewards/margins": 0.2673383355140686, |
|
"rewards/rejected": -0.9551635980606079, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.30096833289714736, |
|
"grad_norm": 16.961671829223633, |
|
"learning_rate": 4.410698644942302e-07, |
|
"logits/chosen": -2.8539395332336426, |
|
"logits/rejected": -2.839357614517212, |
|
"logps/chosen": -425.7850036621094, |
|
"logps/rejected": -408.3335266113281, |
|
"loss": 0.597, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7383579611778259, |
|
"rewards/margins": 0.29951414465904236, |
|
"rewards/rejected": -1.037872076034546, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 18.105398178100586, |
|
"learning_rate": 4.3958869190324057e-07, |
|
"logits/chosen": -2.784026622772217, |
|
"logits/rejected": -2.7167916297912598, |
|
"logps/chosen": -432.7950744628906, |
|
"logps/rejected": -418.3269958496094, |
|
"loss": 0.6018, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9281485676765442, |
|
"rewards/margins": 0.3167566657066345, |
|
"rewards/rejected": -1.2449051141738892, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.30620256477361946, |
|
"grad_norm": 20.290996551513672, |
|
"learning_rate": 4.380916887110365e-07, |
|
"logits/chosen": -2.844586133956909, |
|
"logits/rejected": -2.80912446975708, |
|
"logps/chosen": -451.59234619140625, |
|
"logps/rejected": -413.44390869140625, |
|
"loss": 0.6339, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2258434295654297, |
|
"rewards/margins": 0.2452480047941208, |
|
"rewards/rejected": -1.4710915088653564, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.30881968071185556, |
|
"grad_norm": 27.33935546875, |
|
"learning_rate": 4.3657897991695394e-07, |
|
"logits/chosen": -2.7094969749450684, |
|
"logits/rejected": -2.780738115310669, |
|
"logps/chosen": -440.80987548828125, |
|
"logps/rejected": -465.4100646972656, |
|
"loss": 0.6284, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2238306999206543, |
|
"rewards/margins": 0.2834627032279968, |
|
"rewards/rejected": -1.5072933435440063, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.3114367966500916, |
|
"grad_norm": 19.233346939086914, |
|
"learning_rate": 4.350506918317416e-07, |
|
"logits/chosen": -2.8551743030548096, |
|
"logits/rejected": -2.819492816925049, |
|
"logps/chosen": -427.6788635253906, |
|
"logps/rejected": -432.52703857421875, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1010067462921143, |
|
"rewards/margins": 0.2940807640552521, |
|
"rewards/rejected": -1.395087480545044, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 32.360862731933594, |
|
"learning_rate": 4.335069520670149e-07, |
|
"logits/chosen": -2.813873291015625, |
|
"logits/rejected": -2.778324604034424, |
|
"logps/chosen": -412.0577087402344, |
|
"logps/rejected": -414.05462646484375, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.132204294204712, |
|
"rewards/margins": 0.14098653197288513, |
|
"rewards/rejected": -1.2731907367706299, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"eval_logits/chosen": -2.821247100830078, |
|
"eval_logits/rejected": -2.7994351387023926, |
|
"eval_logps/chosen": -439.2535705566406, |
|
"eval_logps/rejected": -428.8004150390625, |
|
"eval_loss": 0.6152091026306152, |
|
"eval_rewards/accuracies": 0.6570000052452087, |
|
"eval_rewards/chosen": -1.0303457975387573, |
|
"eval_rewards/margins": 0.2866101562976837, |
|
"eval_rewards/rejected": -1.3169556856155396, |
|
"eval_runtime": 305.4997, |
|
"eval_samples_per_second": 6.547, |
|
"eval_steps_per_second": 0.818, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3166710285265637, |
|
"grad_norm": 20.405153274536133, |
|
"learning_rate": 4.319478895245999e-07, |
|
"logits/chosen": -2.8161978721618652, |
|
"logits/rejected": -2.7879137992858887, |
|
"logps/chosen": -414.6759338378906, |
|
"logps/rejected": -396.0447998046875, |
|
"loss": 0.6088, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9484050869941711, |
|
"rewards/margins": 0.28157109022140503, |
|
"rewards/rejected": -1.2299760580062866, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.3192881444647998, |
|
"grad_norm": 21.530492782592773, |
|
"learning_rate": 4.3037363438577036e-07, |
|
"logits/chosen": -2.8707480430603027, |
|
"logits/rejected": -2.8560750484466553, |
|
"logps/chosen": -423.5750427246094, |
|
"logps/rejected": -442.72576904296875, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9034959673881531, |
|
"rewards/margins": 0.23671674728393555, |
|
"rewards/rejected": -1.1402127742767334, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.32190526040303585, |
|
"grad_norm": 19.900814056396484, |
|
"learning_rate": 4.2878431810037716e-07, |
|
"logits/chosen": -2.8587565422058105, |
|
"logits/rejected": -2.8229823112487793, |
|
"logps/chosen": -477.77374267578125, |
|
"logps/rejected": -438.41082763671875, |
|
"loss": 0.5864, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.9826862215995789, |
|
"rewards/margins": 0.3427571952342987, |
|
"rewards/rejected": -1.3254432678222656, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 16.341970443725586, |
|
"learning_rate": 4.271800733758729e-07, |
|
"logits/chosen": -2.7875592708587646, |
|
"logits/rejected": -2.7579915523529053, |
|
"logps/chosen": -480.4039611816406, |
|
"logps/rejected": -449.5467834472656, |
|
"loss": 0.6178, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1455281972885132, |
|
"rewards/margins": 0.32378047704696655, |
|
"rewards/rejected": -1.4693087339401245, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.327139492279508, |
|
"grad_norm": 17.49270248413086, |
|
"learning_rate": 4.255610341662304e-07, |
|
"logits/chosen": -2.830955982208252, |
|
"logits/rejected": -2.783357858657837, |
|
"logps/chosen": -442.563720703125, |
|
"logps/rejected": -430.7234802246094, |
|
"loss": 0.6059, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.118556261062622, |
|
"rewards/margins": 0.31357699632644653, |
|
"rewards/rejected": -1.4321330785751343, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.32975660821774405, |
|
"grad_norm": 15.970780372619629, |
|
"learning_rate": 4.2392733566075757e-07, |
|
"logits/chosen": -2.822460174560547, |
|
"logits/rejected": -2.818441867828369, |
|
"logps/chosen": -425.0038146972656, |
|
"logps/rejected": -420.7904357910156, |
|
"loss": 0.632, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9978582262992859, |
|
"rewards/margins": 0.23995347321033478, |
|
"rewards/rejected": -1.237811803817749, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.3323737241559801, |
|
"grad_norm": 23.021156311035156, |
|
"learning_rate": 4.2227911427280973e-07, |
|
"logits/chosen": -2.8251795768737793, |
|
"logits/rejected": -2.798708200454712, |
|
"logps/chosen": -408.9261779785156, |
|
"logps/rejected": -386.421875, |
|
"loss": 0.6156, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.916782557964325, |
|
"rewards/margins": 0.2843112349510193, |
|
"rewards/rejected": -1.2010937929153442, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 22.88947105407715, |
|
"learning_rate": 4.206165076283982e-07, |
|
"logits/chosen": -2.8256075382232666, |
|
"logits/rejected": -2.8119583129882812, |
|
"logps/chosen": -417.31341552734375, |
|
"logps/rejected": -419.28680419921875, |
|
"loss": 0.5893, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.0365794897079468, |
|
"rewards/margins": 0.34971266984939575, |
|
"rewards/rejected": -1.3862922191619873, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.33760795603245225, |
|
"grad_norm": 20.84588623046875, |
|
"learning_rate": 4.1893965455469946e-07, |
|
"logits/chosen": -2.7863240242004395, |
|
"logits/rejected": -2.772761583328247, |
|
"logps/chosen": -448.05792236328125, |
|
"logps/rejected": -433.765869140625, |
|
"loss": 0.6644, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.2257723808288574, |
|
"rewards/margins": 0.2406727820634842, |
|
"rewards/rejected": -1.4664452075958252, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"grad_norm": 18.72652816772461, |
|
"learning_rate": 4.172486950684626e-07, |
|
"logits/chosen": -2.824350118637085, |
|
"logits/rejected": -2.8358230590820312, |
|
"logps/chosen": -438.16900634765625, |
|
"logps/rejected": -455.0042419433594, |
|
"loss": 0.5967, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.146238088607788, |
|
"rewards/margins": 0.3505471646785736, |
|
"rewards/rejected": -1.4967854022979736, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.3402250719706883, |
|
"eval_logits/chosen": -2.7756471633911133, |
|
"eval_logits/rejected": -2.7494444847106934, |
|
"eval_logps/chosen": -459.6400146484375, |
|
"eval_logps/rejected": -450.31976318359375, |
|
"eval_loss": 0.6130924224853516, |
|
"eval_rewards/accuracies": 0.6654999852180481, |
|
"eval_rewards/chosen": -1.2342103719711304, |
|
"eval_rewards/margins": 0.29793861508369446, |
|
"eval_rewards/rejected": -1.5321489572525024, |
|
"eval_runtime": 305.5502, |
|
"eval_samples_per_second": 6.546, |
|
"eval_steps_per_second": 0.818, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.34284218790892435, |
|
"grad_norm": 20.80463981628418, |
|
"learning_rate": 4.155437703643181e-07, |
|
"logits/chosen": -2.84661602973938, |
|
"logits/rejected": -2.8071157932281494, |
|
"logps/chosen": -431.5113830566406, |
|
"logps/rejected": -413.513916015625, |
|
"loss": 0.6008, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1648436784744263, |
|
"rewards/margins": 0.32154667377471924, |
|
"rewards/rejected": -1.486390233039856, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 22.277143478393555, |
|
"learning_rate": 4.138250228029881e-07, |
|
"logits/chosen": -2.8156943321228027, |
|
"logits/rejected": -2.802820920944214, |
|
"logps/chosen": -443.06024169921875, |
|
"logps/rejected": -460.61663818359375, |
|
"loss": 0.6321, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1023852825164795, |
|
"rewards/margins": 0.26114708185195923, |
|
"rewards/rejected": -1.363532304763794, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.3480764197853965, |
|
"grad_norm": 21.171588897705078, |
|
"learning_rate": 4.1209259589939935e-07, |
|
"logits/chosen": -2.7955193519592285, |
|
"logits/rejected": -2.804624319076538, |
|
"logps/chosen": -397.19091796875, |
|
"logps/rejected": -402.78857421875, |
|
"loss": 0.6281, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0586354732513428, |
|
"rewards/margins": 0.24395787715911865, |
|
"rewards/rejected": -1.302593469619751, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.35069353572363254, |
|
"grad_norm": 32.8704719543457, |
|
"learning_rate": 4.103466343106998e-07, |
|
"logits/chosen": -2.846787929534912, |
|
"logits/rejected": -2.852377414703369, |
|
"logps/chosen": -447.3720703125, |
|
"logps/rejected": -427.18853759765625, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0319609642028809, |
|
"rewards/margins": 0.21338698267936707, |
|
"rewards/rejected": -1.2453479766845703, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.35331065166186865, |
|
"grad_norm": 20.399002075195312, |
|
"learning_rate": 4.085872838241796e-07, |
|
"logits/chosen": -2.772639036178589, |
|
"logits/rejected": -2.7564244270324707, |
|
"logps/chosen": -427.06103515625, |
|
"logps/rejected": -415.52569580078125, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.9161670804023743, |
|
"rewards/margins": 0.2716852128505707, |
|
"rewards/rejected": -1.1878522634506226, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 18.165796279907227, |
|
"learning_rate": 4.06814691345098e-07, |
|
"logits/chosen": -2.824145793914795, |
|
"logits/rejected": -2.7942967414855957, |
|
"logps/chosen": -417.0282287597656, |
|
"logps/rejected": -399.99652099609375, |
|
"loss": 0.6027, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.8873056173324585, |
|
"rewards/margins": 0.3002934157848358, |
|
"rewards/rejected": -1.1875989437103271, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.35854488353834074, |
|
"grad_norm": 17.33743667602539, |
|
"learning_rate": 4.0502900488441707e-07, |
|
"logits/chosen": -2.8374624252319336, |
|
"logits/rejected": -2.851886749267578, |
|
"logps/chosen": -432.22711181640625, |
|
"logps/rejected": -437.4540100097656, |
|
"loss": 0.6232, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8847773671150208, |
|
"rewards/margins": 0.2480648010969162, |
|
"rewards/rejected": -1.1328423023223877, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.3611619994765768, |
|
"grad_norm": 25.823244094848633, |
|
"learning_rate": 4.032303735464422e-07, |
|
"logits/chosen": -2.937242269515991, |
|
"logits/rejected": -2.8683810234069824, |
|
"logps/chosen": -434.61297607421875, |
|
"logps/rejected": -418.332275390625, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.79572993516922, |
|
"rewards/margins": 0.3832002282142639, |
|
"rewards/rejected": -1.1789300441741943, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.3637791154148129, |
|
"grad_norm": 20.350576400756836, |
|
"learning_rate": 4.014189475163726e-07, |
|
"logits/chosen": -2.8276124000549316, |
|
"logits/rejected": -2.8166391849517822, |
|
"logps/chosen": -393.64813232421875, |
|
"logps/rejected": -399.48638916015625, |
|
"loss": 0.5903, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.6952024698257446, |
|
"rewards/margins": 0.34315189719200134, |
|
"rewards/rejected": -1.0383542776107788, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 26.685108184814453, |
|
"learning_rate": 3.995948780477605e-07, |
|
"logits/chosen": -2.871060371398926, |
|
"logits/rejected": -2.8492379188537598, |
|
"logps/chosen": -421.88494873046875, |
|
"logps/rejected": -410.37371826171875, |
|
"loss": 0.596, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8149229884147644, |
|
"rewards/margins": 0.3252275884151459, |
|
"rewards/rejected": -1.1401506662368774, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"eval_logits/chosen": -2.828916311264038, |
|
"eval_logits/rejected": -2.8083691596984863, |
|
"eval_logps/chosen": -422.09027099609375, |
|
"eval_logps/rejected": -414.0766296386719, |
|
"eval_loss": 0.6064282655715942, |
|
"eval_rewards/accuracies": 0.6819999814033508, |
|
"eval_rewards/chosen": -0.8587133288383484, |
|
"eval_rewards/margins": 0.31100472807884216, |
|
"eval_rewards/rejected": -1.1697180271148682, |
|
"eval_runtime": 305.6064, |
|
"eval_samples_per_second": 6.544, |
|
"eval_steps_per_second": 0.818, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.369013347291285, |
|
"grad_norm": 23.994709014892578, |
|
"learning_rate": 3.977583174498816e-07, |
|
"logits/chosen": -2.850383758544922, |
|
"logits/rejected": -2.8548877239227295, |
|
"logps/chosen": -428.41357421875, |
|
"logps/rejected": -422.70208740234375, |
|
"loss": 0.5758, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.8807367086410522, |
|
"rewards/margins": 0.3707871437072754, |
|
"rewards/rejected": -1.2515239715576172, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.3716304632295211, |
|
"grad_norm": 20.96696662902832, |
|
"learning_rate": 3.9590941907501717e-07, |
|
"logits/chosen": -2.8528692722320557, |
|
"logits/rejected": -2.826349973678589, |
|
"logps/chosen": -452.9150390625, |
|
"logps/rejected": -441.3421325683594, |
|
"loss": 0.611, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.8741127252578735, |
|
"rewards/margins": 0.3615128993988037, |
|
"rewards/rejected": -1.2356255054473877, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.37424757916775714, |
|
"grad_norm": 24.699710845947266, |
|
"learning_rate": 3.9404833730564974e-07, |
|
"logits/chosen": -2.749164342880249, |
|
"logits/rejected": -2.7452807426452637, |
|
"logps/chosen": -406.6269226074219, |
|
"logps/rejected": -410.21746826171875, |
|
"loss": 0.6114, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8366050720214844, |
|
"rewards/margins": 0.314082533121109, |
|
"rewards/rejected": -1.150687575340271, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 20.18442726135254, |
|
"learning_rate": 3.9217522754157117e-07, |
|
"logits/chosen": -2.824708938598633, |
|
"logits/rejected": -2.822957992553711, |
|
"logps/chosen": -413.8306579589844, |
|
"logps/rejected": -416.0909118652344, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9702291488647461, |
|
"rewards/margins": 0.4000950753688812, |
|
"rewards/rejected": -1.3703243732452393, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.37948181104422923, |
|
"grad_norm": 24.324777603149414, |
|
"learning_rate": 3.9029024618690785e-07, |
|
"logits/chosen": -2.8401103019714355, |
|
"logits/rejected": -2.802492380142212, |
|
"logps/chosen": -404.65887451171875, |
|
"logps/rejected": -410.8133239746094, |
|
"loss": 0.5906, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.9892793893814087, |
|
"rewards/margins": 0.39633244276046753, |
|
"rewards/rejected": -1.3856117725372314, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.38209892698246534, |
|
"grad_norm": 25.477262496948242, |
|
"learning_rate": 3.883935506370605e-07, |
|
"logits/chosen": -2.787506341934204, |
|
"logits/rejected": -2.782578229904175, |
|
"logps/chosen": -414.322998046875, |
|
"logps/rejected": -399.5470886230469, |
|
"loss": 0.6312, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.9510722160339355, |
|
"rewards/margins": 0.2990874648094177, |
|
"rewards/rejected": -1.2501596212387085, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.3847160429207014, |
|
"grad_norm": 15.537137031555176, |
|
"learning_rate": 3.864852992655616e-07, |
|
"logits/chosen": -2.7832908630371094, |
|
"logits/rejected": -2.7727818489074707, |
|
"logps/chosen": -415.6470642089844, |
|
"logps/rejected": -435.2330627441406, |
|
"loss": 0.551, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9724925756454468, |
|
"rewards/margins": 0.455828994512558, |
|
"rewards/rejected": -1.4283217191696167, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 20.25286865234375, |
|
"learning_rate": 3.845656514108515e-07, |
|
"logits/chosen": -2.8250439167022705, |
|
"logits/rejected": -2.7870283126831055, |
|
"logps/chosen": -451.704345703125, |
|
"logps/rejected": -409.6332092285156, |
|
"loss": 0.6335, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.237602949142456, |
|
"rewards/margins": 0.28984588384628296, |
|
"rewards/rejected": -1.5274488925933838, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.38995027479717354, |
|
"grad_norm": 21.001708984375, |
|
"learning_rate": 3.8263476736297375e-07, |
|
"logits/chosen": -2.775477886199951, |
|
"logits/rejected": -2.741433620452881, |
|
"logps/chosen": -431.9996032714844, |
|
"logps/rejected": -435.52178955078125, |
|
"loss": 0.5698, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1075719594955444, |
|
"rewards/margins": 0.43814000487327576, |
|
"rewards/rejected": -1.5457121133804321, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"grad_norm": 29.425813674926758, |
|
"learning_rate": 3.8069280835019055e-07, |
|
"logits/chosen": -2.7563180923461914, |
|
"logits/rejected": -2.731633424758911, |
|
"logps/chosen": -448.45123291015625, |
|
"logps/rejected": -439.7250061035156, |
|
"loss": 0.592, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.986266016960144, |
|
"rewards/margins": 0.3872339129447937, |
|
"rewards/rejected": -1.3734999895095825, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3925673907354096, |
|
"eval_logits/chosen": -2.7703075408935547, |
|
"eval_logits/rejected": -2.7455074787139893, |
|
"eval_logps/chosen": -433.11322021484375, |
|
"eval_logps/rejected": -428.9928894042969, |
|
"eval_loss": 0.6027323007583618, |
|
"eval_rewards/accuracies": 0.671500027179718, |
|
"eval_rewards/chosen": -0.9689425230026245, |
|
"eval_rewards/margins": 0.34993812441825867, |
|
"eval_rewards/rejected": -1.318880558013916, |
|
"eval_runtime": 305.4978, |
|
"eval_samples_per_second": 6.547, |
|
"eval_steps_per_second": 0.818, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.39518450667364563, |
|
"grad_norm": 20.41739845275879, |
|
"learning_rate": 3.7873993652552073e-07, |
|
"logits/chosen": -2.7873950004577637, |
|
"logits/rejected": -2.767686128616333, |
|
"logps/chosen": -394.41082763671875, |
|
"logps/rejected": -397.44976806640625, |
|
"loss": 0.654, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.9665681719779968, |
|
"rewards/margins": 0.24985328316688538, |
|
"rewards/rejected": -1.2164217233657837, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 17.706754684448242, |
|
"learning_rate": 3.767763149531995e-07, |
|
"logits/chosen": -2.817774772644043, |
|
"logits/rejected": -2.7983908653259277, |
|
"logps/chosen": -410.2703552246094, |
|
"logps/rejected": -415.4830017089844, |
|
"loss": 0.578, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7426007986068726, |
|
"rewards/margins": 0.37523287534713745, |
|
"rewards/rejected": -1.1178338527679443, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.4004187385501178, |
|
"grad_norm": 19.806367874145508, |
|
"learning_rate": 3.7480210759506326e-07, |
|
"logits/chosen": -2.7944726943969727, |
|
"logits/rejected": -2.7969970703125, |
|
"logps/chosen": -424.00128173828125, |
|
"logps/rejected": -411.03619384765625, |
|
"loss": 0.6494, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7284184098243713, |
|
"rewards/margins": 0.221491739153862, |
|
"rewards/rejected": -0.9499101638793945, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.40303585448835383, |
|
"grad_norm": 26.830059051513672, |
|
"learning_rate": 3.728174792968582e-07, |
|
"logits/chosen": -2.749136209487915, |
|
"logits/rejected": -2.7285478115081787, |
|
"logps/chosen": -378.54522705078125, |
|
"logps/rejected": -378.9236145019531, |
|
"loss": 0.6119, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7706597447395325, |
|
"rewards/margins": 0.29169803857803345, |
|
"rewards/rejected": -1.062357783317566, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.4056529704265899, |
|
"grad_norm": 19.88861656188965, |
|
"learning_rate": 3.70822595774476e-07, |
|
"logits/chosen": -2.802050828933716, |
|
"logits/rejected": -2.8034234046936035, |
|
"logps/chosen": -424.07757568359375, |
|
"logps/rejected": -420.0818786621094, |
|
"loss": 0.5911, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7196288704872131, |
|
"rewards/margins": 0.37008053064346313, |
|
"rewards/rejected": -1.0897094011306763, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 21.920900344848633, |
|
"learning_rate": 3.688176236001168e-07, |
|
"logits/chosen": -2.808371067047119, |
|
"logits/rejected": -2.7770209312438965, |
|
"logps/chosen": -437.070556640625, |
|
"logps/rejected": -407.98260498046875, |
|
"loss": 0.599, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.73606276512146, |
|
"rewards/margins": 0.3621772229671478, |
|
"rewards/rejected": -1.0982400178909302, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.410887202303062, |
|
"grad_norm": 21.512451171875, |
|
"learning_rate": 3.6680273018838016e-07, |
|
"logits/chosen": -2.7860348224639893, |
|
"logits/rejected": -2.765151262283325, |
|
"logps/chosen": -402.9967346191406, |
|
"logps/rejected": -403.93328857421875, |
|
"loss": 0.5804, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7530256509780884, |
|
"rewards/margins": 0.39612632989883423, |
|
"rewards/rejected": -1.1491520404815674, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.4135043182412981, |
|
"grad_norm": 27.112638473510742, |
|
"learning_rate": 3.6477808378228596e-07, |
|
"logits/chosen": -2.7512621879577637, |
|
"logits/rejected": -2.7771029472351074, |
|
"logps/chosen": -403.02099609375, |
|
"logps/rejected": -453.424072265625, |
|
"loss": 0.5933, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7836870551109314, |
|
"rewards/margins": 0.3679850399494171, |
|
"rewards/rejected": -1.151672124862671, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.4161214341795342, |
|
"grad_norm": 25.062524795532227, |
|
"learning_rate": 3.6274385343922674e-07, |
|
"logits/chosen": -2.832534074783325, |
|
"logits/rejected": -2.849515438079834, |
|
"logps/chosen": -390.73565673828125, |
|
"logps/rejected": -424.64654541015625, |
|
"loss": 0.5968, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.8553959131240845, |
|
"rewards/margins": 0.34458768367767334, |
|
"rewards/rejected": -1.1999835968017578, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 21.186635971069336, |
|
"learning_rate": 3.6070020901685057e-07, |
|
"logits/chosen": -2.724576234817505, |
|
"logits/rejected": -2.726635694503784, |
|
"logps/chosen": -425.3138122558594, |
|
"logps/rejected": -408.3916015625, |
|
"loss": 0.6353, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.8639251589775085, |
|
"rewards/margins": 0.26024505496025085, |
|
"rewards/rejected": -1.1241703033447266, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"eval_logits/chosen": -2.7245428562164307, |
|
"eval_logits/rejected": -2.6972126960754395, |
|
"eval_logps/chosen": -432.62255859375, |
|
"eval_logps/rejected": -429.33135986328125, |
|
"eval_loss": 0.6051159501075745, |
|
"eval_rewards/accuracies": 0.6744999885559082, |
|
"eval_rewards/chosen": -0.9640358090400696, |
|
"eval_rewards/margins": 0.3582296371459961, |
|
"eval_rewards/rejected": -1.322265386581421, |
|
"eval_runtime": 305.6135, |
|
"eval_samples_per_second": 6.544, |
|
"eval_steps_per_second": 0.818, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.4213556660560063, |
|
"grad_norm": 19.41357421875, |
|
"learning_rate": 3.5864732115887863e-07, |
|
"logits/chosen": -2.790837049484253, |
|
"logits/rejected": -2.7912559509277344, |
|
"logps/chosen": -404.7127685546875, |
|
"logps/rejected": -439.0284729003906, |
|
"loss": 0.5745, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9212363958358765, |
|
"rewards/margins": 0.4179501533508301, |
|
"rewards/rejected": -1.339186668395996, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.4239727819942423, |
|
"grad_norm": 31.039003372192383, |
|
"learning_rate": 3.565853612808562e-07, |
|
"logits/chosen": -2.813098669052124, |
|
"logits/rejected": -2.756390333175659, |
|
"logps/chosen": -455.2381286621094, |
|
"logps/rejected": -444.98101806640625, |
|
"loss": 0.622, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.1717721223831177, |
|
"rewards/margins": 0.3537500500679016, |
|
"rewards/rejected": -1.525522232055664, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.4265898979324784, |
|
"grad_norm": 20.93377685546875, |
|
"learning_rate": 3.5451450155583984e-07, |
|
"logits/chosen": -2.663109302520752, |
|
"logits/rejected": -2.7095718383789062, |
|
"logps/chosen": -419.74652099609375, |
|
"logps/rejected": -429.96502685546875, |
|
"loss": 0.6041, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2498713731765747, |
|
"rewards/margins": 0.4084538519382477, |
|
"rewards/rejected": -1.6583251953125, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 21.383033752441406, |
|
"learning_rate": 3.5243491490002055e-07, |
|
"logits/chosen": -2.721489191055298, |
|
"logits/rejected": -2.705233097076416, |
|
"logps/chosen": -456.5779724121094, |
|
"logps/rejected": -452.54620361328125, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.2452127933502197, |
|
"rewards/margins": 0.2219144105911255, |
|
"rewards/rejected": -1.4671272039413452, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.4318241298089505, |
|
"grad_norm": 20.577220916748047, |
|
"learning_rate": 3.503467749582857e-07, |
|
"logits/chosen": -2.7935385704040527, |
|
"logits/rejected": -2.749878168106079, |
|
"logps/chosen": -415.6224060058594, |
|
"logps/rejected": -389.2660827636719, |
|
"loss": 0.6546, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.9616597890853882, |
|
"rewards/margins": 0.2633201479911804, |
|
"rewards/rejected": -1.2249799966812134, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.4344412457471866, |
|
"grad_norm": 27.85550308227539, |
|
"learning_rate": 3.482502560897194e-07, |
|
"logits/chosen": -2.722708225250244, |
|
"logits/rejected": -2.7652525901794434, |
|
"logps/chosen": -375.1950378417969, |
|
"logps/rejected": -402.11474609375, |
|
"loss": 0.6213, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.940973162651062, |
|
"rewards/margins": 0.28709056973457336, |
|
"rewards/rejected": -1.2280638217926025, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.43705836168542267, |
|
"grad_norm": 19.033451080322266, |
|
"learning_rate": 3.4614553335304403e-07, |
|
"logits/chosen": -2.797665596008301, |
|
"logits/rejected": -2.7312228679656982, |
|
"logps/chosen": -454.24371337890625, |
|
"logps/rejected": -424.7903747558594, |
|
"loss": 0.6157, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.0169007778167725, |
|
"rewards/margins": 0.3303568661212921, |
|
"rewards/rejected": -1.3472576141357422, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 19.925254821777344, |
|
"learning_rate": 3.440327824920022e-07, |
|
"logits/chosen": -2.8008456230163574, |
|
"logits/rejected": -2.744900941848755, |
|
"logps/chosen": -449.263427734375, |
|
"logps/rejected": -425.96429443359375, |
|
"loss": 0.5824, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8703651428222656, |
|
"rewards/margins": 0.4040173590183258, |
|
"rewards/rejected": -1.2743823528289795, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.44229259356189476, |
|
"grad_norm": 19.3896541595459, |
|
"learning_rate": 3.4191217992068287e-07, |
|
"logits/chosen": -2.8239524364471436, |
|
"logits/rejected": -2.7781131267547607, |
|
"logps/chosen": -445.6703186035156, |
|
"logps/rejected": -414.863525390625, |
|
"loss": 0.6067, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.9370183944702148, |
|
"rewards/margins": 0.33230060338974, |
|
"rewards/rejected": -1.26931893825531, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"grad_norm": 25.677919387817383, |
|
"learning_rate": 3.3978390270879056e-07, |
|
"logits/chosen": -2.7083306312561035, |
|
"logits/rejected": -2.722886562347412, |
|
"logps/chosen": -384.851318359375, |
|
"logps/rejected": -393.93927001953125, |
|
"loss": 0.6603, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -1.1110846996307373, |
|
"rewards/margins": 0.19145555794239044, |
|
"rewards/rejected": -1.3025401830673218, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.44490970950013087, |
|
"eval_logits/chosen": -2.7304868698120117, |
|
"eval_logits/rejected": -2.702120780944824, |
|
"eval_logps/chosen": -435.1521301269531, |
|
"eval_logps/rejected": -429.3145446777344, |
|
"eval_loss": 0.6016219854354858, |
|
"eval_rewards/accuracies": 0.6765000224113464, |
|
"eval_rewards/chosen": -0.9893313050270081, |
|
"eval_rewards/margins": 0.3327656388282776, |
|
"eval_rewards/rejected": -1.3220969438552856, |
|
"eval_runtime": 305.5368, |
|
"eval_samples_per_second": 6.546, |
|
"eval_steps_per_second": 0.818, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.4475268254383669, |
|
"grad_norm": 22.6643123626709, |
|
"learning_rate": 3.376481285668599e-07, |
|
"logits/chosen": -2.784320831298828, |
|
"logits/rejected": -2.812058925628662, |
|
"logps/chosen": -382.7949523925781, |
|
"logps/rejected": -412.650390625, |
|
"loss": 0.632, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.9195866584777832, |
|
"rewards/margins": 0.27537328004837036, |
|
"rewards/rejected": -1.1949598789215088, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 25.10662269592285, |
|
"learning_rate": 3.355050358314172e-07, |
|
"logits/chosen": -2.8318912982940674, |
|
"logits/rejected": -2.8115198612213135, |
|
"logps/chosen": -418.42559814453125, |
|
"logps/rejected": -420.23126220703125, |
|
"loss": 0.5826, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7352002859115601, |
|
"rewards/margins": 0.3559706211090088, |
|
"rewards/rejected": -1.0911709070205688, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.45276105731483907, |
|
"grad_norm": 24.134742736816406, |
|
"learning_rate": 3.33354803450089e-07, |
|
"logits/chosen": -2.738598585128784, |
|
"logits/rejected": -2.707373857498169, |
|
"logps/chosen": -400.3019714355469, |
|
"logps/rejected": -398.3058166503906, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.751221776008606, |
|
"rewards/margins": 0.33383291959762573, |
|
"rewards/rejected": -1.085054636001587, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.4553781732530751, |
|
"grad_norm": 19.136178970336914, |
|
"learning_rate": 3.311976109666605e-07, |
|
"logits/chosen": -2.7323246002197266, |
|
"logits/rejected": -2.700840473175049, |
|
"logps/chosen": -419.31512451171875, |
|
"logps/rejected": -397.88238525390625, |
|
"loss": 0.6054, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7072068452835083, |
|
"rewards/margins": 0.31471288204193115, |
|
"rewards/rejected": -1.0219197273254395, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.45799528919131116, |
|
"grad_norm": 20.46944236755371, |
|
"learning_rate": 3.2903363850608317e-07, |
|
"logits/chosen": -2.7930941581726074, |
|
"logits/rejected": -2.73994517326355, |
|
"logps/chosen": -430.36114501953125, |
|
"logps/rejected": -423.3164978027344, |
|
"loss": 0.5988, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0622642040252686, |
|
"rewards/margins": 0.35906052589416504, |
|
"rewards/rejected": -1.421324610710144, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 20.967994689941406, |
|
"learning_rate": 3.2686306675943477e-07, |
|
"logits/chosen": -2.7039730548858643, |
|
"logits/rejected": -2.709929943084717, |
|
"logps/chosen": -429.5506286621094, |
|
"logps/rejected": -427.92413330078125, |
|
"loss": 0.6031, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0673789978027344, |
|
"rewards/margins": 0.34702402353286743, |
|
"rewards/rejected": -1.414402961730957, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.4632295210677833, |
|
"grad_norm": 20.740760803222656, |
|
"learning_rate": 3.2468607696883145e-07, |
|
"logits/chosen": -2.708698034286499, |
|
"logits/rejected": -2.7119338512420654, |
|
"logps/chosen": -437.08807373046875, |
|
"logps/rejected": -471.3455505371094, |
|
"loss": 0.5756, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1100339889526367, |
|
"rewards/margins": 0.45587754249572754, |
|
"rewards/rejected": -1.5659115314483643, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.46584663700601936, |
|
"grad_norm": 22.729509353637695, |
|
"learning_rate": 3.2250285091229435e-07, |
|
"logits/chosen": -2.7388596534729004, |
|
"logits/rejected": -2.694728374481201, |
|
"logps/chosen": -416.31463623046875, |
|
"logps/rejected": -420.25653076171875, |
|
"loss": 0.6293, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.0822761058807373, |
|
"rewards/margins": 0.31783348321914673, |
|
"rewards/rejected": -1.4001096487045288, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.4684637529442554, |
|
"grad_norm": 22.45555305480957, |
|
"learning_rate": 3.2031357088857083e-07, |
|
"logits/chosen": -2.7370693683624268, |
|
"logits/rejected": -2.708481550216675, |
|
"logps/chosen": -462.5494689941406, |
|
"logps/rejected": -482.8470153808594, |
|
"loss": 0.6173, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1477806568145752, |
|
"rewards/margins": 0.3766568601131439, |
|
"rewards/rejected": -1.524437665939331, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 26.537885665893555, |
|
"learning_rate": 3.1811841970191267e-07, |
|
"logits/chosen": -2.5913147926330566, |
|
"logits/rejected": -2.5993223190307617, |
|
"logps/chosen": -404.4351501464844, |
|
"logps/rejected": -467.8827209472656, |
|
"loss": 0.5551, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.0686676502227783, |
|
"rewards/margins": 0.5164337158203125, |
|
"rewards/rejected": -1.5851013660430908, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"eval_logits/chosen": -2.6491506099700928, |
|
"eval_logits/rejected": -2.615879774093628, |
|
"eval_logps/chosen": -436.5640869140625, |
|
"eval_logps/rejected": -434.75897216796875, |
|
"eval_loss": 0.6023004055023193, |
|
"eval_rewards/accuracies": 0.6790000200271606, |
|
"eval_rewards/chosen": -1.0034514665603638, |
|
"eval_rewards/margins": 0.3730900287628174, |
|
"eval_rewards/rejected": -1.3765413761138916, |
|
"eval_runtime": 305.4204, |
|
"eval_samples_per_second": 6.548, |
|
"eval_steps_per_second": 0.819, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.47369798482072756, |
|
"grad_norm": 17.421649932861328, |
|
"learning_rate": 3.1591758064681257e-07, |
|
"logits/chosen": -2.6142051219940186, |
|
"logits/rejected": -2.555877447128296, |
|
"logps/chosen": -421.052490234375, |
|
"logps/rejected": -406.779541015625, |
|
"loss": 0.5928, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9783207774162292, |
|
"rewards/margins": 0.4080726206302643, |
|
"rewards/rejected": -1.3863933086395264, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.4763151007589636, |
|
"grad_norm": 24.198755264282227, |
|
"learning_rate": 3.13711237492698e-07, |
|
"logits/chosen": -2.685159921646118, |
|
"logits/rejected": -2.6774396896362305, |
|
"logps/chosen": -460.9088439941406, |
|
"logps/rejected": -460.2481994628906, |
|
"loss": 0.6401, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9882858395576477, |
|
"rewards/margins": 0.3006265163421631, |
|
"rewards/rejected": -1.2889124155044556, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.4789322166971997, |
|
"grad_norm": 22.546266555786133, |
|
"learning_rate": 3.1149957446858767e-07, |
|
"logits/chosen": -2.7318637371063232, |
|
"logits/rejected": -2.746227264404297, |
|
"logps/chosen": -389.7530212402344, |
|
"logps/rejected": -395.2696533203125, |
|
"loss": 0.6069, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7182685136795044, |
|
"rewards/margins": 0.309965580701828, |
|
"rewards/rejected": -1.0282341241836548, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 18.50275993347168, |
|
"learning_rate": 3.0928277624770736e-07, |
|
"logits/chosen": -2.7960715293884277, |
|
"logits/rejected": -2.758817195892334, |
|
"logps/chosen": -428.894775390625, |
|
"logps/rejected": -424.63494873046875, |
|
"loss": 0.5771, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6762595176696777, |
|
"rewards/margins": 0.4380512237548828, |
|
"rewards/rejected": -1.1143107414245605, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.4841664485736718, |
|
"grad_norm": 16.639318466186523, |
|
"learning_rate": 3.0706102793207073e-07, |
|
"logits/chosen": -2.7766544818878174, |
|
"logits/rejected": -2.7374072074890137, |
|
"logps/chosen": -429.73052978515625, |
|
"logps/rejected": -426.40850830078125, |
|
"loss": 0.5717, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.729512095451355, |
|
"rewards/margins": 0.40905341506004333, |
|
"rewards/rejected": -1.1385654211044312, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.48678356451190785, |
|
"grad_norm": 18.389541625976562, |
|
"learning_rate": 3.048345150370226e-07, |
|
"logits/chosen": -2.7712199687957764, |
|
"logits/rejected": -2.7354605197906494, |
|
"logps/chosen": -465.0520935058594, |
|
"logps/rejected": -462.63519287109375, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9968741536140442, |
|
"rewards/margins": 0.40299710631370544, |
|
"rewards/rejected": -1.3998713493347168, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.48940068045014395, |
|
"grad_norm": 21.121103286743164, |
|
"learning_rate": 3.0260342347574913e-07, |
|
"logits/chosen": -2.700634717941284, |
|
"logits/rejected": -2.6599361896514893, |
|
"logps/chosen": -451.46331787109375, |
|
"logps/rejected": -454.69427490234375, |
|
"loss": 0.5382, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.978916347026825, |
|
"rewards/margins": 0.47690868377685547, |
|
"rewards/rejected": -1.4558249711990356, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 24.27654266357422, |
|
"learning_rate": 3.0036793954375357e-07, |
|
"logits/chosen": -2.7394368648529053, |
|
"logits/rejected": -2.6990807056427, |
|
"logps/chosen": -442.2903747558594, |
|
"logps/rejected": -426.3751525878906, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9537175893783569, |
|
"rewards/margins": 0.4698936939239502, |
|
"rewards/rejected": -1.4236112833023071, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.49463491232661605, |
|
"grad_norm": 24.052539825439453, |
|
"learning_rate": 2.9812824990330085e-07, |
|
"logits/chosen": -2.7231922149658203, |
|
"logits/rejected": -2.696277379989624, |
|
"logps/chosen": -431.68585205078125, |
|
"logps/rejected": -429.3919982910156, |
|
"loss": 0.6315, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.8916054964065552, |
|
"rewards/margins": 0.33528465032577515, |
|
"rewards/rejected": -1.2268900871276855, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"grad_norm": 19.382165908813477, |
|
"learning_rate": 2.958845415678316e-07, |
|
"logits/chosen": -2.703700542449951, |
|
"logits/rejected": -2.666050672531128, |
|
"logps/chosen": -440.453125, |
|
"logps/rejected": -444.79034423828125, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.8149045705795288, |
|
"rewards/margins": 0.42137575149536133, |
|
"rewards/rejected": -1.2362802028656006, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.49725202826485215, |
|
"eval_logits/chosen": -2.694143056869507, |
|
"eval_logits/rejected": -2.6620967388153076, |
|
"eval_logps/chosen": -417.5872497558594, |
|
"eval_logps/rejected": -415.6308288574219, |
|
"eval_loss": 0.5975241661071777, |
|
"eval_rewards/accuracies": 0.6834999918937683, |
|
"eval_rewards/chosen": -0.8136825561523438, |
|
"eval_rewards/margins": 0.37157776951789856, |
|
"eval_rewards/rejected": -1.1852604150772095, |
|
"eval_runtime": 305.6021, |
|
"eval_samples_per_second": 6.544, |
|
"eval_steps_per_second": 0.818, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.4998691442030882, |
|
"grad_norm": 17.323326110839844, |
|
"learning_rate": 2.936370018863459e-07, |
|
"logits/chosen": -2.7364118099212646, |
|
"logits/rejected": -2.7139904499053955, |
|
"logps/chosen": -417.9366760253906, |
|
"logps/rejected": -403.66143798828125, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8763308525085449, |
|
"rewards/margins": 0.36916738748550415, |
|
"rewards/rejected": -1.2454981803894043, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 19.605926513671875, |
|
"learning_rate": 2.913858185277605e-07, |
|
"logits/chosen": -2.7196900844573975, |
|
"logits/rejected": -2.6851718425750732, |
|
"logps/chosen": -425.770263671875, |
|
"logps/rejected": -433.83673095703125, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8689553141593933, |
|
"rewards/margins": 0.44626492261886597, |
|
"rewards/rejected": -1.3152204751968384, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.5051033760795604, |
|
"grad_norm": 23.041301727294922, |
|
"learning_rate": 2.89131179465238e-07, |
|
"logits/chosen": -2.7089123725891113, |
|
"logits/rejected": -2.6577157974243164, |
|
"logps/chosen": -421.91558837890625, |
|
"logps/rejected": -414.3662109375, |
|
"loss": 0.55, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8832708597183228, |
|
"rewards/margins": 0.5182112455368042, |
|
"rewards/rejected": -1.4014819860458374, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.5077204920177963, |
|
"grad_norm": 22.370925903320312, |
|
"learning_rate": 2.8687327296049125e-07, |
|
"logits/chosen": -2.6943976879119873, |
|
"logits/rejected": -2.670966625213623, |
|
"logps/chosen": -417.71807861328125, |
|
"logps/rejected": -440.22589111328125, |
|
"loss": 0.5719, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8600457906723022, |
|
"rewards/margins": 0.47562676668167114, |
|
"rewards/rejected": -1.3356726169586182, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.5103376079560324, |
|
"grad_norm": 18.212282180786133, |
|
"learning_rate": 2.846122875480637e-07, |
|
"logits/chosen": -2.696530818939209, |
|
"logits/rejected": -2.638589382171631, |
|
"logps/chosen": -437.90167236328125, |
|
"logps/rejected": -430.6405334472656, |
|
"loss": 0.5766, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.8469659686088562, |
|
"rewards/margins": 0.425870418548584, |
|
"rewards/rejected": -1.272836446762085, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 21.467227935791016, |
|
"learning_rate": 2.8234841201958647e-07, |
|
"logits/chosen": -2.7403194904327393, |
|
"logits/rejected": -2.6844117641448975, |
|
"logps/chosen": -450.5631408691406, |
|
"logps/rejected": -435.8984375, |
|
"loss": 0.5596, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.8618534207344055, |
|
"rewards/margins": 0.47362977266311646, |
|
"rewards/rejected": -1.335483193397522, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.5155718398325045, |
|
"grad_norm": 28.714305877685547, |
|
"learning_rate": 2.800818354080148e-07, |
|
"logits/chosen": -2.6684775352478027, |
|
"logits/rejected": -2.6224045753479004, |
|
"logps/chosen": -444.83251953125, |
|
"logps/rejected": -412.0975036621094, |
|
"loss": 0.619, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.9538711309432983, |
|
"rewards/margins": 0.33846694231033325, |
|
"rewards/rejected": -1.2923381328582764, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.5181889557707406, |
|
"grad_norm": 24.992568969726562, |
|
"learning_rate": 2.778127469718435e-07, |
|
"logits/chosen": -2.6164069175720215, |
|
"logits/rejected": -2.6504337787628174, |
|
"logps/chosen": -390.96527099609375, |
|
"logps/rejected": -431.69854736328125, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9115864038467407, |
|
"rewards/margins": 0.32063305377960205, |
|
"rewards/rejected": -1.2322193384170532, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.5208060717089767, |
|
"grad_norm": 19.11081886291504, |
|
"learning_rate": 2.755413361793039e-07, |
|
"logits/chosen": -2.6722495555877686, |
|
"logits/rejected": -2.6163339614868164, |
|
"logps/chosen": -403.4658203125, |
|
"logps/rejected": -406.2340393066406, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.817135214805603, |
|
"rewards/margins": 0.4171879291534424, |
|
"rewards/rejected": -1.2343231439590454, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 24.406513214111328, |
|
"learning_rate": 2.7326779269254356e-07, |
|
"logits/chosen": -2.7332730293273926, |
|
"logits/rejected": -2.6852307319641113, |
|
"logps/chosen": -446.953857421875, |
|
"logps/rejected": -410.27093505859375, |
|
"loss": 0.5827, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.7922626733779907, |
|
"rewards/margins": 0.43351325392723083, |
|
"rewards/rejected": -1.225775957107544, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"eval_logits/chosen": -2.6396472454071045, |
|
"eval_logits/rejected": -2.604276418685913, |
|
"eval_logps/chosen": -423.45745849609375, |
|
"eval_logps/rejected": -422.7220764160156, |
|
"eval_loss": 0.5934838652610779, |
|
"eval_rewards/accuracies": 0.6809999942779541, |
|
"eval_rewards/chosen": -0.8723848462104797, |
|
"eval_rewards/margins": 0.383787602186203, |
|
"eval_rewards/rejected": -1.2561724185943604, |
|
"eval_runtime": 305.4532, |
|
"eval_samples_per_second": 6.548, |
|
"eval_steps_per_second": 0.818, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5260403035854488, |
|
"grad_norm": 21.45716094970703, |
|
"learning_rate": 2.709923063517895e-07, |
|
"logits/chosen": -2.673267126083374, |
|
"logits/rejected": -2.667255401611328, |
|
"logps/chosen": -410.90777587890625, |
|
"logps/rejected": -433.248046875, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.8045026659965515, |
|
"rewards/margins": 0.4072667062282562, |
|
"rewards/rejected": -1.211769461631775, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.528657419523685, |
|
"grad_norm": 23.03792953491211, |
|
"learning_rate": 2.68715067159496e-07, |
|
"logits/chosen": -2.7115061283111572, |
|
"logits/rejected": -2.6757900714874268, |
|
"logps/chosen": -402.1265563964844, |
|
"logps/rejected": -395.89752197265625, |
|
"loss": 0.5894, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7784110903739929, |
|
"rewards/margins": 0.35269591212272644, |
|
"rewards/rejected": -1.131106972694397, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.5312745354619209, |
|
"grad_norm": 22.643049240112305, |
|
"learning_rate": 2.664362652644806e-07, |
|
"logits/chosen": -2.7354533672332764, |
|
"logits/rejected": -2.698335647583008, |
|
"logps/chosen": -465.36376953125, |
|
"logps/rejected": -446.8394470214844, |
|
"loss": 0.5532, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.8965269923210144, |
|
"rewards/margins": 0.5018103718757629, |
|
"rewards/rejected": -1.3983373641967773, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 18.090749740600586, |
|
"learning_rate": 2.6415609094604555e-07, |
|
"logits/chosen": -2.6659553050994873, |
|
"logits/rejected": -2.690216541290283, |
|
"logps/chosen": -451.0194396972656, |
|
"logps/rejected": -454.2710876464844, |
|
"loss": 0.589, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -1.010286569595337, |
|
"rewards/margins": 0.4080902636051178, |
|
"rewards/rejected": -1.4183766841888428, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.5365087673383931, |
|
"grad_norm": 19.967069625854492, |
|
"learning_rate": 2.618747345980904e-07, |
|
"logits/chosen": -2.6821203231811523, |
|
"logits/rejected": -2.6286585330963135, |
|
"logps/chosen": -429.80517578125, |
|
"logps/rejected": -390.1981506347656, |
|
"loss": 0.6111, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1355177164077759, |
|
"rewards/margins": 0.328762948513031, |
|
"rewards/rejected": -1.4642808437347412, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.5391258832766291, |
|
"grad_norm": 20.209062576293945, |
|
"learning_rate": 2.595923867132136e-07, |
|
"logits/chosen": -2.7044026851654053, |
|
"logits/rejected": -2.679009437561035, |
|
"logps/chosen": -469.7115173339844, |
|
"logps/rejected": -470.56231689453125, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1646978855133057, |
|
"rewards/margins": 0.41930079460144043, |
|
"rewards/rejected": -1.5839985609054565, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.5417429992148652, |
|
"grad_norm": 22.267589569091797, |
|
"learning_rate": 2.5730923786680667e-07, |
|
"logits/chosen": -2.638622522354126, |
|
"logits/rejected": -2.679886817932129, |
|
"logps/chosen": -434.09967041015625, |
|
"logps/rejected": -474.56787109375, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1707215309143066, |
|
"rewards/margins": 0.36304971575737, |
|
"rewards/rejected": -1.533771276473999, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 27.767457962036133, |
|
"learning_rate": 2.5502547870114135e-07, |
|
"logits/chosen": -2.677556037902832, |
|
"logits/rejected": -2.6269454956054688, |
|
"logps/chosen": -443.53631591796875, |
|
"logps/rejected": -426.9546813964844, |
|
"loss": 0.6383, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1648094654083252, |
|
"rewards/margins": 0.3214976489543915, |
|
"rewards/rejected": -1.486307144165039, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.5469772310913373, |
|
"grad_norm": 22.076040267944336, |
|
"learning_rate": 2.527412999094506e-07, |
|
"logits/chosen": -2.646812677383423, |
|
"logits/rejected": -2.620919704437256, |
|
"logps/chosen": -479.4862365722656, |
|
"logps/rejected": -492.4109802246094, |
|
"loss": 0.5783, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0661259889602661, |
|
"rewards/margins": 0.4469536244869232, |
|
"rewards/rejected": -1.5130794048309326, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"grad_norm": 27.053951263427734, |
|
"learning_rate": 2.5045689222000636e-07, |
|
"logits/chosen": -2.636404275894165, |
|
"logits/rejected": -2.619544506072998, |
|
"logps/chosen": -407.72406005859375, |
|
"logps/rejected": -409.077392578125, |
|
"loss": 0.6017, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.00985586643219, |
|
"rewards/margins": 0.3562536835670471, |
|
"rewards/rejected": -1.3661094903945923, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5495943470295734, |
|
"eval_logits/chosen": -2.643636703491211, |
|
"eval_logits/rejected": -2.6104650497436523, |
|
"eval_logps/chosen": -436.8658447265625, |
|
"eval_logps/rejected": -436.8172302246094, |
|
"eval_loss": 0.5910605192184448, |
|
"eval_rewards/accuracies": 0.690500020980835, |
|
"eval_rewards/chosen": -1.006468415260315, |
|
"eval_rewards/margins": 0.3906554877758026, |
|
"eval_rewards/rejected": -1.3971240520477295, |
|
"eval_runtime": 305.5499, |
|
"eval_samples_per_second": 6.546, |
|
"eval_steps_per_second": 0.818, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.5522114629678094, |
|
"grad_norm": 19.81754493713379, |
|
"learning_rate": 2.481724463801933e-07, |
|
"logits/chosen": -2.6853280067443848, |
|
"logits/rejected": -2.6354801654815674, |
|
"logps/chosen": -442.880615234375, |
|
"logps/rejected": -425.76275634765625, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.9968615770339966, |
|
"rewards/margins": 0.4066940248012543, |
|
"rewards/rejected": -1.4035555124282837, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 21.1542911529541, |
|
"learning_rate": 2.4588815314058154e-07, |
|
"logits/chosen": -2.678277015686035, |
|
"logits/rejected": -2.6839067935943604, |
|
"logps/chosen": -404.617431640625, |
|
"logps/rejected": -398.31866455078125, |
|
"loss": 0.5719, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9024378061294556, |
|
"rewards/margins": 0.42626914381980896, |
|
"rewards/rejected": -1.328706979751587, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.5574456948442816, |
|
"grad_norm": 23.462890625, |
|
"learning_rate": 2.4360420323899917e-07, |
|
"logits/chosen": -2.6694867610931396, |
|
"logits/rejected": -2.6611833572387695, |
|
"logps/chosen": -429.3199768066406, |
|
"logps/rejected": -420.8585510253906, |
|
"loss": 0.5921, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.780637264251709, |
|
"rewards/margins": 0.40070563554763794, |
|
"rewards/rejected": -1.1813428401947021, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.5600628107825176, |
|
"grad_norm": 18.006694793701172, |
|
"learning_rate": 2.4132078738460583e-07, |
|
"logits/chosen": -2.7235093116760254, |
|
"logits/rejected": -2.6758511066436768, |
|
"logps/chosen": -414.63250732421875, |
|
"logps/rejected": -385.173828125, |
|
"loss": 0.6092, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.7920923233032227, |
|
"rewards/margins": 0.32466885447502136, |
|
"rewards/rejected": -1.1167610883712769, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.5626799267207537, |
|
"grad_norm": 28.11566162109375, |
|
"learning_rate": 2.390380962419682e-07, |
|
"logits/chosen": -2.672776460647583, |
|
"logits/rejected": -2.6661226749420166, |
|
"logps/chosen": -382.5970764160156, |
|
"logps/rejected": -362.99176025390625, |
|
"loss": 0.6076, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7663475275039673, |
|
"rewards/margins": 0.33483588695526123, |
|
"rewards/rejected": -1.101183295249939, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 21.116987228393555, |
|
"learning_rate": 2.3675632041513977e-07, |
|
"logits/chosen": -2.7305362224578857, |
|
"logits/rejected": -2.656404972076416, |
|
"logps/chosen": -436.4358825683594, |
|
"logps/rejected": -386.28509521484375, |
|
"loss": 0.5598, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.7153185606002808, |
|
"rewards/margins": 0.4554404318332672, |
|
"rewards/rejected": -1.1707589626312256, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.5679141585972258, |
|
"grad_norm": 25.329484939575195, |
|
"learning_rate": 2.344756504317453e-07, |
|
"logits/chosen": -2.698883533477783, |
|
"logits/rejected": -2.624823808670044, |
|
"logps/chosen": -420.55389404296875, |
|
"logps/rejected": -399.3967590332031, |
|
"loss": 0.5962, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9281074404716492, |
|
"rewards/margins": 0.3701552450656891, |
|
"rewards/rejected": -1.2982627153396606, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.5705312745354619, |
|
"grad_norm": 27.779399871826172, |
|
"learning_rate": 2.3219627672707237e-07, |
|
"logits/chosen": -2.671786069869995, |
|
"logits/rejected": -2.638526201248169, |
|
"logps/chosen": -414.17974853515625, |
|
"logps/rejected": -390.75604248046875, |
|
"loss": 0.5965, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9639150500297546, |
|
"rewards/margins": 0.3531908392906189, |
|
"rewards/rejected": -1.3171058893203735, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.573148390473698, |
|
"grad_norm": 19.64858055114746, |
|
"learning_rate": 2.2991838962816918e-07, |
|
"logits/chosen": -2.6252007484436035, |
|
"logits/rejected": -2.5690817832946777, |
|
"logps/chosen": -419.4808654785156, |
|
"logps/rejected": -446.12841796875, |
|
"loss": 0.596, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.9539289474487305, |
|
"rewards/margins": 0.398750901222229, |
|
"rewards/rejected": -1.3526798486709595, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 24.863256454467773, |
|
"learning_rate": 2.2764217933795297e-07, |
|
"logits/chosen": -2.6451430320739746, |
|
"logits/rejected": -2.6056628227233887, |
|
"logps/chosen": -420.39849853515625, |
|
"logps/rejected": -424.5686950683594, |
|
"loss": 0.5539, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.854151725769043, |
|
"rewards/margins": 0.47093433141708374, |
|
"rewards/rejected": -1.325085997581482, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"eval_logits/chosen": -2.6075503826141357, |
|
"eval_logits/rejected": -2.572392463684082, |
|
"eval_logps/chosen": -426.81951904296875, |
|
"eval_logps/rejected": -426.5499267578125, |
|
"eval_loss": 0.5919502377510071, |
|
"eval_rewards/accuracies": 0.6884999871253967, |
|
"eval_rewards/chosen": -0.9060052037239075, |
|
"eval_rewards/margins": 0.38844582438468933, |
|
"eval_rewards/rejected": -1.294451117515564, |
|
"eval_runtime": 305.4694, |
|
"eval_samples_per_second": 6.547, |
|
"eval_steps_per_second": 0.818, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.5783826223501701, |
|
"grad_norm": 21.576183319091797, |
|
"learning_rate": 2.253678359193278e-07, |
|
"logits/chosen": -2.707562208175659, |
|
"logits/rejected": -2.6342368125915527, |
|
"logps/chosen": -451.54180908203125, |
|
"logps/rejected": -455.8236389160156, |
|
"loss": 0.5991, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9837636947631836, |
|
"rewards/margins": 0.3827388882637024, |
|
"rewards/rejected": -1.3665026426315308, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.5809997382884062, |
|
"grad_norm": 20.301193237304688, |
|
"learning_rate": 2.230955492793149e-07, |
|
"logits/chosen": -2.5531551837921143, |
|
"logits/rejected": -2.5302395820617676, |
|
"logps/chosen": -448.05279541015625, |
|
"logps/rejected": -460.99267578125, |
|
"loss": 0.6251, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.0065380334854126, |
|
"rewards/margins": 0.3645462393760681, |
|
"rewards/rejected": -1.371084451675415, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.5836168542266422, |
|
"grad_norm": 22.607479095458984, |
|
"learning_rate": 2.2082550915319468e-07, |
|
"logits/chosen": -2.5578253269195557, |
|
"logits/rejected": -2.5488719940185547, |
|
"logps/chosen": -447.6844787597656, |
|
"logps/rejected": -438.8826599121094, |
|
"loss": 0.5936, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9693658947944641, |
|
"rewards/margins": 0.4336482882499695, |
|
"rewards/rejected": -1.403014063835144, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 20.127931594848633, |
|
"learning_rate": 2.1855790508866433e-07, |
|
"logits/chosen": -2.6090965270996094, |
|
"logits/rejected": -2.596123218536377, |
|
"logps/chosen": -463.4794006347656, |
|
"logps/rejected": -465.2300720214844, |
|
"loss": 0.6107, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.9245331883430481, |
|
"rewards/margins": 0.33225584030151367, |
|
"rewards/rejected": -1.256788969039917, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.5888510861031143, |
|
"grad_norm": 15.41345500946045, |
|
"learning_rate": 2.162929264300107e-07, |
|
"logits/chosen": -2.6332390308380127, |
|
"logits/rejected": -2.6083428859710693, |
|
"logps/chosen": -422.32220458984375, |
|
"logps/rejected": -420.503662109375, |
|
"loss": 0.5636, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.8126707077026367, |
|
"rewards/margins": 0.4325372576713562, |
|
"rewards/rejected": -1.2452080249786377, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.5914682020413504, |
|
"grad_norm": 22.760326385498047, |
|
"learning_rate": 2.1403076230230005e-07, |
|
"logits/chosen": -2.6489734649658203, |
|
"logits/rejected": -2.622544288635254, |
|
"logps/chosen": -430.8290100097656, |
|
"logps/rejected": -420.72613525390625, |
|
"loss": 0.626, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.867121696472168, |
|
"rewards/margins": 0.30833083391189575, |
|
"rewards/rejected": -1.1754525899887085, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.5940853179795865, |
|
"grad_norm": 26.066482543945312, |
|
"learning_rate": 2.1177160159558596e-07, |
|
"logits/chosen": -2.634918212890625, |
|
"logits/rejected": -2.56962251663208, |
|
"logps/chosen": -447.6597595214844, |
|
"logps/rejected": -419.47430419921875, |
|
"loss": 0.5887, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.9099095463752747, |
|
"rewards/margins": 0.41119471192359924, |
|
"rewards/rejected": -1.3211042881011963, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 26.43048858642578, |
|
"learning_rate": 2.0951563294913734e-07, |
|
"logits/chosen": -2.65494704246521, |
|
"logits/rejected": -2.5803942680358887, |
|
"logps/chosen": -423.8819274902344, |
|
"logps/rejected": -414.9833068847656, |
|
"loss": 0.5454, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.8593618273735046, |
|
"rewards/margins": 0.4740574359893799, |
|
"rewards/rejected": -1.3334193229675293, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.5993195498560586, |
|
"grad_norm": 24.643842697143555, |
|
"learning_rate": 2.072630447356869e-07, |
|
"logits/chosen": -2.598431348800659, |
|
"logits/rejected": -2.5783185958862305, |
|
"logps/chosen": -422.7080078125, |
|
"logps/rejected": -415.7620544433594, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9891357421875, |
|
"rewards/margins": 0.3770177960395813, |
|
"rewards/rejected": -1.3661534786224365, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"grad_norm": 24.30388832092285, |
|
"learning_rate": 2.0501402504570232e-07, |
|
"logits/chosen": -2.6597847938537598, |
|
"logits/rejected": -2.590627908706665, |
|
"logps/chosen": -457.49249267578125, |
|
"logps/rejected": -459.54620361328125, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0667564868927002, |
|
"rewards/margins": 0.49383625388145447, |
|
"rewards/rejected": -1.5605928897857666, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6019366657942947, |
|
"eval_logits/chosen": -2.5756797790527344, |
|
"eval_logits/rejected": -2.5398993492126465, |
|
"eval_logps/chosen": -447.8605041503906, |
|
"eval_logps/rejected": -451.0841064453125, |
|
"eval_loss": 0.5913601517677307, |
|
"eval_rewards/accuracies": 0.6865000128746033, |
|
"eval_rewards/chosen": -1.11641526222229, |
|
"eval_rewards/margins": 0.42337724566459656, |
|
"eval_rewards/rejected": -1.5397926568984985, |
|
"eval_runtime": 305.5859, |
|
"eval_samples_per_second": 6.545, |
|
"eval_steps_per_second": 0.818, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.6045537817325307, |
|
"grad_norm": 23.54559898376465, |
|
"learning_rate": 2.027687616716804e-07, |
|
"logits/chosen": -2.54463529586792, |
|
"logits/rejected": -2.5222010612487793, |
|
"logps/chosen": -398.11944580078125, |
|
"logps/rejected": -391.347412109375, |
|
"loss": 0.5937, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1046133041381836, |
|
"rewards/margins": 0.42002907395362854, |
|
"rewards/rejected": -1.5246422290802002, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 27.417768478393555, |
|
"learning_rate": 2.005274420924668e-07, |
|
"logits/chosen": -2.6474318504333496, |
|
"logits/rejected": -2.5949997901916504, |
|
"logps/chosen": -436.841064453125, |
|
"logps/rejected": -420.890380859375, |
|
"loss": 0.6072, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.06412672996521, |
|
"rewards/margins": 0.38113874197006226, |
|
"rewards/rejected": -1.445265293121338, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.6097880136090029, |
|
"grad_norm": 24.830379486083984, |
|
"learning_rate": 1.9829025345760121e-07, |
|
"logits/chosen": -2.622990369796753, |
|
"logits/rejected": -2.6124253273010254, |
|
"logps/chosen": -460.695556640625, |
|
"logps/rejected": -477.2328186035156, |
|
"loss": 0.6122, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -1.05497407913208, |
|
"rewards/margins": 0.3757147490978241, |
|
"rewards/rejected": -1.430688738822937, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.6124051295472389, |
|
"grad_norm": 25.735050201416016, |
|
"learning_rate": 1.960573825716911e-07, |
|
"logits/chosen": -2.5889840126037598, |
|
"logits/rejected": -2.547499656677246, |
|
"logps/chosen": -403.8297424316406, |
|
"logps/rejected": -414.8653259277344, |
|
"loss": 0.5988, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0478129386901855, |
|
"rewards/margins": 0.3687422275543213, |
|
"rewards/rejected": -1.4165551662445068, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.615022245485475, |
|
"grad_norm": 26.266759872436523, |
|
"learning_rate": 1.9382901587881273e-07, |
|
"logits/chosen": -2.63970685005188, |
|
"logits/rejected": -2.6058359146118164, |
|
"logps/chosen": -429.8356018066406, |
|
"logps/rejected": -418.068115234375, |
|
"loss": 0.5684, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.977154552936554, |
|
"rewards/margins": 0.4486463665962219, |
|
"rewards/rejected": -1.4258009195327759, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 21.620264053344727, |
|
"learning_rate": 1.9160533944694364e-07, |
|
"logits/chosen": -2.6155648231506348, |
|
"logits/rejected": -2.558945894241333, |
|
"logps/chosen": -429.2547912597656, |
|
"logps/rejected": -438.4996032714844, |
|
"loss": 0.5524, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8521722555160522, |
|
"rewards/margins": 0.5101855397224426, |
|
"rewards/rejected": -1.3623578548431396, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.6202564773619471, |
|
"grad_norm": 20.288921356201172, |
|
"learning_rate": 1.8938653895242602e-07, |
|
"logits/chosen": -2.601743221282959, |
|
"logits/rejected": -2.5524344444274902, |
|
"logps/chosen": -433.3799743652344, |
|
"logps/rejected": -439.91143798828125, |
|
"loss": 0.5521, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.9650028944015503, |
|
"rewards/margins": 0.5604956150054932, |
|
"rewards/rejected": -1.525498628616333, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.6228735933001832, |
|
"grad_norm": 28.30954933166504, |
|
"learning_rate": 1.8717279966446264e-07, |
|
"logits/chosen": -2.4995055198669434, |
|
"logits/rejected": -2.4924209117889404, |
|
"logps/chosen": -416.691650390625, |
|
"logps/rejected": -430.76373291015625, |
|
"loss": 0.6314, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0411570072174072, |
|
"rewards/margins": 0.34819602966308594, |
|
"rewards/rejected": -1.3893530368804932, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.6254907092384192, |
|
"grad_norm": 25.23048210144043, |
|
"learning_rate": 1.8496430642964694e-07, |
|
"logits/chosen": -2.5988945960998535, |
|
"logits/rejected": -2.5367140769958496, |
|
"logps/chosen": -437.4556579589844, |
|
"logps/rejected": -441.42694091796875, |
|
"loss": 0.5833, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9659013748168945, |
|
"rewards/margins": 0.42699605226516724, |
|
"rewards/rejected": -1.392897367477417, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 34.421661376953125, |
|
"learning_rate": 1.8276124365652855e-07, |
|
"logits/chosen": -2.5923240184783936, |
|
"logits/rejected": -2.53601336479187, |
|
"logps/chosen": -428.5076599121094, |
|
"logps/rejected": -437.510498046875, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.9861478805541992, |
|
"rewards/margins": 0.4693332314491272, |
|
"rewards/rejected": -1.4554810523986816, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"eval_logits/chosen": -2.5486767292022705, |
|
"eval_logits/rejected": -2.5120887756347656, |
|
"eval_logps/chosen": -439.6860656738281, |
|
"eval_logps/rejected": -442.0413513183594, |
|
"eval_loss": 0.590362012386322, |
|
"eval_rewards/accuracies": 0.6859999895095825, |
|
"eval_rewards/chosen": -1.0346707105636597, |
|
"eval_rewards/margins": 0.41469448804855347, |
|
"eval_rewards/rejected": -1.449365258216858, |
|
"eval_runtime": 305.5078, |
|
"eval_samples_per_second": 6.546, |
|
"eval_steps_per_second": 0.818, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.6307249411148914, |
|
"grad_norm": 23.83245086669922, |
|
"learning_rate": 1.805637953002149e-07, |
|
"logits/chosen": -2.6228480339050293, |
|
"logits/rejected": -2.5939719676971436, |
|
"logps/chosen": -411.1646423339844, |
|
"logps/rejected": -414.8374938964844, |
|
"loss": 0.5865, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0131409168243408, |
|
"rewards/margins": 0.4185038208961487, |
|
"rewards/rejected": -1.4316446781158447, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.6333420570531274, |
|
"grad_norm": 28.81045150756836, |
|
"learning_rate": 1.7837214484701153e-07, |
|
"logits/chosen": -2.615571975708008, |
|
"logits/rejected": -2.6004979610443115, |
|
"logps/chosen": -421.10235595703125, |
|
"logps/rejected": -409.6927185058594, |
|
"loss": 0.5985, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.9161368608474731, |
|
"rewards/margins": 0.39567703008651733, |
|
"rewards/rejected": -1.3118139505386353, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.6359591729913635, |
|
"grad_norm": 26.689922332763672, |
|
"learning_rate": 1.761864752991004e-07, |
|
"logits/chosen": -2.626842975616455, |
|
"logits/rejected": -2.571432590484619, |
|
"logps/chosen": -425.1255798339844, |
|
"logps/rejected": -433.8043518066406, |
|
"loss": 0.587, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.967668890953064, |
|
"rewards/margins": 0.38932132720947266, |
|
"rewards/rejected": -1.3569902181625366, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 24.693151473999023, |
|
"learning_rate": 1.7400696915925995e-07, |
|
"logits/chosen": -2.6202292442321777, |
|
"logits/rejected": -2.5344481468200684, |
|
"logps/chosen": -441.83404541015625, |
|
"logps/rejected": -409.7845764160156, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.9791529774665833, |
|
"rewards/margins": 0.497478187084198, |
|
"rewards/rejected": -1.4766310453414917, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.6411934048678356, |
|
"grad_norm": 29.49846076965332, |
|
"learning_rate": 1.718338084156254e-07, |
|
"logits/chosen": -2.4964375495910645, |
|
"logits/rejected": -2.4695093631744385, |
|
"logps/chosen": -455.32952880859375, |
|
"logps/rejected": -444.91778564453125, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9237340688705444, |
|
"rewards/margins": 0.5019347071647644, |
|
"rewards/rejected": -1.425668716430664, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.6438105208060717, |
|
"grad_norm": 21.89922332763672, |
|
"learning_rate": 1.696671745264937e-07, |
|
"logits/chosen": -2.616865396499634, |
|
"logits/rejected": -2.570014715194702, |
|
"logps/chosen": -446.42449951171875, |
|
"logps/rejected": -421.83251953125, |
|
"loss": 0.5447, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.9398025274276733, |
|
"rewards/margins": 0.527073085308075, |
|
"rewards/rejected": -1.466875672340393, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.6464276367443078, |
|
"grad_norm": 31.102645874023438, |
|
"learning_rate": 1.67507248405171e-07, |
|
"logits/chosen": -2.581944227218628, |
|
"logits/rejected": -2.5663084983825684, |
|
"logps/chosen": -431.9105529785156, |
|
"logps/rejected": -461.98876953125, |
|
"loss": 0.5943, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9992059469223022, |
|
"rewards/margins": 0.4130525588989258, |
|
"rewards/rejected": -1.412258505821228, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 32.388267517089844, |
|
"learning_rate": 1.6535421040486683e-07, |
|
"logits/chosen": -2.458714485168457, |
|
"logits/rejected": -2.4205844402313232, |
|
"logps/chosen": -415.0240173339844, |
|
"logps/rejected": -413.59429931640625, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.9899314045906067, |
|
"rewards/margins": 0.49838346242904663, |
|
"rewards/rejected": -1.4883147478103638, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.6516618686207799, |
|
"grad_norm": 21.825029373168945, |
|
"learning_rate": 1.6320824030363456e-07, |
|
"logits/chosen": -2.5214855670928955, |
|
"logits/rejected": -2.5148208141326904, |
|
"logps/chosen": -409.963134765625, |
|
"logps/rejected": -417.6705017089844, |
|
"loss": 0.5916, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.0183316469192505, |
|
"rewards/margins": 0.4151875376701355, |
|
"rewards/rejected": -1.4335191249847412, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"grad_norm": 27.251855850219727, |
|
"learning_rate": 1.6106951728936024e-07, |
|
"logits/chosen": -2.5894880294799805, |
|
"logits/rejected": -2.5418648719787598, |
|
"logps/chosen": -421.0638732910156, |
|
"logps/rejected": -455.240966796875, |
|
"loss": 0.5306, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.922796905040741, |
|
"rewards/margins": 0.5818823575973511, |
|
"rewards/rejected": -1.5046792030334473, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.654278984559016, |
|
"eval_logits/chosen": -2.510161876678467, |
|
"eval_logits/rejected": -2.469223976135254, |
|
"eval_logps/chosen": -440.859130859375, |
|
"eval_logps/rejected": -445.5005187988281, |
|
"eval_loss": 0.5917896628379822, |
|
"eval_rewards/accuracies": 0.6825000047683716, |
|
"eval_rewards/chosen": -1.0464012622833252, |
|
"eval_rewards/margins": 0.43755561113357544, |
|
"eval_rewards/rejected": -1.4839569330215454, |
|
"eval_runtime": 305.4345, |
|
"eval_samples_per_second": 6.548, |
|
"eval_steps_per_second": 0.819, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.656896100497252, |
|
"grad_norm": 28.463520050048828, |
|
"learning_rate": 1.5893821994479994e-07, |
|
"logits/chosen": -2.5858142375946045, |
|
"logits/rejected": -2.5548527240753174, |
|
"logps/chosen": -465.49346923828125, |
|
"logps/rejected": -450.70831298828125, |
|
"loss": 0.5918, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0464518070220947, |
|
"rewards/margins": 0.4308743476867676, |
|
"rewards/rejected": -1.4773260354995728, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 29.278364181518555, |
|
"learning_rate": 1.5681452623266867e-07, |
|
"logits/chosen": -2.5009493827819824, |
|
"logits/rejected": -2.4307141304016113, |
|
"logps/chosen": -475.93963623046875, |
|
"logps/rejected": -456.0147399902344, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.12712824344635, |
|
"rewards/margins": 0.5893834829330444, |
|
"rewards/rejected": -1.7165117263793945, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.6621303323737242, |
|
"grad_norm": 31.27610206604004, |
|
"learning_rate": 1.546986134807801e-07, |
|
"logits/chosen": -2.5525741577148438, |
|
"logits/rejected": -2.480517864227295, |
|
"logps/chosen": -429.0491638183594, |
|
"logps/rejected": -453.71417236328125, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.1591657400131226, |
|
"rewards/margins": 0.501166582107544, |
|
"rewards/rejected": -1.6603323221206665, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.6647474483119602, |
|
"grad_norm": 18.123111724853516, |
|
"learning_rate": 1.5259065836724034e-07, |
|
"logits/chosen": -2.4638514518737793, |
|
"logits/rejected": -2.4330999851226807, |
|
"logps/chosen": -429.5963439941406, |
|
"logps/rejected": -456.29827880859375, |
|
"loss": 0.5854, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1803653240203857, |
|
"rewards/margins": 0.48663124442100525, |
|
"rewards/rejected": -1.6669965982437134, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.6673645642501963, |
|
"grad_norm": 36.21045684814453, |
|
"learning_rate": 1.5049083690569454e-07, |
|
"logits/chosen": -2.469520330429077, |
|
"logits/rejected": -2.4402925968170166, |
|
"logps/chosen": -426.5743713378906, |
|
"logps/rejected": -462.9029235839844, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.2028522491455078, |
|
"rewards/margins": 0.5434964299201965, |
|
"rewards/rejected": -1.7463487386703491, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 30.517663955688477, |
|
"learning_rate": 1.4839932443063056e-07, |
|
"logits/chosen": -2.4739162921905518, |
|
"logits/rejected": -2.4111859798431396, |
|
"logps/chosen": -488.611572265625, |
|
"logps/rejected": -463.06329345703125, |
|
"loss": 0.5618, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.2092149257659912, |
|
"rewards/margins": 0.5156753659248352, |
|
"rewards/rejected": -1.7248903512954712, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.6725987961266684, |
|
"grad_norm": 35.15083312988281, |
|
"learning_rate": 1.46316295582738e-07, |
|
"logits/chosen": -2.486800193786621, |
|
"logits/rejected": -2.4465322494506836, |
|
"logps/chosen": -431.0811462402344, |
|
"logps/rejected": -436.2640075683594, |
|
"loss": 0.6517, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.2838318347930908, |
|
"rewards/margins": 0.31887996196746826, |
|
"rewards/rejected": -1.6027119159698486, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.6752159120649045, |
|
"grad_norm": 27.834619522094727, |
|
"learning_rate": 1.4424192429432655e-07, |
|
"logits/chosen": -2.5374560356140137, |
|
"logits/rejected": -2.4828081130981445, |
|
"logps/chosen": -433.96722412109375, |
|
"logps/rejected": -470.0076599121094, |
|
"loss": 0.5697, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0254337787628174, |
|
"rewards/margins": 0.4902397692203522, |
|
"rewards/rejected": -1.5156733989715576, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.6778330280031405, |
|
"grad_norm": 35.93947219848633, |
|
"learning_rate": 1.4217638377480158e-07, |
|
"logits/chosen": -2.4948618412017822, |
|
"logits/rejected": -2.4619128704071045, |
|
"logps/chosen": -418.11517333984375, |
|
"logps/rejected": -444.2643127441406, |
|
"loss": 0.5733, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.0571857690811157, |
|
"rewards/margins": 0.48725780844688416, |
|
"rewards/rejected": -1.5444434881210327, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 26.80233383178711, |
|
"learning_rate": 1.401198464962021e-07, |
|
"logits/chosen": -2.5296876430511475, |
|
"logits/rejected": -2.4376063346862793, |
|
"logps/chosen": -448.37811279296875, |
|
"logps/rejected": -441.19036865234375, |
|
"loss": 0.5762, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.0862284898757935, |
|
"rewards/margins": 0.4645492434501648, |
|
"rewards/rejected": -1.550777792930603, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"eval_logits/chosen": -2.473548412322998, |
|
"eval_logits/rejected": -2.429135799407959, |
|
"eval_logps/chosen": -443.086181640625, |
|
"eval_logps/rejected": -448.5192565917969, |
|
"eval_loss": 0.5926596522331238, |
|
"eval_rewards/accuracies": 0.6779999732971191, |
|
"eval_rewards/chosen": -1.068671703338623, |
|
"eval_rewards/margins": 0.4454721510410309, |
|
"eval_rewards/rejected": -1.5141440629959106, |
|
"eval_runtime": 305.3934, |
|
"eval_samples_per_second": 6.549, |
|
"eval_steps_per_second": 0.819, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.6830672598796127, |
|
"grad_norm": 26.471881866455078, |
|
"learning_rate": 1.3807248417879894e-07, |
|
"logits/chosen": -2.563028335571289, |
|
"logits/rejected": -2.5249762535095215, |
|
"logps/chosen": -456.01922607421875, |
|
"logps/rejected": -460.52679443359375, |
|
"loss": 0.5634, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0320606231689453, |
|
"rewards/margins": 0.5281775593757629, |
|
"rewards/rejected": -1.5602381229400635, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.6856843758178487, |
|
"grad_norm": 41.65851974487305, |
|
"learning_rate": 1.3603446777675665e-07, |
|
"logits/chosen": -2.4479596614837646, |
|
"logits/rejected": -2.421311855316162, |
|
"logps/chosen": -440.4335021972656, |
|
"logps/rejected": -441.00592041015625, |
|
"loss": 0.6009, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.09227454662323, |
|
"rewards/margins": 0.42088931798934937, |
|
"rewards/rejected": -1.5131638050079346, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.6883014917560848, |
|
"grad_norm": 27.369688034057617, |
|
"learning_rate": 1.3400596746385814e-07, |
|
"logits/chosen": -2.5403740406036377, |
|
"logits/rejected": -2.475461483001709, |
|
"logps/chosen": -450.69293212890625, |
|
"logps/rejected": -447.13330078125, |
|
"loss": 0.594, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.0313596725463867, |
|
"rewards/margins": 0.4859469532966614, |
|
"rewards/rejected": -1.5173065662384033, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 35.06747817993164, |
|
"learning_rate": 1.3198715261929586e-07, |
|
"logits/chosen": -2.5465633869171143, |
|
"logits/rejected": -2.5011157989501953, |
|
"logps/chosen": -413.10919189453125, |
|
"logps/rejected": -429.83404541015625, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.0741420984268188, |
|
"rewards/margins": 0.4673934578895569, |
|
"rewards/rejected": -1.541535496711731, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.6935357236325569, |
|
"grad_norm": 31.834104537963867, |
|
"learning_rate": 1.299781918135282e-07, |
|
"logits/chosen": -2.534392833709717, |
|
"logits/rejected": -2.473947525024414, |
|
"logps/chosen": -478.8719787597656, |
|
"logps/rejected": -493.5030822753906, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.9548946619033813, |
|
"rewards/margins": 0.6267498731613159, |
|
"rewards/rejected": -1.5816442966461182, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.696152839570793, |
|
"grad_norm": 35.67092514038086, |
|
"learning_rate": 1.279792527942045e-07, |
|
"logits/chosen": -2.5275771617889404, |
|
"logits/rejected": -2.448570728302002, |
|
"logps/chosen": -452.7754821777344, |
|
"logps/rejected": -476.6175842285156, |
|
"loss": 0.5481, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0713622570037842, |
|
"rewards/margins": 0.5786231756210327, |
|
"rewards/rejected": -1.6499855518341064, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.6987699555090291, |
|
"grad_norm": 30.155054092407227, |
|
"learning_rate": 1.259905024721576e-07, |
|
"logits/chosen": -2.477794647216797, |
|
"logits/rejected": -2.4506518840789795, |
|
"logps/chosen": -427.8697814941406, |
|
"logps/rejected": -444.8448791503906, |
|
"loss": 0.5246, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0339616537094116, |
|
"rewards/margins": 0.6002413034439087, |
|
"rewards/rejected": -1.6342031955718994, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 28.638723373413086, |
|
"learning_rate": 1.2401210690746703e-07, |
|
"logits/chosen": -2.480868101119995, |
|
"logits/rejected": -2.4245822429656982, |
|
"logps/chosen": -444.09613037109375, |
|
"logps/rejected": -430.0035095214844, |
|
"loss": 0.5973, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9939875602722168, |
|
"rewards/margins": 0.3904542922973633, |
|
"rewards/rejected": -1.38444185256958, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.7040041873855012, |
|
"grad_norm": 33.430381774902344, |
|
"learning_rate": 1.2204423129559305e-07, |
|
"logits/chosen": -2.5521812438964844, |
|
"logits/rejected": -2.558464765548706, |
|
"logps/chosen": -435.12530517578125, |
|
"logps/rejected": -474.42022705078125, |
|
"loss": 0.5872, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.00673508644104, |
|
"rewards/margins": 0.5006878972053528, |
|
"rewards/rejected": -1.5074230432510376, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"grad_norm": 30.671358108520508, |
|
"learning_rate": 1.2008703995358299e-07, |
|
"logits/chosen": -2.5638155937194824, |
|
"logits/rejected": -2.5175604820251465, |
|
"logps/chosen": -432.1807556152344, |
|
"logps/rejected": -433.16790771484375, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9962761998176575, |
|
"rewards/margins": 0.4422995448112488, |
|
"rewards/rejected": -1.4385757446289062, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7066213033237373, |
|
"eval_logits/chosen": -2.474745273590088, |
|
"eval_logits/rejected": -2.4329495429992676, |
|
"eval_logps/chosen": -443.8889465332031, |
|
"eval_logps/rejected": -447.90631103515625, |
|
"eval_loss": 0.5935620069503784, |
|
"eval_rewards/accuracies": 0.6800000071525574, |
|
"eval_rewards/chosen": -1.0766992568969727, |
|
"eval_rewards/margins": 0.43131566047668457, |
|
"eval_rewards/rejected": -1.5080151557922363, |
|
"eval_runtime": 305.4592, |
|
"eval_samples_per_second": 6.548, |
|
"eval_steps_per_second": 0.818, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.7092384192619733, |
|
"grad_norm": 21.97296905517578, |
|
"learning_rate": 1.1814069630635068e-07, |
|
"logits/chosen": -2.4879977703094482, |
|
"logits/rejected": -2.474510431289673, |
|
"logps/chosen": -442.66912841796875, |
|
"logps/rejected": -471.7681579589844, |
|
"loss": 0.5934, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.020108699798584, |
|
"rewards/margins": 0.45263218879699707, |
|
"rewards/rejected": -1.472740888595581, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 23.306360244750977, |
|
"learning_rate": 1.1620536287303051e-07, |
|
"logits/chosen": -2.53651762008667, |
|
"logits/rejected": -2.4935238361358643, |
|
"logps/chosen": -478.90570068359375, |
|
"logps/rejected": -468.0852966308594, |
|
"loss": 0.6237, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.0745232105255127, |
|
"rewards/margins": 0.37477684020996094, |
|
"rewards/rejected": -1.4493000507354736, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.7144726511384454, |
|
"grad_norm": 20.9937744140625, |
|
"learning_rate": 1.1428120125340716e-07, |
|
"logits/chosen": -2.548900604248047, |
|
"logits/rejected": -2.488852024078369, |
|
"logps/chosen": -430.0403747558594, |
|
"logps/rejected": -420.0286560058594, |
|
"loss": 0.5506, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0618057250976562, |
|
"rewards/margins": 0.5464845895767212, |
|
"rewards/rejected": -1.608290433883667, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.7170897670766815, |
|
"grad_norm": 26.31341552734375, |
|
"learning_rate": 1.123683721144223e-07, |
|
"logits/chosen": -2.536130666732788, |
|
"logits/rejected": -2.496840715408325, |
|
"logps/chosen": -471.9339294433594, |
|
"logps/rejected": -469.2123107910156, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.082291841506958, |
|
"rewards/margins": 0.47875848412513733, |
|
"rewards/rejected": -1.561050295829773, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.7197068830149176, |
|
"grad_norm": 18.80337142944336, |
|
"learning_rate": 1.1046703517675845e-07, |
|
"logits/chosen": -2.521416425704956, |
|
"logits/rejected": -2.5193381309509277, |
|
"logps/chosen": -423.4697265625, |
|
"logps/rejected": -469.30743408203125, |
|
"loss": 0.5889, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.000197172164917, |
|
"rewards/margins": 0.44857126474380493, |
|
"rewards/rejected": -1.4487683773040771, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 24.486021041870117, |
|
"learning_rate": 1.085773492015028e-07, |
|
"logits/chosen": -2.509730815887451, |
|
"logits/rejected": -2.452089786529541, |
|
"logps/chosen": -424.95355224609375, |
|
"logps/rejected": -419.2748107910156, |
|
"loss": 0.5638, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0358409881591797, |
|
"rewards/margins": 0.4873877167701721, |
|
"rewards/rejected": -1.5232288837432861, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.7249411148913897, |
|
"grad_norm": 30.51848030090332, |
|
"learning_rate": 1.0669947197689033e-07, |
|
"logits/chosen": -2.5046708583831787, |
|
"logits/rejected": -2.434422731399536, |
|
"logps/chosen": -449.0489196777344, |
|
"logps/rejected": -453.250732421875, |
|
"loss": 0.558, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.060209035873413, |
|
"rewards/margins": 0.5281985402107239, |
|
"rewards/rejected": -1.5884075164794922, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.7275582308296258, |
|
"grad_norm": 28.53468132019043, |
|
"learning_rate": 1.048335603051291e-07, |
|
"logits/chosen": -2.508882761001587, |
|
"logits/rejected": -2.462200403213501, |
|
"logps/chosen": -468.8936462402344, |
|
"logps/rejected": -488.55438232421875, |
|
"loss": 0.5138, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.091536521911621, |
|
"rewards/margins": 0.6539732217788696, |
|
"rewards/rejected": -1.7455097436904907, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.7301753467678618, |
|
"grad_norm": 27.401844024658203, |
|
"learning_rate": 1.0297976998930663e-07, |
|
"logits/chosen": -2.515787363052368, |
|
"logits/rejected": -2.470237970352173, |
|
"logps/chosen": -447.158935546875, |
|
"logps/rejected": -455.6554260253906, |
|
"loss": 0.5452, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0867551565170288, |
|
"rewards/margins": 0.6032642126083374, |
|
"rewards/rejected": -1.6900192499160767, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 32.95134353637695, |
|
"learning_rate": 1.0113825582038077e-07, |
|
"logits/chosen": -2.5129332542419434, |
|
"logits/rejected": -2.466841220855713, |
|
"logps/chosen": -468.6444396972656, |
|
"logps/rejected": -477.91632080078125, |
|
"loss": 0.6068, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.3479645252227783, |
|
"rewards/margins": 0.4015529155731201, |
|
"rewards/rejected": -1.7495174407958984, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"eval_logits/chosen": -2.4707725048065186, |
|
"eval_logits/rejected": -2.4294190406799316, |
|
"eval_logps/chosen": -455.2722473144531, |
|
"eval_logps/rejected": -461.43121337890625, |
|
"eval_loss": 0.5897455811500549, |
|
"eval_rewards/accuracies": 0.6819999814033508, |
|
"eval_rewards/chosen": -1.1905323266983032, |
|
"eval_rewards/margins": 0.4527316391468048, |
|
"eval_rewards/rejected": -1.6432641744613647, |
|
"eval_runtime": 305.6038, |
|
"eval_samples_per_second": 6.544, |
|
"eval_steps_per_second": 0.818, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.735409578644334, |
|
"grad_norm": 22.0037784576416, |
|
"learning_rate": 9.930917156425475e-08, |
|
"logits/chosen": -2.528698682785034, |
|
"logits/rejected": -2.490994930267334, |
|
"logps/chosen": -452.98828125, |
|
"logps/rejected": -478.26873779296875, |
|
"loss": 0.5954, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2057301998138428, |
|
"rewards/margins": 0.4493107795715332, |
|
"rewards/rejected": -1.655040979385376, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.73802669458257, |
|
"grad_norm": 23.0594539642334, |
|
"learning_rate": 9.749266994893754e-08, |
|
"logits/chosen": -2.498945713043213, |
|
"logits/rejected": -2.4089159965515137, |
|
"logps/chosen": -427.6974182128906, |
|
"logps/rejected": -433.5654296875, |
|
"loss": 0.6381, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.1993119716644287, |
|
"rewards/margins": 0.3153248429298401, |
|
"rewards/rejected": -1.514636754989624, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.7406438105208061, |
|
"grad_norm": 31.801725387573242, |
|
"learning_rate": 9.568890265179128e-08, |
|
"logits/chosen": -2.500518798828125, |
|
"logits/rejected": -2.4740939140319824, |
|
"logps/chosen": -455.19140625, |
|
"logps/rejected": -443.7815856933594, |
|
"loss": 0.615, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1849435567855835, |
|
"rewards/margins": 0.42013731598854065, |
|
"rewards/rejected": -1.6050809621810913, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 24.575345993041992, |
|
"learning_rate": 9.389802028686616e-08, |
|
"logits/chosen": -2.5450186729431152, |
|
"logits/rejected": -2.514194965362549, |
|
"logps/chosen": -447.0918884277344, |
|
"logps/rejected": -432.509521484375, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.1515171527862549, |
|
"rewards/margins": 0.3097633123397827, |
|
"rewards/rejected": -1.461280345916748, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.7458780423972782, |
|
"grad_norm": 37.35166931152344, |
|
"learning_rate": 9.212017239232426e-08, |
|
"logits/chosen": -2.5267508029937744, |
|
"logits/rejected": -2.511732816696167, |
|
"logps/chosen": -450.9908752441406, |
|
"logps/rejected": -463.26214599609375, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.0432413816452026, |
|
"rewards/margins": 0.5652891397476196, |
|
"rewards/rejected": -1.6085306406021118, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.7484951583355143, |
|
"grad_norm": 30.909400939941406, |
|
"learning_rate": 9.035550741795328e-08, |
|
"logits/chosen": -2.5176095962524414, |
|
"logits/rejected": -2.512455940246582, |
|
"logps/chosen": -430.74078369140625, |
|
"logps/rejected": -466.84979248046875, |
|
"loss": 0.5908, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.0033584833145142, |
|
"rewards/margins": 0.47517308592796326, |
|
"rewards/rejected": -1.4785315990447998, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.7511122742737504, |
|
"grad_norm": 24.312652587890625, |
|
"learning_rate": 8.860417271277065e-08, |
|
"logits/chosen": -2.5723533630371094, |
|
"logits/rejected": -2.5612332820892334, |
|
"logps/chosen": -448.51092529296875, |
|
"logps/rejected": -463.96258544921875, |
|
"loss": 0.595, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.025071620941162, |
|
"rewards/margins": 0.38559845089912415, |
|
"rewards/rejected": -1.4106700420379639, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 30.318265914916992, |
|
"learning_rate": 8.686631451272029e-08, |
|
"logits/chosen": -2.547738552093506, |
|
"logits/rejected": -2.5238020420074463, |
|
"logps/chosen": -440.672607421875, |
|
"logps/rejected": -445.39593505859375, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.2060959339141846, |
|
"rewards/margins": 0.419172465801239, |
|
"rewards/rejected": -1.625268578529358, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.7563465061502225, |
|
"grad_norm": 33.638999938964844, |
|
"learning_rate": 8.514207792846168e-08, |
|
"logits/chosen": -2.562272787094116, |
|
"logits/rejected": -2.533822536468506, |
|
"logps/chosen": -436.4820251464844, |
|
"logps/rejected": -438.751708984375, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.1529896259307861, |
|
"rewards/margins": 0.4592018127441406, |
|
"rewards/rejected": -1.6121914386749268, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"grad_norm": 22.52386474609375, |
|
"learning_rate": 8.343160693325355e-08, |
|
"logits/chosen": -2.5034773349761963, |
|
"logits/rejected": -2.4751226902008057, |
|
"logps/chosen": -441.08843994140625, |
|
"logps/rejected": -471.1349182128906, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1367791891098022, |
|
"rewards/margins": 0.4893072247505188, |
|
"rewards/rejected": -1.6260864734649658, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7589636220884585, |
|
"eval_logits/chosen": -2.4862163066864014, |
|
"eval_logits/rejected": -2.4469528198242188, |
|
"eval_logps/chosen": -448.6697082519531, |
|
"eval_logps/rejected": -453.08331298828125, |
|
"eval_loss": 0.5870286822319031, |
|
"eval_rewards/accuracies": 0.684499979019165, |
|
"eval_rewards/chosen": -1.1245074272155762, |
|
"eval_rewards/margins": 0.43527737259864807, |
|
"eval_rewards/rejected": -1.5597847700119019, |
|
"eval_runtime": 305.4546, |
|
"eval_samples_per_second": 6.548, |
|
"eval_steps_per_second": 0.818, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.7615807380266946, |
|
"grad_norm": 26.818082809448242, |
|
"learning_rate": 8.173504435093173e-08, |
|
"logits/chosen": -2.4929020404815674, |
|
"logits/rejected": -2.425166606903076, |
|
"logps/chosen": -418.92266845703125, |
|
"logps/rejected": -419.7470703125, |
|
"loss": 0.5526, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.0992368459701538, |
|
"rewards/margins": 0.522510826587677, |
|
"rewards/rejected": -1.621747612953186, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 21.685863494873047, |
|
"learning_rate": 8.005253184398359e-08, |
|
"logits/chosen": -2.5369515419006348, |
|
"logits/rejected": -2.4856951236724854, |
|
"logps/chosen": -464.41748046875, |
|
"logps/rejected": -482.64569091796875, |
|
"loss": 0.5998, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.0638659000396729, |
|
"rewards/margins": 0.4084245264530182, |
|
"rewards/rejected": -1.4722901582717896, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.7668149699031667, |
|
"grad_norm": 26.51576042175293, |
|
"learning_rate": 7.838420990171926e-08, |
|
"logits/chosen": -2.5660033226013184, |
|
"logits/rejected": -2.5116262435913086, |
|
"logps/chosen": -452.36541748046875, |
|
"logps/rejected": -460.9500427246094, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.042595386505127, |
|
"rewards/margins": 0.4849773049354553, |
|
"rewards/rejected": -1.527572751045227, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.7694320858414028, |
|
"grad_norm": 21.060117721557617, |
|
"learning_rate": 7.673021782854083e-08, |
|
"logits/chosen": -2.4519286155700684, |
|
"logits/rejected": -2.4212234020233154, |
|
"logps/chosen": -442.04351806640625, |
|
"logps/rejected": -423.04547119140625, |
|
"loss": 0.568, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.062628149986267, |
|
"rewards/margins": 0.5063890218734741, |
|
"rewards/rejected": -1.5690171718597412, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.7720492017796389, |
|
"grad_norm": 29.852025985717773, |
|
"learning_rate": 7.509069373231039e-08, |
|
"logits/chosen": -2.4772579669952393, |
|
"logits/rejected": -2.4347732067108154, |
|
"logps/chosen": -440.98681640625, |
|
"logps/rejected": -446.54791259765625, |
|
"loss": 0.5898, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.1728198528289795, |
|
"rewards/margins": 0.4668344557285309, |
|
"rewards/rejected": -1.6396541595458984, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 26.482280731201172, |
|
"learning_rate": 7.346577451281821e-08, |
|
"logits/chosen": -2.4865143299102783, |
|
"logits/rejected": -2.4856116771698, |
|
"logps/chosen": -455.3196716308594, |
|
"logps/rejected": -467.3148498535156, |
|
"loss": 0.56, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1614563465118408, |
|
"rewards/margins": 0.5401021838188171, |
|
"rewards/rejected": -1.7015584707260132, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.777283433656111, |
|
"grad_norm": 30.637746810913086, |
|
"learning_rate": 7.185559585035136e-08, |
|
"logits/chosen": -2.50410532951355, |
|
"logits/rejected": -2.441951274871826, |
|
"logps/chosen": -472.7854919433594, |
|
"logps/rejected": -494.48382568359375, |
|
"loss": 0.5495, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1519578695297241, |
|
"rewards/margins": 0.5563610792160034, |
|
"rewards/rejected": -1.7083189487457275, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.7799005495943471, |
|
"grad_norm": 23.6710147857666, |
|
"learning_rate": 7.026029219436502e-08, |
|
"logits/chosen": -2.52189040184021, |
|
"logits/rejected": -2.45927095413208, |
|
"logps/chosen": -436.1434631347656, |
|
"logps/rejected": -452.95794677734375, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1254907846450806, |
|
"rewards/margins": 0.49826329946517944, |
|
"rewards/rejected": -1.6237539052963257, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.7825176655325831, |
|
"grad_norm": 19.153810501098633, |
|
"learning_rate": 6.867999675225522e-08, |
|
"logits/chosen": -2.5626466274261475, |
|
"logits/rejected": -2.5076661109924316, |
|
"logps/chosen": -412.48858642578125, |
|
"logps/rejected": -431.7395935058594, |
|
"loss": 0.5615, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.1552931070327759, |
|
"rewards/margins": 0.5237449407577515, |
|
"rewards/rejected": -1.6790380477905273, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 40.37303161621094, |
|
"learning_rate": 6.711484147823662e-08, |
|
"logits/chosen": -2.4564764499664307, |
|
"logits/rejected": -2.4580986499786377, |
|
"logps/chosen": -411.7818298339844, |
|
"logps/rejected": -465.6504821777344, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -1.116335153579712, |
|
"rewards/margins": 0.5623366832733154, |
|
"rewards/rejected": -1.6786715984344482, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"eval_logits/chosen": -2.456458568572998, |
|
"eval_logits/rejected": -2.4160501956939697, |
|
"eval_logps/chosen": -458.4520568847656, |
|
"eval_logps/rejected": -464.20196533203125, |
|
"eval_loss": 0.5873444676399231, |
|
"eval_rewards/accuracies": 0.6869999766349792, |
|
"eval_rewards/chosen": -1.222330927848816, |
|
"eval_rewards/margins": 0.4486404359340668, |
|
"eval_rewards/rejected": -1.670971393585205, |
|
"eval_runtime": 305.68, |
|
"eval_samples_per_second": 6.543, |
|
"eval_steps_per_second": 0.818, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7877518974090553, |
|
"grad_norm": 20.812664031982422, |
|
"learning_rate": 6.556495706232412e-08, |
|
"logits/chosen": -2.4477975368499756, |
|
"logits/rejected": -2.460245132446289, |
|
"logps/chosen": -460.86468505859375, |
|
"logps/rejected": -472.31884765625, |
|
"loss": 0.597, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.2172662019729614, |
|
"rewards/margins": 0.4750109612941742, |
|
"rewards/rejected": -1.692277193069458, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.7903690133472913, |
|
"grad_norm": 24.549638748168945, |
|
"learning_rate": 6.403047291942057e-08, |
|
"logits/chosen": -2.4225709438323975, |
|
"logits/rejected": -2.3343379497528076, |
|
"logps/chosen": -420.82391357421875, |
|
"logps/rejected": -419.4525451660156, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.250446081161499, |
|
"rewards/margins": 0.4668423533439636, |
|
"rewards/rejected": -1.7172883749008179, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.7929861292855274, |
|
"grad_norm": 34.49531173706055, |
|
"learning_rate": 6.251151717851021e-08, |
|
"logits/chosen": -2.4909310340881348, |
|
"logits/rejected": -2.473926067352295, |
|
"logps/chosen": -417.85321044921875, |
|
"logps/rejected": -428.09283447265625, |
|
"loss": 0.6039, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1568142175674438, |
|
"rewards/margins": 0.43484121561050415, |
|
"rewards/rejected": -1.5916552543640137, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 27.338661193847656, |
|
"learning_rate": 6.100821667196041e-08, |
|
"logits/chosen": -2.5778112411499023, |
|
"logits/rejected": -2.4326424598693848, |
|
"logps/chosen": -468.60430908203125, |
|
"logps/rejected": -429.7701721191406, |
|
"loss": 0.54, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1660789251327515, |
|
"rewards/margins": 0.5631740093231201, |
|
"rewards/rejected": -1.7292530536651611, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.7982203611619995, |
|
"grad_norm": 25.222888946533203, |
|
"learning_rate": 5.952069692493061e-08, |
|
"logits/chosen": -2.4194066524505615, |
|
"logits/rejected": -2.4164278507232666, |
|
"logps/chosen": -414.73687744140625, |
|
"logps/rejected": -454.0586853027344, |
|
"loss": 0.5665, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1890028715133667, |
|
"rewards/margins": 0.49300870299339294, |
|
"rewards/rejected": -1.682011365890503, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.8008374771002356, |
|
"grad_norm": 32.99183654785156, |
|
"learning_rate": 5.8049082144891794e-08, |
|
"logits/chosen": -2.4552743434906006, |
|
"logits/rejected": -2.459123134613037, |
|
"logps/chosen": -455.1717224121094, |
|
"logps/rejected": -536.97802734375, |
|
"loss": 0.5644, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1822292804718018, |
|
"rewards/margins": 0.548469066619873, |
|
"rewards/rejected": -1.7306982278823853, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.8034545930384716, |
|
"grad_norm": 23.637554168701172, |
|
"learning_rate": 5.659349521125459e-08, |
|
"logits/chosen": -2.556009292602539, |
|
"logits/rejected": -2.5285425186157227, |
|
"logps/chosen": -475.5826110839844, |
|
"logps/rejected": -476.42156982421875, |
|
"loss": 0.6112, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1752772331237793, |
|
"rewards/margins": 0.3971019685268402, |
|
"rewards/rejected": -1.5723793506622314, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 34.81662368774414, |
|
"learning_rate": 5.5154057665109e-08, |
|
"logits/chosen": -2.498422145843506, |
|
"logits/rejected": -2.464500665664673, |
|
"logps/chosen": -449.62548828125, |
|
"logps/rejected": -458.1709899902344, |
|
"loss": 0.547, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.151908278465271, |
|
"rewards/margins": 0.5177582502365112, |
|
"rewards/rejected": -1.6696665287017822, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.8086888249149438, |
|
"grad_norm": 24.595640182495117, |
|
"learning_rate": 5.3730889699075853e-08, |
|
"logits/chosen": -2.5110769271850586, |
|
"logits/rejected": -2.4285898208618164, |
|
"logps/chosen": -460.2265625, |
|
"logps/rejected": -442.6695861816406, |
|
"loss": 0.5678, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.075935959815979, |
|
"rewards/margins": 0.49235135316848755, |
|
"rewards/rejected": -1.5682871341705322, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"grad_norm": 35.13508224487305, |
|
"learning_rate": 5.2324110147270893e-08, |
|
"logits/chosen": -2.4877264499664307, |
|
"logits/rejected": -2.455303192138672, |
|
"logps/chosen": -461.565185546875, |
|
"logps/rejected": -469.4137268066406, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.0697096586227417, |
|
"rewards/margins": 0.5005604028701782, |
|
"rewards/rejected": -1.57027006149292, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8113059408531798, |
|
"eval_logits/chosen": -2.453810691833496, |
|
"eval_logits/rejected": -2.4136769771575928, |
|
"eval_logps/chosen": -449.80560302734375, |
|
"eval_logps/rejected": -454.67962646484375, |
|
"eval_loss": 0.5885876417160034, |
|
"eval_rewards/accuracies": 0.684499979019165, |
|
"eval_rewards/chosen": -1.1358660459518433, |
|
"eval_rewards/margins": 0.439881831407547, |
|
"eval_rewards/rejected": -1.5757479667663574, |
|
"eval_runtime": 305.6852, |
|
"eval_samples_per_second": 6.543, |
|
"eval_steps_per_second": 0.818, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.8139230567914159, |
|
"grad_norm": 24.051559448242188, |
|
"learning_rate": 5.0933836475381795e-08, |
|
"logits/chosen": -2.544729709625244, |
|
"logits/rejected": -2.472533702850342, |
|
"logps/chosen": -466.03924560546875, |
|
"logps/rejected": -479.94140625, |
|
"loss": 0.5701, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.0298961400985718, |
|
"rewards/margins": 0.48342984914779663, |
|
"rewards/rejected": -1.5133259296417236, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 19.326536178588867, |
|
"learning_rate": 4.956018477086005e-08, |
|
"logits/chosen": -2.479027271270752, |
|
"logits/rejected": -2.427156448364258, |
|
"logps/chosen": -458.11993408203125, |
|
"logps/rejected": -453.17864990234375, |
|
"loss": 0.576, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.121168851852417, |
|
"rewards/margins": 0.4721224904060364, |
|
"rewards/rejected": -1.593291163444519, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.819157288667888, |
|
"grad_norm": 21.694664001464844, |
|
"learning_rate": 4.820326973322763e-08, |
|
"logits/chosen": -2.4353275299072266, |
|
"logits/rejected": -2.3973605632781982, |
|
"logps/chosen": -432.6817321777344, |
|
"logps/rejected": -453.9100646972656, |
|
"loss": 0.5839, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.170310378074646, |
|
"rewards/margins": 0.4695982038974762, |
|
"rewards/rejected": -1.6399085521697998, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.821774404606124, |
|
"grad_norm": 30.601573944091797, |
|
"learning_rate": 4.686320466449981e-08, |
|
"logits/chosen": -2.469275951385498, |
|
"logits/rejected": -2.3796298503875732, |
|
"logps/chosen": -429.75885009765625, |
|
"logps/rejected": -460.8003845214844, |
|
"loss": 0.5587, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1616802215576172, |
|
"rewards/margins": 0.5735237002372742, |
|
"rewards/rejected": -1.735203742980957, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.8243915205443602, |
|
"grad_norm": 24.408126831054688, |
|
"learning_rate": 4.554010145972417e-08, |
|
"logits/chosen": -2.5520882606506348, |
|
"logits/rejected": -2.478285312652588, |
|
"logps/chosen": -454.3311462402344, |
|
"logps/rejected": -466.89288330078125, |
|
"loss": 0.611, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.19753098487854, |
|
"rewards/margins": 0.42739981412887573, |
|
"rewards/rejected": -1.624930739402771, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 32.94855499267578, |
|
"learning_rate": 4.423407059763745e-08, |
|
"logits/chosen": -2.4835262298583984, |
|
"logits/rejected": -2.424791097640991, |
|
"logps/chosen": -474.9059143066406, |
|
"logps/rejected": -492.48681640625, |
|
"loss": 0.5942, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.1789145469665527, |
|
"rewards/margins": 0.4700239598751068, |
|
"rewards/rejected": -1.648938536643982, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.8296257524208323, |
|
"grad_norm": 23.921096801757812, |
|
"learning_rate": 4.294522113144078e-08, |
|
"logits/chosen": -2.417403221130371, |
|
"logits/rejected": -2.3476955890655518, |
|
"logps/chosen": -451.852783203125, |
|
"logps/rejected": -444.2347717285156, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1180942058563232, |
|
"rewards/margins": 0.5155684351921082, |
|
"rewards/rejected": -1.6336625814437866, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.8322428683590684, |
|
"grad_norm": 26.380470275878906, |
|
"learning_rate": 4.1673660679693804e-08, |
|
"logits/chosen": -2.4901657104492188, |
|
"logits/rejected": -2.497138500213623, |
|
"logps/chosen": -406.10546875, |
|
"logps/rejected": -466.86248779296875, |
|
"loss": 0.5895, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2110943794250488, |
|
"rewards/margins": 0.4295726716518402, |
|
"rewards/rejected": -1.640667200088501, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.8348599842973043, |
|
"grad_norm": 21.208757400512695, |
|
"learning_rate": 4.041949541732825e-08, |
|
"logits/chosen": -2.4689438343048096, |
|
"logits/rejected": -2.470299482345581, |
|
"logps/chosen": -445.4402770996094, |
|
"logps/rejected": -477.43695068359375, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1410871744155884, |
|
"rewards/margins": 0.5272367596626282, |
|
"rewards/rejected": -1.6683238744735718, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 25.565385818481445, |
|
"learning_rate": 3.9182830066782605e-08, |
|
"logits/chosen": -2.4609150886535645, |
|
"logits/rejected": -2.456033229827881, |
|
"logps/chosen": -459.481201171875, |
|
"logps/rejected": -505.4794006347656, |
|
"loss": 0.5731, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2549974918365479, |
|
"rewards/margins": 0.49946990609169006, |
|
"rewards/rejected": -1.7544673681259155, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"eval_logits/chosen": -2.4400744438171387, |
|
"eval_logits/rejected": -2.3988091945648193, |
|
"eval_logps/chosen": -455.5009460449219, |
|
"eval_logps/rejected": -462.0313415527344, |
|
"eval_loss": 0.586392343044281, |
|
"eval_rewards/accuracies": 0.6899999976158142, |
|
"eval_rewards/chosen": -1.1928194761276245, |
|
"eval_rewards/margins": 0.4564457833766937, |
|
"eval_rewards/rejected": -1.649265170097351, |
|
"eval_runtime": 305.5164, |
|
"eval_samples_per_second": 6.546, |
|
"eval_steps_per_second": 0.818, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.8400942161737766, |
|
"grad_norm": 19.77925682067871, |
|
"learning_rate": 3.79637678892577e-08, |
|
"logits/chosen": -2.403917074203491, |
|
"logits/rejected": -2.4254679679870605, |
|
"logps/chosen": -440.84747314453125, |
|
"logps/rejected": -439.8973693847656, |
|
"loss": 0.6224, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.181836724281311, |
|
"rewards/margins": 0.35303249955177307, |
|
"rewards/rejected": -1.5348690748214722, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.8427113321120125, |
|
"grad_norm": 30.918712615966797, |
|
"learning_rate": 3.6762410676094645e-08, |
|
"logits/chosen": -2.459597110748291, |
|
"logits/rejected": -2.455178737640381, |
|
"logps/chosen": -486.547119140625, |
|
"logps/rejected": -474.1707458496094, |
|
"loss": 0.6022, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1656169891357422, |
|
"rewards/margins": 0.4497455060482025, |
|
"rewards/rejected": -1.6153624057769775, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.8453284480502486, |
|
"grad_norm": 42.9607048034668, |
|
"learning_rate": 3.557885874027497e-08, |
|
"logits/chosen": -2.3949739933013916, |
|
"logits/rejected": -2.4011003971099854, |
|
"logps/chosen": -452.0868225097656, |
|
"logps/rejected": -458.3661193847656, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.2526524066925049, |
|
"rewards/margins": 0.25641515851020813, |
|
"rewards/rejected": -1.509067416191101, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 26.522703170776367, |
|
"learning_rate": 3.441321090804469e-08, |
|
"logits/chosen": -2.5475668907165527, |
|
"logits/rejected": -2.501690626144409, |
|
"logps/chosen": -452.525634765625, |
|
"logps/rejected": -438.3544006347656, |
|
"loss": 0.564, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1297898292541504, |
|
"rewards/margins": 0.4648459851741791, |
|
"rewards/rejected": -1.5946358442306519, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.8505626799267207, |
|
"grad_norm": 23.155546188354492, |
|
"learning_rate": 3.326556451066234e-08, |
|
"logits/chosen": -2.5346245765686035, |
|
"logits/rejected": -2.475264310836792, |
|
"logps/chosen": -488.89306640625, |
|
"logps/rejected": -491.93597412109375, |
|
"loss": 0.5619, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.1155110597610474, |
|
"rewards/margins": 0.532517671585083, |
|
"rewards/rejected": -1.6480286121368408, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.8531797958649568, |
|
"grad_norm": 28.358789443969727, |
|
"learning_rate": 3.2136015376271946e-08, |
|
"logits/chosen": -2.4637229442596436, |
|
"logits/rejected": -2.412484645843506, |
|
"logps/chosen": -459.3023376464844, |
|
"logps/rejected": -466.33636474609375, |
|
"loss": 0.6073, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.308656930923462, |
|
"rewards/margins": 0.3834686875343323, |
|
"rewards/rejected": -1.692125678062439, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.8557969118031928, |
|
"grad_norm": 26.66960334777832, |
|
"learning_rate": 3.102465782190106e-08, |
|
"logits/chosen": -2.4675605297088623, |
|
"logits/rejected": -2.4589285850524902, |
|
"logps/chosen": -436.20440673828125, |
|
"logps/rejected": -456.6858825683594, |
|
"loss": 0.5966, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1874547004699707, |
|
"rewards/margins": 0.4859285354614258, |
|
"rewards/rejected": -1.673383116722107, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 27.6555233001709, |
|
"learning_rate": 2.993158464558565e-08, |
|
"logits/chosen": -2.4608101844787598, |
|
"logits/rejected": -2.4574997425079346, |
|
"logps/chosen": -457.8924255371094, |
|
"logps/rejected": -490.01739501953125, |
|
"loss": 0.5995, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.082580327987671, |
|
"rewards/margins": 0.4137091040611267, |
|
"rewards/rejected": -1.4962894916534424, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.861031143679665, |
|
"grad_norm": 18.215288162231445, |
|
"learning_rate": 2.8856887118621358e-08, |
|
"logits/chosen": -2.506308078765869, |
|
"logits/rejected": -2.5308749675750732, |
|
"logps/chosen": -450.243408203125, |
|
"logps/rejected": -485.89825439453125, |
|
"loss": 0.6124, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.2139848470687866, |
|
"rewards/margins": 0.4676898121833801, |
|
"rewards/rejected": -1.6816747188568115, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"grad_norm": 25.5659122467041, |
|
"learning_rate": 2.7800654977942482e-08, |
|
"logits/chosen": -2.461979627609253, |
|
"logits/rejected": -2.42045259475708, |
|
"logps/chosen": -443.5435485839844, |
|
"logps/rejected": -463.61669921875, |
|
"loss": 0.586, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1075669527053833, |
|
"rewards/margins": 0.45719677209854126, |
|
"rewards/rejected": -1.5647637844085693, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.863648259617901, |
|
"eval_logits/chosen": -2.4384121894836426, |
|
"eval_logits/rejected": -2.3969199657440186, |
|
"eval_logps/chosen": -453.6158752441406, |
|
"eval_logps/rejected": -459.41778564453125, |
|
"eval_loss": 0.5864999890327454, |
|
"eval_rewards/accuracies": 0.6894999742507935, |
|
"eval_rewards/chosen": -1.1739686727523804, |
|
"eval_rewards/margins": 0.4491608142852783, |
|
"eval_rewards/rejected": -1.6231294870376587, |
|
"eval_runtime": 306.5951, |
|
"eval_samples_per_second": 6.523, |
|
"eval_steps_per_second": 0.815, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.8662653755561371, |
|
"grad_norm": 31.966367721557617, |
|
"learning_rate": 2.676297641862879e-08, |
|
"logits/chosen": -2.493067741394043, |
|
"logits/rejected": -2.4418578147888184, |
|
"logps/chosen": -404.053466796875, |
|
"logps/rejected": -391.7318420410156, |
|
"loss": 0.5668, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -1.12334406375885, |
|
"rewards/margins": 0.49404868483543396, |
|
"rewards/rejected": -1.6173927783966064, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 21.95867156982422, |
|
"learning_rate": 2.5743938086541352e-08, |
|
"logits/chosen": -2.448385715484619, |
|
"logits/rejected": -2.4331841468811035, |
|
"logps/chosen": -451.23193359375, |
|
"logps/rejected": -464.74041748046875, |
|
"loss": 0.5711, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.196354627609253, |
|
"rewards/margins": 0.5040711760520935, |
|
"rewards/rejected": -1.7004257440567017, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.8714996074326092, |
|
"grad_norm": 23.792348861694336, |
|
"learning_rate": 2.474362507108757e-08, |
|
"logits/chosen": -2.5521392822265625, |
|
"logits/rejected": -2.4925191402435303, |
|
"logps/chosen": -461.41961669921875, |
|
"logps/rejected": -468.02801513671875, |
|
"loss": 0.5694, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.082940697669983, |
|
"rewards/margins": 0.5743801593780518, |
|
"rewards/rejected": -1.6573207378387451, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.8741167233708453, |
|
"grad_norm": 27.30091094970703, |
|
"learning_rate": 2.3762120898116495e-08, |
|
"logits/chosen": -2.477776288986206, |
|
"logits/rejected": -2.4457545280456543, |
|
"logps/chosen": -455.5399475097656, |
|
"logps/rejected": -483.2525329589844, |
|
"loss": 0.5674, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1902644634246826, |
|
"rewards/margins": 0.4705522954463959, |
|
"rewards/rejected": -1.6608167886734009, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.8767338393090814, |
|
"grad_norm": 31.050687789916992, |
|
"learning_rate": 2.2799507522944044e-08, |
|
"logits/chosen": -2.408630847930908, |
|
"logits/rejected": -2.3747382164001465, |
|
"logps/chosen": -459.5210876464844, |
|
"logps/rejected": -485.4042053222656, |
|
"loss": 0.5379, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.127478837966919, |
|
"rewards/margins": 0.5589307546615601, |
|
"rewards/rejected": -1.686409592628479, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 28.34389877319336, |
|
"learning_rate": 2.1855865323510054e-08, |
|
"logits/chosen": -2.4636940956115723, |
|
"logits/rejected": -2.370000123977661, |
|
"logps/chosen": -459.6726989746094, |
|
"logps/rejected": -500.09625244140625, |
|
"loss": 0.5327, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0568628311157227, |
|
"rewards/margins": 0.6852847337722778, |
|
"rewards/rejected": -1.7421478033065796, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.8819680711855535, |
|
"grad_norm": 23.982769012451172, |
|
"learning_rate": 2.0931273093666573e-08, |
|
"logits/chosen": -2.4250173568725586, |
|
"logits/rejected": -2.377990961074829, |
|
"logps/chosen": -429.1495056152344, |
|
"logps/rejected": -440.8812561035156, |
|
"loss": 0.5617, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.227034330368042, |
|
"rewards/margins": 0.48124265670776367, |
|
"rewards/rejected": -1.7082771062850952, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.8845851871237895, |
|
"grad_norm": 27.970073699951172, |
|
"learning_rate": 2.002580803659873e-08, |
|
"logits/chosen": -2.4545071125030518, |
|
"logits/rejected": -2.4068009853363037, |
|
"logps/chosen": -444.27227783203125, |
|
"logps/rejected": -445.6871643066406, |
|
"loss": 0.5925, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1936101913452148, |
|
"rewards/margins": 0.4538310170173645, |
|
"rewards/rejected": -1.6474411487579346, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.8872023030620256, |
|
"grad_norm": 23.618146896362305, |
|
"learning_rate": 1.9139545758378256e-08, |
|
"logits/chosen": -2.4549124240875244, |
|
"logits/rejected": -2.363956928253174, |
|
"logps/chosen": -459.8614807128906, |
|
"logps/rejected": -442.1275939941406, |
|
"loss": 0.5525, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.1489067077636719, |
|
"rewards/margins": 0.5495232343673706, |
|
"rewards/rejected": -1.698429822921753, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 24.354528427124023, |
|
"learning_rate": 1.8272560261650277e-08, |
|
"logits/chosen": -2.46649432182312, |
|
"logits/rejected": -2.4250054359436035, |
|
"logps/chosen": -503.41552734375, |
|
"logps/rejected": -477.5843200683594, |
|
"loss": 0.5629, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1212806701660156, |
|
"rewards/margins": 0.5350293517112732, |
|
"rewards/rejected": -1.6563100814819336, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"eval_logits/chosen": -2.430605888366699, |
|
"eval_logits/rejected": -2.3882248401641846, |
|
"eval_logps/chosen": -451.9486083984375, |
|
"eval_logps/rejected": -457.9693603515625, |
|
"eval_loss": 0.5859763026237488, |
|
"eval_rewards/accuracies": 0.6890000104904175, |
|
"eval_rewards/chosen": -1.1572964191436768, |
|
"eval_rewards/margins": 0.4513489007949829, |
|
"eval_rewards/rejected": -1.6086454391479492, |
|
"eval_runtime": 305.5222, |
|
"eval_samples_per_second": 6.546, |
|
"eval_steps_per_second": 0.818, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.8924365349384977, |
|
"grad_norm": 23.631261825561523, |
|
"learning_rate": 1.742492393945427e-08, |
|
"logits/chosen": -2.426309108734131, |
|
"logits/rejected": -2.361506938934326, |
|
"logps/chosen": -479.4481506347656, |
|
"logps/rejected": -459.58447265625, |
|
"loss": 0.5771, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.1870609521865845, |
|
"rewards/margins": 0.4371766149997711, |
|
"rewards/rejected": -1.6242374181747437, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.8950536508767338, |
|
"grad_norm": 28.153467178344727, |
|
"learning_rate": 1.6596707569179302e-08, |
|
"logits/chosen": -2.549783229827881, |
|
"logits/rejected": -2.4772112369537354, |
|
"logps/chosen": -478.44061279296875, |
|
"logps/rejected": -468.6541442871094, |
|
"loss": 0.5714, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.147237777709961, |
|
"rewards/margins": 0.5185133814811707, |
|
"rewards/rejected": -1.6657512187957764, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.8976707668149699, |
|
"grad_norm": 29.848007202148438, |
|
"learning_rate": 1.5787980306653848e-08, |
|
"logits/chosen": -2.4930880069732666, |
|
"logits/rejected": -2.423302412033081, |
|
"logps/chosen": -463.26300048828125, |
|
"logps/rejected": -475.92767333984375, |
|
"loss": 0.5766, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1223206520080566, |
|
"rewards/margins": 0.49025315046310425, |
|
"rewards/rejected": -1.6125738620758057, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 27.02195930480957, |
|
"learning_rate": 1.499880968037165e-08, |
|
"logits/chosen": -2.529888868331909, |
|
"logits/rejected": -2.467481851577759, |
|
"logps/chosen": -425.4710388183594, |
|
"logps/rejected": -418.39752197265625, |
|
"loss": 0.5716, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.080725073814392, |
|
"rewards/margins": 0.45771676301956177, |
|
"rewards/rejected": -1.538441777229309, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.902904998691442, |
|
"grad_norm": 37.68179702758789, |
|
"learning_rate": 1.4229261585852803e-08, |
|
"logits/chosen": -2.5055129528045654, |
|
"logits/rejected": -2.483830690383911, |
|
"logps/chosen": -452.56610107421875, |
|
"logps/rejected": -458.8866271972656, |
|
"loss": 0.556, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1098837852478027, |
|
"rewards/margins": 0.5025152564048767, |
|
"rewards/rejected": -1.6123991012573242, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.9055221146296781, |
|
"grad_norm": 26.292009353637695, |
|
"learning_rate": 1.3479400280141883e-08, |
|
"logits/chosen": -2.4547009468078613, |
|
"logits/rejected": -2.469587802886963, |
|
"logps/chosen": -422.94110107421875, |
|
"logps/rejected": -467.0780334472656, |
|
"loss": 0.5599, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.089966058731079, |
|
"rewards/margins": 0.5504525303840637, |
|
"rewards/rejected": -1.6404184103012085, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.9081392305679141, |
|
"grad_norm": 30.572351455688477, |
|
"learning_rate": 1.2749288376442042e-08, |
|
"logits/chosen": -2.461515426635742, |
|
"logits/rejected": -2.381925344467163, |
|
"logps/chosen": -481.184326171875, |
|
"logps/rejected": -467.42510986328125, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1164970397949219, |
|
"rewards/margins": 0.5962284803390503, |
|
"rewards/rejected": -1.712725281715393, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 20.18982696533203, |
|
"learning_rate": 1.2038986838887127e-08, |
|
"logits/chosen": -2.5067992210388184, |
|
"logits/rejected": -2.4624361991882324, |
|
"logps/chosen": -433.2051696777344, |
|
"logps/rejected": -454.38909912109375, |
|
"loss": 0.6137, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.2155464887619019, |
|
"rewards/margins": 0.42829465866088867, |
|
"rewards/rejected": -1.6438411474227905, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.9133734624443863, |
|
"grad_norm": 23.425052642822266, |
|
"learning_rate": 1.1348554977451131e-08, |
|
"logits/chosen": -2.522127628326416, |
|
"logits/rejected": -2.47310471534729, |
|
"logps/chosen": -474.30712890625, |
|
"logps/rejected": -479.2377014160156, |
|
"loss": 0.5578, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1554954051971436, |
|
"rewards/margins": 0.574936032295227, |
|
"rewards/rejected": -1.730431318283081, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"grad_norm": 20.377241134643555, |
|
"learning_rate": 1.06780504429958e-08, |
|
"logits/chosen": -2.4527220726013184, |
|
"logits/rejected": -2.367492198944092, |
|
"logps/chosen": -457.9435119628906, |
|
"logps/rejected": -445.28009033203125, |
|
"loss": 0.6059, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1457488536834717, |
|
"rewards/margins": 0.4370895028114319, |
|
"rewards/rejected": -1.5828382968902588, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9159905783826223, |
|
"eval_logits/chosen": -2.4319987297058105, |
|
"eval_logits/rejected": -2.389674663543701, |
|
"eval_logps/chosen": -452.93878173828125, |
|
"eval_logps/rejected": -459.2307434082031, |
|
"eval_loss": 0.5858200788497925, |
|
"eval_rewards/accuracies": 0.6890000104904175, |
|
"eval_rewards/chosen": -1.1671984195709229, |
|
"eval_rewards/margins": 0.4540611207485199, |
|
"eval_rewards/rejected": -1.6212595701217651, |
|
"eval_runtime": 305.662, |
|
"eval_samples_per_second": 6.543, |
|
"eval_steps_per_second": 0.818, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.9186076943208584, |
|
"grad_norm": 20.939462661743164, |
|
"learning_rate": 1.0027529222456754e-08, |
|
"logits/chosen": -2.44631028175354, |
|
"logits/rejected": -2.397378921508789, |
|
"logps/chosen": -429.8016052246094, |
|
"logps/rejected": -454.264404296875, |
|
"loss": 0.519, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.0528876781463623, |
|
"rewards/margins": 0.5599857568740845, |
|
"rewards/rejected": -1.6128734350204468, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 20.19708251953125, |
|
"learning_rate": 9.397045634168766e-09, |
|
"logits/chosen": -2.5315864086151123, |
|
"logits/rejected": -2.5124194622039795, |
|
"logps/chosen": -454.331787109375, |
|
"logps/rejected": -493.25848388671875, |
|
"loss": 0.5627, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -1.0776004791259766, |
|
"rewards/margins": 0.5831270217895508, |
|
"rewards/rejected": -1.6607275009155273, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.9238419261973305, |
|
"grad_norm": 21.342370986938477, |
|
"learning_rate": 8.78665232332998e-09, |
|
"logits/chosen": -2.4148471355438232, |
|
"logits/rejected": -2.374274730682373, |
|
"logps/chosen": -415.21966552734375, |
|
"logps/rejected": -443.37713623046875, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.192144751548767, |
|
"rewards/margins": 0.43268775939941406, |
|
"rewards/rejected": -1.6248325109481812, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.9264590421355666, |
|
"grad_norm": 24.934450149536133, |
|
"learning_rate": 8.196400257606206e-09, |
|
"logits/chosen": -2.5181639194488525, |
|
"logits/rejected": -2.4744253158569336, |
|
"logps/chosen": -481.7230529785156, |
|
"logps/rejected": -507.2533264160156, |
|
"loss": 0.5876, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1774014234542847, |
|
"rewards/margins": 0.5159769058227539, |
|
"rewards/rejected": -1.693378210067749, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.9290761580738026, |
|
"grad_norm": 22.040672302246094, |
|
"learning_rate": 7.626338722875075e-09, |
|
"logits/chosen": -2.460214853286743, |
|
"logits/rejected": -2.4767918586730957, |
|
"logps/chosen": -436.06005859375, |
|
"logps/rejected": -465.63641357421875, |
|
"loss": 0.5773, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1084725856781006, |
|
"rewards/margins": 0.45712822675704956, |
|
"rewards/rejected": -1.565600872039795, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 24.051870346069336, |
|
"learning_rate": 7.0765153191106875e-09, |
|
"logits/chosen": -2.4851746559143066, |
|
"logits/rejected": -2.4584531784057617, |
|
"logps/chosen": -439.662109375, |
|
"logps/rejected": -432.596435546875, |
|
"loss": 0.5813, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.1396434307098389, |
|
"rewards/margins": 0.5287774801254272, |
|
"rewards/rejected": -1.6684210300445557, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.9343103899502748, |
|
"grad_norm": 26.50396156311035, |
|
"learning_rate": 6.54697595640899e-09, |
|
"logits/chosen": -2.498539447784424, |
|
"logits/rejected": -2.44036865234375, |
|
"logps/chosen": -478.49664306640625, |
|
"logps/rejected": -492.61224365234375, |
|
"loss": 0.5445, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0882660150527954, |
|
"rewards/margins": 0.5571417212486267, |
|
"rewards/rejected": -1.6454076766967773, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.9369275058885108, |
|
"grad_norm": 20.330581665039062, |
|
"learning_rate": 6.037764851154425e-09, |
|
"logits/chosen": -2.4708151817321777, |
|
"logits/rejected": -2.447495698928833, |
|
"logps/chosen": -450.46099853515625, |
|
"logps/rejected": -485.2198791503906, |
|
"loss": 0.5668, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -1.0887110233306885, |
|
"rewards/margins": 0.5052274465560913, |
|
"rewards/rejected": -1.5939384698867798, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.9395446218267469, |
|
"grad_norm": 26.653793334960938, |
|
"learning_rate": 5.548924522327747e-09, |
|
"logits/chosen": -2.44421648979187, |
|
"logits/rejected": -2.406181573867798, |
|
"logps/chosen": -457.9331970214844, |
|
"logps/rejected": -471.136474609375, |
|
"loss": 0.5838, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2172365188598633, |
|
"rewards/margins": 0.4692623019218445, |
|
"rewards/rejected": -1.6864988803863525, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 26.454694747924805, |
|
"learning_rate": 5.080495787955691e-09, |
|
"logits/chosen": -2.4167091846466064, |
|
"logits/rejected": -2.414551019668579, |
|
"logps/chosen": -391.5062561035156, |
|
"logps/rejected": -434.9417419433594, |
|
"loss": 0.5703, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -1.0791380405426025, |
|
"rewards/margins": 0.4439873695373535, |
|
"rewards/rejected": -1.523125410079956, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"eval_logits/chosen": -2.431995391845703, |
|
"eval_logits/rejected": -2.389662981033325, |
|
"eval_logps/chosen": -452.2864990234375, |
|
"eval_logps/rejected": -458.4890441894531, |
|
"eval_loss": 0.5860488414764404, |
|
"eval_rewards/accuracies": 0.6869999766349792, |
|
"eval_rewards/chosen": -1.1606749296188354, |
|
"eval_rewards/margins": 0.45316699147224426, |
|
"eval_rewards/rejected": -1.6138420104980469, |
|
"eval_runtime": 311.4284, |
|
"eval_samples_per_second": 6.422, |
|
"eval_steps_per_second": 0.803, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.944778853703219, |
|
"grad_norm": 27.942058563232422, |
|
"learning_rate": 4.632517761702814e-09, |
|
"logits/chosen": -2.4297971725463867, |
|
"logits/rejected": -2.3728883266448975, |
|
"logps/chosen": -415.197998046875, |
|
"logps/rejected": -435.744384765625, |
|
"loss": 0.5755, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1534807682037354, |
|
"rewards/margins": 0.4942702651023865, |
|
"rewards/rejected": -1.6477508544921875, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.9473959696414551, |
|
"grad_norm": 34.84771728515625, |
|
"learning_rate": 4.205027849605358e-09, |
|
"logits/chosen": -2.46714448928833, |
|
"logits/rejected": -2.431912899017334, |
|
"logps/chosen": -434.2803649902344, |
|
"logps/rejected": -427.74462890625, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -1.1966065168380737, |
|
"rewards/margins": 0.39964643120765686, |
|
"rewards/rejected": -1.5962530374526978, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.9500130855796912, |
|
"grad_norm": 24.179773330688477, |
|
"learning_rate": 3.798061746947995e-09, |
|
"logits/chosen": -2.5451560020446777, |
|
"logits/rejected": -2.5169193744659424, |
|
"logps/chosen": -442.5852966308594, |
|
"logps/rejected": -437.0686950683594, |
|
"loss": 0.5855, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.171771764755249, |
|
"rewards/margins": 0.4499340057373047, |
|
"rewards/rejected": -1.6217056512832642, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 41.15779113769531, |
|
"learning_rate": 3.411653435283157e-09, |
|
"logits/chosen": -2.4626052379608154, |
|
"logits/rejected": -2.4032933712005615, |
|
"logps/chosen": -461.59130859375, |
|
"logps/rejected": -428.04669189453125, |
|
"loss": 0.5857, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1210428476333618, |
|
"rewards/margins": 0.4643692076206207, |
|
"rewards/rejected": -1.5854119062423706, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.9552473174561633, |
|
"grad_norm": 21.085739135742188, |
|
"learning_rate": 3.0458351795936698e-09, |
|
"logits/chosen": -2.5102531909942627, |
|
"logits/rejected": -2.448899745941162, |
|
"logps/chosen": -427.3299865722656, |
|
"logps/rejected": -434.32305908203125, |
|
"loss": 0.5532, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0826635360717773, |
|
"rewards/margins": 0.5397425889968872, |
|
"rewards/rejected": -1.622406005859375, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.9578644333943994, |
|
"grad_norm": 26.300575256347656, |
|
"learning_rate": 2.700637525598598e-09, |
|
"logits/chosen": -2.455357074737549, |
|
"logits/rejected": -2.4355976581573486, |
|
"logps/chosen": -460.82080078125, |
|
"logps/rejected": -478.3233337402344, |
|
"loss": 0.6174, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -1.1832480430603027, |
|
"rewards/margins": 0.3413129150867462, |
|
"rewards/rejected": -1.5245609283447266, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.9604815493326354, |
|
"grad_norm": 24.9046688079834, |
|
"learning_rate": 2.3760892972027324e-09, |
|
"logits/chosen": -2.538999557495117, |
|
"logits/rejected": -2.4709739685058594, |
|
"logps/chosen": -467.25177001953125, |
|
"logps/rejected": -474.575927734375, |
|
"loss": 0.5588, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.215664267539978, |
|
"rewards/margins": 0.5684916973114014, |
|
"rewards/rejected": -1.7841558456420898, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 30.135499954223633, |
|
"learning_rate": 2.0722175940897645e-09, |
|
"logits/chosen": -2.434136152267456, |
|
"logits/rejected": -2.447706937789917, |
|
"logps/chosen": -442.7950134277344, |
|
"logps/rejected": -483.507568359375, |
|
"loss": 0.5304, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.1261581182479858, |
|
"rewards/margins": 0.5885075926780701, |
|
"rewards/rejected": -1.7146657705307007, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.9657157812091076, |
|
"grad_norm": 21.930707931518555, |
|
"learning_rate": 1.7890477894593748e-09, |
|
"logits/chosen": -2.477616310119629, |
|
"logits/rejected": -2.418490409851074, |
|
"logps/chosen": -505.480712890625, |
|
"logps/rejected": -485.1387634277344, |
|
"loss": 0.5525, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.0282517671585083, |
|
"rewards/margins": 0.5708995461463928, |
|
"rewards/rejected": -1.5991512537002563, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"grad_norm": 25.627620697021484, |
|
"learning_rate": 1.5266035279088708e-09, |
|
"logits/chosen": -2.3665719032287598, |
|
"logits/rejected": -2.340869426727295, |
|
"logps/chosen": -492.96484375, |
|
"logps/rejected": -500.45941162109375, |
|
"loss": 0.5533, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.2112007141113281, |
|
"rewards/margins": 0.4987810254096985, |
|
"rewards/rejected": -1.7099816799163818, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9683328971473436, |
|
"eval_logits/chosen": -2.4303910732269287, |
|
"eval_logits/rejected": -2.388193368911743, |
|
"eval_logps/chosen": -452.4510498046875, |
|
"eval_logps/rejected": -458.71649169921875, |
|
"eval_loss": 0.5858403444290161, |
|
"eval_rewards/accuracies": 0.6880000233650208, |
|
"eval_rewards/chosen": -1.162320613861084, |
|
"eval_rewards/margins": 0.4537965655326843, |
|
"eval_rewards/rejected": -1.616117000579834, |
|
"eval_runtime": 305.8027, |
|
"eval_samples_per_second": 6.54, |
|
"eval_steps_per_second": 0.818, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.9709500130855797, |
|
"grad_norm": 37.73438262939453, |
|
"learning_rate": 1.2849067234584621e-09, |
|
"logits/chosen": -2.3928847312927246, |
|
"logits/rejected": -2.3548622131347656, |
|
"logps/chosen": -423.58935546875, |
|
"logps/rejected": -446.5189514160156, |
|
"loss": 0.6168, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.152722954750061, |
|
"rewards/margins": 0.4318475127220154, |
|
"rewards/rejected": -1.5845705270767212, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 27.73121452331543, |
|
"learning_rate": 1.0639775577218625e-09, |
|
"logits/chosen": -2.4332590103149414, |
|
"logits/rejected": -2.3678956031799316, |
|
"logps/chosen": -437.5235290527344, |
|
"logps/rejected": -427.43280029296875, |
|
"loss": 0.5611, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.154651403427124, |
|
"rewards/margins": 0.5416392683982849, |
|
"rewards/rejected": -1.6962906122207642, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.9761842449620518, |
|
"grad_norm": 24.22023582458496, |
|
"learning_rate": 8.638344782207485e-10, |
|
"logits/chosen": -2.434138059616089, |
|
"logits/rejected": -2.3985061645507812, |
|
"logps/chosen": -426.9537048339844, |
|
"logps/rejected": -440.77227783203125, |
|
"loss": 0.5597, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.109956979751587, |
|
"rewards/margins": 0.5067628026008606, |
|
"rewards/rejected": -1.6167194843292236, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.9788013609002879, |
|
"grad_norm": 25.113866806030273, |
|
"learning_rate": 6.844941968447149e-10, |
|
"logits/chosen": -2.4938652515411377, |
|
"logits/rejected": -2.44439959526062, |
|
"logps/chosen": -465.15985107421875, |
|
"logps/rejected": -483.47088623046875, |
|
"loss": 0.5536, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1278215646743774, |
|
"rewards/margins": 0.5776745676994324, |
|
"rewards/rejected": -1.705496072769165, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.9814184768385239, |
|
"grad_norm": 22.66619873046875, |
|
"learning_rate": 5.25971688455612e-10, |
|
"logits/chosen": -2.512327194213867, |
|
"logits/rejected": -2.433387279510498, |
|
"logps/chosen": -449.586669921875, |
|
"logps/rejected": -469.92919921875, |
|
"loss": 0.5283, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.1257078647613525, |
|
"rewards/margins": 0.5826985239982605, |
|
"rewards/rejected": -1.7084062099456787, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 21.942731857299805, |
|
"learning_rate": 3.882801896372967e-10, |
|
"logits/chosen": -2.4940876960754395, |
|
"logits/rejected": -2.446882963180542, |
|
"logps/chosen": -456.06353759765625, |
|
"logps/rejected": -445.09375, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.183485746383667, |
|
"rewards/margins": 0.37569838762283325, |
|
"rewards/rejected": -1.559183955192566, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.9866527087149961, |
|
"grad_norm": 31.122547149658203, |
|
"learning_rate": 2.714311975902661e-10, |
|
"logits/chosen": -2.468987226486206, |
|
"logits/rejected": -2.39099383354187, |
|
"logps/chosen": -463.2923889160156, |
|
"logps/rejected": -476.58935546875, |
|
"loss": 0.5116, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.0611310005187988, |
|
"rewards/margins": 0.5779252648353577, |
|
"rewards/rejected": -1.6390562057495117, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.9892698246532321, |
|
"grad_norm": 27.4237060546875, |
|
"learning_rate": 1.754344691717591e-10, |
|
"logits/chosen": -2.44380521774292, |
|
"logits/rejected": -2.4307875633239746, |
|
"logps/chosen": -434.5845642089844, |
|
"logps/rejected": -482.71759033203125, |
|
"loss": 0.6139, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -1.1346935033798218, |
|
"rewards/margins": 0.3297516703605652, |
|
"rewards/rejected": -1.4644451141357422, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.9918869405914682, |
|
"grad_norm": 31.661060333251953, |
|
"learning_rate": 1.0029802008096333e-10, |
|
"logits/chosen": -2.47269868850708, |
|
"logits/rejected": -2.401909112930298, |
|
"logps/chosen": -473.67608642578125, |
|
"logps/rejected": -482.57501220703125, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.16953444480896, |
|
"rewards/margins": 0.5178566575050354, |
|
"rewards/rejected": -1.6873910427093506, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 28.650169372558594, |
|
"learning_rate": 4.602812418974533e-11, |
|
"logits/chosen": -2.5178725719451904, |
|
"logits/rejected": -2.4773991107940674, |
|
"logps/chosen": -471.7525939941406, |
|
"logps/rejected": -475.13055419921875, |
|
"loss": 0.5988, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.1166661977767944, |
|
"rewards/margins": 0.4814838469028473, |
|
"rewards/rejected": -1.5981502532958984, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"eval_logits/chosen": -2.4306020736694336, |
|
"eval_logits/rejected": -2.3882100582122803, |
|
"eval_logps/chosen": -452.2973327636719, |
|
"eval_logps/rejected": -458.4822998046875, |
|
"eval_loss": 0.5861949324607849, |
|
"eval_rewards/accuracies": 0.6884999871253967, |
|
"eval_rewards/chosen": -1.1607835292816162, |
|
"eval_rewards/margins": 0.45299115777015686, |
|
"eval_rewards/rejected": -1.6137746572494507, |
|
"eval_runtime": 305.6937, |
|
"eval_samples_per_second": 6.542, |
|
"eval_steps_per_second": 0.818, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.9971211724679403, |
|
"grad_norm": 29.108121871948242, |
|
"learning_rate": 1.2629313018819309e-11, |
|
"logits/chosen": -2.446488857269287, |
|
"logits/rejected": -2.4168055057525635, |
|
"logps/chosen": -427.93896484375, |
|
"logps/rejected": -450.84332275390625, |
|
"loss": 0.5569, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1016128063201904, |
|
"rewards/margins": 0.558451771736145, |
|
"rewards/rejected": -1.6600643396377563, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.9997382884061764, |
|
"grad_norm": 23.65038299560547, |
|
"learning_rate": 1.0437535929996855e-13, |
|
"logits/chosen": -2.4572625160217285, |
|
"logits/rejected": -2.4045023918151855, |
|
"logps/chosen": -472.81048583984375, |
|
"logps/rejected": -466.09051513671875, |
|
"loss": 0.5511, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -1.1336238384246826, |
|
"rewards/margins": 0.5550218820571899, |
|
"rewards/rejected": -1.6886459589004517, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3821, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6060711596530634, |
|
"train_runtime": 35916.4658, |
|
"train_samples_per_second": 1.702, |
|
"train_steps_per_second": 0.106 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3821, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|