|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.2616431187859759, |
|
"eval_steps": 500, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 25.513092629771464, |
|
"kl": 0.0, |
|
"learning_rate": 4.99738356881214e-07, |
|
"logps/chosen": -255.54945373535156, |
|
"logps/rejected": -223.1166229248047, |
|
"loss": 0.2812, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 19.05975346317889, |
|
"kl": 0.0, |
|
"learning_rate": 4.99476713762428e-07, |
|
"logps/chosen": -284.558837890625, |
|
"logps/rejected": -214.7106475830078, |
|
"loss": 0.2015, |
|
"rewards/chosen": 0.34634318947792053, |
|
"rewards/margins": 0.3400447368621826, |
|
"rewards/rejected": 0.006298447493463755, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 16.41510277772972, |
|
"kl": 2.259248733520508, |
|
"learning_rate": 4.992150706436421e-07, |
|
"logps/chosen": -191.8242645263672, |
|
"logps/rejected": -164.48486328125, |
|
"loss": 0.2416, |
|
"rewards/chosen": 0.3580303192138672, |
|
"rewards/margins": 0.17033512890338898, |
|
"rewards/rejected": 0.1876951903104782, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 18.59921957448892, |
|
"kl": 1.6011241674423218, |
|
"learning_rate": 4.989534275248561e-07, |
|
"logps/chosen": -191.90174865722656, |
|
"logps/rejected": -228.51425170898438, |
|
"loss": 0.2344, |
|
"rewards/chosen": 0.14850404858589172, |
|
"rewards/margins": 0.18605005741119385, |
|
"rewards/rejected": -0.03754601255059242, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 19.28790409947415, |
|
"kl": 0.6965503692626953, |
|
"learning_rate": 4.986917844060701e-07, |
|
"logps/chosen": -213.99278259277344, |
|
"logps/rejected": -244.74432373046875, |
|
"loss": 0.2692, |
|
"rewards/chosen": 0.08441495150327682, |
|
"rewards/margins": -0.010940708220005035, |
|
"rewards/rejected": 0.09535565972328186, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 16.851732715209323, |
|
"kl": 0.0, |
|
"learning_rate": 4.984301412872841e-07, |
|
"logps/chosen": -235.20309448242188, |
|
"logps/rejected": -261.23590087890625, |
|
"loss": 0.1815, |
|
"rewards/chosen": 0.035614676773548126, |
|
"rewards/margins": 0.42004701495170593, |
|
"rewards/rejected": -0.3844323456287384, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 16.58819557091471, |
|
"kl": 0.0, |
|
"learning_rate": 4.981684981684982e-07, |
|
"logps/chosen": -193.42202758789062, |
|
"logps/rejected": -225.8984832763672, |
|
"loss": 0.1914, |
|
"rewards/chosen": -0.15666261315345764, |
|
"rewards/margins": 0.47593703866004944, |
|
"rewards/rejected": -0.6325996518135071, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 15.607759793866348, |
|
"kl": 0.0, |
|
"learning_rate": 4.979068550497122e-07, |
|
"logps/chosen": -246.3267059326172, |
|
"logps/rejected": -197.67164611816406, |
|
"loss": 0.1615, |
|
"rewards/chosen": 0.23949605226516724, |
|
"rewards/margins": 0.5585669279098511, |
|
"rewards/rejected": -0.3190709054470062, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 17.644608030177945, |
|
"kl": 0.0, |
|
"learning_rate": 4.976452119309262e-07, |
|
"logps/chosen": -270.65924072265625, |
|
"logps/rejected": -205.1195068359375, |
|
"loss": 0.2314, |
|
"rewards/chosen": -0.07605857402086258, |
|
"rewards/margins": 0.15047526359558105, |
|
"rewards/rejected": -0.22653384506702423, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 16.618428473287977, |
|
"kl": 0.0, |
|
"learning_rate": 4.973835688121403e-07, |
|
"logps/chosen": -174.2410888671875, |
|
"logps/rejected": -181.774169921875, |
|
"loss": 0.2923, |
|
"rewards/chosen": -0.08562064915895462, |
|
"rewards/margins": 0.02823999524116516, |
|
"rewards/rejected": -0.11386064440011978, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 17.243866067758734, |
|
"kl": 0.0, |
|
"learning_rate": 4.971219256933543e-07, |
|
"logps/chosen": -198.24940490722656, |
|
"logps/rejected": -215.78672790527344, |
|
"loss": 0.2918, |
|
"rewards/chosen": 0.13005053997039795, |
|
"rewards/margins": 0.43379873037338257, |
|
"rewards/rejected": -0.3037481904029846, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 15.745968199362501, |
|
"kl": 0.0, |
|
"learning_rate": 4.968602825745683e-07, |
|
"logps/chosen": -205.6284637451172, |
|
"logps/rejected": -214.56349182128906, |
|
"loss": 0.2833, |
|
"rewards/chosen": -0.3640637993812561, |
|
"rewards/margins": 0.07524752616882324, |
|
"rewards/rejected": -0.43931132555007935, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 15.030324414091437, |
|
"kl": 0.2821946144104004, |
|
"learning_rate": 4.965986394557823e-07, |
|
"logps/chosen": -286.0364990234375, |
|
"logps/rejected": -206.61985778808594, |
|
"loss": 0.1812, |
|
"rewards/chosen": 0.5389153361320496, |
|
"rewards/margins": 0.35903000831604004, |
|
"rewards/rejected": 0.17988532781600952, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 16.769570066708727, |
|
"kl": 0.0, |
|
"learning_rate": 4.963369963369964e-07, |
|
"logps/chosen": -198.4276580810547, |
|
"logps/rejected": -259.9801940917969, |
|
"loss": 0.2128, |
|
"rewards/chosen": 0.21227878332138062, |
|
"rewards/margins": 0.3703112006187439, |
|
"rewards/rejected": -0.1580324023962021, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 17.041145148374916, |
|
"kl": 1.2743568420410156, |
|
"learning_rate": 4.960753532182104e-07, |
|
"logps/chosen": -229.1648712158203, |
|
"logps/rejected": -253.59913635253906, |
|
"loss": 0.2295, |
|
"rewards/chosen": 0.6795628070831299, |
|
"rewards/margins": 0.9129478335380554, |
|
"rewards/rejected": -0.23338502645492554, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 15.28869972325163, |
|
"kl": 0.0, |
|
"learning_rate": 4.958137100994244e-07, |
|
"logps/chosen": -187.63201904296875, |
|
"logps/rejected": -241.87457275390625, |
|
"loss": 0.1656, |
|
"rewards/chosen": 0.1542114019393921, |
|
"rewards/margins": 0.32588493824005127, |
|
"rewards/rejected": -0.17167352139949799, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 16.352437946436485, |
|
"kl": 0.0, |
|
"learning_rate": 4.955520669806384e-07, |
|
"logps/chosen": -285.8620910644531, |
|
"logps/rejected": -275.36328125, |
|
"loss": 0.229, |
|
"rewards/chosen": 0.02602279745042324, |
|
"rewards/margins": 0.5908135175704956, |
|
"rewards/rejected": -0.5647907257080078, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 14.496172300986403, |
|
"kl": 0.0, |
|
"learning_rate": 4.952904238618525e-07, |
|
"logps/chosen": -236.38160705566406, |
|
"logps/rejected": -206.48521423339844, |
|
"loss": 0.127, |
|
"rewards/chosen": 0.3640650808811188, |
|
"rewards/margins": 1.3152188062667847, |
|
"rewards/rejected": -0.9511536955833435, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 15.755801807248107, |
|
"kl": 0.0, |
|
"learning_rate": 4.950287807430665e-07, |
|
"logps/chosen": -261.5793151855469, |
|
"logps/rejected": -213.00888061523438, |
|
"loss": 0.2456, |
|
"rewards/chosen": 0.2305096834897995, |
|
"rewards/margins": 1.4204968214035034, |
|
"rewards/rejected": -1.1899871826171875, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 14.150683957656698, |
|
"kl": 0.0, |
|
"learning_rate": 4.947671376242805e-07, |
|
"logps/chosen": -250.30348205566406, |
|
"logps/rejected": -237.50193786621094, |
|
"loss": 0.184, |
|
"rewards/chosen": -0.09291169792413712, |
|
"rewards/margins": 0.669346809387207, |
|
"rewards/rejected": -0.7622585296630859, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 16.71784084154209, |
|
"kl": 0.0, |
|
"learning_rate": 4.945054945054945e-07, |
|
"logps/chosen": -250.10418701171875, |
|
"logps/rejected": -258.0350341796875, |
|
"loss": 0.2064, |
|
"rewards/chosen": -0.44092392921447754, |
|
"rewards/margins": 0.4753909707069397, |
|
"rewards/rejected": -0.9163148999214172, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 14.027074569038245, |
|
"kl": 0.0, |
|
"learning_rate": 4.942438513867084e-07, |
|
"logps/chosen": -238.38027954101562, |
|
"logps/rejected": -237.553955078125, |
|
"loss": 0.2034, |
|
"rewards/chosen": -0.11189079284667969, |
|
"rewards/margins": 1.4180889129638672, |
|
"rewards/rejected": -1.5299797058105469, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 13.242349649129327, |
|
"kl": 0.0, |
|
"learning_rate": 4.939822082679226e-07, |
|
"logps/chosen": -211.67172241210938, |
|
"logps/rejected": -220.88316345214844, |
|
"loss": 0.1695, |
|
"rewards/chosen": -0.0019831450190395117, |
|
"rewards/margins": 1.3524924516677856, |
|
"rewards/rejected": -1.3544756174087524, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 14.558526366105085, |
|
"kl": 0.0, |
|
"learning_rate": 4.937205651491365e-07, |
|
"logps/chosen": -183.22792053222656, |
|
"logps/rejected": -288.7313537597656, |
|
"loss": 0.3057, |
|
"rewards/chosen": -0.08994026482105255, |
|
"rewards/margins": 1.0271720886230469, |
|
"rewards/rejected": -1.117112398147583, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 14.612720259085432, |
|
"kl": 0.0, |
|
"learning_rate": 4.934589220303505e-07, |
|
"logps/chosen": -183.35086059570312, |
|
"logps/rejected": -287.0506896972656, |
|
"loss": 0.1631, |
|
"rewards/chosen": -0.2845403850078583, |
|
"rewards/margins": 0.9921867847442627, |
|
"rewards/rejected": -1.2767271995544434, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 14.1871906272162, |
|
"kl": 0.0, |
|
"learning_rate": 4.931972789115645e-07, |
|
"logps/chosen": -260.46820068359375, |
|
"logps/rejected": -231.28530883789062, |
|
"loss": 0.1819, |
|
"rewards/chosen": -0.6221957802772522, |
|
"rewards/margins": 0.9863255620002747, |
|
"rewards/rejected": -1.6085213422775269, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 12.28801127212002, |
|
"kl": 0.0, |
|
"learning_rate": 4.929356357927786e-07, |
|
"logps/chosen": -218.0127410888672, |
|
"logps/rejected": -236.330078125, |
|
"loss": 0.2623, |
|
"rewards/chosen": -1.2400844097137451, |
|
"rewards/margins": 1.2188379764556885, |
|
"rewards/rejected": -2.4589223861694336, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 13.969382177172214, |
|
"kl": 0.0, |
|
"learning_rate": 4.926739926739926e-07, |
|
"logps/chosen": -262.0902099609375, |
|
"logps/rejected": -233.80523681640625, |
|
"loss": 0.2037, |
|
"rewards/chosen": -1.2043147087097168, |
|
"rewards/margins": 1.4312412738800049, |
|
"rewards/rejected": -2.6355559825897217, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 14.346116577373035, |
|
"kl": 0.0, |
|
"learning_rate": 4.924123495552066e-07, |
|
"logps/chosen": -192.3516387939453, |
|
"logps/rejected": -301.5596923828125, |
|
"loss": 0.2649, |
|
"rewards/chosen": -0.6799588799476624, |
|
"rewards/margins": 0.6100900769233704, |
|
"rewards/rejected": -1.2900489568710327, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 14.447170251019777, |
|
"kl": 0.0, |
|
"learning_rate": 4.921507064364207e-07, |
|
"logps/chosen": -246.61720275878906, |
|
"logps/rejected": -279.7056884765625, |
|
"loss": 0.1786, |
|
"rewards/chosen": -0.16088435053825378, |
|
"rewards/margins": 2.5958094596862793, |
|
"rewards/rejected": -2.7566938400268555, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.638201730563553, |
|
"kl": 0.0, |
|
"learning_rate": 4.918890633176347e-07, |
|
"logps/chosen": -269.7441101074219, |
|
"logps/rejected": -227.7078094482422, |
|
"loss": 0.1939, |
|
"rewards/chosen": 0.17156387865543365, |
|
"rewards/margins": 2.2280192375183105, |
|
"rewards/rejected": -2.056455373764038, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.742789811988482, |
|
"kl": 0.0, |
|
"learning_rate": 4.916274201988487e-07, |
|
"logps/chosen": -276.08001708984375, |
|
"logps/rejected": -285.455078125, |
|
"loss": 0.1504, |
|
"rewards/chosen": -0.9163249135017395, |
|
"rewards/margins": 1.5270984172821045, |
|
"rewards/rejected": -2.443423271179199, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.788909838526948, |
|
"kl": 0.0, |
|
"learning_rate": 4.913657770800627e-07, |
|
"logps/chosen": -205.1074981689453, |
|
"logps/rejected": -295.74462890625, |
|
"loss": 0.225, |
|
"rewards/chosen": -0.3685793876647949, |
|
"rewards/margins": 1.5645514726638794, |
|
"rewards/rejected": -1.9331308603286743, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.82701185590145, |
|
"kl": 0.0, |
|
"learning_rate": 4.911041339612768e-07, |
|
"logps/chosen": -215.7355499267578, |
|
"logps/rejected": -270.3883056640625, |
|
"loss": 0.1812, |
|
"rewards/chosen": -0.46088603138923645, |
|
"rewards/margins": 1.8914861679077148, |
|
"rewards/rejected": -2.352372169494629, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.279622843857423, |
|
"kl": 0.0, |
|
"learning_rate": 4.908424908424908e-07, |
|
"logps/chosen": -229.2891387939453, |
|
"logps/rejected": -190.41712951660156, |
|
"loss": 0.2008, |
|
"rewards/chosen": 0.11880671977996826, |
|
"rewards/margins": 1.337594985961914, |
|
"rewards/rejected": -1.2187882661819458, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.833830578324568, |
|
"kl": 0.0, |
|
"learning_rate": 4.905808477237048e-07, |
|
"logps/chosen": -265.9535827636719, |
|
"logps/rejected": -229.44387817382812, |
|
"loss": 0.2017, |
|
"rewards/chosen": 0.7549086213111877, |
|
"rewards/margins": 2.0256097316741943, |
|
"rewards/rejected": -1.2707011699676514, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.878415132245943, |
|
"kl": 0.0, |
|
"learning_rate": 4.903192046049188e-07, |
|
"logps/chosen": -223.6148681640625, |
|
"logps/rejected": -232.38751220703125, |
|
"loss": 0.1578, |
|
"rewards/chosen": 0.42322105169296265, |
|
"rewards/margins": 1.5377566814422607, |
|
"rewards/rejected": -1.1145355701446533, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.016348202428658, |
|
"kl": 0.0, |
|
"learning_rate": 4.900575614861329e-07, |
|
"logps/chosen": -221.3059844970703, |
|
"logps/rejected": -233.40020751953125, |
|
"loss": 0.153, |
|
"rewards/chosen": 0.6136862635612488, |
|
"rewards/margins": 2.5119822025299072, |
|
"rewards/rejected": -1.8982958793640137, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.696306326828102, |
|
"kl": 0.0, |
|
"learning_rate": 4.897959183673469e-07, |
|
"logps/chosen": -263.5332336425781, |
|
"logps/rejected": -223.24720764160156, |
|
"loss": 0.2326, |
|
"rewards/chosen": -0.20513269305229187, |
|
"rewards/margins": 2.1170685291290283, |
|
"rewards/rejected": -2.3222012519836426, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.797741214492445, |
|
"kl": 0.0, |
|
"learning_rate": 4.895342752485609e-07, |
|
"logps/chosen": -241.70838928222656, |
|
"logps/rejected": -242.095703125, |
|
"loss": 0.2954, |
|
"rewards/chosen": -0.5702841281890869, |
|
"rewards/margins": 0.34226876497268677, |
|
"rewards/rejected": -0.9125528931617737, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.245463459701265, |
|
"kl": 0.0, |
|
"learning_rate": 4.89272632129775e-07, |
|
"logps/chosen": -200.02525329589844, |
|
"logps/rejected": -240.9892578125, |
|
"loss": 0.2966, |
|
"rewards/chosen": -1.1106632947921753, |
|
"rewards/margins": -0.09493815898895264, |
|
"rewards/rejected": -1.0157251358032227, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.419341321902724, |
|
"kl": 0.0, |
|
"learning_rate": 4.890109890109889e-07, |
|
"logps/chosen": -224.20755004882812, |
|
"logps/rejected": -281.16717529296875, |
|
"loss": 0.1364, |
|
"rewards/chosen": -0.4932700991630554, |
|
"rewards/margins": 1.310615062713623, |
|
"rewards/rejected": -1.8038851022720337, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.043427588426145, |
|
"kl": 0.0, |
|
"learning_rate": 4.88749345892203e-07, |
|
"logps/chosen": -233.59933471679688, |
|
"logps/rejected": -260.2566833496094, |
|
"loss": 0.2067, |
|
"rewards/chosen": -0.8917900323867798, |
|
"rewards/margins": 1.0747038125991821, |
|
"rewards/rejected": -1.966493844985962, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 12.44162754838529, |
|
"kl": 0.0, |
|
"learning_rate": 4.88487702773417e-07, |
|
"logps/chosen": -216.97845458984375, |
|
"logps/rejected": -269.1575012207031, |
|
"loss": 0.2059, |
|
"rewards/chosen": 0.21655617654323578, |
|
"rewards/margins": 2.175200939178467, |
|
"rewards/rejected": -1.9586448669433594, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 13.803513332755253, |
|
"kl": 0.0, |
|
"learning_rate": 4.88226059654631e-07, |
|
"logps/chosen": -265.6169128417969, |
|
"logps/rejected": -308.48760986328125, |
|
"loss": 0.1974, |
|
"rewards/chosen": -0.7583664059638977, |
|
"rewards/margins": 1.8766412734985352, |
|
"rewards/rejected": -2.635007619857788, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 13.259849756023506, |
|
"kl": 0.0, |
|
"learning_rate": 4.879644165358451e-07, |
|
"logps/chosen": -216.37884521484375, |
|
"logps/rejected": -263.8242492675781, |
|
"loss": 0.1924, |
|
"rewards/chosen": -0.5627312660217285, |
|
"rewards/margins": 1.7108609676361084, |
|
"rewards/rejected": -2.273592233657837, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 11.401346938362929, |
|
"kl": 0.0, |
|
"learning_rate": 4.877027734170591e-07, |
|
"logps/chosen": -196.87896728515625, |
|
"logps/rejected": -168.52236938476562, |
|
"loss": 0.3164, |
|
"rewards/chosen": -0.26614972949028015, |
|
"rewards/margins": 1.1080857515335083, |
|
"rewards/rejected": -1.3742355108261108, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 10.108076253718023, |
|
"kl": 0.0, |
|
"learning_rate": 4.874411302982731e-07, |
|
"logps/chosen": -207.7807159423828, |
|
"logps/rejected": -219.1448974609375, |
|
"loss": 0.1305, |
|
"rewards/chosen": -1.0533264875411987, |
|
"rewards/margins": 1.3320115804672241, |
|
"rewards/rejected": -2.385338068008423, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 10.762036932864307, |
|
"kl": 0.0, |
|
"learning_rate": 4.871794871794871e-07, |
|
"logps/chosen": -236.81471252441406, |
|
"logps/rejected": -228.0085906982422, |
|
"loss": 0.1264, |
|
"rewards/chosen": 0.9120302796363831, |
|
"rewards/margins": 2.634833335876465, |
|
"rewards/rejected": -1.722802996635437, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 11.066959845009587, |
|
"kl": 0.0, |
|
"learning_rate": 4.869178440607012e-07, |
|
"logps/chosen": -252.11825561523438, |
|
"logps/rejected": -209.72384643554688, |
|
"loss": 0.2249, |
|
"rewards/chosen": -0.5552583336830139, |
|
"rewards/margins": 1.0669043064117432, |
|
"rewards/rejected": -1.6221626996994019, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 11.898163000668239, |
|
"kl": 0.0, |
|
"learning_rate": 4.866562009419152e-07, |
|
"logps/chosen": -229.1280975341797, |
|
"logps/rejected": -239.55064392089844, |
|
"loss": 0.2366, |
|
"rewards/chosen": -0.3932199478149414, |
|
"rewards/margins": 0.8869756460189819, |
|
"rewards/rejected": -1.2801955938339233, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 11.53373054095599, |
|
"kl": 0.0, |
|
"learning_rate": 4.863945578231292e-07, |
|
"logps/chosen": -282.8030090332031, |
|
"logps/rejected": -215.99546813964844, |
|
"loss": 0.176, |
|
"rewards/chosen": -0.04715963453054428, |
|
"rewards/margins": 1.483907699584961, |
|
"rewards/rejected": -1.5310673713684082, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 10.818173532721506, |
|
"kl": 0.0, |
|
"learning_rate": 4.861329147043432e-07, |
|
"logps/chosen": -170.08155822753906, |
|
"logps/rejected": -266.9096984863281, |
|
"loss": 0.196, |
|
"rewards/chosen": -0.43234577775001526, |
|
"rewards/margins": 2.175133466720581, |
|
"rewards/rejected": -2.6074793338775635, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 12.389896501721763, |
|
"kl": 0.0, |
|
"learning_rate": 4.858712715855573e-07, |
|
"logps/chosen": -188.79185485839844, |
|
"logps/rejected": -217.9726104736328, |
|
"loss": 0.1855, |
|
"rewards/chosen": -0.7043643593788147, |
|
"rewards/margins": 1.4354064464569092, |
|
"rewards/rejected": -2.139770746231079, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 10.719323563675456, |
|
"kl": 0.0, |
|
"learning_rate": 4.856096284667713e-07, |
|
"logps/chosen": -235.02947998046875, |
|
"logps/rejected": -264.5641174316406, |
|
"loss": 0.217, |
|
"rewards/chosen": -1.3881831169128418, |
|
"rewards/margins": 2.237257480621338, |
|
"rewards/rejected": -3.6254405975341797, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 11.632198403788388, |
|
"kl": 0.0, |
|
"learning_rate": 4.853479853479853e-07, |
|
"logps/chosen": -281.6618347167969, |
|
"logps/rejected": -228.85995483398438, |
|
"loss": 0.0917, |
|
"rewards/chosen": 0.013584318570792675, |
|
"rewards/margins": 3.580209732055664, |
|
"rewards/rejected": -3.5666253566741943, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 12.518341857853112, |
|
"kl": 0.0, |
|
"learning_rate": 4.850863422291994e-07, |
|
"logps/chosen": -159.27957153320312, |
|
"logps/rejected": -251.3924560546875, |
|
"loss": 0.21, |
|
"rewards/chosen": -0.8034593462944031, |
|
"rewards/margins": 1.3652153015136719, |
|
"rewards/rejected": -2.1686747074127197, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 11.773486848179154, |
|
"kl": 0.0, |
|
"learning_rate": 4.848246991104133e-07, |
|
"logps/chosen": -163.08563232421875, |
|
"logps/rejected": -225.0583038330078, |
|
"loss": 0.2323, |
|
"rewards/chosen": -0.784336268901825, |
|
"rewards/margins": 0.35770100355148315, |
|
"rewards/rejected": -1.142037272453308, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 9.98418104323235, |
|
"kl": 0.0, |
|
"learning_rate": 4.845630559916274e-07, |
|
"logps/chosen": -250.50967407226562, |
|
"logps/rejected": -260.6304016113281, |
|
"loss": 0.1682, |
|
"rewards/chosen": -0.046240393072366714, |
|
"rewards/margins": 2.6554722785949707, |
|
"rewards/rejected": -2.7017126083374023, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 13.45864474945625, |
|
"kl": 0.0, |
|
"learning_rate": 4.843014128728414e-07, |
|
"logps/chosen": -241.01133728027344, |
|
"logps/rejected": -208.72291564941406, |
|
"loss": 0.2261, |
|
"rewards/chosen": -1.1776716709136963, |
|
"rewards/margins": 0.5967569351196289, |
|
"rewards/rejected": -1.7744286060333252, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 10.967311785129077, |
|
"kl": 0.0, |
|
"learning_rate": 4.840397697540555e-07, |
|
"logps/chosen": -240.189453125, |
|
"logps/rejected": -213.64976501464844, |
|
"loss": 0.2169, |
|
"rewards/chosen": -0.17561621963977814, |
|
"rewards/margins": 2.7882721424102783, |
|
"rewards/rejected": -2.96388840675354, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 10.93191003753912, |
|
"kl": 0.0, |
|
"learning_rate": 4.837781266352695e-07, |
|
"logps/chosen": -227.45742797851562, |
|
"logps/rejected": -187.86070251464844, |
|
"loss": 0.1054, |
|
"rewards/chosen": -0.3846696615219116, |
|
"rewards/margins": 1.533595085144043, |
|
"rewards/rejected": -1.9182647466659546, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 9.831720988058269, |
|
"kl": 0.0, |
|
"learning_rate": 4.835164835164835e-07, |
|
"logps/chosen": -273.84375, |
|
"logps/rejected": -213.56747436523438, |
|
"loss": 0.1685, |
|
"rewards/chosen": -0.3652818500995636, |
|
"rewards/margins": 2.15742564201355, |
|
"rewards/rejected": -2.522707462310791, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 12.175263730184788, |
|
"kl": 0.0, |
|
"learning_rate": 4.832548403976975e-07, |
|
"logps/chosen": -244.45236206054688, |
|
"logps/rejected": -183.27474975585938, |
|
"loss": 0.1439, |
|
"rewards/chosen": 0.15830610692501068, |
|
"rewards/margins": 2.1498560905456543, |
|
"rewards/rejected": -1.9915499687194824, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 12.593612516595744, |
|
"kl": 0.0, |
|
"learning_rate": 4.829931972789115e-07, |
|
"logps/chosen": -265.2001953125, |
|
"logps/rejected": -295.2158508300781, |
|
"loss": 0.1956, |
|
"rewards/chosen": -0.9956868886947632, |
|
"rewards/margins": 0.9066202640533447, |
|
"rewards/rejected": -1.902307152748108, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 12.056826564998115, |
|
"kl": 0.0, |
|
"learning_rate": 4.827315541601256e-07, |
|
"logps/chosen": -192.40469360351562, |
|
"logps/rejected": -247.81382751464844, |
|
"loss": 0.178, |
|
"rewards/chosen": -0.7804039120674133, |
|
"rewards/margins": 1.2094306945800781, |
|
"rewards/rejected": -1.9898346662521362, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 13.384738493954586, |
|
"kl": 0.0, |
|
"learning_rate": 4.824699110413396e-07, |
|
"logps/chosen": -159.72280883789062, |
|
"logps/rejected": -328.75616455078125, |
|
"loss": 0.1926, |
|
"rewards/chosen": -0.9741663932800293, |
|
"rewards/margins": 1.3278543949127197, |
|
"rewards/rejected": -2.302020788192749, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 12.538951694906014, |
|
"kl": 0.0, |
|
"learning_rate": 4.822082679225536e-07, |
|
"logps/chosen": -254.5558319091797, |
|
"logps/rejected": -190.9063262939453, |
|
"loss": 0.2038, |
|
"rewards/chosen": -0.5719714164733887, |
|
"rewards/margins": 1.5436241626739502, |
|
"rewards/rejected": -2.115595579147339, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 11.328347418708772, |
|
"kl": 0.0, |
|
"learning_rate": 4.819466248037676e-07, |
|
"logps/chosen": -237.08462524414062, |
|
"logps/rejected": -318.36474609375, |
|
"loss": 0.327, |
|
"rewards/chosen": -1.5885767936706543, |
|
"rewards/margins": 1.1602427959442139, |
|
"rewards/rejected": -2.748819589614868, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 12.400662861104186, |
|
"kl": 0.0, |
|
"learning_rate": 4.816849816849817e-07, |
|
"logps/chosen": -238.31106567382812, |
|
"logps/rejected": -195.43804931640625, |
|
"loss": 0.1737, |
|
"rewards/chosen": -0.6987349987030029, |
|
"rewards/margins": 2.256924629211426, |
|
"rewards/rejected": -2.9556596279144287, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 12.841610853219839, |
|
"kl": 0.0, |
|
"learning_rate": 4.814233385661957e-07, |
|
"logps/chosen": -204.5384521484375, |
|
"logps/rejected": -208.86331176757812, |
|
"loss": 0.2565, |
|
"rewards/chosen": -1.2488123178482056, |
|
"rewards/margins": 1.1284335851669312, |
|
"rewards/rejected": -2.3772459030151367, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 13.054533599473265, |
|
"kl": 0.0, |
|
"learning_rate": 4.811616954474097e-07, |
|
"logps/chosen": -250.12615966796875, |
|
"logps/rejected": -243.621337890625, |
|
"loss": 0.1785, |
|
"rewards/chosen": -0.7167716026306152, |
|
"rewards/margins": 1.7554688453674316, |
|
"rewards/rejected": -2.472240447998047, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 11.218204777591135, |
|
"kl": 0.0, |
|
"learning_rate": 4.809000523286237e-07, |
|
"logps/chosen": -202.5470733642578, |
|
"logps/rejected": -251.5446014404297, |
|
"loss": 0.2578, |
|
"rewards/chosen": -0.35193824768066406, |
|
"rewards/margins": 1.2476764917373657, |
|
"rewards/rejected": -1.5996147394180298, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 10.005076969894088, |
|
"kl": 0.0, |
|
"learning_rate": 4.806384092098378e-07, |
|
"logps/chosen": -240.54025268554688, |
|
"logps/rejected": -238.83575439453125, |
|
"loss": 0.2025, |
|
"rewards/chosen": -1.1610413789749146, |
|
"rewards/margins": 1.6202930212020874, |
|
"rewards/rejected": -2.781334400177002, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 13.421231936678494, |
|
"kl": 0.0, |
|
"learning_rate": 4.803767660910518e-07, |
|
"logps/chosen": -287.82110595703125, |
|
"logps/rejected": -193.2252960205078, |
|
"loss": 0.2056, |
|
"rewards/chosen": -0.41320356726646423, |
|
"rewards/margins": 2.6422228813171387, |
|
"rewards/rejected": -3.0554263591766357, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 11.604997350152324, |
|
"kl": 0.0, |
|
"learning_rate": 4.801151229722658e-07, |
|
"logps/chosen": -266.6557922363281, |
|
"logps/rejected": -204.9314727783203, |
|
"loss": 0.1635, |
|
"rewards/chosen": -0.5626986622810364, |
|
"rewards/margins": 1.7505519390106201, |
|
"rewards/rejected": -2.3132505416870117, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 13.167949690406468, |
|
"kl": 0.0, |
|
"learning_rate": 4.798534798534799e-07, |
|
"logps/chosen": -212.5312957763672, |
|
"logps/rejected": -245.55245971679688, |
|
"loss": 0.2015, |
|
"rewards/chosen": -0.6334313750267029, |
|
"rewards/margins": 0.9760074019432068, |
|
"rewards/rejected": -1.6094387769699097, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 13.71229302802406, |
|
"kl": 0.0, |
|
"learning_rate": 4.795918367346938e-07, |
|
"logps/chosen": -248.7874755859375, |
|
"logps/rejected": -254.36085510253906, |
|
"loss": 0.2529, |
|
"rewards/chosen": 0.024569960311055183, |
|
"rewards/margins": 1.5464191436767578, |
|
"rewards/rejected": -1.5218491554260254, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 12.206901910051194, |
|
"kl": 0.0, |
|
"learning_rate": 4.793301936159079e-07, |
|
"logps/chosen": -214.5361328125, |
|
"logps/rejected": -224.99876403808594, |
|
"loss": 0.1976, |
|
"rewards/chosen": -0.3784463107585907, |
|
"rewards/margins": 2.108283281326294, |
|
"rewards/rejected": -2.486729621887207, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 12.358475151298732, |
|
"kl": 0.0, |
|
"learning_rate": 4.790685504971219e-07, |
|
"logps/chosen": -227.8341064453125, |
|
"logps/rejected": -244.53817749023438, |
|
"loss": 0.1508, |
|
"rewards/chosen": 0.0641309916973114, |
|
"rewards/margins": 2.4745709896087646, |
|
"rewards/rejected": -2.410439968109131, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 10.7530961108445, |
|
"kl": 0.0, |
|
"learning_rate": 4.78806907378336e-07, |
|
"logps/chosen": -197.07553100585938, |
|
"logps/rejected": -282.6841735839844, |
|
"loss": 0.2055, |
|
"rewards/chosen": -1.8003284931182861, |
|
"rewards/margins": 0.7180378437042236, |
|
"rewards/rejected": -2.5183663368225098, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 15.543960456447536, |
|
"kl": 0.0, |
|
"learning_rate": 4.7854526425955e-07, |
|
"logps/chosen": -286.0069580078125, |
|
"logps/rejected": -333.1952819824219, |
|
"loss": 0.2215, |
|
"rewards/chosen": -0.9929041862487793, |
|
"rewards/margins": 1.838501214981079, |
|
"rewards/rejected": -2.8314054012298584, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 12.540856467590372, |
|
"kl": 0.0, |
|
"learning_rate": 4.78283621140764e-07, |
|
"logps/chosen": -250.1410369873047, |
|
"logps/rejected": -234.04039001464844, |
|
"loss": 0.2142, |
|
"rewards/chosen": -1.1462880373001099, |
|
"rewards/margins": 1.3604058027267456, |
|
"rewards/rejected": -2.5066938400268555, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 12.685162706946018, |
|
"kl": 0.0, |
|
"learning_rate": 4.78021978021978e-07, |
|
"logps/chosen": -244.63720703125, |
|
"logps/rejected": -206.22079467773438, |
|
"loss": 0.2197, |
|
"rewards/chosen": -0.5177630186080933, |
|
"rewards/margins": 2.0680246353149414, |
|
"rewards/rejected": -2.585787534713745, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 10.152423089357503, |
|
"kl": 0.0, |
|
"learning_rate": 4.77760334903192e-07, |
|
"logps/chosen": -208.19064331054688, |
|
"logps/rejected": -250.68597412109375, |
|
"loss": 0.0981, |
|
"rewards/chosen": 0.4294729232788086, |
|
"rewards/margins": 2.881974697113037, |
|
"rewards/rejected": -2.4525017738342285, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 10.497367332273754, |
|
"kl": 0.0, |
|
"learning_rate": 4.774986917844061e-07, |
|
"logps/chosen": -200.9931640625, |
|
"logps/rejected": -224.72264099121094, |
|
"loss": 0.2253, |
|
"rewards/chosen": -0.9502390623092651, |
|
"rewards/margins": 0.7048424482345581, |
|
"rewards/rejected": -1.6550815105438232, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 10.454551429296167, |
|
"kl": 0.0, |
|
"learning_rate": 4.772370486656201e-07, |
|
"logps/chosen": -204.69024658203125, |
|
"logps/rejected": -238.31492614746094, |
|
"loss": 0.1596, |
|
"rewards/chosen": 0.6356328725814819, |
|
"rewards/margins": 2.17765474319458, |
|
"rewards/rejected": -1.5420219898223877, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 11.947921924106597, |
|
"kl": 0.0, |
|
"learning_rate": 4.769754055468341e-07, |
|
"logps/chosen": -233.5601806640625, |
|
"logps/rejected": -232.72801208496094, |
|
"loss": 0.2242, |
|
"rewards/chosen": 0.4331187307834625, |
|
"rewards/margins": 1.7527743577957153, |
|
"rewards/rejected": -1.3196556568145752, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 12.796884542926785, |
|
"kl": 0.0, |
|
"learning_rate": 4.767137624280481e-07, |
|
"logps/chosen": -229.7658233642578, |
|
"logps/rejected": -266.162841796875, |
|
"loss": 0.2534, |
|
"rewards/chosen": -0.2590081989765167, |
|
"rewards/margins": 1.418634295463562, |
|
"rewards/rejected": -1.6776424646377563, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 10.31450194916881, |
|
"kl": 0.0, |
|
"learning_rate": 4.764521193092622e-07, |
|
"logps/chosen": -273.9610595703125, |
|
"logps/rejected": -277.6679382324219, |
|
"loss": 0.1663, |
|
"rewards/chosen": -0.03371845558285713, |
|
"rewards/margins": 2.451915740966797, |
|
"rewards/rejected": -2.4856340885162354, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 9.278455988308327, |
|
"kl": 0.0, |
|
"learning_rate": 4.761904761904761e-07, |
|
"logps/chosen": -232.08615112304688, |
|
"logps/rejected": -272.5909118652344, |
|
"loss": 0.184, |
|
"rewards/chosen": -0.7493877410888672, |
|
"rewards/margins": 2.300028085708618, |
|
"rewards/rejected": -3.0494158267974854, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 8.817410071740532, |
|
"kl": 0.0, |
|
"learning_rate": 4.7592883307169017e-07, |
|
"logps/chosen": -222.3486785888672, |
|
"logps/rejected": -248.85317993164062, |
|
"loss": 0.1731, |
|
"rewards/chosen": -0.2696648836135864, |
|
"rewards/margins": 2.0320448875427246, |
|
"rewards/rejected": -2.3017096519470215, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 10.59864769694075, |
|
"kl": 0.0, |
|
"learning_rate": 4.756671899529042e-07, |
|
"logps/chosen": -189.18434143066406, |
|
"logps/rejected": -270.7514343261719, |
|
"loss": 0.1508, |
|
"rewards/chosen": 0.2290552258491516, |
|
"rewards/margins": 3.3581156730651855, |
|
"rewards/rejected": -3.1290605068206787, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 11.988809402174233, |
|
"kl": 0.0, |
|
"learning_rate": 4.7540554683411827e-07, |
|
"logps/chosen": -276.038330078125, |
|
"logps/rejected": -303.55596923828125, |
|
"loss": 0.191, |
|
"rewards/chosen": -0.4145098030567169, |
|
"rewards/margins": 2.4649319648742676, |
|
"rewards/rejected": -2.879441738128662, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 14.079727761013277, |
|
"kl": 0.0, |
|
"learning_rate": 4.7514390371533227e-07, |
|
"logps/chosen": -257.2515869140625, |
|
"logps/rejected": -244.81591796875, |
|
"loss": 0.2074, |
|
"rewards/chosen": -1.449769139289856, |
|
"rewards/margins": 2.1893882751464844, |
|
"rewards/rejected": -3.63915753364563, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 12.455104492784045, |
|
"kl": 0.0, |
|
"learning_rate": 4.7488226059654627e-07, |
|
"logps/chosen": -258.3236389160156, |
|
"logps/rejected": -223.65472412109375, |
|
"loss": 0.2373, |
|
"rewards/chosen": -0.474732369184494, |
|
"rewards/margins": 1.6088600158691406, |
|
"rewards/rejected": -2.083592414855957, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 12.813735596225209, |
|
"kl": 0.0, |
|
"learning_rate": 4.746206174777603e-07, |
|
"logps/chosen": -250.6949005126953, |
|
"logps/rejected": -282.6470031738281, |
|
"loss": 0.2388, |
|
"rewards/chosen": -0.8905995488166809, |
|
"rewards/margins": 1.769092321395874, |
|
"rewards/rejected": -2.65969181060791, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 11.204915138695135, |
|
"kl": 0.0, |
|
"learning_rate": 4.743589743589743e-07, |
|
"logps/chosen": -176.22486877441406, |
|
"logps/rejected": -309.79229736328125, |
|
"loss": 0.2097, |
|
"rewards/chosen": -1.4110625982284546, |
|
"rewards/margins": 0.993507981300354, |
|
"rewards/rejected": -2.4045705795288086, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 11.735425350671436, |
|
"kl": 0.0, |
|
"learning_rate": 4.7409733124018836e-07, |
|
"logps/chosen": -224.20626831054688, |
|
"logps/rejected": -246.65956115722656, |
|
"loss": 0.1503, |
|
"rewards/chosen": -0.4009709060192108, |
|
"rewards/margins": 2.9179859161376953, |
|
"rewards/rejected": -3.3189568519592285, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 9.89695125192596, |
|
"kl": 0.0, |
|
"learning_rate": 4.738356881214024e-07, |
|
"logps/chosen": -217.57763671875, |
|
"logps/rejected": -224.3143768310547, |
|
"loss": 0.1092, |
|
"rewards/chosen": 0.3369201123714447, |
|
"rewards/margins": 2.3455708026885986, |
|
"rewards/rejected": -2.008650779724121, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 10.212358128672067, |
|
"kl": 0.0, |
|
"learning_rate": 4.7357404500261636e-07, |
|
"logps/chosen": -197.8075408935547, |
|
"logps/rejected": -229.45677185058594, |
|
"loss": 0.2126, |
|
"rewards/chosen": -0.7042273879051208, |
|
"rewards/margins": 1.3926599025726318, |
|
"rewards/rejected": -2.0968873500823975, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 12.604368131047796, |
|
"kl": 0.0, |
|
"learning_rate": 4.733124018838304e-07, |
|
"logps/chosen": -211.51576232910156, |
|
"logps/rejected": -228.77308654785156, |
|
"loss": 0.2363, |
|
"rewards/chosen": -1.1265863180160522, |
|
"rewards/margins": 0.9696308374404907, |
|
"rewards/rejected": -2.096217155456543, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 11.118276337061534, |
|
"kl": 0.0, |
|
"learning_rate": 4.7305075876504446e-07, |
|
"logps/chosen": -199.48486328125, |
|
"logps/rejected": -370.7545166015625, |
|
"loss": 0.1847, |
|
"rewards/chosen": -0.5413498282432556, |
|
"rewards/margins": 2.3086533546447754, |
|
"rewards/rejected": -2.850003242492676, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 13.028026388315732, |
|
"kl": 0.0, |
|
"learning_rate": 4.727891156462585e-07, |
|
"logps/chosen": -262.6328430175781, |
|
"logps/rejected": -287.1793518066406, |
|
"loss": 0.2251, |
|
"rewards/chosen": -0.2909429669380188, |
|
"rewards/margins": 2.515258550643921, |
|
"rewards/rejected": -2.806201457977295, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 11.421812108784723, |
|
"kl": 0.0, |
|
"learning_rate": 4.725274725274725e-07, |
|
"logps/chosen": -234.4532470703125, |
|
"logps/rejected": -272.8806457519531, |
|
"loss": 0.1591, |
|
"rewards/chosen": 0.45129498839378357, |
|
"rewards/margins": 2.3014488220214844, |
|
"rewards/rejected": -1.8501538038253784, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 10.451980811425736, |
|
"kl": 0.0, |
|
"learning_rate": 4.7226582940868656e-07, |
|
"logps/chosen": -266.4130859375, |
|
"logps/rejected": -256.59039306640625, |
|
"loss": 0.1315, |
|
"rewards/chosen": -0.39178353548049927, |
|
"rewards/margins": 1.9923765659332275, |
|
"rewards/rejected": -2.384160041809082, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 10.541336180624638, |
|
"kl": 0.0, |
|
"learning_rate": 4.7200418628990056e-07, |
|
"logps/chosen": -253.11045837402344, |
|
"logps/rejected": -257.3990478515625, |
|
"loss": 0.1589, |
|
"rewards/chosen": 0.02405649609863758, |
|
"rewards/margins": 3.012369394302368, |
|
"rewards/rejected": -2.9883129596710205, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 11.191607092594259, |
|
"kl": 0.0, |
|
"learning_rate": 4.7174254317111455e-07, |
|
"logps/chosen": -219.4788818359375, |
|
"logps/rejected": -251.76841735839844, |
|
"loss": 0.1878, |
|
"rewards/chosen": -0.653347909450531, |
|
"rewards/margins": 1.0217313766479492, |
|
"rewards/rejected": -1.675079345703125, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 10.465911824092663, |
|
"kl": 0.0, |
|
"learning_rate": 4.714809000523286e-07, |
|
"logps/chosen": -253.79640197753906, |
|
"logps/rejected": -208.65074157714844, |
|
"loss": 0.2581, |
|
"rewards/chosen": -1.6879644393920898, |
|
"rewards/margins": -0.33700597286224365, |
|
"rewards/rejected": -1.3509584665298462, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 11.000969752280952, |
|
"kl": 0.0, |
|
"learning_rate": 4.7121925693354265e-07, |
|
"logps/chosen": -251.3641815185547, |
|
"logps/rejected": -240.36642456054688, |
|
"loss": 0.1976, |
|
"rewards/chosen": -0.2726837396621704, |
|
"rewards/margins": 2.477322578430176, |
|
"rewards/rejected": -2.7500064373016357, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 14.908402118189745, |
|
"kl": 0.0, |
|
"learning_rate": 4.7095761381475665e-07, |
|
"logps/chosen": -244.45858764648438, |
|
"logps/rejected": -266.1649169921875, |
|
"loss": 0.1959, |
|
"rewards/chosen": -0.5489299893379211, |
|
"rewards/margins": 0.9248166680335999, |
|
"rewards/rejected": -1.473746657371521, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 10.994589492437557, |
|
"kl": 0.0, |
|
"learning_rate": 4.7069597069597065e-07, |
|
"logps/chosen": -250.3166961669922, |
|
"logps/rejected": -275.6036071777344, |
|
"loss": 0.2578, |
|
"rewards/chosen": -1.3922420740127563, |
|
"rewards/margins": 0.7932580709457397, |
|
"rewards/rejected": -2.185500144958496, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 12.230772963233404, |
|
"kl": 0.0, |
|
"learning_rate": 4.704343275771847e-07, |
|
"logps/chosen": -251.1761474609375, |
|
"logps/rejected": -354.3590393066406, |
|
"loss": 0.1702, |
|
"rewards/chosen": 0.4129093289375305, |
|
"rewards/margins": 1.9088201522827148, |
|
"rewards/rejected": -1.4959107637405396, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 11.73992280402579, |
|
"kl": 0.0, |
|
"learning_rate": 4.7017268445839875e-07, |
|
"logps/chosen": -185.67906188964844, |
|
"logps/rejected": -188.59017944335938, |
|
"loss": 0.1331, |
|
"rewards/chosen": -0.09853968024253845, |
|
"rewards/margins": 1.8528465032577515, |
|
"rewards/rejected": -1.9513862133026123, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 10.142692814638496, |
|
"kl": 0.0, |
|
"learning_rate": 4.6991104133961275e-07, |
|
"logps/chosen": -266.6419982910156, |
|
"logps/rejected": -270.0727233886719, |
|
"loss": 0.1801, |
|
"rewards/chosen": -0.9462317824363708, |
|
"rewards/margins": 1.412889003753662, |
|
"rewards/rejected": -2.3591208457946777, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 10.967127165803278, |
|
"kl": 0.0, |
|
"learning_rate": 4.696493982208268e-07, |
|
"logps/chosen": -247.25096130371094, |
|
"logps/rejected": -212.33914184570312, |
|
"loss": 0.1917, |
|
"rewards/chosen": -1.022446632385254, |
|
"rewards/margins": -0.13641774654388428, |
|
"rewards/rejected": -0.8860288858413696, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 12.672315042849133, |
|
"kl": 0.0, |
|
"learning_rate": 4.693877551020408e-07, |
|
"logps/chosen": -251.9243621826172, |
|
"logps/rejected": -278.6723327636719, |
|
"loss": 0.2143, |
|
"rewards/chosen": -0.1397348791360855, |
|
"rewards/margins": 2.0988736152648926, |
|
"rewards/rejected": -2.2386085987091064, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 9.90707282468987, |
|
"kl": 0.0, |
|
"learning_rate": 4.691261119832548e-07, |
|
"logps/chosen": -198.83712768554688, |
|
"logps/rejected": -277.9150085449219, |
|
"loss": 0.241, |
|
"rewards/chosen": -0.4362916052341461, |
|
"rewards/margins": 2.6319870948791504, |
|
"rewards/rejected": -3.0682787895202637, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 9.980080260830226, |
|
"kl": 0.0, |
|
"learning_rate": 4.6886446886446884e-07, |
|
"logps/chosen": -222.55442810058594, |
|
"logps/rejected": -239.49298095703125, |
|
"loss": 0.1667, |
|
"rewards/chosen": -0.26491254568099976, |
|
"rewards/margins": 2.8245887756347656, |
|
"rewards/rejected": -3.08950138092041, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 14.223816837655765, |
|
"kl": 0.0, |
|
"learning_rate": 4.686028257456829e-07, |
|
"logps/chosen": -229.81675720214844, |
|
"logps/rejected": -248.0295867919922, |
|
"loss": 0.2241, |
|
"rewards/chosen": 0.6647676229476929, |
|
"rewards/margins": 2.933067798614502, |
|
"rewards/rejected": -2.2683000564575195, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 11.062546156999915, |
|
"kl": 0.0, |
|
"learning_rate": 4.683411826268969e-07, |
|
"logps/chosen": -215.87423706054688, |
|
"logps/rejected": -205.9690399169922, |
|
"loss": 0.221, |
|
"rewards/chosen": -0.38256943225860596, |
|
"rewards/margins": 1.25730562210083, |
|
"rewards/rejected": -1.639875054359436, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 11.71515711397141, |
|
"kl": 0.0, |
|
"learning_rate": 4.680795395081109e-07, |
|
"logps/chosen": -195.1471710205078, |
|
"logps/rejected": -204.15228271484375, |
|
"loss": 0.205, |
|
"rewards/chosen": 0.031107014045119286, |
|
"rewards/margins": 1.4260950088500977, |
|
"rewards/rejected": -1.394987940788269, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 10.668033463026218, |
|
"kl": 0.0, |
|
"learning_rate": 4.6781789638932494e-07, |
|
"logps/chosen": -238.59262084960938, |
|
"logps/rejected": -244.41387939453125, |
|
"loss": 0.2863, |
|
"rewards/chosen": -0.6594109535217285, |
|
"rewards/margins": 0.7834258079528809, |
|
"rewards/rejected": -1.4428367614746094, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 13.050812790862148, |
|
"kl": 0.0, |
|
"learning_rate": 4.6755625327053893e-07, |
|
"logps/chosen": -208.3390655517578, |
|
"logps/rejected": -246.9788360595703, |
|
"loss": 0.1784, |
|
"rewards/chosen": -0.5635675191879272, |
|
"rewards/margins": 0.9708378314971924, |
|
"rewards/rejected": -1.5344053506851196, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 11.38382856846076, |
|
"kl": 0.0, |
|
"learning_rate": 4.67294610151753e-07, |
|
"logps/chosen": -211.11561584472656, |
|
"logps/rejected": -253.8684539794922, |
|
"loss": 0.1853, |
|
"rewards/chosen": -0.8592585325241089, |
|
"rewards/margins": 1.5102192163467407, |
|
"rewards/rejected": -2.3694777488708496, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 11.616081872243173, |
|
"kl": 0.0, |
|
"learning_rate": 4.6703296703296704e-07, |
|
"logps/chosen": -198.94691467285156, |
|
"logps/rejected": -280.16485595703125, |
|
"loss": 0.1689, |
|
"rewards/chosen": -0.6960778832435608, |
|
"rewards/margins": 2.7315070629119873, |
|
"rewards/rejected": -3.4275848865509033, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 9.393491633545926, |
|
"kl": 0.0, |
|
"learning_rate": 4.6677132391418103e-07, |
|
"logps/chosen": -237.6060333251953, |
|
"logps/rejected": -240.0240020751953, |
|
"loss": 0.2003, |
|
"rewards/chosen": -0.7763846516609192, |
|
"rewards/margins": 1.804600715637207, |
|
"rewards/rejected": -2.5809853076934814, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 10.566838825942087, |
|
"kl": 0.0, |
|
"learning_rate": 4.6650968079539503e-07, |
|
"logps/chosen": -266.26409912109375, |
|
"logps/rejected": -245.97015380859375, |
|
"loss": 0.179, |
|
"rewards/chosen": -0.013315518386662006, |
|
"rewards/margins": 2.7922699451446533, |
|
"rewards/rejected": -2.8055853843688965, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 14.386798112694665, |
|
"kl": 0.0, |
|
"learning_rate": 4.662480376766091e-07, |
|
"logps/chosen": -259.3221740722656, |
|
"logps/rejected": -280.12982177734375, |
|
"loss": 0.2262, |
|
"rewards/chosen": 0.06482791900634766, |
|
"rewards/margins": 1.3292893171310425, |
|
"rewards/rejected": -1.2644613981246948, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 10.761953162868792, |
|
"kl": 0.0, |
|
"learning_rate": 4.6598639455782313e-07, |
|
"logps/chosen": -193.20936584472656, |
|
"logps/rejected": -270.4673767089844, |
|
"loss": 0.1212, |
|
"rewards/chosen": -0.4515216648578644, |
|
"rewards/margins": 2.8607840538024902, |
|
"rewards/rejected": -3.3123056888580322, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 12.751720769362379, |
|
"kl": 0.0, |
|
"learning_rate": 4.6572475143903713e-07, |
|
"logps/chosen": -235.50526428222656, |
|
"logps/rejected": -217.54153442382812, |
|
"loss": 0.2364, |
|
"rewards/chosen": 0.6571396589279175, |
|
"rewards/margins": 2.786506175994873, |
|
"rewards/rejected": -2.129366636276245, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 10.524940036364148, |
|
"kl": 0.0, |
|
"learning_rate": 4.654631083202512e-07, |
|
"logps/chosen": -200.957275390625, |
|
"logps/rejected": -260.49444580078125, |
|
"loss": 0.1259, |
|
"rewards/chosen": -0.29692986607551575, |
|
"rewards/margins": 2.9654293060302734, |
|
"rewards/rejected": -3.262359142303467, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 10.735468201198927, |
|
"kl": 0.0, |
|
"learning_rate": 4.652014652014652e-07, |
|
"logps/chosen": -243.689208984375, |
|
"logps/rejected": -259.3601379394531, |
|
"loss": 0.1994, |
|
"rewards/chosen": -0.13563716411590576, |
|
"rewards/margins": 2.890209674835205, |
|
"rewards/rejected": -3.0258467197418213, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 13.921041963760125, |
|
"kl": 0.0, |
|
"learning_rate": 4.6493982208267917e-07, |
|
"logps/chosen": -260.98651123046875, |
|
"logps/rejected": -302.0372314453125, |
|
"loss": 0.2204, |
|
"rewards/chosen": -0.5615333318710327, |
|
"rewards/margins": 1.4565614461898804, |
|
"rewards/rejected": -2.018094778060913, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 8.886703505939792, |
|
"kl": 0.0, |
|
"learning_rate": 4.646781789638932e-07, |
|
"logps/chosen": -219.5489501953125, |
|
"logps/rejected": -222.55165100097656, |
|
"loss": 0.2585, |
|
"rewards/chosen": -2.1579434871673584, |
|
"rewards/margins": -0.29236698150634766, |
|
"rewards/rejected": -1.8655765056610107, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 12.360378101489804, |
|
"kl": 0.0, |
|
"learning_rate": 4.644165358451073e-07, |
|
"logps/chosen": -200.14247131347656, |
|
"logps/rejected": -243.30284118652344, |
|
"loss": 0.2047, |
|
"rewards/chosen": -0.22793890535831451, |
|
"rewards/margins": 2.7516727447509766, |
|
"rewards/rejected": -2.97961163520813, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 11.520891149644566, |
|
"kl": 0.0, |
|
"learning_rate": 4.641548927263213e-07, |
|
"logps/chosen": -212.42518615722656, |
|
"logps/rejected": -271.6721496582031, |
|
"loss": 0.1469, |
|
"rewards/chosen": -0.4246146082878113, |
|
"rewards/margins": 3.187201499938965, |
|
"rewards/rejected": -3.611816167831421, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 8.17959898500212, |
|
"kl": 0.0, |
|
"learning_rate": 4.6389324960753527e-07, |
|
"logps/chosen": -195.15431213378906, |
|
"logps/rejected": -255.0457000732422, |
|
"loss": 0.2136, |
|
"rewards/chosen": -0.6788855791091919, |
|
"rewards/margins": 2.8008108139038086, |
|
"rewards/rejected": -3.47969651222229, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 10.843997894954787, |
|
"kl": 0.0, |
|
"learning_rate": 4.636316064887493e-07, |
|
"logps/chosen": -201.34103393554688, |
|
"logps/rejected": -312.7411804199219, |
|
"loss": 0.186, |
|
"rewards/chosen": -1.7989752292633057, |
|
"rewards/margins": 2.105161428451538, |
|
"rewards/rejected": -3.9041366577148438, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 12.401485298711776, |
|
"kl": 0.0, |
|
"learning_rate": 4.6336996336996337e-07, |
|
"logps/chosen": -174.31419372558594, |
|
"logps/rejected": -279.531494140625, |
|
"loss": 0.1899, |
|
"rewards/chosen": -0.8827177286148071, |
|
"rewards/margins": 1.6950773000717163, |
|
"rewards/rejected": -2.5777950286865234, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 12.876518397686489, |
|
"kl": 0.0, |
|
"learning_rate": 4.6310832025117737e-07, |
|
"logps/chosen": -219.05679321289062, |
|
"logps/rejected": -270.2620544433594, |
|
"loss": 0.2003, |
|
"rewards/chosen": -0.21344774961471558, |
|
"rewards/margins": 2.0357961654663086, |
|
"rewards/rejected": -2.249243974685669, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 10.774258331144129, |
|
"kl": 0.0, |
|
"learning_rate": 4.628466771323914e-07, |
|
"logps/chosen": -212.2626495361328, |
|
"logps/rejected": -283.8631286621094, |
|
"loss": 0.1711, |
|
"rewards/chosen": -0.1372440755367279, |
|
"rewards/margins": 2.4757416248321533, |
|
"rewards/rejected": -2.612985610961914, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 8.790512175321405, |
|
"kl": 0.0, |
|
"learning_rate": 4.625850340136054e-07, |
|
"logps/chosen": -232.13316345214844, |
|
"logps/rejected": -223.1201934814453, |
|
"loss": 0.1773, |
|
"rewards/chosen": -0.856933057308197, |
|
"rewards/margins": 2.2891058921813965, |
|
"rewards/rejected": -3.1460390090942383, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 10.352466882768905, |
|
"kl": 0.0, |
|
"learning_rate": 4.623233908948194e-07, |
|
"logps/chosen": -184.80145263671875, |
|
"logps/rejected": -193.4806671142578, |
|
"loss": 0.215, |
|
"rewards/chosen": -1.3604167699813843, |
|
"rewards/margins": 0.628121018409729, |
|
"rewards/rejected": -1.9885377883911133, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 7.163092288844522, |
|
"kl": 0.0, |
|
"learning_rate": 4.6206174777603346e-07, |
|
"logps/chosen": -202.53546142578125, |
|
"logps/rejected": -254.51487731933594, |
|
"loss": 0.0969, |
|
"rewards/chosen": -0.07566285133361816, |
|
"rewards/margins": 4.109183311462402, |
|
"rewards/rejected": -4.1848464012146, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 12.265660907944843, |
|
"kl": 0.0, |
|
"learning_rate": 4.618001046572475e-07, |
|
"logps/chosen": -265.9979553222656, |
|
"logps/rejected": -339.3370666503906, |
|
"loss": 0.1413, |
|
"rewards/chosen": 0.027865849435329437, |
|
"rewards/margins": 4.2701802253723145, |
|
"rewards/rejected": -4.242314338684082, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 13.038939425208627, |
|
"kl": 0.0, |
|
"learning_rate": 4.6153846153846156e-07, |
|
"logps/chosen": -262.6026306152344, |
|
"logps/rejected": -261.2329406738281, |
|
"loss": 0.1645, |
|
"rewards/chosen": -0.25406259298324585, |
|
"rewards/margins": 3.3194682598114014, |
|
"rewards/rejected": -3.573530912399292, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 13.392666406051779, |
|
"kl": 0.0, |
|
"learning_rate": 4.612768184196755e-07, |
|
"logps/chosen": -265.6265563964844, |
|
"logps/rejected": -281.2951354980469, |
|
"loss": 0.2543, |
|
"rewards/chosen": 0.6286525130271912, |
|
"rewards/margins": 3.1757638454437256, |
|
"rewards/rejected": -2.5471112728118896, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 14.164191990445923, |
|
"kl": 0.0, |
|
"learning_rate": 4.6101517530088956e-07, |
|
"logps/chosen": -280.87567138671875, |
|
"logps/rejected": -298.3075866699219, |
|
"loss": 0.2937, |
|
"rewards/chosen": -1.144802212715149, |
|
"rewards/margins": 0.6895921230316162, |
|
"rewards/rejected": -1.8343943357467651, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 10.347666390603884, |
|
"kl": 0.0, |
|
"learning_rate": 4.607535321821036e-07, |
|
"logps/chosen": -227.58335876464844, |
|
"logps/rejected": -239.180908203125, |
|
"loss": 0.1543, |
|
"rewards/chosen": -1.3316487073898315, |
|
"rewards/margins": 0.7044416666030884, |
|
"rewards/rejected": -2.03609037399292, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 11.52825621590645, |
|
"kl": 0.0, |
|
"learning_rate": 4.604918890633176e-07, |
|
"logps/chosen": -278.8197326660156, |
|
"logps/rejected": -309.47271728515625, |
|
"loss": 0.2073, |
|
"rewards/chosen": -0.36525189876556396, |
|
"rewards/margins": 3.581759452819824, |
|
"rewards/rejected": -3.9470112323760986, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 10.00307147974759, |
|
"kl": 0.0, |
|
"learning_rate": 4.6023024594453166e-07, |
|
"logps/chosen": -232.21923828125, |
|
"logps/rejected": -288.6890563964844, |
|
"loss": 0.1359, |
|
"rewards/chosen": -1.7685518264770508, |
|
"rewards/margins": 1.4930996894836426, |
|
"rewards/rejected": -3.2616515159606934, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 9.048295062554471, |
|
"kl": 0.0, |
|
"learning_rate": 4.5996860282574565e-07, |
|
"logps/chosen": -263.74456787109375, |
|
"logps/rejected": -182.83853149414062, |
|
"loss": 0.1225, |
|
"rewards/chosen": 0.26467177271842957, |
|
"rewards/margins": 3.1499152183532715, |
|
"rewards/rejected": -2.8852434158325195, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 9.505705109305136, |
|
"kl": 0.0, |
|
"learning_rate": 4.5970695970695965e-07, |
|
"logps/chosen": -239.68685913085938, |
|
"logps/rejected": -284.68927001953125, |
|
"loss": 0.228, |
|
"rewards/chosen": -1.0681113004684448, |
|
"rewards/margins": 0.38854992389678955, |
|
"rewards/rejected": -1.4566612243652344, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 12.907956927820472, |
|
"kl": 0.0, |
|
"learning_rate": 4.594453165881737e-07, |
|
"logps/chosen": -171.40005493164062, |
|
"logps/rejected": -231.8187255859375, |
|
"loss": 0.2271, |
|
"rewards/chosen": -0.5379889607429504, |
|
"rewards/margins": 2.1988117694854736, |
|
"rewards/rejected": -2.7368006706237793, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 10.134785294701812, |
|
"kl": 0.0, |
|
"learning_rate": 4.5918367346938775e-07, |
|
"logps/chosen": -206.3992462158203, |
|
"logps/rejected": -202.78799438476562, |
|
"loss": 0.1833, |
|
"rewards/chosen": -0.1859370917081833, |
|
"rewards/margins": 2.8349618911743164, |
|
"rewards/rejected": -3.0208990573883057, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 10.654899640372967, |
|
"kl": 0.0, |
|
"learning_rate": 4.589220303506018e-07, |
|
"logps/chosen": -226.39356994628906, |
|
"logps/rejected": -253.27488708496094, |
|
"loss": 0.2065, |
|
"rewards/chosen": -0.936338484287262, |
|
"rewards/margins": 1.9939072132110596, |
|
"rewards/rejected": -2.9302456378936768, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 10.940456012048248, |
|
"kl": 0.0, |
|
"learning_rate": 4.5866038723181575e-07, |
|
"logps/chosen": -224.9565887451172, |
|
"logps/rejected": -265.723388671875, |
|
"loss": 0.2093, |
|
"rewards/chosen": -1.0069631338119507, |
|
"rewards/margins": 1.279760718345642, |
|
"rewards/rejected": -2.2867238521575928, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 13.331073300797962, |
|
"kl": 0.0, |
|
"learning_rate": 4.583987441130298e-07, |
|
"logps/chosen": -199.77915954589844, |
|
"logps/rejected": -303.58538818359375, |
|
"loss": 0.1757, |
|
"rewards/chosen": -0.519672155380249, |
|
"rewards/margins": 1.5667927265167236, |
|
"rewards/rejected": -2.0864648818969727, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 12.759132153782163, |
|
"kl": 0.0, |
|
"learning_rate": 4.5813710099424385e-07, |
|
"logps/chosen": -263.2257385253906, |
|
"logps/rejected": -216.82395935058594, |
|
"loss": 0.2497, |
|
"rewards/chosen": -0.19666321575641632, |
|
"rewards/margins": 1.9927500486373901, |
|
"rewards/rejected": -2.18941330909729, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 13.295115337094142, |
|
"kl": 0.0, |
|
"learning_rate": 4.5787545787545784e-07, |
|
"logps/chosen": -228.7009735107422, |
|
"logps/rejected": -295.6015319824219, |
|
"loss": 0.2364, |
|
"rewards/chosen": -0.8198361992835999, |
|
"rewards/margins": 0.8233547806739807, |
|
"rewards/rejected": -1.6431909799575806, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 14.82680643745037, |
|
"kl": 0.0, |
|
"learning_rate": 4.576138147566719e-07, |
|
"logps/chosen": -260.577880859375, |
|
"logps/rejected": -253.9260711669922, |
|
"loss": 0.2625, |
|
"rewards/chosen": -0.1303100883960724, |
|
"rewards/margins": 2.1792666912078857, |
|
"rewards/rejected": -2.3095767498016357, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 13.208958389079216, |
|
"kl": 0.0, |
|
"learning_rate": 4.5735217163788594e-07, |
|
"logps/chosen": -190.9998016357422, |
|
"logps/rejected": -276.3247985839844, |
|
"loss": 0.1975, |
|
"rewards/chosen": 0.03442925959825516, |
|
"rewards/margins": 2.56953763961792, |
|
"rewards/rejected": -2.5351083278656006, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 11.80477720551951, |
|
"kl": 0.0, |
|
"learning_rate": 4.570905285190999e-07, |
|
"logps/chosen": -246.15234375, |
|
"logps/rejected": -248.68072509765625, |
|
"loss": 0.2212, |
|
"rewards/chosen": -0.5984467267990112, |
|
"rewards/margins": 1.7412387132644653, |
|
"rewards/rejected": -2.3396854400634766, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 9.00263121063917, |
|
"kl": 0.0, |
|
"learning_rate": 4.5682888540031394e-07, |
|
"logps/chosen": -252.92127990722656, |
|
"logps/rejected": -215.0977325439453, |
|
"loss": 0.1732, |
|
"rewards/chosen": -0.49442756175994873, |
|
"rewards/margins": 2.391329288482666, |
|
"rewards/rejected": -2.885756731033325, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 9.7104117892985, |
|
"kl": 0.0, |
|
"learning_rate": 4.56567242281528e-07, |
|
"logps/chosen": -286.4942626953125, |
|
"logps/rejected": -277.3382873535156, |
|
"loss": 0.1979, |
|
"rewards/chosen": -0.5238850712776184, |
|
"rewards/margins": 2.418689489364624, |
|
"rewards/rejected": -2.9425745010375977, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 9.591639733233848, |
|
"kl": 0.0, |
|
"learning_rate": 4.56305599162742e-07, |
|
"logps/chosen": -237.4594268798828, |
|
"logps/rejected": -217.5932159423828, |
|
"loss": 0.1465, |
|
"rewards/chosen": -0.8079670071601868, |
|
"rewards/margins": 1.809727668762207, |
|
"rewards/rejected": -2.617694616317749, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 11.459074832211314, |
|
"kl": 0.0, |
|
"learning_rate": 4.5604395604395604e-07, |
|
"logps/chosen": -220.34298706054688, |
|
"logps/rejected": -245.24441528320312, |
|
"loss": 0.17, |
|
"rewards/chosen": 0.019128350540995598, |
|
"rewards/margins": 2.7078402042388916, |
|
"rewards/rejected": -2.6887118816375732, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 15.063963939999352, |
|
"kl": 0.0, |
|
"learning_rate": 4.5578231292517003e-07, |
|
"logps/chosen": -297.51300048828125, |
|
"logps/rejected": -251.10533142089844, |
|
"loss": 0.1753, |
|
"rewards/chosen": 0.8247194886207581, |
|
"rewards/margins": 2.683767557144165, |
|
"rewards/rejected": -1.8590481281280518, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 9.142681234947581, |
|
"kl": 0.0, |
|
"learning_rate": 4.555206698063841e-07, |
|
"logps/chosen": -300.2449951171875, |
|
"logps/rejected": -224.29713439941406, |
|
"loss": 0.2087, |
|
"rewards/chosen": -0.8797904253005981, |
|
"rewards/margins": 1.0547667741775513, |
|
"rewards/rejected": -1.9345571994781494, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 10.227907117502706, |
|
"kl": 0.0, |
|
"learning_rate": 4.552590266875981e-07, |
|
"logps/chosen": -224.4203338623047, |
|
"logps/rejected": -315.0204162597656, |
|
"loss": 0.1784, |
|
"rewards/chosen": -0.9637618064880371, |
|
"rewards/margins": 2.0699081420898438, |
|
"rewards/rejected": -3.033669948577881, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 11.184644215978503, |
|
"kl": 0.0, |
|
"learning_rate": 4.5499738356881213e-07, |
|
"logps/chosen": -232.22695922851562, |
|
"logps/rejected": -202.02688598632812, |
|
"loss": 0.2471, |
|
"rewards/chosen": -1.0796171426773071, |
|
"rewards/margins": 1.041359305381775, |
|
"rewards/rejected": -2.120976448059082, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 12.858302396434425, |
|
"kl": 0.0, |
|
"learning_rate": 4.547357404500262e-07, |
|
"logps/chosen": -231.92547607421875, |
|
"logps/rejected": -284.2903137207031, |
|
"loss": 0.1662, |
|
"rewards/chosen": 0.22813068330287933, |
|
"rewards/margins": 2.726292610168457, |
|
"rewards/rejected": -2.498162031173706, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 10.830471355823923, |
|
"kl": 0.0, |
|
"learning_rate": 4.5447409733124013e-07, |
|
"logps/chosen": -245.82032775878906, |
|
"logps/rejected": -204.56790161132812, |
|
"loss": 0.2141, |
|
"rewards/chosen": 0.45919883251190186, |
|
"rewards/margins": 2.118945598602295, |
|
"rewards/rejected": -1.6597468852996826, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 11.540820927447694, |
|
"kl": 0.0, |
|
"learning_rate": 4.542124542124542e-07, |
|
"logps/chosen": -221.0320587158203, |
|
"logps/rejected": -244.3851318359375, |
|
"loss": 0.1404, |
|
"rewards/chosen": 0.015136350877583027, |
|
"rewards/margins": 2.4611456394195557, |
|
"rewards/rejected": -2.446009397506714, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 9.370935336042336, |
|
"kl": 0.0, |
|
"learning_rate": 4.5395081109366823e-07, |
|
"logps/chosen": -148.11834716796875, |
|
"logps/rejected": -254.36962890625, |
|
"loss": 0.1237, |
|
"rewards/chosen": 0.47263994812965393, |
|
"rewards/margins": 3.6257412433624268, |
|
"rewards/rejected": -3.1531012058258057, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 9.524027879440371, |
|
"kl": 0.0, |
|
"learning_rate": 4.536891679748822e-07, |
|
"logps/chosen": -218.85006713867188, |
|
"logps/rejected": -268.6586608886719, |
|
"loss": 0.1249, |
|
"rewards/chosen": -1.152251958847046, |
|
"rewards/margins": 1.3508758544921875, |
|
"rewards/rejected": -2.5031278133392334, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 10.097839005642474, |
|
"kl": 0.0, |
|
"learning_rate": 4.534275248560963e-07, |
|
"logps/chosen": -140.55996704101562, |
|
"logps/rejected": -265.65789794921875, |
|
"loss": 0.279, |
|
"rewards/chosen": -0.11726745963096619, |
|
"rewards/margins": 1.562013030052185, |
|
"rewards/rejected": -1.6792805194854736, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 11.353665853013293, |
|
"kl": 0.0, |
|
"learning_rate": 4.5316588173731027e-07, |
|
"logps/chosen": -168.6680145263672, |
|
"logps/rejected": -309.8199462890625, |
|
"loss": 0.1995, |
|
"rewards/chosen": -0.32298406958580017, |
|
"rewards/margins": 1.8489097356796265, |
|
"rewards/rejected": -2.171893835067749, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 10.693119887330608, |
|
"kl": 0.0, |
|
"learning_rate": 4.529042386185243e-07, |
|
"logps/chosen": -262.695556640625, |
|
"logps/rejected": -234.45143127441406, |
|
"loss": 0.2569, |
|
"rewards/chosen": -0.22278353571891785, |
|
"rewards/margins": 2.558516502380371, |
|
"rewards/rejected": -2.7813000679016113, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 13.107582728245008, |
|
"kl": 0.0, |
|
"learning_rate": 4.526425954997383e-07, |
|
"logps/chosen": -244.29647827148438, |
|
"logps/rejected": -291.70941162109375, |
|
"loss": 0.2651, |
|
"rewards/chosen": -0.5042144656181335, |
|
"rewards/margins": 2.197082757949829, |
|
"rewards/rejected": -2.7012972831726074, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 9.773171918884088, |
|
"kl": 0.0, |
|
"learning_rate": 4.5238095238095237e-07, |
|
"logps/chosen": -197.32012939453125, |
|
"logps/rejected": -293.9897766113281, |
|
"loss": 0.1873, |
|
"rewards/chosen": -0.5700012445449829, |
|
"rewards/margins": 4.046950340270996, |
|
"rewards/rejected": -4.6169514656066895, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 11.20040922510821, |
|
"kl": 0.0, |
|
"learning_rate": 4.521193092621664e-07, |
|
"logps/chosen": -194.4012908935547, |
|
"logps/rejected": -223.0313720703125, |
|
"loss": 0.1483, |
|
"rewards/chosen": 0.47125691175460815, |
|
"rewards/margins": 2.2409911155700684, |
|
"rewards/rejected": -1.7697341442108154, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 12.989173617269795, |
|
"kl": 0.0, |
|
"learning_rate": 4.5185766614338037e-07, |
|
"logps/chosen": -171.93673706054688, |
|
"logps/rejected": -316.18316650390625, |
|
"loss": 0.216, |
|
"rewards/chosen": -0.1644737273454666, |
|
"rewards/margins": 0.4819384217262268, |
|
"rewards/rejected": -0.6464121341705322, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 13.329019681893538, |
|
"kl": 0.0, |
|
"learning_rate": 4.515960230245944e-07, |
|
"logps/chosen": -215.2859649658203, |
|
"logps/rejected": -258.4815368652344, |
|
"loss": 0.1873, |
|
"rewards/chosen": 0.19089780747890472, |
|
"rewards/margins": 3.2682807445526123, |
|
"rewards/rejected": -3.077383041381836, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 9.903441953636646, |
|
"kl": 0.0, |
|
"learning_rate": 4.5133437990580847e-07, |
|
"logps/chosen": -203.1886444091797, |
|
"logps/rejected": -219.4387969970703, |
|
"loss": 0.1725, |
|
"rewards/chosen": 0.7246238589286804, |
|
"rewards/margins": 1.6935207843780518, |
|
"rewards/rejected": -0.9688969254493713, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 10.026171474980584, |
|
"kl": 0.0, |
|
"learning_rate": 4.5107273678702246e-07, |
|
"logps/chosen": -237.39012145996094, |
|
"logps/rejected": -313.46038818359375, |
|
"loss": 0.1848, |
|
"rewards/chosen": 0.13696634769439697, |
|
"rewards/margins": 2.4037017822265625, |
|
"rewards/rejected": -2.266735315322876, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 13.743945884823951, |
|
"kl": 0.0, |
|
"learning_rate": 4.508110936682365e-07, |
|
"logps/chosen": -226.96961975097656, |
|
"logps/rejected": -239.95713806152344, |
|
"loss": 0.0994, |
|
"rewards/chosen": 0.7345108389854431, |
|
"rewards/margins": 2.190957546234131, |
|
"rewards/rejected": -1.4564467668533325, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 11.111688156500954, |
|
"kl": 0.0, |
|
"learning_rate": 4.5054945054945056e-07, |
|
"logps/chosen": -228.42758178710938, |
|
"logps/rejected": -203.33209228515625, |
|
"loss": 0.1395, |
|
"rewards/chosen": 0.4875214099884033, |
|
"rewards/margins": 1.7979084253311157, |
|
"rewards/rejected": -1.3103870153427124, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 9.470046480370337, |
|
"kl": 0.0, |
|
"learning_rate": 4.5028780743066456e-07, |
|
"logps/chosen": -207.3546905517578, |
|
"logps/rejected": -231.11886596679688, |
|
"loss": 0.1262, |
|
"rewards/chosen": -0.461222380399704, |
|
"rewards/margins": 1.9130089282989502, |
|
"rewards/rejected": -2.3742313385009766, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 12.783425591778233, |
|
"kl": 0.0, |
|
"learning_rate": 4.5002616431187856e-07, |
|
"logps/chosen": -212.4601593017578, |
|
"logps/rejected": -252.49920654296875, |
|
"loss": 0.1552, |
|
"rewards/chosen": 0.14723794162273407, |
|
"rewards/margins": 1.8059498071670532, |
|
"rewards/rejected": -1.6587119102478027, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 11.876829640439261, |
|
"kl": 0.0, |
|
"learning_rate": 4.497645211930926e-07, |
|
"logps/chosen": -331.9737548828125, |
|
"logps/rejected": -216.70932006835938, |
|
"loss": 0.1592, |
|
"rewards/chosen": 0.7978419065475464, |
|
"rewards/margins": 3.4641337394714355, |
|
"rewards/rejected": -2.6662919521331787, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 11.18786423314498, |
|
"kl": 0.0, |
|
"learning_rate": 4.4950287807430666e-07, |
|
"logps/chosen": -210.8509521484375, |
|
"logps/rejected": -276.6178283691406, |
|
"loss": 0.1855, |
|
"rewards/chosen": 0.7132515907287598, |
|
"rewards/margins": 3.511439561843872, |
|
"rewards/rejected": -2.7981879711151123, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 11.51339539474961, |
|
"kl": 0.0, |
|
"learning_rate": 4.4924123495552066e-07, |
|
"logps/chosen": -193.66836547851562, |
|
"logps/rejected": -206.64353942871094, |
|
"loss": 0.2209, |
|
"rewards/chosen": -0.7599809765815735, |
|
"rewards/margins": 0.2910911440849304, |
|
"rewards/rejected": -1.051072120666504, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 9.775297052636276, |
|
"kl": 0.0, |
|
"learning_rate": 4.4897959183673465e-07, |
|
"logps/chosen": -210.68817138671875, |
|
"logps/rejected": -269.3539123535156, |
|
"loss": 0.1624, |
|
"rewards/chosen": 0.5124328136444092, |
|
"rewards/margins": 3.528744697570801, |
|
"rewards/rejected": -3.0163118839263916, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 13.388916890187613, |
|
"kl": 0.0, |
|
"learning_rate": 4.487179487179487e-07, |
|
"logps/chosen": -286.09918212890625, |
|
"logps/rejected": -238.72901916503906, |
|
"loss": 0.2343, |
|
"rewards/chosen": -0.26804402470588684, |
|
"rewards/margins": 1.2514140605926514, |
|
"rewards/rejected": -1.5194580554962158, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 11.098868865803937, |
|
"kl": 0.0, |
|
"learning_rate": 4.484563055991627e-07, |
|
"logps/chosen": -247.14202880859375, |
|
"logps/rejected": -196.82537841796875, |
|
"loss": 0.157, |
|
"rewards/chosen": -0.33019348978996277, |
|
"rewards/margins": 2.0103812217712402, |
|
"rewards/rejected": -2.3405747413635254, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 14.223147581105783, |
|
"kl": 0.0, |
|
"learning_rate": 4.4819466248037675e-07, |
|
"logps/chosen": -204.22280883789062, |
|
"logps/rejected": -286.9610900878906, |
|
"loss": 0.237, |
|
"rewards/chosen": -0.013581500388681889, |
|
"rewards/margins": 1.4753634929656982, |
|
"rewards/rejected": -1.4889450073242188, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 14.41947331635678, |
|
"kl": 0.0, |
|
"learning_rate": 4.479330193615908e-07, |
|
"logps/chosen": -213.51507568359375, |
|
"logps/rejected": -270.748046875, |
|
"loss": 0.1949, |
|
"rewards/chosen": -0.7232166528701782, |
|
"rewards/margins": 2.0983409881591797, |
|
"rewards/rejected": -2.8215577602386475, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 9.05336870531292, |
|
"kl": 0.0, |
|
"learning_rate": 4.476713762428048e-07, |
|
"logps/chosen": -266.99261474609375, |
|
"logps/rejected": -215.208740234375, |
|
"loss": 0.164, |
|
"rewards/chosen": 0.1612011045217514, |
|
"rewards/margins": 1.967577338218689, |
|
"rewards/rejected": -1.8063762187957764, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 12.145917811167815, |
|
"kl": 0.0, |
|
"learning_rate": 4.474097331240188e-07, |
|
"logps/chosen": -311.310546875, |
|
"logps/rejected": -207.57504272460938, |
|
"loss": 0.198, |
|
"rewards/chosen": 0.33403000235557556, |
|
"rewards/margins": 2.6162540912628174, |
|
"rewards/rejected": -2.282224178314209, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 10.620881317672168, |
|
"kl": 0.0, |
|
"learning_rate": 4.4714809000523285e-07, |
|
"logps/chosen": -250.87953186035156, |
|
"logps/rejected": -275.86480712890625, |
|
"loss": 0.1359, |
|
"rewards/chosen": 0.3162269592285156, |
|
"rewards/margins": 1.990512728691101, |
|
"rewards/rejected": -1.6742857694625854, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 10.496041019849555, |
|
"kl": 0.0, |
|
"learning_rate": 4.468864468864469e-07, |
|
"logps/chosen": -247.9547119140625, |
|
"logps/rejected": -217.61874389648438, |
|
"loss": 0.203, |
|
"rewards/chosen": -1.653024673461914, |
|
"rewards/margins": 0.7888014316558838, |
|
"rewards/rejected": -2.441826105117798, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 11.99618040191761, |
|
"kl": 0.0, |
|
"learning_rate": 4.466248037676609e-07, |
|
"logps/chosen": -150.46995544433594, |
|
"logps/rejected": -319.3192138671875, |
|
"loss": 0.2054, |
|
"rewards/chosen": -0.28798094391822815, |
|
"rewards/margins": 1.984279751777649, |
|
"rewards/rejected": -2.2722606658935547, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 11.06635292149166, |
|
"kl": 0.0, |
|
"learning_rate": 4.463631606488749e-07, |
|
"logps/chosen": -206.5174560546875, |
|
"logps/rejected": -254.26016235351562, |
|
"loss": 0.1693, |
|
"rewards/chosen": -0.9574307799339294, |
|
"rewards/margins": 2.373122453689575, |
|
"rewards/rejected": -3.3305532932281494, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 9.1313149728796, |
|
"kl": 0.0, |
|
"learning_rate": 4.4610151753008894e-07, |
|
"logps/chosen": -211.5845489501953, |
|
"logps/rejected": -266.97222900390625, |
|
"loss": 0.2363, |
|
"rewards/chosen": -0.3957107961177826, |
|
"rewards/margins": 3.1181278228759766, |
|
"rewards/rejected": -3.513838529586792, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 13.851054073575309, |
|
"kl": 0.0, |
|
"learning_rate": 4.4583987441130294e-07, |
|
"logps/chosen": -266.7466125488281, |
|
"logps/rejected": -223.49757385253906, |
|
"loss": 0.2842, |
|
"rewards/chosen": -1.3039528131484985, |
|
"rewards/margins": 0.7924333810806274, |
|
"rewards/rejected": -2.096386194229126, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 11.650743591966863, |
|
"kl": 0.0, |
|
"learning_rate": 4.45578231292517e-07, |
|
"logps/chosen": -183.24017333984375, |
|
"logps/rejected": -276.4227294921875, |
|
"loss": 0.1615, |
|
"rewards/chosen": -0.3646261990070343, |
|
"rewards/margins": 2.279031276702881, |
|
"rewards/rejected": -2.6436574459075928, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 13.362859226103993, |
|
"kl": 0.0, |
|
"learning_rate": 4.4531658817373104e-07, |
|
"logps/chosen": -205.38035583496094, |
|
"logps/rejected": -269.58526611328125, |
|
"loss": 0.2259, |
|
"rewards/chosen": -0.2013118416070938, |
|
"rewards/margins": 2.4193832874298096, |
|
"rewards/rejected": -2.620695114135742, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 12.32754631235818, |
|
"kl": 0.0, |
|
"learning_rate": 4.45054945054945e-07, |
|
"logps/chosen": -268.922607421875, |
|
"logps/rejected": -284.7842102050781, |
|
"loss": 0.2121, |
|
"rewards/chosen": -1.0982351303100586, |
|
"rewards/margins": 1.700211524963379, |
|
"rewards/rejected": -2.7984466552734375, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 11.666014991292931, |
|
"kl": 0.0, |
|
"learning_rate": 4.4479330193615904e-07, |
|
"logps/chosen": -273.0299987792969, |
|
"logps/rejected": -326.8552551269531, |
|
"loss": 0.139, |
|
"rewards/chosen": -0.8367617726325989, |
|
"rewards/margins": 2.7738115787506104, |
|
"rewards/rejected": -3.6105732917785645, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 12.85199720190171, |
|
"kl": 0.0, |
|
"learning_rate": 4.445316588173731e-07, |
|
"logps/chosen": -267.0897216796875, |
|
"logps/rejected": -248.22039794921875, |
|
"loss": 0.2089, |
|
"rewards/chosen": -1.914868950843811, |
|
"rewards/margins": 1.703445553779602, |
|
"rewards/rejected": -3.618314504623413, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 13.279155862550748, |
|
"kl": 0.0, |
|
"learning_rate": 4.4427001569858714e-07, |
|
"logps/chosen": -270.5106201171875, |
|
"logps/rejected": -323.9206848144531, |
|
"loss": 0.1508, |
|
"rewards/chosen": 0.43147632479667664, |
|
"rewards/margins": 4.1532816886901855, |
|
"rewards/rejected": -3.7218053340911865, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 10.842517342661287, |
|
"kl": 0.0, |
|
"learning_rate": 4.4400837257980113e-07, |
|
"logps/chosen": -251.76222229003906, |
|
"logps/rejected": -204.5667724609375, |
|
"loss": 0.2109, |
|
"rewards/chosen": -0.23020866513252258, |
|
"rewards/margins": 1.9914796352386475, |
|
"rewards/rejected": -2.2216882705688477, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 11.17043596208898, |
|
"kl": 0.0, |
|
"learning_rate": 4.4374672946101513e-07, |
|
"logps/chosen": -203.4984588623047, |
|
"logps/rejected": -343.187255859375, |
|
"loss": 0.2139, |
|
"rewards/chosen": -0.3769696354866028, |
|
"rewards/margins": 2.5140483379364014, |
|
"rewards/rejected": -2.8910179138183594, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 11.293416573180265, |
|
"kl": 0.0, |
|
"learning_rate": 4.434850863422292e-07, |
|
"logps/chosen": -294.1488037109375, |
|
"logps/rejected": -282.24981689453125, |
|
"loss": 0.1393, |
|
"rewards/chosen": 0.7586994767189026, |
|
"rewards/margins": 4.094053268432617, |
|
"rewards/rejected": -3.3353536128997803, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 13.608688968178855, |
|
"kl": 0.0, |
|
"learning_rate": 4.432234432234432e-07, |
|
"logps/chosen": -274.1988220214844, |
|
"logps/rejected": -293.933837890625, |
|
"loss": 0.1346, |
|
"rewards/chosen": -0.13219358026981354, |
|
"rewards/margins": 2.8667314052581787, |
|
"rewards/rejected": -2.998924970626831, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 10.83390830521355, |
|
"kl": 0.0, |
|
"learning_rate": 4.4296180010465723e-07, |
|
"logps/chosen": -222.037353515625, |
|
"logps/rejected": -319.1642150878906, |
|
"loss": 0.1958, |
|
"rewards/chosen": -0.27647799253463745, |
|
"rewards/margins": 2.2584426403045654, |
|
"rewards/rejected": -2.5349206924438477, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 11.966693070507677, |
|
"kl": 0.0, |
|
"learning_rate": 4.427001569858713e-07, |
|
"logps/chosen": -191.46580505371094, |
|
"logps/rejected": -366.8327941894531, |
|
"loss": 0.1951, |
|
"rewards/chosen": -0.4248928129673004, |
|
"rewards/margins": 2.9257259368896484, |
|
"rewards/rejected": -3.350618839263916, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 9.986050551901583, |
|
"kl": 0.0, |
|
"learning_rate": 4.424385138670852e-07, |
|
"logps/chosen": -235.12484741210938, |
|
"logps/rejected": -248.2144012451172, |
|
"loss": 0.2251, |
|
"rewards/chosen": -1.9966895580291748, |
|
"rewards/margins": 1.9721267223358154, |
|
"rewards/rejected": -3.9688162803649902, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 10.554269588738498, |
|
"kl": 0.0, |
|
"learning_rate": 4.421768707482993e-07, |
|
"logps/chosen": -239.76820373535156, |
|
"logps/rejected": -218.79449462890625, |
|
"loss": 0.2125, |
|
"rewards/chosen": -1.0601166486740112, |
|
"rewards/margins": 2.6879591941833496, |
|
"rewards/rejected": -3.7480757236480713, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 11.807560445400602, |
|
"kl": 0.0, |
|
"learning_rate": 4.419152276295133e-07, |
|
"logps/chosen": -226.56256103515625, |
|
"logps/rejected": -218.861328125, |
|
"loss": 0.1385, |
|
"rewards/chosen": -0.5115461349487305, |
|
"rewards/margins": 2.6464967727661133, |
|
"rewards/rejected": -3.1580429077148438, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 11.670267600602166, |
|
"kl": 0.0, |
|
"learning_rate": 4.416535845107274e-07, |
|
"logps/chosen": -282.7430725097656, |
|
"logps/rejected": -211.0267791748047, |
|
"loss": 0.2221, |
|
"rewards/chosen": -1.8116750717163086, |
|
"rewards/margins": 0.58609938621521, |
|
"rewards/rejected": -2.3977744579315186, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 11.238377690831841, |
|
"kl": 0.0, |
|
"learning_rate": 4.4139194139194137e-07, |
|
"logps/chosen": -218.9561004638672, |
|
"logps/rejected": -232.92832946777344, |
|
"loss": 0.1533, |
|
"rewards/chosen": -0.23154856264591217, |
|
"rewards/margins": 1.915379285812378, |
|
"rewards/rejected": -2.146927833557129, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 13.477552695461938, |
|
"kl": 0.0, |
|
"learning_rate": 4.411302982731554e-07, |
|
"logps/chosen": -241.8193359375, |
|
"logps/rejected": -243.86459350585938, |
|
"loss": 0.2513, |
|
"rewards/chosen": -0.6799749135971069, |
|
"rewards/margins": 0.9313141107559204, |
|
"rewards/rejected": -1.6112890243530273, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 10.248025744939582, |
|
"kl": 0.0, |
|
"learning_rate": 4.408686551543694e-07, |
|
"logps/chosen": -255.42225646972656, |
|
"logps/rejected": -354.8009948730469, |
|
"loss": 0.1809, |
|
"rewards/chosen": -0.15073581039905548, |
|
"rewards/margins": 3.6917855739593506, |
|
"rewards/rejected": -3.8425214290618896, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 15.177673567635171, |
|
"kl": 0.0, |
|
"learning_rate": 4.406070120355834e-07, |
|
"logps/chosen": -219.69955444335938, |
|
"logps/rejected": -257.246337890625, |
|
"loss": 0.2311, |
|
"rewards/chosen": -0.5061897039413452, |
|
"rewards/margins": 1.860742449760437, |
|
"rewards/rejected": -2.3669321537017822, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 10.212253558418395, |
|
"kl": 0.0, |
|
"learning_rate": 4.4034536891679747e-07, |
|
"logps/chosen": -216.00762939453125, |
|
"logps/rejected": -271.8702392578125, |
|
"loss": 0.1099, |
|
"rewards/chosen": -0.29265689849853516, |
|
"rewards/margins": 2.9313840866088867, |
|
"rewards/rejected": -3.224040985107422, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 11.734827553729616, |
|
"kl": 0.0, |
|
"learning_rate": 4.400837257980115e-07, |
|
"logps/chosen": -217.1133270263672, |
|
"logps/rejected": -310.7548522949219, |
|
"loss": 0.2233, |
|
"rewards/chosen": -0.2567431330680847, |
|
"rewards/margins": 2.2228996753692627, |
|
"rewards/rejected": -2.479642868041992, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 9.41912765209931, |
|
"kl": 0.0, |
|
"learning_rate": 4.398220826792255e-07, |
|
"logps/chosen": -194.0414581298828, |
|
"logps/rejected": -252.26522827148438, |
|
"loss": 0.1112, |
|
"rewards/chosen": 0.5574043989181519, |
|
"rewards/margins": 4.030994892120361, |
|
"rewards/rejected": -3.47359037399292, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 10.488015000282592, |
|
"kl": 0.0, |
|
"learning_rate": 4.395604395604395e-07, |
|
"logps/chosen": -188.4119873046875, |
|
"logps/rejected": -282.2178039550781, |
|
"loss": 0.1811, |
|
"rewards/chosen": -0.3539419174194336, |
|
"rewards/margins": 1.9429447650909424, |
|
"rewards/rejected": -2.296886682510376, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 10.39486238771726, |
|
"kl": 0.0, |
|
"learning_rate": 4.3929879644165356e-07, |
|
"logps/chosen": -223.63308715820312, |
|
"logps/rejected": -240.90345764160156, |
|
"loss": 0.1975, |
|
"rewards/chosen": -0.5462809205055237, |
|
"rewards/margins": 1.494751214981079, |
|
"rewards/rejected": -2.041032075881958, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 11.215577164704541, |
|
"kl": 0.0, |
|
"learning_rate": 4.390371533228676e-07, |
|
"logps/chosen": -208.15684509277344, |
|
"logps/rejected": -315.0474548339844, |
|
"loss": 0.2079, |
|
"rewards/chosen": -1.009997010231018, |
|
"rewards/margins": 1.5491665601730347, |
|
"rewards/rejected": -2.5591635704040527, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 10.766769653642761, |
|
"kl": 0.0, |
|
"learning_rate": 4.387755102040816e-07, |
|
"logps/chosen": -183.52944946289062, |
|
"logps/rejected": -254.58596801757812, |
|
"loss": 0.1539, |
|
"rewards/chosen": -0.7764175534248352, |
|
"rewards/margins": 2.6363468170166016, |
|
"rewards/rejected": -3.412764310836792, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 11.608047188492522, |
|
"kl": 0.0, |
|
"learning_rate": 4.3851386708529566e-07, |
|
"logps/chosen": -210.39288330078125, |
|
"logps/rejected": -248.83990478515625, |
|
"loss": 0.1721, |
|
"rewards/chosen": -0.28012871742248535, |
|
"rewards/margins": 2.2773919105529785, |
|
"rewards/rejected": -2.557520627975464, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 13.662061510468588, |
|
"kl": 0.0, |
|
"learning_rate": 4.3825222396650966e-07, |
|
"logps/chosen": -257.81976318359375, |
|
"logps/rejected": -294.05328369140625, |
|
"loss": 0.1683, |
|
"rewards/chosen": 0.09654220193624496, |
|
"rewards/margins": 2.7226510047912598, |
|
"rewards/rejected": -2.6261088848114014, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 12.161426046134066, |
|
"kl": 0.0, |
|
"learning_rate": 4.3799058084772366e-07, |
|
"logps/chosen": -248.7794647216797, |
|
"logps/rejected": -260.9866943359375, |
|
"loss": 0.1904, |
|
"rewards/chosen": 0.21598704159259796, |
|
"rewards/margins": 3.1339871883392334, |
|
"rewards/rejected": -2.9180002212524414, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 10.031617578976944, |
|
"kl": 0.0, |
|
"learning_rate": 4.377289377289377e-07, |
|
"logps/chosen": -239.06961059570312, |
|
"logps/rejected": -262.04071044921875, |
|
"loss": 0.1708, |
|
"rewards/chosen": 0.007866961881518364, |
|
"rewards/margins": 2.7547590732574463, |
|
"rewards/rejected": -2.746892213821411, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 14.122345962730769, |
|
"kl": 0.0, |
|
"learning_rate": 4.3746729461015176e-07, |
|
"logps/chosen": -192.8285369873047, |
|
"logps/rejected": -284.4400634765625, |
|
"loss": 0.2025, |
|
"rewards/chosen": -0.6923890709877014, |
|
"rewards/margins": 1.7750904560089111, |
|
"rewards/rejected": -2.4674794673919678, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 10.12440657869901, |
|
"kl": 0.0, |
|
"learning_rate": 4.3720565149136575e-07, |
|
"logps/chosen": -254.7532958984375, |
|
"logps/rejected": -278.2186279296875, |
|
"loss": 0.2537, |
|
"rewards/chosen": -1.5660591125488281, |
|
"rewards/margins": 1.244922161102295, |
|
"rewards/rejected": -2.810981273651123, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 9.097833834404545, |
|
"kl": 0.0, |
|
"learning_rate": 4.3694400837257975e-07, |
|
"logps/chosen": -190.60516357421875, |
|
"logps/rejected": -281.48699951171875, |
|
"loss": 0.1973, |
|
"rewards/chosen": -0.27278468012809753, |
|
"rewards/margins": 2.6037862300872803, |
|
"rewards/rejected": -2.8765709400177, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 10.170278536494735, |
|
"kl": 0.0, |
|
"learning_rate": 4.366823652537938e-07, |
|
"logps/chosen": -241.40150451660156, |
|
"logps/rejected": -271.16192626953125, |
|
"loss": 0.1001, |
|
"rewards/chosen": 0.05600461736321449, |
|
"rewards/margins": 4.060368537902832, |
|
"rewards/rejected": -4.004364013671875, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 10.968657094423396, |
|
"kl": 0.0, |
|
"learning_rate": 4.3642072213500785e-07, |
|
"logps/chosen": -218.7855987548828, |
|
"logps/rejected": -231.13519287109375, |
|
"loss": 0.1814, |
|
"rewards/chosen": 0.2664739489555359, |
|
"rewards/margins": 1.7813398838043213, |
|
"rewards/rejected": -1.5148659944534302, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 8.862306842445157, |
|
"kl": 0.0, |
|
"learning_rate": 4.3615907901622185e-07, |
|
"logps/chosen": -195.7134552001953, |
|
"logps/rejected": -222.33502197265625, |
|
"loss": 0.096, |
|
"rewards/chosen": 0.3085781931877136, |
|
"rewards/margins": 2.4294745922088623, |
|
"rewards/rejected": -2.120896339416504, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 10.488199983055214, |
|
"kl": 0.0, |
|
"learning_rate": 4.358974358974359e-07, |
|
"logps/chosen": -213.25262451171875, |
|
"logps/rejected": -245.7234344482422, |
|
"loss": 0.1646, |
|
"rewards/chosen": 0.47856229543685913, |
|
"rewards/margins": 4.145866870880127, |
|
"rewards/rejected": -3.667304754257202, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 11.576912231416909, |
|
"kl": 0.0, |
|
"learning_rate": 4.3563579277864995e-07, |
|
"logps/chosen": -215.87538146972656, |
|
"logps/rejected": -262.62786865234375, |
|
"loss": 0.2232, |
|
"rewards/chosen": 0.43594250082969666, |
|
"rewards/margins": 1.7815001010894775, |
|
"rewards/rejected": -1.3455575704574585, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 11.472489865502666, |
|
"kl": 0.0, |
|
"learning_rate": 4.353741496598639e-07, |
|
"logps/chosen": -298.4697570800781, |
|
"logps/rejected": -223.03550720214844, |
|
"loss": 0.1638, |
|
"rewards/chosen": 0.32377567887306213, |
|
"rewards/margins": 1.912637710571289, |
|
"rewards/rejected": -1.5888620615005493, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 9.245318090630139, |
|
"kl": 0.0, |
|
"learning_rate": 4.3511250654107794e-07, |
|
"logps/chosen": -160.44625854492188, |
|
"logps/rejected": -268.469482421875, |
|
"loss": 0.1647, |
|
"rewards/chosen": 0.2935628294944763, |
|
"rewards/margins": 4.304611682891846, |
|
"rewards/rejected": -4.011048793792725, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 9.203079637794323, |
|
"kl": 0.0, |
|
"learning_rate": 4.34850863422292e-07, |
|
"logps/chosen": -248.113037109375, |
|
"logps/rejected": -241.7339630126953, |
|
"loss": 0.1833, |
|
"rewards/chosen": 1.2330036163330078, |
|
"rewards/margins": 3.8498387336730957, |
|
"rewards/rejected": -2.616835117340088, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 9.144257332584273, |
|
"kl": 0.0, |
|
"learning_rate": 4.34589220303506e-07, |
|
"logps/chosen": -196.73558044433594, |
|
"logps/rejected": -234.81517028808594, |
|
"loss": 0.19, |
|
"rewards/chosen": -0.7745031714439392, |
|
"rewards/margins": 1.9112095832824707, |
|
"rewards/rejected": -2.6857128143310547, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 10.977345554295628, |
|
"kl": 0.0, |
|
"learning_rate": 4.3432757718472004e-07, |
|
"logps/chosen": -270.6989440917969, |
|
"logps/rejected": -296.6756896972656, |
|
"loss": 0.1977, |
|
"rewards/chosen": 0.5559626221656799, |
|
"rewards/margins": 2.0558934211730957, |
|
"rewards/rejected": -1.499930739402771, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 11.195892622420404, |
|
"kl": 0.0, |
|
"learning_rate": 4.3406593406593404e-07, |
|
"logps/chosen": -324.94915771484375, |
|
"logps/rejected": -269.9296569824219, |
|
"loss": 0.1905, |
|
"rewards/chosen": -0.5299755334854126, |
|
"rewards/margins": 2.1768460273742676, |
|
"rewards/rejected": -2.7068214416503906, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 13.09663182243734, |
|
"kl": 0.0, |
|
"learning_rate": 4.3380429094714804e-07, |
|
"logps/chosen": -209.63999938964844, |
|
"logps/rejected": -290.00927734375, |
|
"loss": 0.1509, |
|
"rewards/chosen": 0.9980623722076416, |
|
"rewards/margins": 4.123095512390137, |
|
"rewards/rejected": -3.125032901763916, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 16.101792755673987, |
|
"kl": 0.0, |
|
"learning_rate": 4.335426478283621e-07, |
|
"logps/chosen": -178.9565887451172, |
|
"logps/rejected": -214.99896240234375, |
|
"loss": 0.1981, |
|
"rewards/chosen": -0.7419480085372925, |
|
"rewards/margins": 1.410202145576477, |
|
"rewards/rejected": -2.1521501541137695, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 9.260673089248163, |
|
"kl": 0.0, |
|
"learning_rate": 4.3328100470957614e-07, |
|
"logps/chosen": -269.7500915527344, |
|
"logps/rejected": -240.69114685058594, |
|
"loss": 0.1547, |
|
"rewards/chosen": -1.514892578125, |
|
"rewards/margins": 1.8574752807617188, |
|
"rewards/rejected": -3.3723678588867188, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 8.74207098415269, |
|
"kl": 0.0, |
|
"learning_rate": 4.330193615907902e-07, |
|
"logps/chosen": -238.84718322753906, |
|
"logps/rejected": -185.5977325439453, |
|
"loss": 0.2231, |
|
"rewards/chosen": 0.872572660446167, |
|
"rewards/margins": 2.413464069366455, |
|
"rewards/rejected": -1.5408915281295776, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 13.06989888703404, |
|
"kl": 0.0, |
|
"learning_rate": 4.3275771847200413e-07, |
|
"logps/chosen": -198.09527587890625, |
|
"logps/rejected": -241.32310485839844, |
|
"loss": 0.293, |
|
"rewards/chosen": -0.5270448327064514, |
|
"rewards/margins": 1.5255239009857178, |
|
"rewards/rejected": -2.0525686740875244, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 9.146455056162033, |
|
"kl": 0.0, |
|
"learning_rate": 4.324960753532182e-07, |
|
"logps/chosen": -158.8040771484375, |
|
"logps/rejected": -317.1865539550781, |
|
"loss": 0.1985, |
|
"rewards/chosen": -0.44321534037590027, |
|
"rewards/margins": 2.745103359222412, |
|
"rewards/rejected": -3.1883187294006348, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 11.496122217797181, |
|
"kl": 0.0, |
|
"learning_rate": 4.3223443223443223e-07, |
|
"logps/chosen": -195.80255126953125, |
|
"logps/rejected": -251.7202911376953, |
|
"loss": 0.2603, |
|
"rewards/chosen": 0.14385934174060822, |
|
"rewards/margins": 3.09423565864563, |
|
"rewards/rejected": -2.950376272201538, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 10.14234334654825, |
|
"kl": 0.0, |
|
"learning_rate": 4.3197278911564623e-07, |
|
"logps/chosen": -194.34381103515625, |
|
"logps/rejected": -210.70053100585938, |
|
"loss": 0.2247, |
|
"rewards/chosen": -0.5755199193954468, |
|
"rewards/margins": 1.121391773223877, |
|
"rewards/rejected": -1.6969116926193237, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 10.972480345195148, |
|
"kl": 0.0, |
|
"learning_rate": 4.317111459968603e-07, |
|
"logps/chosen": -190.80508422851562, |
|
"logps/rejected": -250.46139526367188, |
|
"loss": 0.2264, |
|
"rewards/chosen": 0.17243629693984985, |
|
"rewards/margins": 3.771756887435913, |
|
"rewards/rejected": -3.599320650100708, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 12.562599017075712, |
|
"kl": 0.0, |
|
"learning_rate": 4.314495028780743e-07, |
|
"logps/chosen": -251.84373474121094, |
|
"logps/rejected": -245.03099060058594, |
|
"loss": 0.1727, |
|
"rewards/chosen": 0.41944611072540283, |
|
"rewards/margins": 2.5281949043273926, |
|
"rewards/rejected": -2.1087489128112793, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 11.382077716702321, |
|
"kl": 0.0, |
|
"learning_rate": 4.311878597592883e-07, |
|
"logps/chosen": -213.92518615722656, |
|
"logps/rejected": -231.60177612304688, |
|
"loss": 0.2323, |
|
"rewards/chosen": -0.6165014505386353, |
|
"rewards/margins": 1.244750738143921, |
|
"rewards/rejected": -1.8612521886825562, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 9.731491692873679, |
|
"kl": 0.0, |
|
"learning_rate": 4.309262166405023e-07, |
|
"logps/chosen": -212.86976623535156, |
|
"logps/rejected": -261.46868896484375, |
|
"loss": 0.1888, |
|
"rewards/chosen": -0.4610190689563751, |
|
"rewards/margins": 2.1575005054473877, |
|
"rewards/rejected": -2.6185195446014404, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 11.409608574249868, |
|
"kl": 0.0, |
|
"learning_rate": 4.306645735217164e-07, |
|
"logps/chosen": -224.6321258544922, |
|
"logps/rejected": -252.78160095214844, |
|
"loss": 0.2112, |
|
"rewards/chosen": 0.9657794833183289, |
|
"rewards/margins": 2.610295534133911, |
|
"rewards/rejected": -1.6445159912109375, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 9.856888071447521, |
|
"kl": 0.0, |
|
"learning_rate": 4.3040293040293043e-07, |
|
"logps/chosen": -222.96458435058594, |
|
"logps/rejected": -214.23255920410156, |
|
"loss": 0.2125, |
|
"rewards/chosen": -0.6956588625907898, |
|
"rewards/margins": 1.7711460590362549, |
|
"rewards/rejected": -2.4668049812316895, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 12.139684536745287, |
|
"kl": 0.0, |
|
"learning_rate": 4.3014128728414437e-07, |
|
"logps/chosen": -227.04371643066406, |
|
"logps/rejected": -338.61358642578125, |
|
"loss": 0.1782, |
|
"rewards/chosen": -0.1494772583246231, |
|
"rewards/margins": 2.7432305812835693, |
|
"rewards/rejected": -2.8927078247070312, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 11.372427757605506, |
|
"kl": 0.0, |
|
"learning_rate": 4.298796441653584e-07, |
|
"logps/chosen": -269.498291015625, |
|
"logps/rejected": -285.0457763671875, |
|
"loss": 0.2566, |
|
"rewards/chosen": -0.7780652642250061, |
|
"rewards/margins": 2.5229103565216064, |
|
"rewards/rejected": -3.3009755611419678, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 10.418607245045079, |
|
"kl": 0.0, |
|
"learning_rate": 4.2961800104657247e-07, |
|
"logps/chosen": -206.6187744140625, |
|
"logps/rejected": -178.76629638671875, |
|
"loss": 0.1973, |
|
"rewards/chosen": -0.2997088134288788, |
|
"rewards/margins": 2.2873473167419434, |
|
"rewards/rejected": -2.5870561599731445, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 8.700783144862982, |
|
"kl": 0.0, |
|
"learning_rate": 4.2935635792778647e-07, |
|
"logps/chosen": -259.9353942871094, |
|
"logps/rejected": -190.39688110351562, |
|
"loss": 0.1152, |
|
"rewards/chosen": 0.6818735003471375, |
|
"rewards/margins": 2.6719298362731934, |
|
"rewards/rejected": -1.9900562763214111, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 11.113524020838248, |
|
"kl": 0.0, |
|
"learning_rate": 4.290947148090005e-07, |
|
"logps/chosen": -278.58087158203125, |
|
"logps/rejected": -203.67617797851562, |
|
"loss": 0.1375, |
|
"rewards/chosen": 0.04530330374836922, |
|
"rewards/margins": 3.534538507461548, |
|
"rewards/rejected": -3.4892351627349854, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 8.408405701936438, |
|
"kl": 0.0, |
|
"learning_rate": 4.288330716902145e-07, |
|
"logps/chosen": -167.8460693359375, |
|
"logps/rejected": -220.5739288330078, |
|
"loss": 0.1761, |
|
"rewards/chosen": -0.373342365026474, |
|
"rewards/margins": 2.532893657684326, |
|
"rewards/rejected": -2.906235933303833, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 13.207340486836898, |
|
"kl": 0.0, |
|
"learning_rate": 4.285714285714285e-07, |
|
"logps/chosen": -271.5782165527344, |
|
"logps/rejected": -229.74559020996094, |
|
"loss": 0.1837, |
|
"rewards/chosen": -0.2260454148054123, |
|
"rewards/margins": 1.8243522644042969, |
|
"rewards/rejected": -2.0503976345062256, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 9.722222876772891, |
|
"kl": 0.0, |
|
"learning_rate": 4.2830978545264256e-07, |
|
"logps/chosen": -263.2925109863281, |
|
"logps/rejected": -304.8329772949219, |
|
"loss": 0.1408, |
|
"rewards/chosen": -0.7855675220489502, |
|
"rewards/margins": 1.542686939239502, |
|
"rewards/rejected": -2.328254461288452, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 11.917040600069175, |
|
"kl": 0.0, |
|
"learning_rate": 4.280481423338566e-07, |
|
"logps/chosen": -200.11573791503906, |
|
"logps/rejected": -282.0390930175781, |
|
"loss": 0.1633, |
|
"rewards/chosen": 0.374490350484848, |
|
"rewards/margins": 3.0798637866973877, |
|
"rewards/rejected": -2.705373525619507, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 14.079400827374393, |
|
"kl": 0.0, |
|
"learning_rate": 4.2778649921507067e-07, |
|
"logps/chosen": -226.02078247070312, |
|
"logps/rejected": -285.05780029296875, |
|
"loss": 0.239, |
|
"rewards/chosen": 0.22204646468162537, |
|
"rewards/margins": 2.745478630065918, |
|
"rewards/rejected": -2.5234322547912598, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 12.40231967594057, |
|
"kl": 0.0, |
|
"learning_rate": 4.2752485609628466e-07, |
|
"logps/chosen": -233.96995544433594, |
|
"logps/rejected": -245.3420867919922, |
|
"loss": 0.2526, |
|
"rewards/chosen": -0.20722751319408417, |
|
"rewards/margins": 1.8177690505981445, |
|
"rewards/rejected": -2.024996519088745, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 11.912714615917263, |
|
"kl": 0.0, |
|
"learning_rate": 4.2726321297749866e-07, |
|
"logps/chosen": -242.40673828125, |
|
"logps/rejected": -333.7016906738281, |
|
"loss": 0.2233, |
|
"rewards/chosen": -0.6541883945465088, |
|
"rewards/margins": 1.2289773225784302, |
|
"rewards/rejected": -1.883165717124939, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 12.620911514266433, |
|
"kl": 0.0, |
|
"learning_rate": 4.270015698587127e-07, |
|
"logps/chosen": -158.87258911132812, |
|
"logps/rejected": -180.81192016601562, |
|
"loss": 0.1446, |
|
"rewards/chosen": -0.11502157896757126, |
|
"rewards/margins": 1.9224528074264526, |
|
"rewards/rejected": -2.0374743938446045, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 11.601791307438955, |
|
"kl": 0.0, |
|
"learning_rate": 4.267399267399267e-07, |
|
"logps/chosen": -245.42486572265625, |
|
"logps/rejected": -260.4696960449219, |
|
"loss": 0.1418, |
|
"rewards/chosen": 0.19518660008907318, |
|
"rewards/margins": 2.677379846572876, |
|
"rewards/rejected": -2.4821932315826416, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 12.839705365539672, |
|
"kl": 0.0, |
|
"learning_rate": 4.2647828362114076e-07, |
|
"logps/chosen": -256.3037109375, |
|
"logps/rejected": -272.7105712890625, |
|
"loss": 0.2253, |
|
"rewards/chosen": 0.4001716375350952, |
|
"rewards/margins": 2.9852728843688965, |
|
"rewards/rejected": -2.585101366043091, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 11.039693610954819, |
|
"kl": 0.0, |
|
"learning_rate": 4.262166405023548e-07, |
|
"logps/chosen": -256.00457763671875, |
|
"logps/rejected": -206.69845581054688, |
|
"loss": 0.1776, |
|
"rewards/chosen": 0.3543960452079773, |
|
"rewards/margins": 2.914128303527832, |
|
"rewards/rejected": -2.55973219871521, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 9.759460114772729, |
|
"kl": 0.0, |
|
"learning_rate": 4.2595499738356875e-07, |
|
"logps/chosen": -273.74395751953125, |
|
"logps/rejected": -195.93484497070312, |
|
"loss": 0.2458, |
|
"rewards/chosen": 1.1952847242355347, |
|
"rewards/margins": 2.3585853576660156, |
|
"rewards/rejected": -1.1633005142211914, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 10.249902760795546, |
|
"kl": 0.0, |
|
"learning_rate": 4.256933542647828e-07, |
|
"logps/chosen": -250.92709350585938, |
|
"logps/rejected": -251.54632568359375, |
|
"loss": 0.2445, |
|
"rewards/chosen": -1.2972968816757202, |
|
"rewards/margins": 1.1200555562973022, |
|
"rewards/rejected": -2.4173524379730225, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 11.519889613409475, |
|
"kl": 0.0, |
|
"learning_rate": 4.2543171114599685e-07, |
|
"logps/chosen": -241.36473083496094, |
|
"logps/rejected": -217.71559143066406, |
|
"loss": 0.1655, |
|
"rewards/chosen": -0.08067867159843445, |
|
"rewards/margins": 2.2561678886413574, |
|
"rewards/rejected": -2.3368465900421143, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 12.116227642012538, |
|
"kl": 0.0, |
|
"learning_rate": 4.2517006802721085e-07, |
|
"logps/chosen": -227.16310119628906, |
|
"logps/rejected": -254.51699829101562, |
|
"loss": 0.1795, |
|
"rewards/chosen": -0.5490508675575256, |
|
"rewards/margins": 2.050340175628662, |
|
"rewards/rejected": -2.599390983581543, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 10.761723965602993, |
|
"kl": 0.0, |
|
"learning_rate": 4.249084249084249e-07, |
|
"logps/chosen": -213.15887451171875, |
|
"logps/rejected": -229.43199157714844, |
|
"loss": 0.2217, |
|
"rewards/chosen": -0.36798590421676636, |
|
"rewards/margins": 0.6998893618583679, |
|
"rewards/rejected": -1.0678752660751343, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 9.96614506259357, |
|
"kl": 0.0, |
|
"learning_rate": 4.246467817896389e-07, |
|
"logps/chosen": -224.59458923339844, |
|
"logps/rejected": -168.9609375, |
|
"loss": 0.2067, |
|
"rewards/chosen": 0.40960344672203064, |
|
"rewards/margins": 1.400376319885254, |
|
"rewards/rejected": -0.9907728433609009, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 10.590063389968323, |
|
"kl": 0.0, |
|
"learning_rate": 4.2438513867085295e-07, |
|
"logps/chosen": -230.34326171875, |
|
"logps/rejected": -194.3732452392578, |
|
"loss": 0.1615, |
|
"rewards/chosen": -0.03669742867350578, |
|
"rewards/margins": 1.9274276494979858, |
|
"rewards/rejected": -1.9641250371932983, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 12.341220101671244, |
|
"kl": 0.0, |
|
"learning_rate": 4.2412349555206695e-07, |
|
"logps/chosen": -261.1521911621094, |
|
"logps/rejected": -184.7381591796875, |
|
"loss": 0.1907, |
|
"rewards/chosen": 0.5180913209915161, |
|
"rewards/margins": 1.7361607551574707, |
|
"rewards/rejected": -1.2180694341659546, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 13.945755502648126, |
|
"kl": 0.0, |
|
"learning_rate": 4.23861852433281e-07, |
|
"logps/chosen": -318.291015625, |
|
"logps/rejected": -216.2500457763672, |
|
"loss": 0.2175, |
|
"rewards/chosen": -0.4215258061885834, |
|
"rewards/margins": 0.7515729665756226, |
|
"rewards/rejected": -1.1730988025665283, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 11.888573752629188, |
|
"kl": 0.0, |
|
"learning_rate": 4.2360020931449505e-07, |
|
"logps/chosen": -213.46910095214844, |
|
"logps/rejected": -188.46728515625, |
|
"loss": 0.2024, |
|
"rewards/chosen": 0.058938559144735336, |
|
"rewards/margins": 1.171943187713623, |
|
"rewards/rejected": -1.1130046844482422, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 16.17419414361992, |
|
"kl": 0.0, |
|
"learning_rate": 4.23338566195709e-07, |
|
"logps/chosen": -192.65357971191406, |
|
"logps/rejected": -261.0958251953125, |
|
"loss": 0.2653, |
|
"rewards/chosen": 0.42944175004959106, |
|
"rewards/margins": 1.398432970046997, |
|
"rewards/rejected": -0.9689911603927612, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 9.577005409188526, |
|
"kl": 0.0, |
|
"learning_rate": 4.2307692307692304e-07, |
|
"logps/chosen": -214.690185546875, |
|
"logps/rejected": -195.5751953125, |
|
"loss": 0.1916, |
|
"rewards/chosen": 0.16525867581367493, |
|
"rewards/margins": 1.8478789329528809, |
|
"rewards/rejected": -1.6826202869415283, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 13.024241787173485, |
|
"kl": 0.0, |
|
"learning_rate": 4.228152799581371e-07, |
|
"logps/chosen": -269.0843505859375, |
|
"logps/rejected": -258.8379821777344, |
|
"loss": 0.1626, |
|
"rewards/chosen": 0.0963350236415863, |
|
"rewards/margins": 1.255967617034912, |
|
"rewards/rejected": -1.1596325635910034, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 8.85778497897942, |
|
"kl": 0.0, |
|
"learning_rate": 4.225536368393511e-07, |
|
"logps/chosen": -217.76644897460938, |
|
"logps/rejected": -309.886962890625, |
|
"loss": 0.112, |
|
"rewards/chosen": 0.8563817739486694, |
|
"rewards/margins": 3.093313217163086, |
|
"rewards/rejected": -2.236931562423706, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 12.645142193880144, |
|
"kl": 0.0, |
|
"learning_rate": 4.2229199372056514e-07, |
|
"logps/chosen": -181.10629272460938, |
|
"logps/rejected": -232.3704071044922, |
|
"loss": 0.2186, |
|
"rewards/chosen": -0.1876239776611328, |
|
"rewards/margins": 1.843381643295288, |
|
"rewards/rejected": -2.031005620956421, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 11.248387892063585, |
|
"kl": 0.0, |
|
"learning_rate": 4.2203035060177914e-07, |
|
"logps/chosen": -248.957763671875, |
|
"logps/rejected": -215.41151428222656, |
|
"loss": 0.1798, |
|
"rewards/chosen": 0.26162558794021606, |
|
"rewards/margins": 2.0767014026641846, |
|
"rewards/rejected": -1.8150758743286133, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 13.744936576539429, |
|
"kl": 0.0, |
|
"learning_rate": 4.217687074829932e-07, |
|
"logps/chosen": -221.40310668945312, |
|
"logps/rejected": -279.14703369140625, |
|
"loss": 0.2128, |
|
"rewards/chosen": 0.0988471731543541, |
|
"rewards/margins": 2.282918691635132, |
|
"rewards/rejected": -2.1840715408325195, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 11.139613060769499, |
|
"kl": 0.0, |
|
"learning_rate": 4.215070643642072e-07, |
|
"logps/chosen": -219.13638305664062, |
|
"logps/rejected": -316.4018249511719, |
|
"loss": 0.1851, |
|
"rewards/chosen": -0.3484467566013336, |
|
"rewards/margins": 2.9719343185424805, |
|
"rewards/rejected": -3.3203811645507812, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 11.23566454390853, |
|
"kl": 0.0, |
|
"learning_rate": 4.2124542124542123e-07, |
|
"logps/chosen": -264.99456787109375, |
|
"logps/rejected": -212.24009704589844, |
|
"loss": 0.2013, |
|
"rewards/chosen": 0.5730603337287903, |
|
"rewards/margins": 2.691094398498535, |
|
"rewards/rejected": -2.1180341243743896, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 9.57651530765622, |
|
"kl": 0.0, |
|
"learning_rate": 4.209837781266353e-07, |
|
"logps/chosen": -220.54957580566406, |
|
"logps/rejected": -235.98797607421875, |
|
"loss": 0.1582, |
|
"rewards/chosen": 0.9783976674079895, |
|
"rewards/margins": 2.1505749225616455, |
|
"rewards/rejected": -1.1721771955490112, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 11.869078446374544, |
|
"kl": 0.0, |
|
"learning_rate": 4.2072213500784923e-07, |
|
"logps/chosen": -275.3968505859375, |
|
"logps/rejected": -187.9066925048828, |
|
"loss": 0.1583, |
|
"rewards/chosen": 0.454397588968277, |
|
"rewards/margins": 1.851892352104187, |
|
"rewards/rejected": -1.3974947929382324, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 11.706252453621756, |
|
"kl": 0.0, |
|
"learning_rate": 4.204604918890633e-07, |
|
"logps/chosen": -310.59661865234375, |
|
"logps/rejected": -275.7345275878906, |
|
"loss": 0.2664, |
|
"rewards/chosen": -0.8155102133750916, |
|
"rewards/margins": 0.8342878222465515, |
|
"rewards/rejected": -1.649798035621643, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 11.279199741590181, |
|
"kl": 0.0, |
|
"learning_rate": 4.2019884877027733e-07, |
|
"logps/chosen": -238.77163696289062, |
|
"logps/rejected": -267.1834716796875, |
|
"loss": 0.1667, |
|
"rewards/chosen": 0.08637741953134537, |
|
"rewards/margins": 2.7349460124969482, |
|
"rewards/rejected": -2.648568630218506, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 10.974707777807534, |
|
"kl": 0.0, |
|
"learning_rate": 4.1993720565149133e-07, |
|
"logps/chosen": -249.7024383544922, |
|
"logps/rejected": -290.3708190917969, |
|
"loss": 0.2169, |
|
"rewards/chosen": 0.3819645047187805, |
|
"rewards/margins": 2.038438558578491, |
|
"rewards/rejected": -1.6564741134643555, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 15.241423459831774, |
|
"kl": 0.0, |
|
"learning_rate": 4.196755625327054e-07, |
|
"logps/chosen": -256.07049560546875, |
|
"logps/rejected": -295.1194152832031, |
|
"loss": 0.1785, |
|
"rewards/chosen": 0.4329548180103302, |
|
"rewards/margins": 2.4334588050842285, |
|
"rewards/rejected": -2.0005040168762207, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 12.073965597469332, |
|
"kl": 0.0, |
|
"learning_rate": 4.1941391941391943e-07, |
|
"logps/chosen": -233.57545471191406, |
|
"logps/rejected": -308.6473388671875, |
|
"loss": 0.1959, |
|
"rewards/chosen": -0.34189051389694214, |
|
"rewards/margins": 1.3108210563659668, |
|
"rewards/rejected": -1.6527115106582642, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 10.835558830180704, |
|
"kl": 0.0, |
|
"learning_rate": 4.191522762951334e-07, |
|
"logps/chosen": -221.1939239501953, |
|
"logps/rejected": -216.98193359375, |
|
"loss": 0.1861, |
|
"rewards/chosen": 0.3302954435348511, |
|
"rewards/margins": 1.9516035318374634, |
|
"rewards/rejected": -1.6213080883026123, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 10.5831850564, |
|
"kl": 0.0, |
|
"learning_rate": 4.188906331763474e-07, |
|
"logps/chosen": -193.68711853027344, |
|
"logps/rejected": -257.5124206542969, |
|
"loss": 0.2299, |
|
"rewards/chosen": -0.3544810116291046, |
|
"rewards/margins": 1.7205111980438232, |
|
"rewards/rejected": -2.0749921798706055, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 12.791438944238683, |
|
"kl": 0.0, |
|
"learning_rate": 4.1862899005756147e-07, |
|
"logps/chosen": -259.3917236328125, |
|
"logps/rejected": -346.5878601074219, |
|
"loss": 0.2063, |
|
"rewards/chosen": -1.0072615146636963, |
|
"rewards/margins": 0.5092824697494507, |
|
"rewards/rejected": -1.516543984413147, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 8.990082192323051, |
|
"kl": 0.0, |
|
"learning_rate": 4.183673469387755e-07, |
|
"logps/chosen": -185.4383544921875, |
|
"logps/rejected": -225.95294189453125, |
|
"loss": 0.1446, |
|
"rewards/chosen": 0.1784064769744873, |
|
"rewards/margins": 2.15664005279541, |
|
"rewards/rejected": -1.9782336950302124, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 9.526272579899544, |
|
"kl": 0.0, |
|
"learning_rate": 4.181057038199895e-07, |
|
"logps/chosen": -220.56004333496094, |
|
"logps/rejected": -190.05809020996094, |
|
"loss": 0.1914, |
|
"rewards/chosen": 1.1748487949371338, |
|
"rewards/margins": 3.3340234756469727, |
|
"rewards/rejected": -2.159174680709839, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 11.139585767174607, |
|
"kl": 0.0, |
|
"learning_rate": 4.178440607012035e-07, |
|
"logps/chosen": -268.95770263671875, |
|
"logps/rejected": -222.8634033203125, |
|
"loss": 0.2051, |
|
"rewards/chosen": -0.40160444378852844, |
|
"rewards/margins": 2.409646987915039, |
|
"rewards/rejected": -2.811251401901245, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 10.949601505354943, |
|
"kl": 0.0, |
|
"learning_rate": 4.1758241758241757e-07, |
|
"logps/chosen": -186.237060546875, |
|
"logps/rejected": -245.27206420898438, |
|
"loss": 0.249, |
|
"rewards/chosen": -0.8131721019744873, |
|
"rewards/margins": 1.2247133255004883, |
|
"rewards/rejected": -2.0378854274749756, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 11.351891085819371, |
|
"kl": 0.0, |
|
"learning_rate": 4.1732077446363157e-07, |
|
"logps/chosen": -201.30223083496094, |
|
"logps/rejected": -243.75341796875, |
|
"loss": 0.1895, |
|
"rewards/chosen": 0.3100842535495758, |
|
"rewards/margins": 2.5161705017089844, |
|
"rewards/rejected": -2.2060861587524414, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 12.221976691002848, |
|
"kl": 0.0, |
|
"learning_rate": 4.170591313448456e-07, |
|
"logps/chosen": -232.7662353515625, |
|
"logps/rejected": -362.24041748046875, |
|
"loss": 0.1972, |
|
"rewards/chosen": 0.6142377853393555, |
|
"rewards/margins": 3.4600791931152344, |
|
"rewards/rejected": -2.845841407775879, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 10.754382204616256, |
|
"kl": 0.0, |
|
"learning_rate": 4.1679748822605967e-07, |
|
"logps/chosen": -215.42227172851562, |
|
"logps/rejected": -278.8962707519531, |
|
"loss": 0.1188, |
|
"rewards/chosen": 0.1050441712141037, |
|
"rewards/margins": 3.3125483989715576, |
|
"rewards/rejected": -3.2075042724609375, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 12.987540188284319, |
|
"kl": 0.0, |
|
"learning_rate": 4.1653584510727366e-07, |
|
"logps/chosen": -244.31643676757812, |
|
"logps/rejected": -323.1125183105469, |
|
"loss": 0.2359, |
|
"rewards/chosen": -0.4736923575401306, |
|
"rewards/margins": 2.1419007778167725, |
|
"rewards/rejected": -2.615593194961548, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 8.80349986892105, |
|
"kl": 0.0, |
|
"learning_rate": 4.1627420198848766e-07, |
|
"logps/chosen": -223.25929260253906, |
|
"logps/rejected": -236.41934204101562, |
|
"loss": 0.1387, |
|
"rewards/chosen": -0.46407273411750793, |
|
"rewards/margins": 1.7442665100097656, |
|
"rewards/rejected": -2.208339214324951, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 13.1846568437176, |
|
"kl": 0.0, |
|
"learning_rate": 4.160125588697017e-07, |
|
"logps/chosen": -213.67198181152344, |
|
"logps/rejected": -295.5606384277344, |
|
"loss": 0.2656, |
|
"rewards/chosen": -0.33450353145599365, |
|
"rewards/margins": 1.9383219480514526, |
|
"rewards/rejected": -2.2728254795074463, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 13.253044198831535, |
|
"kl": 0.0, |
|
"learning_rate": 4.1575091575091576e-07, |
|
"logps/chosen": -231.430908203125, |
|
"logps/rejected": -249.1555938720703, |
|
"loss": 0.1841, |
|
"rewards/chosen": 0.48544788360595703, |
|
"rewards/margins": 1.7013275623321533, |
|
"rewards/rejected": -1.2158796787261963, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 11.874190668515645, |
|
"kl": 0.0, |
|
"learning_rate": 4.1548927263212976e-07, |
|
"logps/chosen": -208.916015625, |
|
"logps/rejected": -248.04067993164062, |
|
"loss": 0.1922, |
|
"rewards/chosen": -0.29332485795021057, |
|
"rewards/margins": 2.6810503005981445, |
|
"rewards/rejected": -2.9743752479553223, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 9.788377245433486, |
|
"kl": 0.0, |
|
"learning_rate": 4.1522762951334376e-07, |
|
"logps/chosen": -237.29441833496094, |
|
"logps/rejected": -246.01272583007812, |
|
"loss": 0.2483, |
|
"rewards/chosen": -1.0158889293670654, |
|
"rewards/margins": 2.2013275623321533, |
|
"rewards/rejected": -3.2172164916992188, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 10.674006139111874, |
|
"kl": 0.0, |
|
"learning_rate": 4.149659863945578e-07, |
|
"logps/chosen": -240.6201171875, |
|
"logps/rejected": -255.26345825195312, |
|
"loss": 0.2101, |
|
"rewards/chosen": -0.13020078837871552, |
|
"rewards/margins": 3.523922920227051, |
|
"rewards/rejected": -3.6541237831115723, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 10.0670473259245, |
|
"kl": 0.0, |
|
"learning_rate": 4.147043432757718e-07, |
|
"logps/chosen": -229.05381774902344, |
|
"logps/rejected": -261.8768005371094, |
|
"loss": 0.1158, |
|
"rewards/chosen": -0.555793821811676, |
|
"rewards/margins": 3.118558645248413, |
|
"rewards/rejected": -3.6743524074554443, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 8.521982319858306, |
|
"kl": 0.0, |
|
"learning_rate": 4.1444270015698585e-07, |
|
"logps/chosen": -220.83934020996094, |
|
"logps/rejected": -249.3174285888672, |
|
"loss": 0.1808, |
|
"rewards/chosen": 0.1714603751897812, |
|
"rewards/margins": 2.402540922164917, |
|
"rewards/rejected": -2.2310805320739746, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 12.228304372001576, |
|
"kl": 0.0, |
|
"learning_rate": 4.141810570381999e-07, |
|
"logps/chosen": -286.3045654296875, |
|
"logps/rejected": -269.97601318359375, |
|
"loss": 0.1993, |
|
"rewards/chosen": -0.551383912563324, |
|
"rewards/margins": 1.8383245468139648, |
|
"rewards/rejected": -2.3897085189819336, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 11.897560403128791, |
|
"kl": 0.0, |
|
"learning_rate": 4.1391941391941385e-07, |
|
"logps/chosen": -154.19729614257812, |
|
"logps/rejected": -238.20199584960938, |
|
"loss": 0.2342, |
|
"rewards/chosen": 0.1875210553407669, |
|
"rewards/margins": 1.9252369403839111, |
|
"rewards/rejected": -1.7377158403396606, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 10.436673403488303, |
|
"kl": 0.0, |
|
"learning_rate": 4.136577708006279e-07, |
|
"logps/chosen": -209.2281036376953, |
|
"logps/rejected": -269.2527160644531, |
|
"loss": 0.1088, |
|
"rewards/chosen": 0.6344658732414246, |
|
"rewards/margins": 3.8878352642059326, |
|
"rewards/rejected": -3.2533693313598633, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 8.255234754549457, |
|
"kl": 0.0, |
|
"learning_rate": 4.1339612768184195e-07, |
|
"logps/chosen": -212.30836486816406, |
|
"logps/rejected": -201.14207458496094, |
|
"loss": 0.174, |
|
"rewards/chosen": 0.6615203619003296, |
|
"rewards/margins": 2.292316436767578, |
|
"rewards/rejected": -1.630796194076538, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 11.884943779747632, |
|
"kl": 0.0, |
|
"learning_rate": 4.13134484563056e-07, |
|
"logps/chosen": -211.07350158691406, |
|
"logps/rejected": -211.02023315429688, |
|
"loss": 0.2495, |
|
"rewards/chosen": 0.40837398171424866, |
|
"rewards/margins": 2.4228506088256836, |
|
"rewards/rejected": -2.0144765377044678, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 11.994293665472632, |
|
"kl": 0.0, |
|
"learning_rate": 4.1287284144427e-07, |
|
"logps/chosen": -289.47589111328125, |
|
"logps/rejected": -228.80950927734375, |
|
"loss": 0.1505, |
|
"rewards/chosen": -0.011884563602507114, |
|
"rewards/margins": 1.357936143875122, |
|
"rewards/rejected": -1.3698207139968872, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 12.497465243458791, |
|
"kl": 0.0, |
|
"learning_rate": 4.1261119832548405e-07, |
|
"logps/chosen": -214.80462646484375, |
|
"logps/rejected": -285.0194396972656, |
|
"loss": 0.2098, |
|
"rewards/chosen": -0.4318108856678009, |
|
"rewards/margins": 1.8095088005065918, |
|
"rewards/rejected": -2.2413196563720703, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 12.597201422275237, |
|
"kl": 0.0, |
|
"learning_rate": 4.1234955520669805e-07, |
|
"logps/chosen": -173.9023895263672, |
|
"logps/rejected": -242.03558349609375, |
|
"loss": 0.1763, |
|
"rewards/chosen": -0.03515807166695595, |
|
"rewards/margins": 1.9462581872940063, |
|
"rewards/rejected": -1.9814162254333496, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 11.29563689087554, |
|
"kl": 0.0, |
|
"learning_rate": 4.1208791208791204e-07, |
|
"logps/chosen": -224.08148193359375, |
|
"logps/rejected": -253.4733123779297, |
|
"loss": 0.1798, |
|
"rewards/chosen": 0.09991315752267838, |
|
"rewards/margins": 2.7761764526367188, |
|
"rewards/rejected": -2.6762633323669434, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 9.798183455958176, |
|
"kl": 0.0, |
|
"learning_rate": 4.118262689691261e-07, |
|
"logps/chosen": -226.20651245117188, |
|
"logps/rejected": -261.2646789550781, |
|
"loss": 0.2247, |
|
"rewards/chosen": -0.36128562688827515, |
|
"rewards/margins": 2.0138986110687256, |
|
"rewards/rejected": -2.3751842975616455, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 10.59217786675228, |
|
"kl": 0.0, |
|
"learning_rate": 4.1156462585034014e-07, |
|
"logps/chosen": -191.44393920898438, |
|
"logps/rejected": -222.71336364746094, |
|
"loss": 0.2218, |
|
"rewards/chosen": -0.10784044116735458, |
|
"rewards/margins": 1.7394548654556274, |
|
"rewards/rejected": -1.8472952842712402, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 9.929264582178304, |
|
"kl": 0.0, |
|
"learning_rate": 4.1130298273155414e-07, |
|
"logps/chosen": -211.10133361816406, |
|
"logps/rejected": -235.98374938964844, |
|
"loss": 0.1557, |
|
"rewards/chosen": -0.17055515944957733, |
|
"rewards/margins": 3.654494285583496, |
|
"rewards/rejected": -3.82504940032959, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 12.576617441384204, |
|
"kl": 0.0, |
|
"learning_rate": 4.1104133961276814e-07, |
|
"logps/chosen": -245.1005096435547, |
|
"logps/rejected": -265.1275939941406, |
|
"loss": 0.1974, |
|
"rewards/chosen": 0.29450762271881104, |
|
"rewards/margins": 1.8650548458099365, |
|
"rewards/rejected": -1.5705472230911255, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 12.893258406638443, |
|
"kl": 0.0, |
|
"learning_rate": 4.107796964939822e-07, |
|
"logps/chosen": -283.796630859375, |
|
"logps/rejected": -255.04974365234375, |
|
"loss": 0.1138, |
|
"rewards/chosen": 1.2097806930541992, |
|
"rewards/margins": 3.861506223678589, |
|
"rewards/rejected": -2.6517255306243896, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 9.442037096083547, |
|
"kl": 0.0, |
|
"learning_rate": 4.1051805337519624e-07, |
|
"logps/chosen": -193.82659912109375, |
|
"logps/rejected": -236.00262451171875, |
|
"loss": 0.169, |
|
"rewards/chosen": 0.25398653745651245, |
|
"rewards/margins": 3.0244903564453125, |
|
"rewards/rejected": -2.7705037593841553, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 10.498722183093273, |
|
"kl": 0.0, |
|
"learning_rate": 4.1025641025641024e-07, |
|
"logps/chosen": -337.9830017089844, |
|
"logps/rejected": -247.3750457763672, |
|
"loss": 0.1899, |
|
"rewards/chosen": -1.3225940465927124, |
|
"rewards/margins": 1.0586594343185425, |
|
"rewards/rejected": -2.381253480911255, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 10.172536259980642, |
|
"kl": 0.0, |
|
"learning_rate": 4.099947671376243e-07, |
|
"logps/chosen": -250.67410278320312, |
|
"logps/rejected": -271.3428039550781, |
|
"loss": 0.2109, |
|
"rewards/chosen": -0.8246206045150757, |
|
"rewards/margins": 2.084084987640381, |
|
"rewards/rejected": -2.908705472946167, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 12.383051195247807, |
|
"kl": 0.0, |
|
"learning_rate": 4.097331240188383e-07, |
|
"logps/chosen": -263.4064025878906, |
|
"logps/rejected": -282.1274108886719, |
|
"loss": 0.2069, |
|
"rewards/chosen": -0.49988675117492676, |
|
"rewards/margins": 1.9791779518127441, |
|
"rewards/rejected": -2.479064702987671, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 11.923267130594034, |
|
"kl": 0.0, |
|
"learning_rate": 4.094714809000523e-07, |
|
"logps/chosen": -215.8848419189453, |
|
"logps/rejected": -253.63076782226562, |
|
"loss": 0.1745, |
|
"rewards/chosen": -0.4700072109699249, |
|
"rewards/margins": 1.989243507385254, |
|
"rewards/rejected": -2.4592506885528564, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 8.982041772561747, |
|
"kl": 0.0, |
|
"learning_rate": 4.0920983778126633e-07, |
|
"logps/chosen": -225.7939910888672, |
|
"logps/rejected": -244.41358947753906, |
|
"loss": 0.1869, |
|
"rewards/chosen": -1.364567756652832, |
|
"rewards/margins": 0.951836109161377, |
|
"rewards/rejected": -2.316403865814209, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 10.837403601758242, |
|
"kl": 0.0, |
|
"learning_rate": 4.089481946624804e-07, |
|
"logps/chosen": -249.8412628173828, |
|
"logps/rejected": -241.59420776367188, |
|
"loss": 0.1471, |
|
"rewards/chosen": -0.19725684821605682, |
|
"rewards/margins": 2.119018077850342, |
|
"rewards/rejected": -2.316274881362915, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 11.217732507310354, |
|
"kl": 0.0, |
|
"learning_rate": 4.086865515436944e-07, |
|
"logps/chosen": -257.2194519042969, |
|
"logps/rejected": -180.22837829589844, |
|
"loss": 0.1527, |
|
"rewards/chosen": 0.24126391112804413, |
|
"rewards/margins": 2.3532466888427734, |
|
"rewards/rejected": -2.111982822418213, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 10.502546710908188, |
|
"kl": 0.0, |
|
"learning_rate": 4.084249084249084e-07, |
|
"logps/chosen": -227.2938232421875, |
|
"logps/rejected": -300.4139709472656, |
|
"loss": 0.1961, |
|
"rewards/chosen": -0.11248449236154556, |
|
"rewards/margins": 2.296675205230713, |
|
"rewards/rejected": -2.4091596603393555, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 11.149043285356687, |
|
"kl": 0.0, |
|
"learning_rate": 4.0816326530612243e-07, |
|
"logps/chosen": -235.18333435058594, |
|
"logps/rejected": -206.1359100341797, |
|
"loss": 0.1829, |
|
"rewards/chosen": 0.5669969320297241, |
|
"rewards/margins": 3.09303617477417, |
|
"rewards/rejected": -2.5260391235351562, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 11.899081712479965, |
|
"kl": 0.0, |
|
"learning_rate": 4.079016221873365e-07, |
|
"logps/chosen": -247.81387329101562, |
|
"logps/rejected": -268.5088195800781, |
|
"loss": 0.2918, |
|
"rewards/chosen": -0.36709415912628174, |
|
"rewards/margins": 0.9215909242630005, |
|
"rewards/rejected": -1.2886850833892822, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 11.107384586901443, |
|
"kl": 0.0, |
|
"learning_rate": 4.076399790685505e-07, |
|
"logps/chosen": -206.99432373046875, |
|
"logps/rejected": -212.78196716308594, |
|
"loss": 0.2387, |
|
"rewards/chosen": -0.985337495803833, |
|
"rewards/margins": 1.2888743877410889, |
|
"rewards/rejected": -2.274211883544922, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 10.235471285138322, |
|
"kl": 0.0, |
|
"learning_rate": 4.073783359497645e-07, |
|
"logps/chosen": -236.5115203857422, |
|
"logps/rejected": -272.5962219238281, |
|
"loss": 0.1737, |
|
"rewards/chosen": -0.7294275164604187, |
|
"rewards/margins": 2.0560388565063477, |
|
"rewards/rejected": -2.785466432571411, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 11.272141410111164, |
|
"kl": 0.0, |
|
"learning_rate": 4.071166928309785e-07, |
|
"logps/chosen": -222.7964324951172, |
|
"logps/rejected": -287.06475830078125, |
|
"loss": 0.1308, |
|
"rewards/chosen": 0.3911401629447937, |
|
"rewards/margins": 2.989321231842041, |
|
"rewards/rejected": -2.5981810092926025, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 7.964274506087736, |
|
"kl": 0.0, |
|
"learning_rate": 4.068550497121925e-07, |
|
"logps/chosen": -170.38925170898438, |
|
"logps/rejected": -205.2819366455078, |
|
"loss": 0.1071, |
|
"rewards/chosen": -0.18005786836147308, |
|
"rewards/margins": 2.6861469745635986, |
|
"rewards/rejected": -2.8662047386169434, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 11.501254991289231, |
|
"kl": 0.0, |
|
"learning_rate": 4.0659340659340657e-07, |
|
"logps/chosen": -246.70559692382812, |
|
"logps/rejected": -238.14466857910156, |
|
"loss": 0.1549, |
|
"rewards/chosen": 0.33830830454826355, |
|
"rewards/margins": 2.2392632961273193, |
|
"rewards/rejected": -1.9009549617767334, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 10.23573665557948, |
|
"kl": 0.0, |
|
"learning_rate": 4.063317634746206e-07, |
|
"logps/chosen": -200.4901123046875, |
|
"logps/rejected": -207.58871459960938, |
|
"loss": 0.1599, |
|
"rewards/chosen": 1.0193722248077393, |
|
"rewards/margins": 2.878417491912842, |
|
"rewards/rejected": -1.859045147895813, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 11.84162361137601, |
|
"kl": 0.0, |
|
"learning_rate": 4.060701203558346e-07, |
|
"logps/chosen": -253.09637451171875, |
|
"logps/rejected": -247.7617645263672, |
|
"loss": 0.2949, |
|
"rewards/chosen": -1.3407307863235474, |
|
"rewards/margins": 0.6386681795120239, |
|
"rewards/rejected": -1.9793989658355713, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 12.38802746375023, |
|
"kl": 0.0, |
|
"learning_rate": 4.058084772370486e-07, |
|
"logps/chosen": -241.19613647460938, |
|
"logps/rejected": -237.13682556152344, |
|
"loss": 0.1108, |
|
"rewards/chosen": -0.31301483511924744, |
|
"rewards/margins": 2.1045053005218506, |
|
"rewards/rejected": -2.417520046234131, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 12.348300253030986, |
|
"kl": 0.0, |
|
"learning_rate": 4.0554683411826267e-07, |
|
"logps/chosen": -253.8113555908203, |
|
"logps/rejected": -250.69007873535156, |
|
"loss": 0.2231, |
|
"rewards/chosen": -0.6661314964294434, |
|
"rewards/margins": 1.135571002960205, |
|
"rewards/rejected": -1.8017024993896484, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 9.813621316611632, |
|
"kl": 0.0, |
|
"learning_rate": 4.052851909994767e-07, |
|
"logps/chosen": -183.4186248779297, |
|
"logps/rejected": -226.8902130126953, |
|
"loss": 0.2452, |
|
"rewards/chosen": 0.09793578833341599, |
|
"rewards/margins": 3.1266567707061768, |
|
"rewards/rejected": -3.0287210941314697, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 10.410184074704667, |
|
"kl": 0.0, |
|
"learning_rate": 4.050235478806907e-07, |
|
"logps/chosen": -218.2381134033203, |
|
"logps/rejected": -223.76805114746094, |
|
"loss": 0.2223, |
|
"rewards/chosen": -0.6022669672966003, |
|
"rewards/margins": 2.0103299617767334, |
|
"rewards/rejected": -2.6125969886779785, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 12.01700363931878, |
|
"kl": 0.0, |
|
"learning_rate": 4.0476190476190476e-07, |
|
"logps/chosen": -232.75802612304688, |
|
"logps/rejected": -237.35382080078125, |
|
"loss": 0.2363, |
|
"rewards/chosen": 0.516566812992096, |
|
"rewards/margins": 2.019991874694824, |
|
"rewards/rejected": -1.5034250020980835, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 10.262930905107156, |
|
"kl": 0.0, |
|
"learning_rate": 4.045002616431188e-07, |
|
"logps/chosen": -204.19009399414062, |
|
"logps/rejected": -235.92129516601562, |
|
"loss": 0.1872, |
|
"rewards/chosen": 0.05859723314642906, |
|
"rewards/margins": 2.741896629333496, |
|
"rewards/rejected": -2.6832993030548096, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 11.30426818476053, |
|
"kl": 0.0, |
|
"learning_rate": 4.0423861852433276e-07, |
|
"logps/chosen": -166.32923889160156, |
|
"logps/rejected": -281.9306945800781, |
|
"loss": 0.2366, |
|
"rewards/chosen": -0.21419693529605865, |
|
"rewards/margins": 2.5291309356689453, |
|
"rewards/rejected": -2.7433278560638428, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 10.791375928218388, |
|
"kl": 0.0, |
|
"learning_rate": 4.039769754055468e-07, |
|
"logps/chosen": -174.7226104736328, |
|
"logps/rejected": -268.91180419921875, |
|
"loss": 0.1773, |
|
"rewards/chosen": 0.03391220420598984, |
|
"rewards/margins": 3.911714792251587, |
|
"rewards/rejected": -3.877802610397339, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 12.229778476365636, |
|
"kl": 0.0, |
|
"learning_rate": 4.0371533228676086e-07, |
|
"logps/chosen": -227.41676330566406, |
|
"logps/rejected": -253.11061096191406, |
|
"loss": 0.2316, |
|
"rewards/chosen": -1.3165072202682495, |
|
"rewards/margins": 2.440537452697754, |
|
"rewards/rejected": -3.757044553756714, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 9.70449239918135, |
|
"kl": 0.0, |
|
"learning_rate": 4.0345368916797486e-07, |
|
"logps/chosen": -206.78543090820312, |
|
"logps/rejected": -193.22666931152344, |
|
"loss": 0.152, |
|
"rewards/chosen": -0.005291328299790621, |
|
"rewards/margins": 2.320930004119873, |
|
"rewards/rejected": -2.326221227645874, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 12.529777714925808, |
|
"kl": 0.0, |
|
"learning_rate": 4.031920460491889e-07, |
|
"logps/chosen": -260.2597961425781, |
|
"logps/rejected": -245.1582794189453, |
|
"loss": 0.2229, |
|
"rewards/chosen": -0.34088611602783203, |
|
"rewards/margins": 2.3002560138702393, |
|
"rewards/rejected": -2.6411421298980713, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 12.126319059662272, |
|
"kl": 0.0, |
|
"learning_rate": 4.029304029304029e-07, |
|
"logps/chosen": -159.91371154785156, |
|
"logps/rejected": -215.52037048339844, |
|
"loss": 0.1604, |
|
"rewards/chosen": -0.13704350590705872, |
|
"rewards/margins": 2.6966662406921387, |
|
"rewards/rejected": -2.833709716796875, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 12.2279596400823, |
|
"kl": 0.0, |
|
"learning_rate": 4.026687598116169e-07, |
|
"logps/chosen": -203.8690643310547, |
|
"logps/rejected": -248.30355834960938, |
|
"loss": 0.2764, |
|
"rewards/chosen": -1.4684327840805054, |
|
"rewards/margins": 0.25741446018218994, |
|
"rewards/rejected": -1.7258472442626953, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 11.93017592019336, |
|
"kl": 0.0, |
|
"learning_rate": 4.0240711669283095e-07, |
|
"logps/chosen": -312.0763244628906, |
|
"logps/rejected": -262.0274963378906, |
|
"loss": 0.1746, |
|
"rewards/chosen": -1.0104618072509766, |
|
"rewards/margins": 2.119619369506836, |
|
"rewards/rejected": -3.1300811767578125, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 10.026240675416432, |
|
"kl": 0.0, |
|
"learning_rate": 4.02145473574045e-07, |
|
"logps/chosen": -220.4754638671875, |
|
"logps/rejected": -265.4399719238281, |
|
"loss": 0.1718, |
|
"rewards/chosen": 0.43962326645851135, |
|
"rewards/margins": 3.062368392944336, |
|
"rewards/rejected": -2.6227450370788574, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 7.862119363862361, |
|
"kl": 0.0, |
|
"learning_rate": 4.0188383045525905e-07, |
|
"logps/chosen": -225.05450439453125, |
|
"logps/rejected": -323.68798828125, |
|
"loss": 0.2182, |
|
"rewards/chosen": -0.6643018126487732, |
|
"rewards/margins": 1.284275770187378, |
|
"rewards/rejected": -1.9485775232315063, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 10.113068546407243, |
|
"kl": 0.0, |
|
"learning_rate": 4.01622187336473e-07, |
|
"logps/chosen": -238.6414031982422, |
|
"logps/rejected": -226.67205810546875, |
|
"loss": 0.1479, |
|
"rewards/chosen": 0.5626780986785889, |
|
"rewards/margins": 3.329695224761963, |
|
"rewards/rejected": -2.767017126083374, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 8.742780985362685, |
|
"kl": 0.0, |
|
"learning_rate": 4.0136054421768705e-07, |
|
"logps/chosen": -205.3953857421875, |
|
"logps/rejected": -243.7183074951172, |
|
"loss": 0.1611, |
|
"rewards/chosen": 0.14700005948543549, |
|
"rewards/margins": 3.2417192459106445, |
|
"rewards/rejected": -3.094719171524048, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 11.28132011147294, |
|
"kl": 0.0, |
|
"learning_rate": 4.010989010989011e-07, |
|
"logps/chosen": -214.97244262695312, |
|
"logps/rejected": -216.71173095703125, |
|
"loss": 0.1858, |
|
"rewards/chosen": -0.015776043757796288, |
|
"rewards/margins": 2.4526312351226807, |
|
"rewards/rejected": -2.468407392501831, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 12.778638599311456, |
|
"kl": 0.0, |
|
"learning_rate": 4.008372579801151e-07, |
|
"logps/chosen": -281.7500915527344, |
|
"logps/rejected": -205.56297302246094, |
|
"loss": 0.1866, |
|
"rewards/chosen": -0.7977088093757629, |
|
"rewards/margins": 2.43152117729187, |
|
"rewards/rejected": -3.2292299270629883, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 13.038247255191017, |
|
"kl": 0.0, |
|
"learning_rate": 4.0057561486132915e-07, |
|
"logps/chosen": -278.4736328125, |
|
"logps/rejected": -237.22613525390625, |
|
"loss": 0.172, |
|
"rewards/chosen": 0.31080275774002075, |
|
"rewards/margins": 2.6367199420928955, |
|
"rewards/rejected": -2.3259172439575195, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 12.071589262477481, |
|
"kl": 0.0, |
|
"learning_rate": 4.0031397174254314e-07, |
|
"logps/chosen": -236.46629333496094, |
|
"logps/rejected": -230.3934326171875, |
|
"loss": 0.2205, |
|
"rewards/chosen": 0.46186843514442444, |
|
"rewards/margins": 2.98122239112854, |
|
"rewards/rejected": -2.5193538665771484, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 11.849099983442954, |
|
"kl": 0.0, |
|
"learning_rate": 4.0005232862375714e-07, |
|
"logps/chosen": -225.62376403808594, |
|
"logps/rejected": -240.21817016601562, |
|
"loss": 0.2767, |
|
"rewards/chosen": -0.6141564846038818, |
|
"rewards/margins": 0.870281457901001, |
|
"rewards/rejected": -1.4844379425048828, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 10.00253243924632, |
|
"kl": 0.0, |
|
"learning_rate": 3.997906855049712e-07, |
|
"logps/chosen": -253.96109008789062, |
|
"logps/rejected": -244.46368408203125, |
|
"loss": 0.1898, |
|
"rewards/chosen": -0.6670475006103516, |
|
"rewards/margins": 2.5981571674346924, |
|
"rewards/rejected": -3.265204668045044, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 9.423555936016577, |
|
"kl": 0.0, |
|
"learning_rate": 3.9952904238618524e-07, |
|
"logps/chosen": -235.05836486816406, |
|
"logps/rejected": -276.4933166503906, |
|
"loss": 0.1754, |
|
"rewards/chosen": -1.0856462717056274, |
|
"rewards/margins": 2.616638660430908, |
|
"rewards/rejected": -3.702285051345825, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 9.603542307090086, |
|
"kl": 0.0, |
|
"learning_rate": 3.992673992673993e-07, |
|
"logps/chosen": -218.27806091308594, |
|
"logps/rejected": -227.58969116210938, |
|
"loss": 0.1948, |
|
"rewards/chosen": 0.6791697144508362, |
|
"rewards/margins": 3.0643129348754883, |
|
"rewards/rejected": -2.385143280029297, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 11.997346527322566, |
|
"kl": 0.0, |
|
"learning_rate": 3.9900575614861324e-07, |
|
"logps/chosen": -205.48524475097656, |
|
"logps/rejected": -237.6011505126953, |
|
"loss": 0.2379, |
|
"rewards/chosen": 0.24487002193927765, |
|
"rewards/margins": 2.6219630241394043, |
|
"rewards/rejected": -2.3770930767059326, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 9.0666967002916, |
|
"kl": 0.0, |
|
"learning_rate": 3.987441130298273e-07, |
|
"logps/chosen": -196.8197479248047, |
|
"logps/rejected": -200.19735717773438, |
|
"loss": 0.1701, |
|
"rewards/chosen": 0.5998373627662659, |
|
"rewards/margins": 2.8779029846191406, |
|
"rewards/rejected": -2.2780656814575195, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 11.538513762529194, |
|
"kl": 0.0, |
|
"learning_rate": 3.9848246991104134e-07, |
|
"logps/chosen": -237.28199768066406, |
|
"logps/rejected": -266.018798828125, |
|
"loss": 0.167, |
|
"rewards/chosen": -0.623340904712677, |
|
"rewards/margins": 2.147143602371216, |
|
"rewards/rejected": -2.770484447479248, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 11.828887960825947, |
|
"kl": 0.0, |
|
"learning_rate": 3.9822082679225533e-07, |
|
"logps/chosen": -220.56329345703125, |
|
"logps/rejected": -268.723388671875, |
|
"loss": 0.1492, |
|
"rewards/chosen": 0.7558012008666992, |
|
"rewards/margins": 2.6395673751831055, |
|
"rewards/rejected": -1.8837661743164062, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 11.47492309378771, |
|
"kl": 0.0, |
|
"learning_rate": 3.979591836734694e-07, |
|
"logps/chosen": -249.0001983642578, |
|
"logps/rejected": -240.189697265625, |
|
"loss": 0.18, |
|
"rewards/chosen": 0.21565745770931244, |
|
"rewards/margins": 2.2755565643310547, |
|
"rewards/rejected": -2.059899091720581, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 8.926444305093995, |
|
"kl": 0.0, |
|
"learning_rate": 3.9769754055468343e-07, |
|
"logps/chosen": -200.0030059814453, |
|
"logps/rejected": -206.71624755859375, |
|
"loss": 0.1516, |
|
"rewards/chosen": -0.6393547058105469, |
|
"rewards/margins": 1.0063797235488892, |
|
"rewards/rejected": -1.645734429359436, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 11.660568501998092, |
|
"kl": 0.0, |
|
"learning_rate": 3.974358974358974e-07, |
|
"logps/chosen": -223.32652282714844, |
|
"logps/rejected": -243.16845703125, |
|
"loss": 0.2899, |
|
"rewards/chosen": 0.2290610671043396, |
|
"rewards/margins": 1.6543824672698975, |
|
"rewards/rejected": -1.425321340560913, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 14.42295762896043, |
|
"kl": 0.0, |
|
"learning_rate": 3.9717425431711143e-07, |
|
"logps/chosen": -289.037353515625, |
|
"logps/rejected": -252.30154418945312, |
|
"loss": 0.2241, |
|
"rewards/chosen": -1.7561075687408447, |
|
"rewards/margins": 0.43781447410583496, |
|
"rewards/rejected": -2.1939220428466797, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 11.559459650147774, |
|
"kl": 0.0, |
|
"learning_rate": 3.969126111983255e-07, |
|
"logps/chosen": -245.2276153564453, |
|
"logps/rejected": -241.8197021484375, |
|
"loss": 0.2797, |
|
"rewards/chosen": -1.3280991315841675, |
|
"rewards/margins": 1.9113882780075073, |
|
"rewards/rejected": -3.239487409591675, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 14.711575319555203, |
|
"kl": 0.0, |
|
"learning_rate": 3.9665096807953953e-07, |
|
"logps/chosen": -183.3538818359375, |
|
"logps/rejected": -245.3995819091797, |
|
"loss": 0.2133, |
|
"rewards/chosen": -0.0880654826760292, |
|
"rewards/margins": 2.2100796699523926, |
|
"rewards/rejected": -2.298145055770874, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 13.58824047165676, |
|
"kl": 0.0, |
|
"learning_rate": 3.9638932496075353e-07, |
|
"logps/chosen": -319.79083251953125, |
|
"logps/rejected": -241.92750549316406, |
|
"loss": 0.2077, |
|
"rewards/chosen": -0.5814463496208191, |
|
"rewards/margins": 1.7764933109283447, |
|
"rewards/rejected": -2.3579397201538086, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 10.51670285219746, |
|
"kl": 0.0, |
|
"learning_rate": 3.961276818419675e-07, |
|
"logps/chosen": -187.76580810546875, |
|
"logps/rejected": -264.5431823730469, |
|
"loss": 0.2047, |
|
"rewards/chosen": 0.11567319184541702, |
|
"rewards/margins": 4.132474899291992, |
|
"rewards/rejected": -4.016801834106445, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 10.431439920064484, |
|
"kl": 0.0, |
|
"learning_rate": 3.958660387231816e-07, |
|
"logps/chosen": -152.51498413085938, |
|
"logps/rejected": -235.09324645996094, |
|
"loss": 0.2288, |
|
"rewards/chosen": 0.2288106083869934, |
|
"rewards/margins": 2.305171251296997, |
|
"rewards/rejected": -2.0763607025146484, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 11.437728669932744, |
|
"kl": 0.0, |
|
"learning_rate": 3.9560439560439557e-07, |
|
"logps/chosen": -198.04653930664062, |
|
"logps/rejected": -216.70425415039062, |
|
"loss": 0.148, |
|
"rewards/chosen": 0.6912394165992737, |
|
"rewards/margins": 2.1111791133880615, |
|
"rewards/rejected": -1.4199397563934326, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 10.47479277819163, |
|
"kl": 0.0, |
|
"learning_rate": 3.953427524856096e-07, |
|
"logps/chosen": -218.10205078125, |
|
"logps/rejected": -294.8763427734375, |
|
"loss": 0.1738, |
|
"rewards/chosen": 0.3303108215332031, |
|
"rewards/margins": 2.3888604640960693, |
|
"rewards/rejected": -2.058549642562866, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 10.343750828726911, |
|
"kl": 0.0, |
|
"learning_rate": 3.9508110936682367e-07, |
|
"logps/chosen": -216.23837280273438, |
|
"logps/rejected": -222.16204833984375, |
|
"loss": 0.1711, |
|
"rewards/chosen": 0.24862945079803467, |
|
"rewards/margins": 2.214453935623169, |
|
"rewards/rejected": -1.9658244848251343, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 11.335701218532215, |
|
"kl": 0.0, |
|
"learning_rate": 3.948194662480376e-07, |
|
"logps/chosen": -236.11170959472656, |
|
"logps/rejected": -206.62315368652344, |
|
"loss": 0.2024, |
|
"rewards/chosen": -0.06115901097655296, |
|
"rewards/margins": 1.5972472429275513, |
|
"rewards/rejected": -1.6584062576293945, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 12.475003237928307, |
|
"kl": 0.0, |
|
"learning_rate": 3.9455782312925167e-07, |
|
"logps/chosen": -260.5793151855469, |
|
"logps/rejected": -287.6771545410156, |
|
"loss": 0.1706, |
|
"rewards/chosen": -0.39119893312454224, |
|
"rewards/margins": 1.3929164409637451, |
|
"rewards/rejected": -1.7841154336929321, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 9.477036698967389, |
|
"kl": 0.0, |
|
"learning_rate": 3.942961800104657e-07, |
|
"logps/chosen": -175.79698181152344, |
|
"logps/rejected": -189.22921752929688, |
|
"loss": 0.1915, |
|
"rewards/chosen": 0.30524417757987976, |
|
"rewards/margins": 2.519529104232788, |
|
"rewards/rejected": -2.214284896850586, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 12.239825540829113, |
|
"kl": 0.0, |
|
"learning_rate": 3.9403453689167977e-07, |
|
"logps/chosen": -271.8373718261719, |
|
"logps/rejected": -304.6640319824219, |
|
"loss": 0.1507, |
|
"rewards/chosen": -0.007900595664978027, |
|
"rewards/margins": 3.236818313598633, |
|
"rewards/rejected": -3.2447190284729004, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 10.427157164376496, |
|
"kl": 0.0, |
|
"learning_rate": 3.9377289377289377e-07, |
|
"logps/chosen": -215.03746032714844, |
|
"logps/rejected": -255.59893798828125, |
|
"loss": 0.2518, |
|
"rewards/chosen": -0.6349587440490723, |
|
"rewards/margins": 2.439880847930908, |
|
"rewards/rejected": -3.0748395919799805, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 11.207119574932996, |
|
"kl": 0.0, |
|
"learning_rate": 3.9351125065410776e-07, |
|
"logps/chosen": -167.9252471923828, |
|
"logps/rejected": -230.44461059570312, |
|
"loss": 0.2153, |
|
"rewards/chosen": -0.5827754139900208, |
|
"rewards/margins": 1.467712640762329, |
|
"rewards/rejected": -2.050487995147705, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 12.843002882536329, |
|
"kl": 0.0, |
|
"learning_rate": 3.932496075353218e-07, |
|
"logps/chosen": -274.9442443847656, |
|
"logps/rejected": -291.43450927734375, |
|
"loss": 0.1899, |
|
"rewards/chosen": -0.4634897708892822, |
|
"rewards/margins": 2.9832985401153564, |
|
"rewards/rejected": -3.4467883110046387, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 12.301578880284277, |
|
"kl": 0.0, |
|
"learning_rate": 3.929879644165358e-07, |
|
"logps/chosen": -230.11001586914062, |
|
"logps/rejected": -262.1216125488281, |
|
"loss": 0.2163, |
|
"rewards/chosen": -0.47498393058776855, |
|
"rewards/margins": 2.5229318141937256, |
|
"rewards/rejected": -2.997915744781494, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 14.17354036141092, |
|
"kl": 0.0, |
|
"learning_rate": 3.9272632129774986e-07, |
|
"logps/chosen": -236.7783203125, |
|
"logps/rejected": -225.37086486816406, |
|
"loss": 0.1465, |
|
"rewards/chosen": 0.534197211265564, |
|
"rewards/margins": 3.3101611137390137, |
|
"rewards/rejected": -2.7759640216827393, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 9.994681734611301, |
|
"kl": 0.0, |
|
"learning_rate": 3.924646781789639e-07, |
|
"logps/chosen": -152.39027404785156, |
|
"logps/rejected": -236.91995239257812, |
|
"loss": 0.1985, |
|
"rewards/chosen": -0.45504745841026306, |
|
"rewards/margins": 2.5142714977264404, |
|
"rewards/rejected": -2.9693188667297363, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 9.62216210519223, |
|
"kl": 0.0, |
|
"learning_rate": 3.9220303506017786e-07, |
|
"logps/chosen": -222.86874389648438, |
|
"logps/rejected": -318.4061279296875, |
|
"loss": 0.1912, |
|
"rewards/chosen": -0.2937797009944916, |
|
"rewards/margins": 3.895700216293335, |
|
"rewards/rejected": -4.189479827880859, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 10.901795525608296, |
|
"kl": 0.0, |
|
"learning_rate": 3.919413919413919e-07, |
|
"logps/chosen": -214.8961639404297, |
|
"logps/rejected": -260.2914733886719, |
|
"loss": 0.2245, |
|
"rewards/chosen": 0.16179826855659485, |
|
"rewards/margins": 2.0142719745635986, |
|
"rewards/rejected": -1.8524737358093262, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 13.652487549232319, |
|
"kl": 0.0, |
|
"learning_rate": 3.9167974882260596e-07, |
|
"logps/chosen": -212.1907958984375, |
|
"logps/rejected": -278.97296142578125, |
|
"loss": 0.1698, |
|
"rewards/chosen": -0.08914380520582199, |
|
"rewards/margins": 3.5527141094207764, |
|
"rewards/rejected": -3.641857862472534, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 10.750735909217193, |
|
"kl": 0.0, |
|
"learning_rate": 3.9141810570381995e-07, |
|
"logps/chosen": -219.64820861816406, |
|
"logps/rejected": -223.59970092773438, |
|
"loss": 0.1419, |
|
"rewards/chosen": -0.31567585468292236, |
|
"rewards/margins": 1.8360644578933716, |
|
"rewards/rejected": -2.151740312576294, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 13.00956558924868, |
|
"kl": 0.0, |
|
"learning_rate": 3.91156462585034e-07, |
|
"logps/chosen": -221.29444885253906, |
|
"logps/rejected": -292.55242919921875, |
|
"loss": 0.2404, |
|
"rewards/chosen": -0.8755928874015808, |
|
"rewards/margins": 2.879188060760498, |
|
"rewards/rejected": -3.7547810077667236, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 11.263246462543782, |
|
"kl": 0.0, |
|
"learning_rate": 3.90894819466248e-07, |
|
"logps/chosen": -204.36553955078125, |
|
"logps/rejected": -277.9330749511719, |
|
"loss": 0.2751, |
|
"rewards/chosen": -0.16093337535858154, |
|
"rewards/margins": 1.7333406209945679, |
|
"rewards/rejected": -1.8942739963531494, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 10.77749694683554, |
|
"kl": 0.0, |
|
"learning_rate": 3.9063317634746205e-07, |
|
"logps/chosen": -218.55001831054688, |
|
"logps/rejected": -276.6634216308594, |
|
"loss": 0.1298, |
|
"rewards/chosen": 0.9015990495681763, |
|
"rewards/margins": 3.651735305786133, |
|
"rewards/rejected": -2.750136137008667, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 10.516288450325918, |
|
"kl": 0.0, |
|
"learning_rate": 3.9037153322867605e-07, |
|
"logps/chosen": -210.36167907714844, |
|
"logps/rejected": -265.4808654785156, |
|
"loss": 0.2017, |
|
"rewards/chosen": -0.17863792181015015, |
|
"rewards/margins": 2.6375937461853027, |
|
"rewards/rejected": -2.8162317276000977, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 11.824811555332477, |
|
"kl": 0.0, |
|
"learning_rate": 3.901098901098901e-07, |
|
"logps/chosen": -208.4647674560547, |
|
"logps/rejected": -255.76190185546875, |
|
"loss": 0.199, |
|
"rewards/chosen": 0.15355457365512848, |
|
"rewards/margins": 2.69331955909729, |
|
"rewards/rejected": -2.539764881134033, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 10.69862877229561, |
|
"kl": 0.0, |
|
"learning_rate": 3.8984824699110415e-07, |
|
"logps/chosen": -256.15496826171875, |
|
"logps/rejected": -272.2501220703125, |
|
"loss": 0.1392, |
|
"rewards/chosen": 0.36112552881240845, |
|
"rewards/margins": 3.3303704261779785, |
|
"rewards/rejected": -2.969244956970215, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 10.317432492795456, |
|
"kl": 0.0, |
|
"learning_rate": 3.895866038723181e-07, |
|
"logps/chosen": -201.2956085205078, |
|
"logps/rejected": -252.26727294921875, |
|
"loss": 0.2015, |
|
"rewards/chosen": -0.19176657497882843, |
|
"rewards/margins": 1.5057237148284912, |
|
"rewards/rejected": -1.6974903345108032, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 11.089829796355776, |
|
"kl": 0.0, |
|
"learning_rate": 3.8932496075353214e-07, |
|
"logps/chosen": -251.06739807128906, |
|
"logps/rejected": -207.30560302734375, |
|
"loss": 0.2024, |
|
"rewards/chosen": 0.43589267134666443, |
|
"rewards/margins": 2.9610509872436523, |
|
"rewards/rejected": -2.525158405303955, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 13.046985449887648, |
|
"kl": 0.0, |
|
"learning_rate": 3.890633176347462e-07, |
|
"logps/chosen": -191.86936950683594, |
|
"logps/rejected": -282.1868896484375, |
|
"loss": 0.172, |
|
"rewards/chosen": -0.3784329891204834, |
|
"rewards/margins": 2.067564010620117, |
|
"rewards/rejected": -2.4459969997406006, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 9.765346020761122, |
|
"kl": 0.0, |
|
"learning_rate": 3.888016745159602e-07, |
|
"logps/chosen": -199.694580078125, |
|
"logps/rejected": -256.0482482910156, |
|
"loss": 0.1586, |
|
"rewards/chosen": -0.6785757541656494, |
|
"rewards/margins": 1.9910736083984375, |
|
"rewards/rejected": -2.669649362564087, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 14.196677493751132, |
|
"kl": 0.0, |
|
"learning_rate": 3.8854003139717424e-07, |
|
"logps/chosen": -228.76034545898438, |
|
"logps/rejected": -302.63836669921875, |
|
"loss": 0.1734, |
|
"rewards/chosen": -0.3336869776248932, |
|
"rewards/margins": 2.1534783840179443, |
|
"rewards/rejected": -2.4871654510498047, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 11.772671674738493, |
|
"kl": 0.0, |
|
"learning_rate": 3.882783882783883e-07, |
|
"logps/chosen": -234.97866821289062, |
|
"logps/rejected": -277.7060241699219, |
|
"loss": 0.2091, |
|
"rewards/chosen": -0.5110215544700623, |
|
"rewards/margins": 0.7004496455192566, |
|
"rewards/rejected": -1.2114711999893188, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 13.524888701607797, |
|
"kl": 0.0, |
|
"learning_rate": 3.880167451596023e-07, |
|
"logps/chosen": -222.4050750732422, |
|
"logps/rejected": -227.77481079101562, |
|
"loss": 0.2568, |
|
"rewards/chosen": 0.0008676449651829898, |
|
"rewards/margins": 1.8511059284210205, |
|
"rewards/rejected": -1.85023832321167, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 10.405779261311427, |
|
"kl": 0.0, |
|
"learning_rate": 3.877551020408163e-07, |
|
"logps/chosen": -204.4320068359375, |
|
"logps/rejected": -222.79629516601562, |
|
"loss": 0.0875, |
|
"rewards/chosen": 0.08031473308801651, |
|
"rewards/margins": 2.481466770172119, |
|
"rewards/rejected": -2.4011521339416504, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 14.72924638814786, |
|
"kl": 0.0, |
|
"learning_rate": 3.8749345892203034e-07, |
|
"logps/chosen": -289.6814270019531, |
|
"logps/rejected": -331.42816162109375, |
|
"loss": 0.1481, |
|
"rewards/chosen": -0.13640549778938293, |
|
"rewards/margins": 4.516214370727539, |
|
"rewards/rejected": -4.6526198387146, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.741301498332556, |
|
"kl": 0.0, |
|
"learning_rate": 3.872318158032444e-07, |
|
"logps/chosen": -222.26815795898438, |
|
"logps/rejected": -271.1949462890625, |
|
"loss": 0.197, |
|
"rewards/chosen": 0.39989006519317627, |
|
"rewards/margins": 2.3632915019989014, |
|
"rewards/rejected": -1.963401436805725, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 12.112546116412723, |
|
"kl": 0.0, |
|
"learning_rate": 3.869701726844584e-07, |
|
"logps/chosen": -243.7536163330078, |
|
"logps/rejected": -252.95794677734375, |
|
"loss": 0.2012, |
|
"rewards/chosen": -0.38057583570480347, |
|
"rewards/margins": 1.647994041442871, |
|
"rewards/rejected": -2.0285699367523193, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 14.742669713065881, |
|
"kl": 0.0, |
|
"learning_rate": 3.867085295656724e-07, |
|
"logps/chosen": -244.42164611816406, |
|
"logps/rejected": -267.96954345703125, |
|
"loss": 0.2001, |
|
"rewards/chosen": 0.29253336787223816, |
|
"rewards/margins": 2.2655460834503174, |
|
"rewards/rejected": -1.9730128049850464, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.134298526921, |
|
"kl": 0.0, |
|
"learning_rate": 3.8644688644688643e-07, |
|
"logps/chosen": -193.90284729003906, |
|
"logps/rejected": -232.6556396484375, |
|
"loss": 0.2008, |
|
"rewards/chosen": 0.34420499205589294, |
|
"rewards/margins": 3.2133047580718994, |
|
"rewards/rejected": -2.8690998554229736, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 11.746178628630169, |
|
"kl": 0.0, |
|
"learning_rate": 3.8618524332810043e-07, |
|
"logps/chosen": -220.666015625, |
|
"logps/rejected": -303.94451904296875, |
|
"loss": 0.1645, |
|
"rewards/chosen": 0.019716663286089897, |
|
"rewards/margins": 2.469646692276001, |
|
"rewards/rejected": -2.44992995262146, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 11.618541405718464, |
|
"kl": 0.0, |
|
"learning_rate": 3.859236002093145e-07, |
|
"logps/chosen": -201.02305603027344, |
|
"logps/rejected": -307.0491027832031, |
|
"loss": 0.1626, |
|
"rewards/chosen": -0.7490205764770508, |
|
"rewards/margins": 2.6788406372070312, |
|
"rewards/rejected": -3.427861213684082, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 12.168910387907376, |
|
"kl": 0.0, |
|
"learning_rate": 3.8566195709052853e-07, |
|
"logps/chosen": -278.5819396972656, |
|
"logps/rejected": -261.8109130859375, |
|
"loss": 0.0773, |
|
"rewards/chosen": -0.5979146361351013, |
|
"rewards/margins": 2.49701189994812, |
|
"rewards/rejected": -3.094926595687866, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.604103054919578, |
|
"kl": 0.0, |
|
"learning_rate": 3.8540031397174253e-07, |
|
"logps/chosen": -193.6268768310547, |
|
"logps/rejected": -241.7948455810547, |
|
"loss": 0.1785, |
|
"rewards/chosen": 0.43937230110168457, |
|
"rewards/margins": 1.834383487701416, |
|
"rewards/rejected": -1.3950111865997314, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.834100142745722, |
|
"kl": 0.0, |
|
"learning_rate": 3.851386708529565e-07, |
|
"logps/chosen": -229.8187713623047, |
|
"logps/rejected": -325.75762939453125, |
|
"loss": 0.1844, |
|
"rewards/chosen": 0.6806701421737671, |
|
"rewards/margins": 4.323229789733887, |
|
"rewards/rejected": -3.64255952835083, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 12.704909503144034, |
|
"kl": 0.0, |
|
"learning_rate": 3.848770277341706e-07, |
|
"logps/chosen": -224.91845703125, |
|
"logps/rejected": -340.0356750488281, |
|
"loss": 0.2631, |
|
"rewards/chosen": 0.026007119566202164, |
|
"rewards/margins": 2.283827781677246, |
|
"rewards/rejected": -2.2578206062316895, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.41021619314634, |
|
"kl": 0.0, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logps/chosen": -219.94329833984375, |
|
"logps/rejected": -233.54290771484375, |
|
"loss": 0.1716, |
|
"rewards/chosen": -0.22441209852695465, |
|
"rewards/margins": 2.2788405418395996, |
|
"rewards/rejected": -2.5032527446746826, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.379317080534767, |
|
"kl": 0.0, |
|
"learning_rate": 3.843537414965986e-07, |
|
"logps/chosen": -229.6413116455078, |
|
"logps/rejected": -249.2711639404297, |
|
"loss": 0.1614, |
|
"rewards/chosen": 0.3848222494125366, |
|
"rewards/margins": 1.9725074768066406, |
|
"rewards/rejected": -1.587685227394104, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.520229093007123, |
|
"kl": 0.0, |
|
"learning_rate": 3.840920983778126e-07, |
|
"logps/chosen": -222.9331817626953, |
|
"logps/rejected": -208.89297485351562, |
|
"loss": 0.1427, |
|
"rewards/chosen": 0.582370400428772, |
|
"rewards/margins": 3.301401138305664, |
|
"rewards/rejected": -2.7190306186676025, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 13.275687424817486, |
|
"kl": 0.0, |
|
"learning_rate": 3.8383045525902667e-07, |
|
"logps/chosen": -189.17926025390625, |
|
"logps/rejected": -219.13580322265625, |
|
"loss": 0.2513, |
|
"rewards/chosen": -0.015819329768419266, |
|
"rewards/margins": 1.8377530574798584, |
|
"rewards/rejected": -1.8535723686218262, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.600324116674757, |
|
"kl": 0.0, |
|
"learning_rate": 3.8356881214024067e-07, |
|
"logps/chosen": -216.97711181640625, |
|
"logps/rejected": -249.7940216064453, |
|
"loss": 0.2049, |
|
"rewards/chosen": 0.5193243622779846, |
|
"rewards/margins": 2.258141040802002, |
|
"rewards/rejected": -1.738816738128662, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.525139117282963, |
|
"kl": 0.0, |
|
"learning_rate": 3.833071690214547e-07, |
|
"logps/chosen": -222.4760284423828, |
|
"logps/rejected": -239.29403686523438, |
|
"loss": 0.071, |
|
"rewards/chosen": 0.5078960657119751, |
|
"rewards/margins": 2.6218113899230957, |
|
"rewards/rejected": -2.113915205001831, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 13.222155048640982, |
|
"kl": 0.0, |
|
"learning_rate": 3.8304552590266877e-07, |
|
"logps/chosen": -200.71603393554688, |
|
"logps/rejected": -267.3768005371094, |
|
"loss": 0.162, |
|
"rewards/chosen": 0.24389874935150146, |
|
"rewards/margins": 3.596365451812744, |
|
"rewards/rejected": -3.3524668216705322, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 10.120823838278643, |
|
"kl": 0.0, |
|
"learning_rate": 3.827838827838827e-07, |
|
"logps/chosen": -196.2765350341797, |
|
"logps/rejected": -215.92112731933594, |
|
"loss": 0.1333, |
|
"rewards/chosen": 0.05430547147989273, |
|
"rewards/margins": 1.7621450424194336, |
|
"rewards/rejected": -1.7078396081924438, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 11.458337974039024, |
|
"kl": 0.0, |
|
"learning_rate": 3.8252223966509676e-07, |
|
"logps/chosen": -195.0396270751953, |
|
"logps/rejected": -285.74462890625, |
|
"loss": 0.1939, |
|
"rewards/chosen": 0.07916612178087234, |
|
"rewards/margins": 2.918091058731079, |
|
"rewards/rejected": -2.8389248847961426, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 12.160182938253959, |
|
"kl": 0.0, |
|
"learning_rate": 3.822605965463108e-07, |
|
"logps/chosen": -193.6781768798828, |
|
"logps/rejected": -243.92469787597656, |
|
"loss": 0.1757, |
|
"rewards/chosen": -0.5648252964019775, |
|
"rewards/margins": 1.6671717166900635, |
|
"rewards/rejected": -2.231997013092041, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 12.143861035511275, |
|
"kl": 0.0, |
|
"learning_rate": 3.8199895342752486e-07, |
|
"logps/chosen": -208.96681213378906, |
|
"logps/rejected": -243.71438598632812, |
|
"loss": 0.1589, |
|
"rewards/chosen": -0.6121307015419006, |
|
"rewards/margins": 1.7129931449890137, |
|
"rewards/rejected": -2.3251237869262695, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 9.91525808504644, |
|
"kl": 0.0, |
|
"learning_rate": 3.8173731030873886e-07, |
|
"logps/chosen": -266.3404541015625, |
|
"logps/rejected": -302.9526672363281, |
|
"loss": 0.1843, |
|
"rewards/chosen": -0.14618553221225739, |
|
"rewards/margins": 2.309356689453125, |
|
"rewards/rejected": -2.4555423259735107, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 9.702336587181945, |
|
"kl": 0.0, |
|
"learning_rate": 3.814756671899529e-07, |
|
"logps/chosen": -181.5204620361328, |
|
"logps/rejected": -304.9017639160156, |
|
"loss": 0.1483, |
|
"rewards/chosen": -0.1943751722574234, |
|
"rewards/margins": 3.55947208404541, |
|
"rewards/rejected": -3.753847360610962, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 11.965604322215004, |
|
"kl": 0.0, |
|
"learning_rate": 3.812140240711669e-07, |
|
"logps/chosen": -245.6624755859375, |
|
"logps/rejected": -260.020263671875, |
|
"loss": 0.2183, |
|
"rewards/chosen": -1.2427337169647217, |
|
"rewards/margins": 1.6019926071166992, |
|
"rewards/rejected": -2.844726324081421, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 10.836340963305517, |
|
"kl": 0.0, |
|
"learning_rate": 3.809523809523809e-07, |
|
"logps/chosen": -237.24041748046875, |
|
"logps/rejected": -235.87216186523438, |
|
"loss": 0.1095, |
|
"rewards/chosen": 0.8246784210205078, |
|
"rewards/margins": 3.6156461238861084, |
|
"rewards/rejected": -2.7909677028656006, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 11.897690499315637, |
|
"kl": 0.0, |
|
"learning_rate": 3.8069073783359496e-07, |
|
"logps/chosen": -232.65216064453125, |
|
"logps/rejected": -197.15951538085938, |
|
"loss": 0.0782, |
|
"rewards/chosen": 0.05020507052540779, |
|
"rewards/margins": 3.441476821899414, |
|
"rewards/rejected": -3.3912718296051025, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 8.049103139267938, |
|
"kl": 0.0, |
|
"learning_rate": 3.80429094714809e-07, |
|
"logps/chosen": -239.66998291015625, |
|
"logps/rejected": -256.76837158203125, |
|
"loss": 0.1933, |
|
"rewards/chosen": 0.054171182215213776, |
|
"rewards/margins": 3.8353452682495117, |
|
"rewards/rejected": -3.7811741828918457, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 13.088097317739122, |
|
"kl": 0.0, |
|
"learning_rate": 3.80167451596023e-07, |
|
"logps/chosen": -205.0490264892578, |
|
"logps/rejected": -241.65966796875, |
|
"loss": 0.1946, |
|
"rewards/chosen": -0.03932800143957138, |
|
"rewards/margins": 2.7577974796295166, |
|
"rewards/rejected": -2.7971255779266357, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 13.14569665419969, |
|
"kl": 0.0, |
|
"learning_rate": 3.79905808477237e-07, |
|
"logps/chosen": -298.6637268066406, |
|
"logps/rejected": -285.0900573730469, |
|
"loss": 0.1677, |
|
"rewards/chosen": -0.5165401697158813, |
|
"rewards/margins": 3.1756558418273926, |
|
"rewards/rejected": -3.6921958923339844, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 10.434555514022984, |
|
"kl": 0.0, |
|
"learning_rate": 3.7964416535845105e-07, |
|
"logps/chosen": -228.3349609375, |
|
"logps/rejected": -214.828857421875, |
|
"loss": 0.1609, |
|
"rewards/chosen": 0.11236274987459183, |
|
"rewards/margins": 2.25835919380188, |
|
"rewards/rejected": -2.145996332168579, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 10.167277710211138, |
|
"kl": 0.0, |
|
"learning_rate": 3.793825222396651e-07, |
|
"logps/chosen": -245.6664581298828, |
|
"logps/rejected": -274.6752624511719, |
|
"loss": 0.1496, |
|
"rewards/chosen": -0.27803435921669006, |
|
"rewards/margins": 1.9288045167922974, |
|
"rewards/rejected": -2.206838846206665, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 10.474016211401475, |
|
"kl": 0.0, |
|
"learning_rate": 3.791208791208791e-07, |
|
"logps/chosen": -193.68919372558594, |
|
"logps/rejected": -202.5056610107422, |
|
"loss": 0.1846, |
|
"rewards/chosen": -0.4363885223865509, |
|
"rewards/margins": 2.7338576316833496, |
|
"rewards/rejected": -3.170246124267578, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 10.659154195676038, |
|
"kl": 0.0, |
|
"learning_rate": 3.7885923600209315e-07, |
|
"logps/chosen": -242.42816162109375, |
|
"logps/rejected": -282.0824890136719, |
|
"loss": 0.1603, |
|
"rewards/chosen": 0.673112690448761, |
|
"rewards/margins": 3.3613035678863525, |
|
"rewards/rejected": -2.6881909370422363, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 12.65492990351007, |
|
"kl": 0.0, |
|
"learning_rate": 3.7859759288330715e-07, |
|
"logps/chosen": -170.49734497070312, |
|
"logps/rejected": -210.32472229003906, |
|
"loss": 0.1996, |
|
"rewards/chosen": 0.2279433161020279, |
|
"rewards/margins": 2.666241407394409, |
|
"rewards/rejected": -2.438297986984253, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 10.54524759203527, |
|
"kl": 0.0, |
|
"learning_rate": 3.7833594976452115e-07, |
|
"logps/chosen": -153.5712432861328, |
|
"logps/rejected": -269.2149353027344, |
|
"loss": 0.2098, |
|
"rewards/chosen": -0.019966602325439453, |
|
"rewards/margins": 2.5769615173339844, |
|
"rewards/rejected": -2.596928119659424, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 10.51825030268249, |
|
"kl": 0.0, |
|
"learning_rate": 3.780743066457352e-07, |
|
"logps/chosen": -203.99156188964844, |
|
"logps/rejected": -266.8109436035156, |
|
"loss": 0.1801, |
|
"rewards/chosen": 1.9818308353424072, |
|
"rewards/margins": 3.033543109893799, |
|
"rewards/rejected": -1.051712155342102, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 9.77436331647093, |
|
"kl": 0.0, |
|
"learning_rate": 3.7781266352694925e-07, |
|
"logps/chosen": -240.96786499023438, |
|
"logps/rejected": -226.21963500976562, |
|
"loss": 0.146, |
|
"rewards/chosen": -0.04570434242486954, |
|
"rewards/margins": 1.6731890439987183, |
|
"rewards/rejected": -1.7188934087753296, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 11.33142669534873, |
|
"kl": 0.0, |
|
"learning_rate": 3.7755102040816324e-07, |
|
"logps/chosen": -274.2488708496094, |
|
"logps/rejected": -221.2158203125, |
|
"loss": 0.2061, |
|
"rewards/chosen": -0.03258565813302994, |
|
"rewards/margins": 1.8558272123336792, |
|
"rewards/rejected": -1.8884128332138062, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 12.417447396754456, |
|
"kl": 0.0, |
|
"learning_rate": 3.7728937728937724e-07, |
|
"logps/chosen": -241.2676544189453, |
|
"logps/rejected": -312.99432373046875, |
|
"loss": 0.1357, |
|
"rewards/chosen": 1.6663408279418945, |
|
"rewards/margins": 3.927600383758545, |
|
"rewards/rejected": -2.2612595558166504, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 11.170512277856618, |
|
"kl": 0.0, |
|
"learning_rate": 3.770277341705913e-07, |
|
"logps/chosen": -233.8650360107422, |
|
"logps/rejected": -197.24072265625, |
|
"loss": 0.1556, |
|
"rewards/chosen": 0.8342301249504089, |
|
"rewards/margins": 2.448633909225464, |
|
"rewards/rejected": -1.6144037246704102, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 13.803253821404509, |
|
"kl": 0.0, |
|
"learning_rate": 3.7676609105180534e-07, |
|
"logps/chosen": -194.3175506591797, |
|
"logps/rejected": -353.5451354980469, |
|
"loss": 0.1461, |
|
"rewards/chosen": 0.6489688754081726, |
|
"rewards/margins": 3.0008530616760254, |
|
"rewards/rejected": -2.351884126663208, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 11.510104873936742, |
|
"kl": 0.0, |
|
"learning_rate": 3.7650444793301934e-07, |
|
"logps/chosen": -276.4261474609375, |
|
"logps/rejected": -173.75657653808594, |
|
"loss": 0.2384, |
|
"rewards/chosen": -0.2080867886543274, |
|
"rewards/margins": 0.7136066555976868, |
|
"rewards/rejected": -0.9216934442520142, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 11.151674395455558, |
|
"kl": 0.0, |
|
"learning_rate": 3.762428048142334e-07, |
|
"logps/chosen": -234.051025390625, |
|
"logps/rejected": -349.57586669921875, |
|
"loss": 0.1555, |
|
"rewards/chosen": 0.1754276305437088, |
|
"rewards/margins": 2.250746965408325, |
|
"rewards/rejected": -2.075319290161133, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 10.012473838900807, |
|
"kl": 0.0, |
|
"learning_rate": 3.759811616954474e-07, |
|
"logps/chosen": -206.8048553466797, |
|
"logps/rejected": -245.5289306640625, |
|
"loss": 0.1644, |
|
"rewards/chosen": 0.046800464391708374, |
|
"rewards/margins": 2.0636954307556152, |
|
"rewards/rejected": -2.016895055770874, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 12.41419451120897, |
|
"kl": 0.0, |
|
"learning_rate": 3.757195185766614e-07, |
|
"logps/chosen": -197.76080322265625, |
|
"logps/rejected": -243.91220092773438, |
|
"loss": 0.1441, |
|
"rewards/chosen": 0.16484998166561127, |
|
"rewards/margins": 1.870893120765686, |
|
"rewards/rejected": -1.7060431241989136, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 12.281595873200606, |
|
"kl": 0.0, |
|
"learning_rate": 3.7545787545787543e-07, |
|
"logps/chosen": -206.76541137695312, |
|
"logps/rejected": -230.58517456054688, |
|
"loss": 0.1835, |
|
"rewards/chosen": 0.500891923904419, |
|
"rewards/margins": 2.8288941383361816, |
|
"rewards/rejected": -2.3280022144317627, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 11.477594673948579, |
|
"kl": 0.0, |
|
"learning_rate": 3.751962323390895e-07, |
|
"logps/chosen": -205.23182678222656, |
|
"logps/rejected": -265.99298095703125, |
|
"loss": 0.2508, |
|
"rewards/chosen": -0.7120283842086792, |
|
"rewards/margins": 1.5744119882583618, |
|
"rewards/rejected": -2.286440372467041, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 10.389487469578903, |
|
"kl": 0.0, |
|
"learning_rate": 3.749345892203035e-07, |
|
"logps/chosen": -239.08309936523438, |
|
"logps/rejected": -245.90695190429688, |
|
"loss": 0.2403, |
|
"rewards/chosen": 0.03259509429335594, |
|
"rewards/margins": 1.3131746053695679, |
|
"rewards/rejected": -1.2805795669555664, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 12.417872067981774, |
|
"kl": 0.0, |
|
"learning_rate": 3.7467294610151753e-07, |
|
"logps/chosen": -256.7392883300781, |
|
"logps/rejected": -276.9643249511719, |
|
"loss": 0.2144, |
|
"rewards/chosen": 0.17145968973636627, |
|
"rewards/margins": 2.2896573543548584, |
|
"rewards/rejected": -2.1181976795196533, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 11.327729837593566, |
|
"kl": 0.0, |
|
"learning_rate": 3.7441130298273153e-07, |
|
"logps/chosen": -194.67178344726562, |
|
"logps/rejected": -293.6398620605469, |
|
"loss": 0.2087, |
|
"rewards/chosen": 0.07769111543893814, |
|
"rewards/margins": 2.251216411590576, |
|
"rewards/rejected": -2.173525333404541, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 11.641851328355145, |
|
"kl": 0.0, |
|
"learning_rate": 3.741496598639456e-07, |
|
"logps/chosen": -225.63128662109375, |
|
"logps/rejected": -242.99880981445312, |
|
"loss": 0.2983, |
|
"rewards/chosen": -0.29468005895614624, |
|
"rewards/margins": 1.5418860912322998, |
|
"rewards/rejected": -1.8365662097930908, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 10.688981054253707, |
|
"kl": 0.0, |
|
"learning_rate": 3.738880167451596e-07, |
|
"logps/chosen": -194.8035125732422, |
|
"logps/rejected": -327.953369140625, |
|
"loss": 0.1177, |
|
"rewards/chosen": 0.040647171437740326, |
|
"rewards/margins": 2.5855929851531982, |
|
"rewards/rejected": -2.54494571685791, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 10.545945342907165, |
|
"kl": 0.0, |
|
"learning_rate": 3.7362637362637363e-07, |
|
"logps/chosen": -299.9187927246094, |
|
"logps/rejected": -232.39605712890625, |
|
"loss": 0.142, |
|
"rewards/chosen": 0.8184850811958313, |
|
"rewards/margins": 3.6689701080322266, |
|
"rewards/rejected": -2.85048508644104, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 12.552799610744065, |
|
"kl": 0.0, |
|
"learning_rate": 3.733647305075877e-07, |
|
"logps/chosen": -239.55209350585938, |
|
"logps/rejected": -253.3202667236328, |
|
"loss": 0.1485, |
|
"rewards/chosen": -0.060923367738723755, |
|
"rewards/margins": 1.103141188621521, |
|
"rewards/rejected": -1.1640645265579224, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 10.114942413331908, |
|
"kl": 0.0, |
|
"learning_rate": 3.731030873888016e-07, |
|
"logps/chosen": -188.8922882080078, |
|
"logps/rejected": -213.3092041015625, |
|
"loss": 0.1095, |
|
"rewards/chosen": 0.31580618023872375, |
|
"rewards/margins": 2.5469398498535156, |
|
"rewards/rejected": -2.2311336994171143, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 14.076487746859804, |
|
"kl": 0.0, |
|
"learning_rate": 3.7284144427001567e-07, |
|
"logps/chosen": -252.74615478515625, |
|
"logps/rejected": -206.2298583984375, |
|
"loss": 0.1541, |
|
"rewards/chosen": 0.32222241163253784, |
|
"rewards/margins": 2.381617307662964, |
|
"rewards/rejected": -2.0593948364257812, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 13.614522220242236, |
|
"kl": 0.0, |
|
"learning_rate": 3.725798011512297e-07, |
|
"logps/chosen": -233.2279052734375, |
|
"logps/rejected": -309.02667236328125, |
|
"loss": 0.1624, |
|
"rewards/chosen": 0.3280530273914337, |
|
"rewards/margins": 2.868494987487793, |
|
"rewards/rejected": -2.5404419898986816, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 13.04079850351043, |
|
"kl": 0.0, |
|
"learning_rate": 3.723181580324437e-07, |
|
"logps/chosen": -236.299560546875, |
|
"logps/rejected": -235.88479614257812, |
|
"loss": 0.2823, |
|
"rewards/chosen": -0.19492840766906738, |
|
"rewards/margins": 2.121401786804199, |
|
"rewards/rejected": -2.3163301944732666, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 13.854146801900269, |
|
"kl": 0.0, |
|
"learning_rate": 3.7205651491365777e-07, |
|
"logps/chosen": -220.61044311523438, |
|
"logps/rejected": -256.89471435546875, |
|
"loss": 0.2039, |
|
"rewards/chosen": 0.09335081279277802, |
|
"rewards/margins": 1.3945070505142212, |
|
"rewards/rejected": -1.3011562824249268, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 12.146701230600012, |
|
"kl": 0.0, |
|
"learning_rate": 3.7179487179487177e-07, |
|
"logps/chosen": -244.57260131835938, |
|
"logps/rejected": -216.97161865234375, |
|
"loss": 0.1947, |
|
"rewards/chosen": 0.42316800355911255, |
|
"rewards/margins": 1.4870796203613281, |
|
"rewards/rejected": -1.0639115571975708, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 10.730648049345493, |
|
"kl": 0.0, |
|
"learning_rate": 3.7153322867608577e-07, |
|
"logps/chosen": -218.7908935546875, |
|
"logps/rejected": -258.998291015625, |
|
"loss": 0.183, |
|
"rewards/chosen": -0.3176134526729584, |
|
"rewards/margins": 1.3556625843048096, |
|
"rewards/rejected": -1.6732760667800903, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 10.23814496690706, |
|
"kl": 0.0, |
|
"learning_rate": 3.712715855572998e-07, |
|
"logps/chosen": -223.61936950683594, |
|
"logps/rejected": -253.8956756591797, |
|
"loss": 0.1727, |
|
"rewards/chosen": 0.5035489201545715, |
|
"rewards/margins": 2.772200107574463, |
|
"rewards/rejected": -2.268651247024536, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 12.792135627932897, |
|
"kl": 0.0, |
|
"learning_rate": 3.7100994243851387e-07, |
|
"logps/chosen": -254.05380249023438, |
|
"logps/rejected": -232.8262939453125, |
|
"loss": 0.1926, |
|
"rewards/chosen": 1.061255693435669, |
|
"rewards/margins": 2.6245744228363037, |
|
"rewards/rejected": -1.5633187294006348, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 11.345332305270757, |
|
"kl": 0.0, |
|
"learning_rate": 3.707482993197279e-07, |
|
"logps/chosen": -232.64930725097656, |
|
"logps/rejected": -215.4774627685547, |
|
"loss": 0.1479, |
|
"rewards/chosen": 0.9204630851745605, |
|
"rewards/margins": 2.5953850746154785, |
|
"rewards/rejected": -1.674921989440918, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 12.930924815591139, |
|
"kl": 0.0, |
|
"learning_rate": 3.7048665620094186e-07, |
|
"logps/chosen": -273.54595947265625, |
|
"logps/rejected": -246.6627655029297, |
|
"loss": 0.2992, |
|
"rewards/chosen": -1.0437400341033936, |
|
"rewards/margins": 0.9890058040618896, |
|
"rewards/rejected": -2.032745838165283, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 9.386730828304604, |
|
"kl": 0.0, |
|
"learning_rate": 3.702250130821559e-07, |
|
"logps/chosen": -225.7566680908203, |
|
"logps/rejected": -239.1521759033203, |
|
"loss": 0.1486, |
|
"rewards/chosen": 0.671124279499054, |
|
"rewards/margins": 3.3144171237945557, |
|
"rewards/rejected": -2.6432929039001465, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 12.454753782186298, |
|
"kl": 0.0, |
|
"learning_rate": 3.6996336996336996e-07, |
|
"logps/chosen": -289.2835693359375, |
|
"logps/rejected": -271.740478515625, |
|
"loss": 0.2592, |
|
"rewards/chosen": -1.0335984230041504, |
|
"rewards/margins": 1.533452033996582, |
|
"rewards/rejected": -2.5670504570007324, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 11.975373879855518, |
|
"kl": 0.0, |
|
"learning_rate": 3.6970172684458396e-07, |
|
"logps/chosen": -213.38905334472656, |
|
"logps/rejected": -201.7144012451172, |
|
"loss": 0.1521, |
|
"rewards/chosen": 0.3211534321308136, |
|
"rewards/margins": 1.6761234998703003, |
|
"rewards/rejected": -1.354970097541809, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 10.925688675488383, |
|
"kl": 0.0, |
|
"learning_rate": 3.69440083725798e-07, |
|
"logps/chosen": -208.6687774658203, |
|
"logps/rejected": -211.62774658203125, |
|
"loss": 0.1478, |
|
"rewards/chosen": 1.2924268245697021, |
|
"rewards/margins": 3.0114760398864746, |
|
"rewards/rejected": -1.7190492153167725, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 12.744456658603918, |
|
"kl": 0.0, |
|
"learning_rate": 3.69178440607012e-07, |
|
"logps/chosen": -195.3672332763672, |
|
"logps/rejected": -232.05824279785156, |
|
"loss": 0.273, |
|
"rewards/chosen": -0.07960432767868042, |
|
"rewards/margins": 2.1937575340270996, |
|
"rewards/rejected": -2.273361921310425, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1911, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|