|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9983510322538092, |
|
"eval_steps": 400, |
|
"global_step": 473, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002110678715124332, |
|
"grad_norm": 5.946747818020539, |
|
"learning_rate": 1.0416666666666666e-08, |
|
"logits/chosen": -0.680223822593689, |
|
"logits/rejected": -1.0431472063064575, |
|
"logps/chosen": -237.51611328125, |
|
"logps/rejected": -224.62709045410156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.010553393575621661, |
|
"grad_norm": 4.788472074755126, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -0.605172336101532, |
|
"logits/rejected": -1.07023286819458, |
|
"logps/chosen": -302.76666259765625, |
|
"logps/rejected": -281.7729187011719, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.3515625, |
|
"rewards/chosen": 0.00022320327116176486, |
|
"rewards/margins": 8.367415284737945e-05, |
|
"rewards/rejected": 0.00013952911831438541, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.021106787151243322, |
|
"grad_norm": 4.238337049927753, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -0.6779512763023376, |
|
"logits/rejected": -1.0506097078323364, |
|
"logps/chosen": -270.19256591796875, |
|
"logps/rejected": -256.5780029296875, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -7.003974315011874e-05, |
|
"rewards/margins": -0.0007576555944979191, |
|
"rewards/rejected": 0.0006876158877275884, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03166018072686498, |
|
"grad_norm": 5.613762220129112, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -0.6236697435379028, |
|
"logits/rejected": -1.136690378189087, |
|
"logps/chosen": -294.53656005859375, |
|
"logps/rejected": -257.5579528808594, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": 0.0019345388282090425, |
|
"rewards/margins": 0.0002388877037446946, |
|
"rewards/rejected": 0.0016956512117758393, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.042213574302486644, |
|
"grad_norm": 5.5506034027854625, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -0.5656238794326782, |
|
"logits/rejected": -1.1458613872528076, |
|
"logps/chosen": -277.18695068359375, |
|
"logps/rejected": -242.61972045898438, |
|
"loss": 0.6923, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": 0.004845106042921543, |
|
"rewards/margins": 0.001295976573601365, |
|
"rewards/rejected": 0.0035491292364895344, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.052766967878108306, |
|
"grad_norm": 5.6701208184749, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -0.5958523750305176, |
|
"logits/rejected": -1.1272190809249878, |
|
"logps/chosen": -305.53717041015625, |
|
"logps/rejected": -268.25347900390625, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.015507316216826439, |
|
"rewards/margins": 0.0047125560231506824, |
|
"rewards/rejected": 0.010794761590659618, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06332036145372996, |
|
"grad_norm": 6.300590779192755, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -0.594957709312439, |
|
"logits/rejected": -1.086895227432251, |
|
"logps/chosen": -308.94012451171875, |
|
"logps/rejected": -276.35650634765625, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.03275737911462784, |
|
"rewards/margins": 0.01083751954138279, |
|
"rewards/rejected": 0.02191985584795475, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07387375502935163, |
|
"grad_norm": 5.279197636228696, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -0.566003680229187, |
|
"logits/rejected": -1.1707605123519897, |
|
"logps/chosen": -305.409912109375, |
|
"logps/rejected": -279.29486083984375, |
|
"loss": 0.6838, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.05798542499542236, |
|
"rewards/margins": 0.023088786751031876, |
|
"rewards/rejected": 0.03489663824439049, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08442714860497329, |
|
"grad_norm": 4.841427121898649, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.6702648401260376, |
|
"logits/rejected": -1.063316822052002, |
|
"logps/chosen": -295.71905517578125, |
|
"logps/rejected": -275.6202087402344, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.06462486833333969, |
|
"rewards/margins": 0.02073287032544613, |
|
"rewards/rejected": 0.04389199614524841, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09498054218059494, |
|
"grad_norm": 4.337699418611637, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -0.7736238241195679, |
|
"logits/rejected": -1.1706254482269287, |
|
"logps/chosen": -272.80596923828125, |
|
"logps/rejected": -252.0467987060547, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.06801488995552063, |
|
"rewards/margins": 0.04081468656659126, |
|
"rewards/rejected": 0.027200212702155113, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10553393575621661, |
|
"grad_norm": 5.035982549882043, |
|
"learning_rate": 4.999726797933858e-07, |
|
"logits/chosen": -0.8857172131538391, |
|
"logits/rejected": -1.2423771619796753, |
|
"logps/chosen": -284.9183044433594, |
|
"logps/rejected": -266.7237854003906, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.03259050473570824, |
|
"rewards/margins": 0.05623316764831543, |
|
"rewards/rejected": -0.023642662912607193, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11608732933183827, |
|
"grad_norm": 5.2802868788136035, |
|
"learning_rate": 4.99665396039775e-07, |
|
"logits/chosen": -0.8473213315010071, |
|
"logits/rejected": -1.336387276649475, |
|
"logps/chosen": -276.6699523925781, |
|
"logps/rejected": -261.4330749511719, |
|
"loss": 0.6554, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.028915267437696457, |
|
"rewards/margins": 0.08701522648334503, |
|
"rewards/rejected": -0.11593051254749298, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12664072290745992, |
|
"grad_norm": 9.944712985206676, |
|
"learning_rate": 4.99017099386437e-07, |
|
"logits/chosen": -0.911908745765686, |
|
"logits/rejected": -1.4581682682037354, |
|
"logps/chosen": -287.9392395019531, |
|
"logps/rejected": -270.0166931152344, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.10472609847784042, |
|
"rewards/margins": 0.09941879659891129, |
|
"rewards/rejected": -0.20414488017559052, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13719411648308158, |
|
"grad_norm": 5.431017253590544, |
|
"learning_rate": 4.980286753286194e-07, |
|
"logits/chosen": -0.8732226490974426, |
|
"logits/rejected": -1.378962755203247, |
|
"logps/chosen": -294.68267822265625, |
|
"logps/rejected": -273.043701171875, |
|
"loss": 0.6509, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.15889093279838562, |
|
"rewards/margins": 0.0964367687702179, |
|
"rewards/rejected": -0.2553277015686035, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14774751005870326, |
|
"grad_norm": 6.0993507114780305, |
|
"learning_rate": 4.967014739346915e-07, |
|
"logits/chosen": -0.9360530972480774, |
|
"logits/rejected": -1.3933957815170288, |
|
"logps/chosen": -306.38653564453125, |
|
"logps/rejected": -282.32769775390625, |
|
"loss": 0.6403, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.14856548607349396, |
|
"rewards/margins": 0.1044367104768753, |
|
"rewards/rejected": -0.25300222635269165, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15830090363432492, |
|
"grad_norm": 6.805381088591199, |
|
"learning_rate": 4.950373080021136e-07, |
|
"logits/chosen": -0.9195225834846497, |
|
"logits/rejected": -1.3844249248504639, |
|
"logps/chosen": -316.6197509765625, |
|
"logps/rejected": -294.3293762207031, |
|
"loss": 0.6296, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.15873293578624725, |
|
"rewards/margins": 0.13153603672981262, |
|
"rewards/rejected": -0.2902689576148987, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16885429720994657, |
|
"grad_norm": 11.435330521176741, |
|
"learning_rate": 4.930384505813737e-07, |
|
"logits/chosen": -1.0555646419525146, |
|
"logits/rejected": -1.482975721359253, |
|
"logps/chosen": -320.57818603515625, |
|
"logps/rejected": -304.3819885253906, |
|
"loss": 0.6345, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.2796502113342285, |
|
"rewards/margins": 0.15378037095069885, |
|
"rewards/rejected": -0.43343058228492737, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17940769078556823, |
|
"grad_norm": 10.893293221473195, |
|
"learning_rate": 4.907076318712738e-07, |
|
"logits/chosen": -0.9902318120002747, |
|
"logits/rejected": -1.4965012073516846, |
|
"logps/chosen": -321.46295166015625, |
|
"logps/rejected": -311.6328125, |
|
"loss": 0.6289, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.3334459364414215, |
|
"rewards/margins": 0.1901421993970871, |
|
"rewards/rejected": -0.5235881209373474, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.18996108436118989, |
|
"grad_norm": 7.423510047109125, |
|
"learning_rate": 4.88048035489807e-07, |
|
"logits/chosen": -0.9904667139053345, |
|
"logits/rejected": -1.5588419437408447, |
|
"logps/chosen": -308.77227783203125, |
|
"logps/rejected": -290.52130126953125, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.33037030696868896, |
|
"rewards/margins": 0.15141813457012177, |
|
"rewards/rejected": -0.4817884564399719, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20051447793681157, |
|
"grad_norm": 10.503540684222791, |
|
"learning_rate": 4.85063294125718e-07, |
|
"logits/chosen": -1.0931203365325928, |
|
"logits/rejected": -1.59712815284729, |
|
"logps/chosen": -327.8336486816406, |
|
"logps/rejected": -313.16302490234375, |
|
"loss": 0.616, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.4475630819797516, |
|
"rewards/margins": 0.1690167635679245, |
|
"rewards/rejected": -0.6165798306465149, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21106787151243322, |
|
"grad_norm": 7.941678777423352, |
|
"learning_rate": 4.817574845766874e-07, |
|
"logits/chosen": -1.1208244562149048, |
|
"logits/rejected": -1.5354535579681396, |
|
"logps/chosen": -351.09759521484375, |
|
"logps/rejected": -341.6310729980469, |
|
"loss": 0.6132, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.5545604825019836, |
|
"rewards/margins": 0.21660009026527405, |
|
"rewards/rejected": -0.7711606025695801, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22162126508805488, |
|
"grad_norm": 7.975671452180151, |
|
"learning_rate": 4.781351221809166e-07, |
|
"logits/chosen": -1.0387537479400635, |
|
"logits/rejected": -1.6272369623184204, |
|
"logps/chosen": -360.13323974609375, |
|
"logps/rejected": -349.0940856933594, |
|
"loss": 0.6115, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5049761533737183, |
|
"rewards/margins": 0.21938514709472656, |
|
"rewards/rejected": -0.7243613004684448, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23217465866367654, |
|
"grad_norm": 8.034746512723116, |
|
"learning_rate": 4.742011546497182e-07, |
|
"logits/chosen": -1.1847660541534424, |
|
"logits/rejected": -1.6106970310211182, |
|
"logps/chosen": -334.15869140625, |
|
"logps/rejected": -328.2769470214844, |
|
"loss": 0.6055, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.4516691267490387, |
|
"rewards/margins": 0.2418258637189865, |
|
"rewards/rejected": -0.693494975566864, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2427280522392982, |
|
"grad_norm": 10.287751316470317, |
|
"learning_rate": 4.6996095530953875e-07, |
|
"logits/chosen": -1.339599370956421, |
|
"logits/rejected": -1.7150366306304932, |
|
"logps/chosen": -331.191162109375, |
|
"logps/rejected": -330.8086853027344, |
|
"loss": 0.5971, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.5352717638015747, |
|
"rewards/margins": 0.21640977263450623, |
|
"rewards/rejected": -0.7516815662384033, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.25328144581491985, |
|
"grad_norm": 12.105502619722833, |
|
"learning_rate": 4.654203157626399e-07, |
|
"logits/chosen": -1.1615684032440186, |
|
"logits/rejected": -1.5826199054718018, |
|
"logps/chosen": -356.81915283203125, |
|
"logps/rejected": -361.2477722167969, |
|
"loss": 0.5989, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.7497292757034302, |
|
"rewards/margins": 0.2648738920688629, |
|
"rewards/rejected": -1.0146030187606812, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26383483939054153, |
|
"grad_norm": 11.641374719855692, |
|
"learning_rate": 4.605854379764673e-07, |
|
"logits/chosen": -1.228808045387268, |
|
"logits/rejected": -1.633094072341919, |
|
"logps/chosen": -376.2483825683594, |
|
"logps/rejected": -372.6545104980469, |
|
"loss": 0.5901, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.8822706937789917, |
|
"rewards/margins": 0.2663251757621765, |
|
"rewards/rejected": -1.1485958099365234, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27438823296616316, |
|
"grad_norm": 18.11761811423972, |
|
"learning_rate": 4.5546292581250857e-07, |
|
"logits/chosen": -1.2683994770050049, |
|
"logits/rejected": -1.7013158798217773, |
|
"logps/chosen": -393.85699462890625, |
|
"logps/rejected": -393.3715515136719, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.0844448804855347, |
|
"rewards/margins": 0.284206360578537, |
|
"rewards/rejected": -1.3686515092849731, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28494162654178484, |
|
"grad_norm": 11.265967096929554, |
|
"learning_rate": 4.5005977600621275e-07, |
|
"logits/chosen": -1.2262264490127563, |
|
"logits/rejected": -1.720760703086853, |
|
"logps/chosen": -390.81500244140625, |
|
"logps/rejected": -386.7849426269531, |
|
"loss": 0.577, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.1187413930892944, |
|
"rewards/margins": 0.2867385149002075, |
|
"rewards/rejected": -1.4054800271987915, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2954950201174065, |
|
"grad_norm": 13.458747198104215, |
|
"learning_rate": 4.443833686102919e-07, |
|
"logits/chosen": -1.2382609844207764, |
|
"logits/rejected": -1.6747297048568726, |
|
"logps/chosen": -434.0775451660156, |
|
"logps/rejected": -440.32598876953125, |
|
"loss": 0.5758, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.3505640029907227, |
|
"rewards/margins": 0.3462044298648834, |
|
"rewards/rejected": -1.6967684030532837, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30604841369302815, |
|
"grad_norm": 11.083230607187067, |
|
"learning_rate": 4.384414569144561e-07, |
|
"logits/chosen": -1.2794346809387207, |
|
"logits/rejected": -1.7727094888687134, |
|
"logps/chosen": -410.8555603027344, |
|
"logps/rejected": -412.8260803222656, |
|
"loss": 0.5823, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.3353734016418457, |
|
"rewards/margins": 0.34854626655578613, |
|
"rewards/rejected": -1.6839195489883423, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31660180726864984, |
|
"grad_norm": 9.628316005499197, |
|
"learning_rate": 4.3224215685535287e-07, |
|
"logits/chosen": -1.2036387920379639, |
|
"logits/rejected": -1.6848615407943726, |
|
"logps/chosen": -425.97796630859375, |
|
"logps/rejected": -431.17547607421875, |
|
"loss": 0.5789, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -1.3492869138717651, |
|
"rewards/margins": 0.35322755575180054, |
|
"rewards/rejected": -1.702514410018921, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32715520084427147, |
|
"grad_norm": 13.446042568474843, |
|
"learning_rate": 4.2579393593117364e-07, |
|
"logits/chosen": -1.2631900310516357, |
|
"logits/rejected": -1.6935676336288452, |
|
"logps/chosen": -405.7359619140625, |
|
"logps/rejected": -407.66510009765625, |
|
"loss": 0.5651, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.19693124294281, |
|
"rewards/margins": 0.2678401470184326, |
|
"rewards/rejected": -1.4647715091705322, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33770859441989315, |
|
"grad_norm": 11.045996690620928, |
|
"learning_rate": 4.191056016360699e-07, |
|
"logits/chosen": -1.3399848937988281, |
|
"logits/rejected": -1.648470163345337, |
|
"logps/chosen": -443.5994567871094, |
|
"logps/rejected": -469.8695373535156, |
|
"loss": 0.5645, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -1.471341848373413, |
|
"rewards/margins": 0.4636549949645996, |
|
"rewards/rejected": -1.9349968433380127, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34826198799551483, |
|
"grad_norm": 13.778566050110996, |
|
"learning_rate": 4.121862894301754e-07, |
|
"logits/chosen": -1.3236744403839111, |
|
"logits/rejected": -1.8871498107910156, |
|
"logps/chosen": -420.1505432128906, |
|
"logps/rejected": -415.3837890625, |
|
"loss": 0.5593, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.1470205783843994, |
|
"rewards/margins": 0.3792787194252014, |
|
"rewards/rejected": -1.526299238204956, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.35881538157113646, |
|
"grad_norm": 17.339401498715958, |
|
"learning_rate": 4.050454502616667e-07, |
|
"logits/chosen": -1.3061871528625488, |
|
"logits/rejected": -1.6525911092758179, |
|
"logps/chosen": -458.53338623046875, |
|
"logps/rejected": -494.4827575683594, |
|
"loss": 0.5455, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.6396774053573608, |
|
"rewards/margins": 0.5506707429885864, |
|
"rewards/rejected": -2.1903481483459473, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36936877514675814, |
|
"grad_norm": 14.142408652824205, |
|
"learning_rate": 3.976928376579047e-07, |
|
"logits/chosen": -1.2991268634796143, |
|
"logits/rejected": -1.7594095468521118, |
|
"logps/chosen": -505.6692810058594, |
|
"logps/rejected": -530.7013549804688, |
|
"loss": 0.5295, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.9093101024627686, |
|
"rewards/margins": 0.5646597743034363, |
|
"rewards/rejected": -2.4739699363708496, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.37992216872237977, |
|
"grad_norm": 12.91331776704347, |
|
"learning_rate": 3.9013849440328945e-07, |
|
"logits/chosen": -1.2874772548675537, |
|
"logits/rejected": -1.6723277568817139, |
|
"logps/chosen": -453.1542053222656, |
|
"logps/rejected": -469.8594665527344, |
|
"loss": 0.5487, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.7126022577285767, |
|
"rewards/margins": 0.4102846682071686, |
|
"rewards/rejected": -2.1228866577148438, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39047556229800146, |
|
"grad_norm": 13.372482096217857, |
|
"learning_rate": 3.8239273882202473e-07, |
|
"logits/chosen": -1.3676090240478516, |
|
"logits/rejected": -1.727839708328247, |
|
"logps/chosen": -470.1155700683594, |
|
"logps/rejected": -490.20196533203125, |
|
"loss": 0.5615, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.8151922225952148, |
|
"rewards/margins": 0.47195178270339966, |
|
"rewards/rejected": -2.287144184112549, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.40102895587362314, |
|
"grad_norm": 19.85364607320994, |
|
"learning_rate": 3.7446615068452804e-07, |
|
"logits/chosen": -1.3825256824493408, |
|
"logits/rejected": -1.7227319478988647, |
|
"logps/chosen": -438.42333984375, |
|
"logps/rejected": -469.5264587402344, |
|
"loss": 0.5311, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.7287547588348389, |
|
"rewards/margins": 0.5413612723350525, |
|
"rewards/rejected": -2.2701163291931152, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.41158234944924477, |
|
"grad_norm": 14.668927735081262, |
|
"learning_rate": 3.6636955675673743e-07, |
|
"logits/chosen": -1.2241297960281372, |
|
"logits/rejected": -1.7972831726074219, |
|
"logps/chosen": -444.69830322265625, |
|
"logps/rejected": -449.8846740722656, |
|
"loss": 0.5395, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -1.570357084274292, |
|
"rewards/margins": 0.42953139543533325, |
|
"rewards/rejected": -1.9998886585235596, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42213574302486645, |
|
"grad_norm": 12.855557980372362, |
|
"learning_rate": 3.5811401601205093e-07, |
|
"logits/chosen": -1.3073621988296509, |
|
"logits/rejected": -1.7404594421386719, |
|
"logps/chosen": -453.62286376953125, |
|
"logps/rejected": -476.1055603027344, |
|
"loss": 0.516, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -1.6229336261749268, |
|
"rewards/margins": 0.5728715062141418, |
|
"rewards/rejected": -2.195805072784424, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4326891366004881, |
|
"grad_norm": 14.723820955973457, |
|
"learning_rate": 3.497108045260995e-07, |
|
"logits/chosen": -1.3508846759796143, |
|
"logits/rejected": -1.8392765522003174, |
|
"logps/chosen": -464.30926513671875, |
|
"logps/rejected": -480.6266174316406, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.9108375310897827, |
|
"rewards/margins": 0.4916614592075348, |
|
"rewards/rejected": -2.402498960494995, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44324253017610976, |
|
"grad_norm": 17.043565936987463, |
|
"learning_rate": 3.411714000749838e-07, |
|
"logits/chosen": -1.374245047569275, |
|
"logits/rejected": -1.8004543781280518, |
|
"logps/chosen": -487.89776611328125, |
|
"logps/rejected": -509.8401794433594, |
|
"loss": 0.5286, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.9728209972381592, |
|
"rewards/margins": 0.46407732367515564, |
|
"rewards/rejected": -2.4368982315063477, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4537959237517314, |
|
"grad_norm": 24.760116135102702, |
|
"learning_rate": 3.3250746645801287e-07, |
|
"logits/chosen": -1.5120536088943481, |
|
"logits/rejected": -1.846258521080017, |
|
"logps/chosen": -495.29779052734375, |
|
"logps/rejected": -514.2366943359375, |
|
"loss": 0.5482, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.207698345184326, |
|
"rewards/margins": 0.46926626563072205, |
|
"rewards/rejected": -2.676964521408081, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4643493173273531, |
|
"grad_norm": 16.143780206492696, |
|
"learning_rate": 3.237308375663571e-07, |
|
"logits/chosen": -1.3949377536773682, |
|
"logits/rejected": -1.7159026861190796, |
|
"logps/chosen": -467.3490295410156, |
|
"logps/rejected": -508.98480224609375, |
|
"loss": 0.5367, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.080124855041504, |
|
"rewards/margins": 0.5874677896499634, |
|
"rewards/rejected": -2.6675925254821777, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.47490271090297476, |
|
"grad_norm": 21.361838942730817, |
|
"learning_rate": 3.148535012193767e-07, |
|
"logits/chosen": -1.4045331478118896, |
|
"logits/rejected": -1.745840072631836, |
|
"logps/chosen": -509.15594482421875, |
|
"logps/rejected": -545.216064453125, |
|
"loss": 0.5216, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.0763394832611084, |
|
"rewards/margins": 0.5761786699295044, |
|
"rewards/rejected": -2.6525182723999023, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4854561044785964, |
|
"grad_norm": 14.456644926293217, |
|
"learning_rate": 3.0588758279070183e-07, |
|
"logits/chosen": -1.3919737339019775, |
|
"logits/rejected": -1.8210327625274658, |
|
"logps/chosen": -463.69171142578125, |
|
"logps/rejected": -494.8233947753906, |
|
"loss": 0.5352, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.015812397003174, |
|
"rewards/margins": 0.515998363494873, |
|
"rewards/rejected": -2.531810760498047, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49600949805421807, |
|
"grad_norm": 14.589143727916596, |
|
"learning_rate": 2.968453286464312e-07, |
|
"logits/chosen": -1.4191744327545166, |
|
"logits/rejected": -1.7561525106430054, |
|
"logps/chosen": -487.16912841796875, |
|
"logps/rejected": -515.9456787109375, |
|
"loss": 0.5387, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -1.961477279663086, |
|
"rewards/margins": 0.5488137602806091, |
|
"rewards/rejected": -2.51029109954834, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5065628916298397, |
|
"grad_norm": 14.508599811690631, |
|
"learning_rate": 2.8773908941806877e-07, |
|
"logits/chosen": -1.4843761920928955, |
|
"logits/rejected": -1.8826162815093994, |
|
"logps/chosen": -506.026123046875, |
|
"logps/rejected": -531.3427124023438, |
|
"loss": 0.5289, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -2.0544593334198, |
|
"rewards/margins": 0.5065608024597168, |
|
"rewards/rejected": -2.5610203742980957, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5171162852054614, |
|
"grad_norm": 18.773519116584172, |
|
"learning_rate": 2.785813031330473e-07, |
|
"logits/chosen": -1.443350076675415, |
|
"logits/rejected": -1.7564016580581665, |
|
"logps/chosen": -486.8233947753906, |
|
"logps/rejected": -526.447021484375, |
|
"loss": 0.5172, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -1.9753189086914062, |
|
"rewards/margins": 0.6450583338737488, |
|
"rewards/rejected": -2.6203773021698, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5276696787810831, |
|
"grad_norm": 18.389045572362814, |
|
"learning_rate": 2.693844782258779e-07, |
|
"logits/chosen": -1.510719656944275, |
|
"logits/rejected": -1.871638536453247, |
|
"logps/chosen": -502.90521240234375, |
|
"logps/rejected": -547.2249145507812, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.225248098373413, |
|
"rewards/margins": 0.747429370880127, |
|
"rewards/rejected": -2.972677707672119, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5382230723567047, |
|
"grad_norm": 16.8014567042791, |
|
"learning_rate": 2.601611764531342e-07, |
|
"logits/chosen": -1.4652379751205444, |
|
"logits/rejected": -1.7754548788070679, |
|
"logps/chosen": -507.2633361816406, |
|
"logps/rejected": -574.4364013671875, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.2433393001556396, |
|
"rewards/margins": 0.8110122680664062, |
|
"rewards/rejected": -3.054351329803467, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5487764659323263, |
|
"grad_norm": 13.726764888156497, |
|
"learning_rate": 2.5092399573560323e-07, |
|
"logits/chosen": -1.4230600595474243, |
|
"logits/rejected": -1.8214447498321533, |
|
"logps/chosen": -478.06817626953125, |
|
"logps/rejected": -509.787109375, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -1.9923759698867798, |
|
"rewards/margins": 0.5705231428146362, |
|
"rewards/rejected": -2.562898874282837, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.559329859507948, |
|
"grad_norm": 23.65560305674686, |
|
"learning_rate": 2.4168555295104124e-07, |
|
"logits/chosen": -1.6187779903411865, |
|
"logits/rejected": -1.9773069620132446, |
|
"logps/chosen": -536.1935424804688, |
|
"logps/rejected": -572.4215698242188, |
|
"loss": 0.5145, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.2998838424682617, |
|
"rewards/margins": 0.6668075919151306, |
|
"rewards/rejected": -2.966691493988037, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5698832530835697, |
|
"grad_norm": 16.778556885067378, |
|
"learning_rate": 2.3245846670103626e-07, |
|
"logits/chosen": -1.6787500381469727, |
|
"logits/rejected": -2.1560122966766357, |
|
"logps/chosen": -517.3394775390625, |
|
"logps/rejected": -558.0335693359375, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.354661464691162, |
|
"rewards/margins": 0.7168524265289307, |
|
"rewards/rejected": -3.0715138912200928, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5804366466591914, |
|
"grad_norm": 17.263807575864956, |
|
"learning_rate": 2.232553400755159e-07, |
|
"logits/chosen": -1.6897958517074585, |
|
"logits/rejected": -2.048074960708618, |
|
"logps/chosen": -483.93780517578125, |
|
"logps/rejected": -522.008056640625, |
|
"loss": 0.5194, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.1912150382995605, |
|
"rewards/margins": 0.6004900932312012, |
|
"rewards/rejected": -2.791705369949341, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.590990040234813, |
|
"grad_norm": 14.97406750721532, |
|
"learning_rate": 2.1408874343844294e-07, |
|
"logits/chosen": -1.5942695140838623, |
|
"logits/rejected": -1.9944686889648438, |
|
"logps/chosen": -510.0712890625, |
|
"logps/rejected": -560.1776123046875, |
|
"loss": 0.5099, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -2.1629281044006348, |
|
"rewards/margins": 0.6950211524963379, |
|
"rewards/rejected": -2.8579492568969727, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6015434338104346, |
|
"grad_norm": 15.808244586573563, |
|
"learning_rate": 2.049711972582101e-07, |
|
"logits/chosen": -1.5963830947875977, |
|
"logits/rejected": -1.9408931732177734, |
|
"logps/chosen": -526.6187744140625, |
|
"logps/rejected": -573.6658325195312, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.284257411956787, |
|
"rewards/margins": 0.7334116697311401, |
|
"rewards/rejected": -3.0176689624786377, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6120968273860563, |
|
"grad_norm": 32.53721598643591, |
|
"learning_rate": 1.9591515500618588e-07, |
|
"logits/chosen": -1.6634728908538818, |
|
"logits/rejected": -2.111931324005127, |
|
"logps/chosen": -564.41748046875, |
|
"logps/rejected": -615.9747314453125, |
|
"loss": 0.527, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.686408519744873, |
|
"rewards/margins": 0.7001844644546509, |
|
"rewards/rejected": -3.3865933418273926, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.622650220961678, |
|
"grad_norm": 19.340424118071393, |
|
"learning_rate": 1.8693298614677112e-07, |
|
"logits/chosen": -1.658970594406128, |
|
"logits/rejected": -1.9972753524780273, |
|
"logps/chosen": -569.0438232421875, |
|
"logps/rejected": -618.6884765625, |
|
"loss": 0.4914, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -2.6297943592071533, |
|
"rewards/margins": 0.8370558619499207, |
|
"rewards/rejected": -3.4668502807617188, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6332036145372997, |
|
"grad_norm": 17.931673600276827, |
|
"learning_rate": 1.7803695924219814e-07, |
|
"logits/chosen": -1.5948402881622314, |
|
"logits/rejected": -2.000213146209717, |
|
"logps/chosen": -551.8944091796875, |
|
"logps/rejected": -608.9307250976562, |
|
"loss": 0.4796, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.4895987510681152, |
|
"rewards/margins": 0.8136266469955444, |
|
"rewards/rejected": -3.3032257556915283, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6437570081129214, |
|
"grad_norm": 18.41740038504812, |
|
"learning_rate": 1.6923922519515067e-07, |
|
"logits/chosen": -1.5580310821533203, |
|
"logits/rejected": -2.058936595916748, |
|
"logps/chosen": -568.0121459960938, |
|
"logps/rejected": -599.84765625, |
|
"loss": 0.5062, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.7541260719299316, |
|
"rewards/margins": 0.6605782508850098, |
|
"rewards/rejected": -3.4147045612335205, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6543104016885429, |
|
"grad_norm": 18.94630227910009, |
|
"learning_rate": 1.605518006520924e-07, |
|
"logits/chosen": -1.5944321155548096, |
|
"logits/rejected": -1.9596939086914062, |
|
"logps/chosen": -544.1249389648438, |
|
"logps/rejected": -598.9169921875, |
|
"loss": 0.5156, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.5852460861206055, |
|
"rewards/margins": 0.7831751108169556, |
|
"rewards/rejected": -3.3684210777282715, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6648637952641646, |
|
"grad_norm": 17.846645357695152, |
|
"learning_rate": 1.519865515899731e-07, |
|
"logits/chosen": -1.6307493448257446, |
|
"logits/rejected": -2.0079474449157715, |
|
"logps/chosen": -536.2100830078125, |
|
"logps/rejected": -575.9131469726562, |
|
"loss": 0.5119, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -2.501380443572998, |
|
"rewards/margins": 0.6081604361534119, |
|
"rewards/rejected": -3.1095407009124756, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6754171888397863, |
|
"grad_norm": 17.811680184641038, |
|
"learning_rate": 1.4355517710873182e-07, |
|
"logits/chosen": -1.6296656131744385, |
|
"logits/rejected": -2.132188558578491, |
|
"logps/chosen": -553.8519287109375, |
|
"logps/rejected": -594.5834350585938, |
|
"loss": 0.5043, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.6355135440826416, |
|
"rewards/margins": 0.7011954188346863, |
|
"rewards/rejected": -3.3367092609405518, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.685970582415408, |
|
"grad_norm": 22.014031239903254, |
|
"learning_rate": 1.3526919345173318e-07, |
|
"logits/chosen": -1.7465054988861084, |
|
"logits/rejected": -2.223958969116211, |
|
"logps/chosen": -591.5750732421875, |
|
"logps/rejected": -645.5902709960938, |
|
"loss": 0.5127, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -3.071688413619995, |
|
"rewards/margins": 0.8220219612121582, |
|
"rewards/rejected": -3.8937106132507324, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6965239759910297, |
|
"grad_norm": 18.950149468318013, |
|
"learning_rate": 1.2713991827596443e-07, |
|
"logits/chosen": -1.8067715167999268, |
|
"logits/rejected": -2.2385947704315186, |
|
"logps/chosen": -603.2128295898438, |
|
"logps/rejected": -631.5573120117188, |
|
"loss": 0.5004, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.8057212829589844, |
|
"rewards/margins": 0.7445579767227173, |
|
"rewards/rejected": -3.5502796173095703, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7070773695666512, |
|
"grad_norm": 19.529222069564966, |
|
"learning_rate": 1.191784551934773e-07, |
|
"logits/chosen": -1.6992321014404297, |
|
"logits/rejected": -2.138716697692871, |
|
"logps/chosen": -544.4804077148438, |
|
"logps/rejected": -600.5407104492188, |
|
"loss": 0.495, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.6194682121276855, |
|
"rewards/margins": 0.8088088035583496, |
|
"rewards/rejected": -3.428276777267456, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7176307631422729, |
|
"grad_norm": 18.18480447945621, |
|
"learning_rate": 1.1139567860518953e-07, |
|
"logits/chosen": -1.7028363943099976, |
|
"logits/rejected": -2.206139326095581, |
|
"logps/chosen": -540.541748046875, |
|
"logps/rejected": -595.2509155273438, |
|
"loss": 0.4766, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -2.7220633029937744, |
|
"rewards/margins": 0.8165687322616577, |
|
"rewards/rejected": -3.5386319160461426, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7281841567178946, |
|
"grad_norm": 24.785455969446467, |
|
"learning_rate": 1.0380221884776128e-07, |
|
"logits/chosen": -1.6632766723632812, |
|
"logits/rejected": -2.1659774780273438, |
|
"logps/chosen": -634.0614624023438, |
|
"logps/rejected": -691.1161499023438, |
|
"loss": 0.4602, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.2313294410705566, |
|
"rewards/margins": 0.9306994676589966, |
|
"rewards/rejected": -4.1620283126831055, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7387375502935163, |
|
"grad_norm": 21.96462703390364, |
|
"learning_rate": 9.640844767383405e-08, |
|
"logits/chosen": -1.7271251678466797, |
|
"logits/rejected": -2.200099468231201, |
|
"logps/chosen": -614.051025390625, |
|
"logps/rejected": -659.74853515625, |
|
"loss": 0.524, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.4554572105407715, |
|
"rewards/margins": 0.7755366563796997, |
|
"rewards/rejected": -4.23099422454834, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.749290943869138, |
|
"grad_norm": 19.92963560086107, |
|
"learning_rate": 8.922446408546378e-08, |
|
"logits/chosen": -1.659334421157837, |
|
"logits/rejected": -2.0364480018615723, |
|
"logps/chosen": -569.6084594726562, |
|
"logps/rejected": -638.3173828125, |
|
"loss": 0.4554, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.770272731781006, |
|
"rewards/margins": 0.9671792984008789, |
|
"rewards/rejected": -3.7374520301818848, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7598443374447595, |
|
"grad_norm": 17.97428748366273, |
|
"learning_rate": 8.22600805400994e-08, |
|
"logits/chosen": -1.5649785995483398, |
|
"logits/rejected": -2.075366735458374, |
|
"logps/chosen": -527.646484375, |
|
"logps/rejected": -579.1947021484375, |
|
"loss": 0.4778, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -2.405059576034546, |
|
"rewards/margins": 0.786846399307251, |
|
"rewards/rejected": -3.191905975341797, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7703977310203812, |
|
"grad_norm": 25.05638368769761, |
|
"learning_rate": 7.552480954794558e-08, |
|
"logits/chosen": -1.6564147472381592, |
|
"logits/rejected": -2.168598175048828, |
|
"logps/chosen": -607.8692626953125, |
|
"logps/rejected": -662.06494140625, |
|
"loss": 0.4862, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -3.0341908931732178, |
|
"rewards/margins": 0.822891116142273, |
|
"rewards/rejected": -3.857081890106201, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7809511245960029, |
|
"grad_norm": 22.12728668677665, |
|
"learning_rate": 6.902785067901854e-08, |
|
"logits/chosen": -1.7829809188842773, |
|
"logits/rejected": -2.062894105911255, |
|
"logps/chosen": -574.40673828125, |
|
"logps/rejected": -647.600830078125, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.980964183807373, |
|
"rewards/margins": 0.8619121313095093, |
|
"rewards/rejected": -3.84287691116333, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7915045181716246, |
|
"grad_norm": 19.35197929971672, |
|
"learning_rate": 6.277807799763973e-08, |
|
"logits/chosen": -1.6468517780303955, |
|
"logits/rejected": -2.1115472316741943, |
|
"logps/chosen": -583.3654174804688, |
|
"logps/rejected": -647.2584228515625, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.734621524810791, |
|
"rewards/margins": 0.9541465044021606, |
|
"rewards/rejected": -3.688767910003662, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8020579117472463, |
|
"grad_norm": 25.507938089288015, |
|
"learning_rate": 5.678402794153145e-08, |
|
"logits/chosen": -1.7440725564956665, |
|
"logits/rejected": -2.137272357940674, |
|
"logps/chosen": -546.3848876953125, |
|
"logps/rejected": -603.1316528320312, |
|
"loss": 0.4873, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -2.7099640369415283, |
|
"rewards/margins": 0.749721884727478, |
|
"rewards/rejected": -3.459686279296875, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8126113053228678, |
|
"grad_norm": 23.303144237592594, |
|
"learning_rate": 5.105388766206969e-08, |
|
"logits/chosen": -1.7952743768692017, |
|
"logits/rejected": -2.218257188796997, |
|
"logps/chosen": -593.2301635742188, |
|
"logps/rejected": -648.0328979492188, |
|
"loss": 0.4938, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.9787325859069824, |
|
"rewards/margins": 0.8751193284988403, |
|
"rewards/rejected": -3.853851318359375, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8231646988984895, |
|
"grad_norm": 18.27389690660161, |
|
"learning_rate": 4.5595483841620484e-08, |
|
"logits/chosen": -1.7571223974227905, |
|
"logits/rejected": -2.144810914993286, |
|
"logps/chosen": -578.3859252929688, |
|
"logps/rejected": -649.9483642578125, |
|
"loss": 0.5063, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.023244619369507, |
|
"rewards/margins": 0.9022436141967773, |
|
"rewards/rejected": -3.925487995147705, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8337180924741112, |
|
"grad_norm": 19.91862919187348, |
|
"learning_rate": 4.0416272003232526e-08, |
|
"logits/chosen": -1.6357412338256836, |
|
"logits/rejected": -2.0107734203338623, |
|
"logps/chosen": -543.9173583984375, |
|
"logps/rejected": -597.3966064453125, |
|
"loss": 0.4669, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -2.689850330352783, |
|
"rewards/margins": 0.8007882833480835, |
|
"rewards/rejected": -3.4906387329101562, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8442714860497329, |
|
"grad_norm": 22.868215506762866, |
|
"learning_rate": 3.552332632729041e-08, |
|
"logits/chosen": -1.7554508447647095, |
|
"logits/rejected": -2.100292444229126, |
|
"logps/chosen": -604.8919067382812, |
|
"logps/rejected": -656.6446533203125, |
|
"loss": 0.4967, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.00014591217041, |
|
"rewards/margins": 0.7955247163772583, |
|
"rewards/rejected": -3.795670986175537, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8442714860497329, |
|
"eval_logits/chosen": -2.6870288848876953, |
|
"eval_logits/rejected": -2.53877329826355, |
|
"eval_logps/chosen": -555.4608154296875, |
|
"eval_logps/rejected": -610.8331298828125, |
|
"eval_loss": 0.6258421540260315, |
|
"eval_rewards/accuracies": 0.6639676094055176, |
|
"eval_rewards/chosen": -2.919989824295044, |
|
"eval_rewards/margins": 0.4445248544216156, |
|
"eval_rewards/rejected": -3.3645148277282715, |
|
"eval_runtime": 305.2745, |
|
"eval_samples_per_second": 6.473, |
|
"eval_steps_per_second": 0.809, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8548248796253546, |
|
"grad_norm": 18.976507998784875, |
|
"learning_rate": 3.092332998903416e-08, |
|
"logits/chosen": -1.6837856769561768, |
|
"logits/rejected": -2.183306932449341, |
|
"logps/chosen": -543.5635375976562, |
|
"logps/rejected": -615.6289672851562, |
|
"loss": 0.4408, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.6628594398498535, |
|
"rewards/margins": 1.0260940790176392, |
|
"rewards/rejected": -3.688953399658203, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8653782732009762, |
|
"grad_norm": 20.75221237108568, |
|
"learning_rate": 2.6622566030146455e-08, |
|
"logits/chosen": -1.7799179553985596, |
|
"logits/rejected": -2.1815567016601562, |
|
"logps/chosen": -591.3876953125, |
|
"logps/rejected": -658.4329223632812, |
|
"loss": 0.4724, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.0632710456848145, |
|
"rewards/margins": 0.9336662292480469, |
|
"rewards/rejected": -3.9969372749328613, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8759316667765978, |
|
"grad_norm": 20.339013632703708, |
|
"learning_rate": 2.26269087768734e-08, |
|
"logits/chosen": -1.7808520793914795, |
|
"logits/rejected": -2.20698881149292, |
|
"logps/chosen": -610.84716796875, |
|
"logps/rejected": -696.8916015625, |
|
"loss": 0.4667, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.1746575832366943, |
|
"rewards/margins": 1.094146490097046, |
|
"rewards/rejected": -4.268804550170898, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8864850603522195, |
|
"grad_norm": 21.448619329065966, |
|
"learning_rate": 1.894181581640106e-08, |
|
"logits/chosen": -1.7119935750961304, |
|
"logits/rejected": -2.181185722351074, |
|
"logps/chosen": -640.0302734375, |
|
"logps/rejected": -725.7114868164062, |
|
"loss": 0.4371, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.318742036819458, |
|
"rewards/margins": 1.2371790409088135, |
|
"rewards/rejected": -4.555922031402588, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8970384539278412, |
|
"grad_norm": 31.199200608117586, |
|
"learning_rate": 1.5572320542448143e-08, |
|
"logits/chosen": -1.8320446014404297, |
|
"logits/rejected": -2.2155635356903076, |
|
"logps/chosen": -637.1694946289062, |
|
"logps/rejected": -691.4876708984375, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -3.344149112701416, |
|
"rewards/margins": 0.7779822945594788, |
|
"rewards/rejected": -4.12213134765625, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9075918475034628, |
|
"grad_norm": 26.35733359263408, |
|
"learning_rate": 1.2523025280255729e-08, |
|
"logits/chosen": -1.8720299005508423, |
|
"logits/rejected": -2.2721853256225586, |
|
"logps/chosen": -598.7555541992188, |
|
"logps/rejected": -676.6019287109375, |
|
"loss": 0.4668, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -3.2352309226989746, |
|
"rewards/margins": 1.1256295442581177, |
|
"rewards/rejected": -4.360860347747803, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9181452410790845, |
|
"grad_norm": 19.592377697948315, |
|
"learning_rate": 9.798095000364214e-09, |
|
"logits/chosen": -1.8192625045776367, |
|
"logits/rejected": -2.1697540283203125, |
|
"logps/chosen": -593.2144775390625, |
|
"logps/rejected": -647.7749633789062, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -3.3029427528381348, |
|
"rewards/margins": 0.7348226308822632, |
|
"rewards/rejected": -4.0377655029296875, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9286986346547061, |
|
"grad_norm": 18.25740720427561, |
|
"learning_rate": 7.401251629764876e-09, |
|
"logits/chosen": -1.8516120910644531, |
|
"logits/rejected": -2.2366607189178467, |
|
"logps/chosen": -612.3639526367188, |
|
"logps/rejected": -662.9229736328125, |
|
"loss": 0.4957, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -3.2708640098571777, |
|
"rewards/margins": 0.781753420829773, |
|
"rewards/rejected": -4.052617073059082, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9392520282303278, |
|
"grad_norm": 21.707645823041325, |
|
"learning_rate": 5.335768968195098e-09, |
|
"logits/chosen": -1.822199821472168, |
|
"logits/rejected": -2.2232327461242676, |
|
"logps/chosen": -590.3006591796875, |
|
"logps/rejected": -674.7203369140625, |
|
"loss": 0.4531, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.1192424297332764, |
|
"rewards/margins": 1.0382407903671265, |
|
"rewards/rejected": -4.1574835777282715, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9498054218059495, |
|
"grad_norm": 27.24335169269175, |
|
"learning_rate": 3.604468216521883e-09, |
|
"logits/chosen": -1.8075069189071655, |
|
"logits/rejected": -2.1066629886627197, |
|
"logps/chosen": -587.6980590820312, |
|
"logps/rejected": -656.5093994140625, |
|
"loss": 0.4569, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -3.0228023529052734, |
|
"rewards/margins": 0.8714979290962219, |
|
"rewards/rejected": -3.8943004608154297, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9603588153815711, |
|
"grad_norm": 30.5949031304229, |
|
"learning_rate": 2.2097141233206884e-09, |
|
"logits/chosen": -1.764469861984253, |
|
"logits/rejected": -2.0643749237060547, |
|
"logps/chosen": -625.1185302734375, |
|
"logps/rejected": -686.2703857421875, |
|
"loss": 0.4951, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -3.299448013305664, |
|
"rewards/margins": 0.8464337587356567, |
|
"rewards/rejected": -4.1458821296691895, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9709122089571928, |
|
"grad_norm": 16.854970901618884, |
|
"learning_rate": 1.1534117549133472e-09, |
|
"logits/chosen": -1.8324248790740967, |
|
"logits/rejected": -2.1692023277282715, |
|
"logps/chosen": -600.291015625, |
|
"logps/rejected": -676.2047119140625, |
|
"loss": 0.4721, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -3.200017213821411, |
|
"rewards/margins": 0.9616624712944031, |
|
"rewards/rejected": -4.161679267883301, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9814656025328145, |
|
"grad_norm": 20.996442076100582, |
|
"learning_rate": 4.3700389327672173e-10, |
|
"logits/chosen": -1.6799993515014648, |
|
"logits/rejected": -2.161038398742676, |
|
"logps/chosen": -642.9143676757812, |
|
"logps/rejected": -710.483154296875, |
|
"loss": 0.4775, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.233483076095581, |
|
"rewards/margins": 1.045021414756775, |
|
"rewards/rejected": -4.278504371643066, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9920189961084361, |
|
"grad_norm": 31.480125798687133, |
|
"learning_rate": 6.146906537587982e-11, |
|
"logits/chosen": -1.7645127773284912, |
|
"logits/rejected": -2.224125385284424, |
|
"logps/chosen": -594.4464721679688, |
|
"logps/rejected": -659.8152465820312, |
|
"loss": 0.4866, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.988931179046631, |
|
"rewards/margins": 1.064064383506775, |
|
"rewards/rejected": -4.052995204925537, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9983510322538092, |
|
"step": 473, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5460537698505796, |
|
"train_runtime": 22947.8791, |
|
"train_samples_per_second": 2.643, |
|
"train_steps_per_second": 0.021 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 473, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|