|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982631930527722, |
|
"eval_steps": 400, |
|
"global_step": 467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01068804275217101, |
|
"grad_norm": 47.431165475675876, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -1.0093685388565063, |
|
"logits/rejected": -0.9801958203315735, |
|
"logps/chosen": -0.27410927414894104, |
|
"logps/rejected": -0.2716289460659027, |
|
"loss": 3.0415, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -2.7410929203033447, |
|
"rewards/margins": -0.024803416803479195, |
|
"rewards/rejected": -2.7162892818450928, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02137608550434202, |
|
"grad_norm": 41.239549738008755, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -1.0442249774932861, |
|
"logits/rejected": -0.9774014353752136, |
|
"logps/chosen": -0.29465872049331665, |
|
"logps/rejected": -0.29961076378822327, |
|
"loss": 3.0332, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.946587085723877, |
|
"rewards/margins": 0.04952071234583855, |
|
"rewards/rejected": -2.996107578277588, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03206412825651302, |
|
"grad_norm": 43.03376486032676, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.9605627059936523, |
|
"logits/rejected": -0.9794631004333496, |
|
"logps/chosen": -0.26438188552856445, |
|
"logps/rejected": -0.30044788122177124, |
|
"loss": 3.047, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.6438186168670654, |
|
"rewards/margins": 0.36066022515296936, |
|
"rewards/rejected": -3.004478931427002, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04275217100868404, |
|
"grad_norm": 55.729867353234994, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -0.9706584215164185, |
|
"logits/rejected": -0.9438554048538208, |
|
"logps/chosen": -0.2775923013687134, |
|
"logps/rejected": -0.29160231351852417, |
|
"loss": 2.9682, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.775923252105713, |
|
"rewards/margins": 0.14010018110275269, |
|
"rewards/rejected": -2.916023015975952, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.053440213760855046, |
|
"grad_norm": 53.66865278898649, |
|
"learning_rate": 5.319148936170212e-07, |
|
"logits/chosen": -1.0153733491897583, |
|
"logits/rejected": -0.9855473637580872, |
|
"logps/chosen": -0.2714543342590332, |
|
"logps/rejected": -0.2778756022453308, |
|
"loss": 3.1483, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.714543104171753, |
|
"rewards/margins": 0.0642128437757492, |
|
"rewards/rejected": -2.7787561416625977, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06412825651302605, |
|
"grad_norm": 43.4911593109138, |
|
"learning_rate": 6.382978723404255e-07, |
|
"logits/chosen": -1.0054885149002075, |
|
"logits/rejected": -0.9609144330024719, |
|
"logps/chosen": -0.27275633811950684, |
|
"logps/rejected": -0.2785263657569885, |
|
"loss": 2.9455, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -2.7275633811950684, |
|
"rewards/margins": 0.05770007520914078, |
|
"rewards/rejected": -2.7852635383605957, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07481629926519706, |
|
"grad_norm": 62.11467926433348, |
|
"learning_rate": 7.446808510638297e-07, |
|
"logits/chosen": -1.048362374305725, |
|
"logits/rejected": -0.9739812612533569, |
|
"logps/chosen": -0.2923530042171478, |
|
"logps/rejected": -0.31766629219055176, |
|
"loss": 2.9148, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -2.923529863357544, |
|
"rewards/margins": 0.253133088350296, |
|
"rewards/rejected": -3.1766631603240967, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08550434201736808, |
|
"grad_norm": 66.19354446783973, |
|
"learning_rate": 8.51063829787234e-07, |
|
"logits/chosen": -1.027752161026001, |
|
"logits/rejected": -0.982993483543396, |
|
"logps/chosen": -0.28172561526298523, |
|
"logps/rejected": -0.3218705952167511, |
|
"loss": 2.9039, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.817256212234497, |
|
"rewards/margins": 0.40144944190979004, |
|
"rewards/rejected": -3.218705654144287, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09619238476953908, |
|
"grad_norm": 40.808829184419544, |
|
"learning_rate": 9.574468085106384e-07, |
|
"logits/chosen": -1.064257025718689, |
|
"logits/rejected": -1.022287130355835, |
|
"logps/chosen": -0.33324795961380005, |
|
"logps/rejected": -0.3929290771484375, |
|
"loss": 2.9539, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -3.332479476928711, |
|
"rewards/margins": 0.5968114137649536, |
|
"rewards/rejected": -3.929290771484375, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10688042752171009, |
|
"grad_norm": 51.85470904227569, |
|
"learning_rate": 9.998741174712533e-07, |
|
"logits/chosen": -1.0401779413223267, |
|
"logits/rejected": -0.990446925163269, |
|
"logps/chosen": -0.321146696805954, |
|
"logps/rejected": -0.37150952219963074, |
|
"loss": 2.9887, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -3.2114670276641846, |
|
"rewards/margins": 0.5036287307739258, |
|
"rewards/rejected": -3.7150955200195312, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11756847027388109, |
|
"grad_norm": 75.50941924220831, |
|
"learning_rate": 9.991050648838675e-07, |
|
"logits/chosen": -1.0736358165740967, |
|
"logits/rejected": -1.0379724502563477, |
|
"logps/chosen": -0.28898900747299194, |
|
"logps/rejected": -0.3547995686531067, |
|
"loss": 2.7366, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.88988995552063, |
|
"rewards/margins": 0.658105731010437, |
|
"rewards/rejected": -3.5479958057403564, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1282565130260521, |
|
"grad_norm": 53.536746683672696, |
|
"learning_rate": 9.97637968732563e-07, |
|
"logits/chosen": -1.1059926748275757, |
|
"logits/rejected": -1.0733429193496704, |
|
"logps/chosen": -0.3318749964237213, |
|
"logps/rejected": -0.3473610281944275, |
|
"loss": 2.8777, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -3.3187499046325684, |
|
"rewards/margins": 0.15486064553260803, |
|
"rewards/rejected": -3.4736104011535645, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13894455577822312, |
|
"grad_norm": 57.894224289101096, |
|
"learning_rate": 9.954748808839674e-07, |
|
"logits/chosen": -1.0196824073791504, |
|
"logits/rejected": -0.9911971092224121, |
|
"logps/chosen": -0.3787004351615906, |
|
"logps/rejected": -0.45460987091064453, |
|
"loss": 2.7406, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -3.787003993988037, |
|
"rewards/margins": 0.7590948343276978, |
|
"rewards/rejected": -4.5460991859436035, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14963259853039412, |
|
"grad_norm": 40.680575216186135, |
|
"learning_rate": 9.926188266120295e-07, |
|
"logits/chosen": -1.0517082214355469, |
|
"logits/rejected": -1.0281683206558228, |
|
"logps/chosen": -0.3594748377799988, |
|
"logps/rejected": -0.4522577226161957, |
|
"loss": 2.8528, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -3.594748020172119, |
|
"rewards/margins": 0.9278289675712585, |
|
"rewards/rejected": -4.522576808929443, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16032064128256512, |
|
"grad_norm": 58.56175042188981, |
|
"learning_rate": 9.890738003669027e-07, |
|
"logits/chosen": -1.0064220428466797, |
|
"logits/rejected": -0.9376434087753296, |
|
"logps/chosen": -0.36788100004196167, |
|
"logps/rejected": -0.42331838607788086, |
|
"loss": 2.7642, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -3.6788101196289062, |
|
"rewards/margins": 0.5543740391731262, |
|
"rewards/rejected": -4.233183860778809, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17100868403473615, |
|
"grad_norm": 47.18670280685341, |
|
"learning_rate": 9.848447601883433e-07, |
|
"logits/chosen": -0.9864493608474731, |
|
"logits/rejected": -0.9741519689559937, |
|
"logps/chosen": -0.3633944094181061, |
|
"logps/rejected": -0.47577548027038574, |
|
"loss": 2.6618, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -3.633944272994995, |
|
"rewards/margins": 1.1238101720809937, |
|
"rewards/rejected": -4.757754325866699, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18169672678690715, |
|
"grad_norm": 56.89275049644848, |
|
"learning_rate": 9.799376207714444e-07, |
|
"logits/chosen": -0.9944761395454407, |
|
"logits/rejected": -0.9737129211425781, |
|
"logps/chosen": -0.348540723323822, |
|
"logps/rejected": -0.4202929437160492, |
|
"loss": 2.5907, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -3.4854073524475098, |
|
"rewards/margins": 0.7175217866897583, |
|
"rewards/rejected": -4.2029290199279785, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19238476953907815, |
|
"grad_norm": 64.95639784576333, |
|
"learning_rate": 9.743592451943998e-07, |
|
"logits/chosen": -1.039825677871704, |
|
"logits/rejected": -1.0079519748687744, |
|
"logps/chosen": -0.4592970013618469, |
|
"logps/rejected": -0.5395578145980835, |
|
"loss": 2.8586, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -4.5929694175720215, |
|
"rewards/margins": 0.8026081919670105, |
|
"rewards/rejected": -5.395577430725098, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20307281229124916, |
|
"grad_norm": 58.58608272426839, |
|
"learning_rate": 9.681174353198686e-07, |
|
"logits/chosen": -1.1195189952850342, |
|
"logits/rejected": -1.0406802892684937, |
|
"logps/chosen": -0.47035104036331177, |
|
"logps/rejected": -0.5184075236320496, |
|
"loss": 2.6804, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -4.703510284423828, |
|
"rewards/margins": 0.4805658459663391, |
|
"rewards/rejected": -5.184075832366943, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21376085504342018, |
|
"grad_norm": 65.6357725034444, |
|
"learning_rate": 9.612209208833646e-07, |
|
"logits/chosen": -1.018027901649475, |
|
"logits/rejected": -0.9948932528495789, |
|
"logps/chosen": -0.4622405171394348, |
|
"logps/rejected": -0.528734564781189, |
|
"loss": 2.7539, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -4.622405052185059, |
|
"rewards/margins": 0.6649408340454102, |
|
"rewards/rejected": -5.287345886230469, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22444889779559118, |
|
"grad_norm": 60.841137599337706, |
|
"learning_rate": 9.536793472839324e-07, |
|
"logits/chosen": -1.0319901704788208, |
|
"logits/rejected": -0.9800949096679688, |
|
"logps/chosen": -0.43034663796424866, |
|
"logps/rejected": -0.5452305674552917, |
|
"loss": 2.6903, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -4.303465843200684, |
|
"rewards/margins": 1.1488397121429443, |
|
"rewards/rejected": -5.452306270599365, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23513694054776219, |
|
"grad_norm": 66.66409649647449, |
|
"learning_rate": 9.455032620941839e-07, |
|
"logits/chosen": -0.985127329826355, |
|
"logits/rejected": -0.9267324209213257, |
|
"logps/chosen": -0.48107171058654785, |
|
"logps/rejected": -0.6188473701477051, |
|
"loss": 2.6025, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -4.8107171058654785, |
|
"rewards/margins": 1.3777568340301514, |
|
"rewards/rejected": -6.188473701477051, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2458249832999332, |
|
"grad_norm": 79.76554466353629, |
|
"learning_rate": 9.367041003085648e-07, |
|
"logits/chosen": -1.0459407567977905, |
|
"logits/rejected": -0.987104058265686, |
|
"logps/chosen": -0.5227265357971191, |
|
"logps/rejected": -0.5985993146896362, |
|
"loss": 2.4679, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -5.227265357971191, |
|
"rewards/margins": 0.7587274312973022, |
|
"rewards/rejected": -5.985992908477783, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2565130260521042, |
|
"grad_norm": 71.0699167092581, |
|
"learning_rate": 9.272941683504808e-07, |
|
"logits/chosen": -1.0006247758865356, |
|
"logits/rejected": -0.9123749732971191, |
|
"logps/chosen": -0.5375245213508606, |
|
"logps/rejected": -0.7409927845001221, |
|
"loss": 2.3789, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -5.375245094299316, |
|
"rewards/margins": 2.0346832275390625, |
|
"rewards/rejected": -7.409928321838379, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26720106880427524, |
|
"grad_norm": 71.80653378932202, |
|
"learning_rate": 9.172866268606513e-07, |
|
"logits/chosen": -1.0720800161361694, |
|
"logits/rejected": -1.0307856798171997, |
|
"logps/chosen": -0.6043092608451843, |
|
"logps/rejected": -0.7186105847358704, |
|
"loss": 2.2873, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -6.043093681335449, |
|
"rewards/margins": 1.1430130004882812, |
|
"rewards/rejected": -7.186106204986572, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27788911155644624, |
|
"grad_norm": 76.52309149449971, |
|
"learning_rate": 9.066954722907638e-07, |
|
"logits/chosen": -1.0935930013656616, |
|
"logits/rejected": -1.087660551071167, |
|
"logps/chosen": -0.6125985383987427, |
|
"logps/rejected": -0.9003360867500305, |
|
"loss": 2.0828, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -6.125986099243164, |
|
"rewards/margins": 2.8773741722106934, |
|
"rewards/rejected": -9.003360748291016, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28857715430861725, |
|
"grad_norm": 74.74165844394271, |
|
"learning_rate": 8.955355173281707e-07, |
|
"logits/chosen": -1.0836272239685059, |
|
"logits/rejected": -1.0363242626190186, |
|
"logps/chosen": -0.705600917339325, |
|
"logps/rejected": -0.8650037050247192, |
|
"loss": 2.1181, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -7.056008815765381, |
|
"rewards/margins": 1.5940272808074951, |
|
"rewards/rejected": -8.650036811828613, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29926519706078825, |
|
"grad_norm": 82.01125840970528, |
|
"learning_rate": 8.838223701790055e-07, |
|
"logits/chosen": -1.1484724283218384, |
|
"logits/rejected": -1.1269563436508179, |
|
"logps/chosen": -0.8566378355026245, |
|
"logps/rejected": -1.011114239692688, |
|
"loss": 2.0912, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -8.566378593444824, |
|
"rewards/margins": 1.5447633266448975, |
|
"rewards/rejected": -10.1111421585083, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30995323981295925, |
|
"grad_norm": 85.34384972694933, |
|
"learning_rate": 8.71572412738697e-07, |
|
"logits/chosen": -1.0586411952972412, |
|
"logits/rejected": -1.0339049100875854, |
|
"logps/chosen": -0.8592250943183899, |
|
"logps/rejected": -1.117244005203247, |
|
"loss": 1.9373, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -8.592249870300293, |
|
"rewards/margins": 2.5801892280578613, |
|
"rewards/rejected": -11.172440528869629, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32064128256513025, |
|
"grad_norm": 83.57155238411846, |
|
"learning_rate": 8.588027776804058e-07, |
|
"logits/chosen": -1.0752789974212646, |
|
"logits/rejected": -1.057715654373169, |
|
"logps/chosen": -0.9481467008590698, |
|
"logps/rejected": -1.1904191970825195, |
|
"loss": 1.9564, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -9.481468200683594, |
|
"rewards/margins": 2.422724723815918, |
|
"rewards/rejected": -11.904191970825195, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33132932531730125, |
|
"grad_norm": 87.97325530333627, |
|
"learning_rate": 8.455313244934324e-07, |
|
"logits/chosen": -1.084198236465454, |
|
"logits/rejected": -1.0641534328460693, |
|
"logps/chosen": -1.0298274755477905, |
|
"logps/rejected": -1.338127851486206, |
|
"loss": 2.0325, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -10.298274993896484, |
|
"rewards/margins": 3.0830020904541016, |
|
"rewards/rejected": -13.381277084350586, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3420173680694723, |
|
"grad_norm": 99.47098531695673, |
|
"learning_rate": 8.317766145051057e-07, |
|
"logits/chosen": -1.0900102853775024, |
|
"logits/rejected": -1.0740206241607666, |
|
"logps/chosen": -1.147585153579712, |
|
"logps/rejected": -1.567286729812622, |
|
"loss": 1.97, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -11.475851058959961, |
|
"rewards/margins": 4.197016716003418, |
|
"rewards/rejected": -15.672868728637695, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3527054108216433, |
|
"grad_norm": 63.97740340024748, |
|
"learning_rate": 8.175578849210894e-07, |
|
"logits/chosen": -1.114548921585083, |
|
"logits/rejected": -1.0925266742706299, |
|
"logps/chosen": -1.1195895671844482, |
|
"logps/rejected": -1.5141029357910156, |
|
"loss": 1.8483, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -11.195895195007324, |
|
"rewards/margins": 3.9451332092285156, |
|
"rewards/rejected": -15.141029357910156, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3633934535738143, |
|
"grad_norm": 96.40902503969279, |
|
"learning_rate": 8.028950219204099e-07, |
|
"logits/chosen": -1.1151658296585083, |
|
"logits/rejected": -1.0936176776885986, |
|
"logps/chosen": -1.0980020761489868, |
|
"logps/rejected": -1.5130436420440674, |
|
"loss": 1.8009, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -10.980019569396973, |
|
"rewards/margins": 4.150416374206543, |
|
"rewards/rejected": -15.1304349899292, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3740814963259853, |
|
"grad_norm": 88.267242124785, |
|
"learning_rate": 7.878085328428368e-07, |
|
"logits/chosen": -1.1430907249450684, |
|
"logits/rejected": -1.0969905853271484, |
|
"logps/chosen": -1.1937079429626465, |
|
"logps/rejected": -1.4644215106964111, |
|
"loss": 1.6648, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -11.937080383300781, |
|
"rewards/margins": 2.7071356773376465, |
|
"rewards/rejected": -14.644216537475586, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3847695390781563, |
|
"grad_norm": 97.14634805094629, |
|
"learning_rate": 7.723195175075135e-07, |
|
"logits/chosen": -1.100259780883789, |
|
"logits/rejected": -1.0826084613800049, |
|
"logps/chosen": -1.1161882877349854, |
|
"logps/rejected": -1.483745813369751, |
|
"loss": 1.6508, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -11.161882400512695, |
|
"rewards/margins": 3.6755752563476562, |
|
"rewards/rejected": -14.837457656860352, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3954575818303273, |
|
"grad_norm": 99.49435944644794, |
|
"learning_rate": 7.564496387029531e-07, |
|
"logits/chosen": -1.1444669961929321, |
|
"logits/rejected": -1.0918629169464111, |
|
"logps/chosen": -1.1615946292877197, |
|
"logps/rejected": -1.5793178081512451, |
|
"loss": 1.7334, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -11.615945816040039, |
|
"rewards/margins": 4.177231788635254, |
|
"rewards/rejected": -15.793177604675293, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4061456245824983, |
|
"grad_norm": 90.47582983297573, |
|
"learning_rate": 7.402210918896689e-07, |
|
"logits/chosen": -1.1339255571365356, |
|
"logits/rejected": -1.143362283706665, |
|
"logps/chosen": -1.2574008703231812, |
|
"logps/rejected": -1.757472276687622, |
|
"loss": 1.565, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -12.574009895324707, |
|
"rewards/margins": 5.000711917877197, |
|
"rewards/rejected": -17.57472038269043, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4168336673346693, |
|
"grad_norm": 69.29879834232428, |
|
"learning_rate": 7.236565741578162e-07, |
|
"logits/chosen": -1.0910736322402954, |
|
"logits/rejected": -1.073672890663147, |
|
"logps/chosen": -1.2707961797714233, |
|
"logps/rejected": -1.6355140209197998, |
|
"loss": 1.6663, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -12.707962036132812, |
|
"rewards/margins": 3.6471798419952393, |
|
"rewards/rejected": -16.355140686035156, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42752171008684037, |
|
"grad_norm": 114.06338305078152, |
|
"learning_rate": 7.067792524832603e-07, |
|
"logits/chosen": -1.0798423290252686, |
|
"logits/rejected": -1.0704095363616943, |
|
"logps/chosen": -1.2981719970703125, |
|
"logps/rejected": -1.7119674682617188, |
|
"loss": 1.5558, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -12.981719970703125, |
|
"rewards/margins": 4.137955665588379, |
|
"rewards/rejected": -17.11967658996582, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43820975283901137, |
|
"grad_norm": 90.04523554097818, |
|
"learning_rate": 6.896127313264642e-07, |
|
"logits/chosen": -1.1306648254394531, |
|
"logits/rejected": -1.082484483718872, |
|
"logps/chosen": -1.3895291090011597, |
|
"logps/rejected": -1.8172976970672607, |
|
"loss": 1.7407, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -13.895291328430176, |
|
"rewards/margins": 4.277683734893799, |
|
"rewards/rejected": -18.172977447509766, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44889779559118237, |
|
"grad_norm": 90.2095455226181, |
|
"learning_rate": 6.721810196195174e-07, |
|
"logits/chosen": -1.156951904296875, |
|
"logits/rejected": -1.1476523876190186, |
|
"logps/chosen": -1.405803918838501, |
|
"logps/rejected": -1.8274023532867432, |
|
"loss": 1.6338, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -14.058039665222168, |
|
"rewards/margins": 4.215982913970947, |
|
"rewards/rejected": -18.274023056030273, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45958583834335337, |
|
"grad_norm": 121.41353956332256, |
|
"learning_rate": 6.545084971874736e-07, |
|
"logits/chosen": -1.098239541053772, |
|
"logits/rejected": -1.082698941230774, |
|
"logps/chosen": -1.419948697090149, |
|
"logps/rejected": -1.8909088373184204, |
|
"loss": 1.4946, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -14.199487686157227, |
|
"rewards/margins": 4.709600925445557, |
|
"rewards/rejected": -18.909086227416992, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.47027388109552437, |
|
"grad_norm": 101.41780500317314, |
|
"learning_rate": 6.3661988065096e-07, |
|
"logits/chosen": -1.1594250202178955, |
|
"logits/rejected": -1.142090082168579, |
|
"logps/chosen": -1.455776333808899, |
|
"logps/rejected": -1.9567766189575195, |
|
"loss": 1.4418, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -14.557762145996094, |
|
"rewards/margins": 5.01000452041626, |
|
"rewards/rejected": -19.567766189575195, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48096192384769537, |
|
"grad_norm": 75.55567519433296, |
|
"learning_rate": 6.185401888577487e-07, |
|
"logits/chosen": -1.139162302017212, |
|
"logits/rejected": -1.1079540252685547, |
|
"logps/chosen": -1.5027626752853394, |
|
"logps/rejected": -1.9599437713623047, |
|
"loss": 1.4648, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -15.027628898620605, |
|
"rewards/margins": 4.571808815002441, |
|
"rewards/rejected": -19.599437713623047, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4916499665998664, |
|
"grad_norm": 74.26739925465552, |
|
"learning_rate": 6.002947078916364e-07, |
|
"logits/chosen": -1.2207891941070557, |
|
"logits/rejected": -1.1718138456344604, |
|
"logps/chosen": -1.4501031637191772, |
|
"logps/rejected": -1.89298415184021, |
|
"loss": 1.4109, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -14.501032829284668, |
|
"rewards/margins": 4.428809642791748, |
|
"rewards/rejected": -18.929840087890625, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5023380093520374, |
|
"grad_norm": 94.05649604378364, |
|
"learning_rate": 5.819089557075688e-07, |
|
"logits/chosen": -1.247056007385254, |
|
"logits/rejected": -1.2196362018585205, |
|
"logps/chosen": -1.4864906072616577, |
|
"logps/rejected": -1.998369812965393, |
|
"loss": 1.3624, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -14.864908218383789, |
|
"rewards/margins": 5.1187896728515625, |
|
"rewards/rejected": -19.98369789123535, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5130260521042084, |
|
"grad_norm": 83.1982843702179, |
|
"learning_rate": 5.634086464424742e-07, |
|
"logits/chosen": -1.219469428062439, |
|
"logits/rejected": -1.2216647863388062, |
|
"logps/chosen": -1.4312059879302979, |
|
"logps/rejected": -1.9184064865112305, |
|
"loss": 1.4426, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -14.31205940246582, |
|
"rewards/margins": 4.872005462646484, |
|
"rewards/rejected": -19.184062957763672, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5237140948563794, |
|
"grad_norm": 91.50503743289462, |
|
"learning_rate": 5.448196544517167e-07, |
|
"logits/chosen": -1.3046424388885498, |
|
"logits/rejected": -1.2480084896087646, |
|
"logps/chosen": -1.4915251731872559, |
|
"logps/rejected": -2.1005947589874268, |
|
"loss": 1.3463, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -14.915250778198242, |
|
"rewards/margins": 6.090696334838867, |
|
"rewards/rejected": -21.00594711303711, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5344021376085505, |
|
"grad_norm": 93.25169357476504, |
|
"learning_rate": 5.26167978121472e-07, |
|
"logits/chosen": -1.2517807483673096, |
|
"logits/rejected": -1.2366987466812134, |
|
"logps/chosen": -1.5615525245666504, |
|
"logps/rejected": -2.1383166313171387, |
|
"loss": 1.3433, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.61552619934082, |
|
"rewards/margins": 5.767637729644775, |
|
"rewards/rejected": -21.383163452148438, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5450901803607214, |
|
"grad_norm": 84.64245243867487, |
|
"learning_rate": 5.074797035076318e-07, |
|
"logits/chosen": -1.3026002645492554, |
|
"logits/rejected": -1.279266595840454, |
|
"logps/chosen": -1.6704552173614502, |
|
"logps/rejected": -2.1482787132263184, |
|
"loss": 1.4096, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -16.70454978942871, |
|
"rewards/margins": 4.778237342834473, |
|
"rewards/rejected": -21.4827880859375, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5557782231128925, |
|
"grad_norm": 95.87257492049523, |
|
"learning_rate": 4.887809678520975e-07, |
|
"logits/chosen": -1.2724745273590088, |
|
"logits/rejected": -1.2435853481292725, |
|
"logps/chosen": -1.5778831243515015, |
|
"logps/rejected": -2.0692591667175293, |
|
"loss": 1.4179, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -15.778831481933594, |
|
"rewards/margins": 4.913762092590332, |
|
"rewards/rejected": -20.69259262084961, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5664662658650634, |
|
"grad_norm": 75.03721390019906, |
|
"learning_rate": 4.700979230274829e-07, |
|
"logits/chosen": -1.239183783531189, |
|
"logits/rejected": -1.222015619277954, |
|
"logps/chosen": -1.6615720987319946, |
|
"logps/rejected": -2.201857566833496, |
|
"loss": 1.3249, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -16.615720748901367, |
|
"rewards/margins": 5.402855396270752, |
|
"rewards/rejected": -22.018573760986328, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5771543086172345, |
|
"grad_norm": 164.9017663104039, |
|
"learning_rate": 4.514566989613559e-07, |
|
"logits/chosen": -1.235475778579712, |
|
"logits/rejected": -1.2083224058151245, |
|
"logps/chosen": -1.4797016382217407, |
|
"logps/rejected": -2.020514965057373, |
|
"loss": 1.2676, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -14.797017097473145, |
|
"rewards/margins": 5.408134460449219, |
|
"rewards/rejected": -20.205150604248047, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5878423513694054, |
|
"grad_norm": 81.15228217715286, |
|
"learning_rate": 4.328833670911724e-07, |
|
"logits/chosen": -1.2022705078125, |
|
"logits/rejected": -1.1674965620040894, |
|
"logps/chosen": -1.4664252996444702, |
|
"logps/rejected": -1.931023359298706, |
|
"loss": 1.4248, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -14.664253234863281, |
|
"rewards/margins": 4.645982265472412, |
|
"rewards/rejected": -19.31023597717285, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5985303941215765, |
|
"grad_norm": 104.47138977370568, |
|
"learning_rate": 4.144039039010124e-07, |
|
"logits/chosen": -1.28118097782135, |
|
"logits/rejected": -1.2569917440414429, |
|
"logps/chosen": -1.5222523212432861, |
|
"logps/rejected": -2.0608773231506348, |
|
"loss": 1.3366, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -15.22252368927002, |
|
"rewards/margins": 5.386250972747803, |
|
"rewards/rejected": -20.608774185180664, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6092184368737475, |
|
"grad_norm": 93.20515844216109, |
|
"learning_rate": 3.960441545911204e-07, |
|
"logits/chosen": -1.251189947128296, |
|
"logits/rejected": -1.2210452556610107, |
|
"logps/chosen": -1.5165798664093018, |
|
"logps/rejected": -2.1089890003204346, |
|
"loss": 1.0744, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -15.165797233581543, |
|
"rewards/margins": 5.924090385437012, |
|
"rewards/rejected": -21.089889526367188, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6199064796259185, |
|
"grad_norm": 81.40609622602612, |
|
"learning_rate": 3.778297969310529e-07, |
|
"logits/chosen": -1.2748587131500244, |
|
"logits/rejected": -1.2314293384552002, |
|
"logps/chosen": -1.5472508668899536, |
|
"logps/rejected": -2.0469508171081543, |
|
"loss": 1.3477, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -15.472508430480957, |
|
"rewards/margins": 4.996997356414795, |
|
"rewards/rejected": -20.469507217407227, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6305945223780896, |
|
"grad_norm": 99.64164292903165, |
|
"learning_rate": 3.5978630534699865e-07, |
|
"logits/chosen": -1.2144112586975098, |
|
"logits/rejected": -1.2017982006072998, |
|
"logps/chosen": -1.56182062625885, |
|
"logps/rejected": -2.0901787281036377, |
|
"loss": 1.1901, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -15.618207931518555, |
|
"rewards/margins": 5.283578872680664, |
|
"rewards/rejected": -20.901784896850586, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6412825651302605, |
|
"grad_norm": 88.30403958670574, |
|
"learning_rate": 3.4193891529348795e-07, |
|
"logits/chosen": -1.1442140340805054, |
|
"logits/rejected": -1.1183984279632568, |
|
"logps/chosen": -1.6569198369979858, |
|
"logps/rejected": -2.119126558303833, |
|
"loss": 1.5134, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -16.569198608398438, |
|
"rewards/margins": 4.622069358825684, |
|
"rewards/rejected": -21.191268920898438, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6519706078824316, |
|
"grad_norm": 76.57630583970531, |
|
"learning_rate": 3.243125879593286e-07, |
|
"logits/chosen": -1.2529053688049316, |
|
"logits/rejected": -1.2107479572296143, |
|
"logps/chosen": -1.6253858804702759, |
|
"logps/rejected": -2.088477849960327, |
|
"loss": 1.3079, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -16.253854751586914, |
|
"rewards/margins": 4.630920886993408, |
|
"rewards/rejected": -20.884777069091797, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6626586506346025, |
|
"grad_norm": 115.60549772909806, |
|
"learning_rate": 3.069319753571269e-07, |
|
"logits/chosen": -1.2966339588165283, |
|
"logits/rejected": -1.2777100801467896, |
|
"logps/chosen": -1.6359281539916992, |
|
"logps/rejected": -2.1718597412109375, |
|
"loss": 1.325, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -16.35927963256836, |
|
"rewards/margins": 5.359316825866699, |
|
"rewards/rejected": -21.71860122680664, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6733466933867736, |
|
"grad_norm": 88.88632603422757, |
|
"learning_rate": 2.898213858452173e-07, |
|
"logits/chosen": -1.2804549932479858, |
|
"logits/rejected": -1.2265164852142334, |
|
"logps/chosen": -1.6381988525390625, |
|
"logps/rejected": -2.162752628326416, |
|
"loss": 1.3169, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -16.381986618041992, |
|
"rewards/margins": 5.245541572570801, |
|
"rewards/rejected": -21.62752914428711, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6840347361389446, |
|
"grad_norm": 87.7153445266354, |
|
"learning_rate": 2.730047501302266e-07, |
|
"logits/chosen": -1.2721607685089111, |
|
"logits/rejected": -1.268053650856018, |
|
"logps/chosen": -1.6763546466827393, |
|
"logps/rejected": -2.327500820159912, |
|
"loss": 1.233, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -16.763545989990234, |
|
"rewards/margins": 6.511463165283203, |
|
"rewards/rejected": -23.275009155273438, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6947227788911156, |
|
"grad_norm": 70.42378282195334, |
|
"learning_rate": 2.5650558779781635e-07, |
|
"logits/chosen": -1.3053241968154907, |
|
"logits/rejected": -1.2554945945739746, |
|
"logps/chosen": -1.7189693450927734, |
|
"logps/rejected": -2.395566463470459, |
|
"loss": 1.3248, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -17.189693450927734, |
|
"rewards/margins": 6.765972137451172, |
|
"rewards/rejected": -23.95566749572754, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7054108216432866, |
|
"grad_norm": 73.22054751663588, |
|
"learning_rate": 2.403469744184154e-07, |
|
"logits/chosen": -1.2106170654296875, |
|
"logits/rejected": -1.1687678098678589, |
|
"logps/chosen": -1.6829122304916382, |
|
"logps/rejected": -2.164522647857666, |
|
"loss": 1.3184, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.829120635986328, |
|
"rewards/margins": 4.816105842590332, |
|
"rewards/rejected": -21.64522933959961, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7160988643954576, |
|
"grad_norm": 97.2368838538889, |
|
"learning_rate": 2.2455150927394878e-07, |
|
"logits/chosen": -1.2514355182647705, |
|
"logits/rejected": -1.2357397079467773, |
|
"logps/chosen": -1.652038335800171, |
|
"logps/rejected": -2.202007532119751, |
|
"loss": 1.1712, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.520383834838867, |
|
"rewards/margins": 5.499691009521484, |
|
"rewards/rejected": -22.02007484436035, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7267869071476286, |
|
"grad_norm": 101.12187857047496, |
|
"learning_rate": 2.0914128375069722e-07, |
|
"logits/chosen": -1.2703526020050049, |
|
"logits/rejected": -1.237259864807129, |
|
"logps/chosen": -1.5824345350265503, |
|
"logps/rejected": -2.1380550861358643, |
|
"loss": 1.275, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.824345588684082, |
|
"rewards/margins": 5.556204795837402, |
|
"rewards/rejected": -21.380550384521484, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7374749498997996, |
|
"grad_norm": 83.8607340097021, |
|
"learning_rate": 1.9413785044249676e-07, |
|
"logits/chosen": -1.3083045482635498, |
|
"logits/rejected": -1.2853591442108154, |
|
"logps/chosen": -1.6669658422470093, |
|
"logps/rejected": -2.3121345043182373, |
|
"loss": 1.3494, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -16.669658660888672, |
|
"rewards/margins": 6.45168924331665, |
|
"rewards/rejected": -23.121349334716797, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7481629926519706, |
|
"grad_norm": 108.91802839900684, |
|
"learning_rate": 1.7956219300748792e-07, |
|
"logits/chosen": -1.2830331325531006, |
|
"logits/rejected": -1.2870824337005615, |
|
"logps/chosen": -1.5124456882476807, |
|
"logps/rejected": -2.100379467010498, |
|
"loss": 1.2289, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -15.124456405639648, |
|
"rewards/margins": 5.879337310791016, |
|
"rewards/rejected": -21.003795623779297, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7588510354041417, |
|
"grad_norm": 92.29342846486482, |
|
"learning_rate": 1.6543469682057104e-07, |
|
"logits/chosen": -1.1956264972686768, |
|
"logits/rejected": -1.2103073596954346, |
|
"logps/chosen": -1.5397236347198486, |
|
"logps/rejected": -2.1057169437408447, |
|
"loss": 1.1804, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -15.397236824035645, |
|
"rewards/margins": 5.659933090209961, |
|
"rewards/rejected": -21.05716896057129, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7695390781563126, |
|
"grad_norm": 80.96407512590481, |
|
"learning_rate": 1.5177512046261666e-07, |
|
"logits/chosen": -1.2520828247070312, |
|
"logits/rejected": -1.252455472946167, |
|
"logps/chosen": -1.548658847808838, |
|
"logps/rejected": -2.2058348655700684, |
|
"loss": 1.2696, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -15.486590385437012, |
|
"rewards/margins": 6.571758270263672, |
|
"rewards/rejected": -22.058345794677734, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7802271209084837, |
|
"grad_norm": 80.42780807489935, |
|
"learning_rate": 1.3860256808630427e-07, |
|
"logits/chosen": -1.2946264743804932, |
|
"logits/rejected": -1.2320127487182617, |
|
"logps/chosen": -1.6179031133651733, |
|
"logps/rejected": -2.2405967712402344, |
|
"loss": 1.2432, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.179031372070312, |
|
"rewards/margins": 6.226935863494873, |
|
"rewards/rejected": -22.405969619750977, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7909151636606546, |
|
"grad_norm": 101.37315779363591, |
|
"learning_rate": 1.2593546269723647e-07, |
|
"logits/chosen": -1.2141435146331787, |
|
"logits/rejected": -1.20265793800354, |
|
"logps/chosen": -1.5797102451324463, |
|
"logps/rejected": -2.090893507003784, |
|
"loss": 1.2071, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -15.797101974487305, |
|
"rewards/margins": 5.111833095550537, |
|
"rewards/rejected": -20.908937454223633, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8016032064128257, |
|
"grad_norm": 77.11466865502938, |
|
"learning_rate": 1.1379152038770029e-07, |
|
"logits/chosen": -1.2475601434707642, |
|
"logits/rejected": -1.2534716129302979, |
|
"logps/chosen": -1.711700439453125, |
|
"logps/rejected": -2.310718536376953, |
|
"loss": 1.2262, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -17.11700439453125, |
|
"rewards/margins": 5.990179538726807, |
|
"rewards/rejected": -23.1071834564209, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8122912491649966, |
|
"grad_norm": 117.37788053060973, |
|
"learning_rate": 1.0218772555910954e-07, |
|
"logits/chosen": -1.2747116088867188, |
|
"logits/rejected": -1.255689024925232, |
|
"logps/chosen": -1.5994197130203247, |
|
"logps/rejected": -2.137598752975464, |
|
"loss": 1.4027, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -15.994195938110352, |
|
"rewards/margins": 5.381791114807129, |
|
"rewards/rejected": -21.375986099243164, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8229792919171677, |
|
"grad_norm": 78.98597874286344, |
|
"learning_rate": 9.114030716778432e-08, |
|
"logits/chosen": -1.2616714239120483, |
|
"logits/rejected": -1.2391117811203003, |
|
"logps/chosen": -1.6102495193481445, |
|
"logps/rejected": -2.3222262859344482, |
|
"loss": 1.0892, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.102497100830078, |
|
"rewards/margins": 7.1197686195373535, |
|
"rewards/rejected": -23.222265243530273, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8336673346693386, |
|
"grad_norm": 84.54508830914433, |
|
"learning_rate": 8.066471602728803e-08, |
|
"logits/chosen": -1.2698580026626587, |
|
"logits/rejected": -1.2559095621109009, |
|
"logps/chosen": -1.6754401922225952, |
|
"logps/rejected": -2.296884298324585, |
|
"loss": 1.2113, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -16.75440216064453, |
|
"rewards/margins": 6.214441299438477, |
|
"rewards/rejected": -22.96884536743164, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8443553774215097, |
|
"grad_norm": 80.87157072797021, |
|
"learning_rate": 7.077560319906694e-08, |
|
"logits/chosen": -1.271916389465332, |
|
"logits/rejected": -1.252569317817688, |
|
"logps/chosen": -1.5791504383087158, |
|
"logps/rejected": -2.1556344032287598, |
|
"loss": 1.26, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -15.79150390625, |
|
"rewards/margins": 5.764839172363281, |
|
"rewards/rejected": -21.55634117126465, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"grad_norm": 65.17427856770865, |
|
"learning_rate": 6.148679950161672e-08, |
|
"logits/chosen": -1.2736351490020752, |
|
"logits/rejected": -1.2584983110427856, |
|
"logps/chosen": -1.6285731792449951, |
|
"logps/rejected": -2.164571762084961, |
|
"loss": 1.1724, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -16.28573226928711, |
|
"rewards/margins": 5.359988212585449, |
|
"rewards/rejected": -21.645719528198242, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"eval_logits/chosen": -1.460854172706604, |
|
"eval_logits/rejected": -1.470569372177124, |
|
"eval_logps/chosen": -1.6253507137298584, |
|
"eval_logps/rejected": -2.183858871459961, |
|
"eval_loss": 1.351211428642273, |
|
"eval_rewards/accuracies": 0.8394308686256409, |
|
"eval_rewards/chosen": -16.253507614135742, |
|
"eval_rewards/margins": 5.5850830078125, |
|
"eval_rewards/rejected": -21.838590621948242, |
|
"eval_runtime": 98.0395, |
|
"eval_samples_per_second": 20.002, |
|
"eval_steps_per_second": 1.255, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8657314629258517, |
|
"grad_norm": 113.62350381683838, |
|
"learning_rate": 5.2811296166831666e-08, |
|
"logits/chosen": -1.2400482892990112, |
|
"logits/rejected": -1.2593395709991455, |
|
"logps/chosen": -1.7036975622177124, |
|
"logps/rejected": -2.276524066925049, |
|
"loss": 1.1972, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -17.036977767944336, |
|
"rewards/margins": 5.728262901306152, |
|
"rewards/rejected": -22.765239715576172, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8764195056780227, |
|
"grad_norm": 144.61743799041326, |
|
"learning_rate": 4.4761226670592066e-08, |
|
"logits/chosen": -1.2617685794830322, |
|
"logits/rejected": -1.249569296836853, |
|
"logps/chosen": -1.6581430435180664, |
|
"logps/rejected": -2.1955461502075195, |
|
"loss": 1.3257, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -16.581430435180664, |
|
"rewards/margins": 5.374030113220215, |
|
"rewards/rejected": -21.955459594726562, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8871075484301937, |
|
"grad_norm": 88.89080057583915, |
|
"learning_rate": 3.734784976300165e-08, |
|
"logits/chosen": -1.2503310441970825, |
|
"logits/rejected": -1.1999974250793457, |
|
"logps/chosen": -1.5886937379837036, |
|
"logps/rejected": -2.2466578483581543, |
|
"loss": 1.3331, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.886937141418457, |
|
"rewards/margins": 6.579639434814453, |
|
"rewards/rejected": -22.466577529907227, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8977955911823647, |
|
"grad_norm": 85.80208344095527, |
|
"learning_rate": 3.058153372200695e-08, |
|
"logits/chosen": -1.2792952060699463, |
|
"logits/rejected": -1.2328004837036133, |
|
"logps/chosen": -1.524823546409607, |
|
"logps/rejected": -2.1325583457946777, |
|
"loss": 1.1978, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.248235702514648, |
|
"rewards/margins": 6.077347755432129, |
|
"rewards/rejected": -21.325584411621094, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9084836339345357, |
|
"grad_norm": 107.0115137921273, |
|
"learning_rate": 2.4471741852423233e-08, |
|
"logits/chosen": -1.2821718454360962, |
|
"logits/rejected": -1.273240089416504, |
|
"logps/chosen": -1.7154676914215088, |
|
"logps/rejected": -2.2665412425994873, |
|
"loss": 1.4189, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -17.154674530029297, |
|
"rewards/margins": 5.510737419128418, |
|
"rewards/rejected": -22.6654109954834, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9191716766867067, |
|
"grad_norm": 106.31448672736177, |
|
"learning_rate": 1.9027019250647036e-08, |
|
"logits/chosen": -1.267595887184143, |
|
"logits/rejected": -1.2530834674835205, |
|
"logps/chosen": -1.7077808380126953, |
|
"logps/rejected": -2.310647487640381, |
|
"loss": 1.253, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -17.077808380126953, |
|
"rewards/margins": 6.028668403625488, |
|
"rewards/rejected": -23.106477737426758, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9298597194388778, |
|
"grad_norm": 82.83256823235773, |
|
"learning_rate": 1.4254980853566246e-08, |
|
"logits/chosen": -1.2165040969848633, |
|
"logits/rejected": -1.180338740348816, |
|
"logps/chosen": -1.5563018321990967, |
|
"logps/rejected": -2.143784284591675, |
|
"loss": 1.1374, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -15.563015937805176, |
|
"rewards/margins": 5.874825477600098, |
|
"rewards/rejected": -21.437841415405273, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9405477621910487, |
|
"grad_norm": 83.84639598697997, |
|
"learning_rate": 1.016230078838226e-08, |
|
"logits/chosen": -1.2405247688293457, |
|
"logits/rejected": -1.1848217248916626, |
|
"logps/chosen": -1.6584663391113281, |
|
"logps/rejected": -2.197697401046753, |
|
"loss": 1.2363, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -16.58466148376465, |
|
"rewards/margins": 5.392308235168457, |
|
"rewards/rejected": -21.976970672607422, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9512358049432198, |
|
"grad_norm": 80.97147542055019, |
|
"learning_rate": 6.754703038239329e-09, |
|
"logits/chosen": -1.1893346309661865, |
|
"logits/rejected": -1.1745051145553589, |
|
"logps/chosen": -1.6517200469970703, |
|
"logps/rejected": -2.3159308433532715, |
|
"loss": 1.0816, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -16.517200469970703, |
|
"rewards/margins": 6.642111301422119, |
|
"rewards/rejected": -23.159311294555664, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9619238476953907, |
|
"grad_norm": 101.25047268904304, |
|
"learning_rate": 4.036953436716895e-09, |
|
"logits/chosen": -1.2991673946380615, |
|
"logits/rejected": -1.2805778980255127, |
|
"logps/chosen": -1.6150133609771729, |
|
"logps/rejected": -2.1743900775909424, |
|
"loss": 1.3429, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -16.150135040283203, |
|
"rewards/margins": 5.593768119812012, |
|
"rewards/rejected": -21.7439022064209, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9726118904475618, |
|
"grad_norm": 104.9392779126059, |
|
"learning_rate": 2.0128530023804656e-09, |
|
"logits/chosen": -1.2574083805084229, |
|
"logits/rejected": -1.22499680519104, |
|
"logps/chosen": -1.6232726573944092, |
|
"logps/rejected": -2.2851340770721436, |
|
"loss": 1.0189, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -16.232728958129883, |
|
"rewards/margins": 6.618613243103027, |
|
"rewards/rejected": -22.851343154907227, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9832999331997327, |
|
"grad_norm": 89.08039811332223, |
|
"learning_rate": 6.852326227130833e-10, |
|
"logits/chosen": -1.2663607597351074, |
|
"logits/rejected": -1.2550715208053589, |
|
"logps/chosen": -1.699883222579956, |
|
"logps/rejected": -2.3202741146087646, |
|
"loss": 1.1666, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.99883460998535, |
|
"rewards/margins": 6.203906536102295, |
|
"rewards/rejected": -23.202741622924805, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9939879759519038, |
|
"grad_norm": 104.72078285232733, |
|
"learning_rate": 5.594909486328348e-11, |
|
"logits/chosen": -1.2424919605255127, |
|
"logits/rejected": -1.251131296157837, |
|
"logps/chosen": -1.704716682434082, |
|
"logps/rejected": -2.33524489402771, |
|
"loss": 1.3005, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -17.047168731689453, |
|
"rewards/margins": 6.305283546447754, |
|
"rewards/rejected": -23.352449417114258, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9982631930527722, |
|
"step": 467, |
|
"total_flos": 0.0, |
|
"train_loss": 1.7901343150618756, |
|
"train_runtime": 11462.4072, |
|
"train_samples_per_second": 5.224, |
|
"train_steps_per_second": 0.041 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|