{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.985781990521327, "eval_steps": 500, "global_step": 315, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04739336492890995, "grad_norm": 168960.0, "learning_rate": 9.375000000000001e-06, "log_odds_chosen": 10.756143569946289, "log_odds_ratio": -8.201075553894043, "logits/chosen": 137.92239379882812, "logits/rejected": 154.74513244628906, "logps/chosen": -16.90255355834961, "logps/rejected": -27.65829849243164, "loss": 1407.4271, "nll_loss": 8.030233383178711, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -8.451276779174805, "rewards/margins": 5.377873420715332, "rewards/rejected": -13.82914924621582, "step": 5 }, { "epoch": 0.0947867298578199, "grad_norm": 8320.0, "learning_rate": 1.8750000000000002e-05, "log_odds_chosen": -2.0146212577819824, "log_odds_ratio": -9.117185592651367, "logits/chosen": 139.0816192626953, "logits/rejected": 152.4964141845703, "logps/chosen": -16.836305618286133, "logps/rejected": -14.821057319641113, "loss": 114.8602, "nll_loss": 6.842395782470703, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -8.418152809143066, "rewards/margins": -1.0076239109039307, "rewards/rejected": -7.410528659820557, "step": 10 }, { "epoch": 0.14218009478672985, "grad_norm": 25344.0, "learning_rate": 2.8125e-05, "log_odds_chosen": 6.341404914855957, "log_odds_ratio": -7.492286682128906, "logits/chosen": 115.94664001464844, "logits/rejected": 139.38864135742188, "logps/chosen": -19.506237030029297, "logps/rejected": -25.845422744750977, "loss": 2035.5172, "nll_loss": 8.318296432495117, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -9.753118515014648, "rewards/margins": 3.1695926189422607, "rewards/rejected": -12.922711372375488, "step": 15 }, { "epoch": 0.1895734597156398, "grad_norm": 3424.0, "learning_rate": 3.7500000000000003e-05, "log_odds_chosen": -0.7289161682128906, "log_odds_ratio": -7.691019535064697, "logits/chosen": 105.5518569946289, "logits/rejected": 116.6485595703125, "logps/chosen": -18.734756469726562, "logps/rejected": -18.00742530822754, "loss": 1202.2451, "nll_loss": 8.924882888793945, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -9.367378234863281, "rewards/margins": -0.3636666238307953, "rewards/rejected": -9.00371265411377, "step": 20 }, { "epoch": 0.23696682464454977, "grad_norm": 8640.0, "learning_rate": 4.6875e-05, "log_odds_chosen": -1.369574785232544, "log_odds_ratio": -8.745997428894043, "logits/chosen": 102.84317779541016, "logits/rejected": 96.14292907714844, "logps/chosen": -18.200498580932617, "logps/rejected": -16.830005645751953, "loss": 1457.908, "nll_loss": 7.214259147644043, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -9.100249290466309, "rewards/margins": -0.685246467590332, "rewards/rejected": -8.415002822875977, "step": 25 }, { "epoch": 0.2843601895734597, "grad_norm": 20480.0, "learning_rate": 5.625e-05, "log_odds_chosen": -2.44787859916687, "log_odds_ratio": -9.492902755737305, "logits/chosen": 82.8386459350586, "logits/rejected": 124.99415588378906, "logps/chosen": -19.397884368896484, "logps/rejected": -16.947490692138672, "loss": 416.4873, "nll_loss": 7.92099666595459, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -9.698942184448242, "rewards/margins": -1.2251958847045898, "rewards/rejected": -8.473745346069336, "step": 30 }, { "epoch": 0.33175355450236965, "grad_norm": 872.0, "learning_rate": 5.998336508818541e-05, "log_odds_chosen": -3.3048667907714844, "log_odds_ratio": -4.905556678771973, "logits/chosen": 179.0537567138672, "logits/rejected": 157.49603271484375, "logps/chosen": -10.089513778686523, "logps/rejected": -6.791520595550537, "loss": 183.6225, "nll_loss": 5.170867443084717, "rewards/accuracies": 0.4375, "rewards/chosen": -5.044756889343262, "rewards/margins": -1.6489967107772827, "rewards/rejected": -3.3957602977752686, "step": 35 }, { "epoch": 0.3791469194312796, "grad_norm": 388.0, "learning_rate": 5.988177409372154e-05, "log_odds_chosen": 0.18621893227100372, "log_odds_ratio": -0.744963526725769, "logits/chosen": 250.6142120361328, "logits/rejected": 266.593994140625, "logps/chosen": -1.9268105030059814, "logps/rejected": -2.0770750045776367, "loss": 45.8455, "nll_loss": 2.393054962158203, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.9634052515029907, "rewards/margins": 0.07513223588466644, "rewards/rejected": -1.0385375022888184, "step": 40 }, { "epoch": 0.4265402843601896, "grad_norm": 296.0, "learning_rate": 5.968814624645376e-05, "log_odds_chosen": 0.19682058691978455, "log_odds_ratio": -0.9012538194656372, "logits/chosen": 257.0416259765625, "logits/rejected": 231.23562622070312, "logps/chosen": -1.8941532373428345, "logps/rejected": -2.064608097076416, "loss": 36.973, "nll_loss": 2.027169704437256, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.9470766186714172, "rewards/margins": 0.08522741496562958, "rewards/rejected": -1.032304048538208, "step": 45 }, { "epoch": 0.47393364928909953, "grad_norm": 65.5, "learning_rate": 5.9403077926557534e-05, "log_odds_chosen": 0.11038754880428314, "log_odds_ratio": -0.8040679693222046, "logits/chosen": 263.97332763671875, "logits/rejected": 269.75946044921875, "logps/chosen": -1.6346588134765625, "logps/rejected": -1.7171961069107056, "loss": 35.7226, "nll_loss": 1.9796053171157837, "rewards/accuracies": 0.5, "rewards/chosen": -0.8173294067382812, "rewards/margins": 0.04126860201358795, "rewards/rejected": -0.8585980534553528, "step": 50 }, { "epoch": 0.5213270142180095, "grad_norm": 50.75, "learning_rate": 5.9027447153889215e-05, "log_odds_chosen": 0.09287216514348984, "log_odds_ratio": -0.7353734374046326, "logits/chosen": 243.19577026367188, "logits/rejected": 245.62234497070312, "logps/chosen": -1.287972092628479, "logps/rejected": -1.3607467412948608, "loss": 30.7576, "nll_loss": 1.6278873682022095, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.6439860463142395, "rewards/margins": 0.03638739511370659, "rewards/rejected": -0.6803733706474304, "step": 55 }, { "epoch": 0.5687203791469194, "grad_norm": 37.25, "learning_rate": 5.856241088365584e-05, "log_odds_chosen": 0.1018252968788147, "log_odds_ratio": -0.7410688400268555, "logits/chosen": 220.8896026611328, "logits/rejected": 226.8162841796875, "logps/chosen": -1.215947151184082, "logps/rejected": -1.2763280868530273, "loss": 28.7756, "nll_loss": 1.4690172672271729, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.607973575592041, "rewards/margins": 0.030190488323569298, "rewards/rejected": -0.6381640434265137, "step": 60 }, { "epoch": 0.6161137440758294, "grad_norm": 94.0, "learning_rate": 5.800940144295476e-05, "log_odds_chosen": 0.2972797155380249, "log_odds_ratio": -0.6765426993370056, "logits/chosen": 227.9703369140625, "logits/rejected": 230.8743438720703, "logps/chosen": -1.2523950338363647, "logps/rejected": -1.4669511318206787, "loss": 28.0376, "nll_loss": 1.5335967540740967, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.6261975169181824, "rewards/margins": 0.10727809369564056, "rewards/rejected": -0.7334755659103394, "step": 65 }, { "epoch": 0.6635071090047393, "grad_norm": 80.5, "learning_rate": 5.7370122119158855e-05, "log_odds_chosen": 0.3697313070297241, "log_odds_ratio": -0.656975507736206, "logits/chosen": 229.30117797851562, "logits/rejected": 225.6231689453125, "logps/chosen": -1.0724413394927979, "logps/rejected": -1.3780503273010254, "loss": 27.2101, "nll_loss": 1.4345372915267944, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5362206697463989, "rewards/margins": 0.15280446410179138, "rewards/rejected": -0.6890251636505127, "step": 70 }, { "epoch": 0.7109004739336493, "grad_norm": 66.0, "learning_rate": 5.6646541913735056e-05, "log_odds_chosen": 0.23539912700653076, "log_odds_ratio": -0.6753562092781067, "logits/chosen": 222.90090942382812, "logits/rejected": 224.7488555908203, "logps/chosen": -1.040056586265564, "logps/rejected": -1.2055822610855103, "loss": 26.432, "nll_loss": 1.4434144496917725, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.520028293132782, "rewards/margins": 0.08276289701461792, "rewards/rejected": -0.6027911305427551, "step": 75 }, { "epoch": 0.7582938388625592, "grad_norm": 57.0, "learning_rate": 5.5840889477654665e-05, "log_odds_chosen": 0.21213491261005402, "log_odds_ratio": -0.7092779874801636, "logits/chosen": 224.779052734375, "logits/rejected": 223.54104614257812, "logps/chosen": -1.135852575302124, "logps/rejected": -1.2810156345367432, "loss": 25.729, "nll_loss": 1.3958572149276733, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.567926287651062, "rewards/margins": 0.07258154451847076, "rewards/rejected": -0.6405078172683716, "step": 80 }, { "epoch": 0.8056872037914692, "grad_norm": 75.5, "learning_rate": 5.495564624707466e-05, "log_odds_chosen": 0.22340472042560577, "log_odds_ratio": -0.6968339085578918, "logits/chosen": 214.7851104736328, "logits/rejected": 210.5839080810547, "logps/chosen": -1.09432852268219, "logps/rejected": -1.2580267190933228, "loss": 25.5057, "nll_loss": 1.3930976390838623, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.547164261341095, "rewards/margins": 0.08184906840324402, "rewards/rejected": -0.6290133595466614, "step": 85 }, { "epoch": 0.8530805687203792, "grad_norm": 57.25, "learning_rate": 5.399353880043222e-05, "log_odds_chosen": 0.258540540933609, "log_odds_ratio": -0.6605676412582397, "logits/chosen": 212.9822540283203, "logits/rejected": 210.5906219482422, "logps/chosen": -1.0728873014450073, "logps/rejected": -1.2394678592681885, "loss": 27.487, "nll_loss": 1.4236419200897217, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.5364436507225037, "rewards/margins": 0.0832902193069458, "rewards/rejected": -0.6197339296340942, "step": 90 }, { "epoch": 0.9004739336492891, "grad_norm": 44.0, "learning_rate": 5.295753046049293e-05, "log_odds_chosen": 0.33555328845977783, "log_odds_ratio": -0.599485456943512, "logits/chosen": 199.41171264648438, "logits/rejected": 198.87872314453125, "logps/chosen": -0.9943248629570007, "logps/rejected": -1.2264639139175415, "loss": 25.259, "nll_loss": 1.2300159931182861, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.49716243147850037, "rewards/margins": 0.11606951057910919, "rewards/rejected": -0.6132319569587708, "step": 95 }, { "epoch": 0.9478672985781991, "grad_norm": 28.0, "learning_rate": 5.1850812167218644e-05, "log_odds_chosen": 0.05684388801455498, "log_odds_ratio": -0.7587562799453735, "logits/chosen": 202.80810546875, "logits/rejected": 196.2851104736328, "logps/chosen": -1.1174707412719727, "logps/rejected": -1.1865062713623047, "loss": 25.2803, "nll_loss": 1.4526774883270264, "rewards/accuracies": 0.5, "rewards/chosen": -0.5587353706359863, "rewards/margins": 0.03451773524284363, "rewards/rejected": -0.5932531356811523, "step": 100 }, { "epoch": 0.995260663507109, "grad_norm": 40.75, "learning_rate": 5.067679264956681e-05, "log_odds_chosen": 0.40639758110046387, "log_odds_ratio": -0.6050174236297607, "logits/chosen": 204.6847381591797, "logits/rejected": 201.60601806640625, "logps/chosen": -1.0167808532714844, "logps/rejected": -1.3128955364227295, "loss": 24.7542, "nll_loss": 1.3192346096038818, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5083904266357422, "rewards/margins": 0.14805743098258972, "rewards/rejected": -0.6564477682113647, "step": 105 }, { "epoch": 1.042654028436019, "grad_norm": 23.375, "learning_rate": 4.943908792649255e-05, "log_odds_chosen": 0.21281662583351135, "log_odds_ratio": -0.6757606267929077, "logits/chosen": 198.5106201171875, "logits/rejected": 196.48208618164062, "logps/chosen": -0.9237734079360962, "logps/rejected": -1.0613982677459717, "loss": 22.539, "nll_loss": 1.1923763751983643, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4618867039680481, "rewards/margins": 0.06881250441074371, "rewards/rejected": -0.5306991338729858, "step": 110 }, { "epoch": 1.0900473933649288, "grad_norm": 51.25, "learning_rate": 4.814151016949061e-05, "log_odds_chosen": 0.45136967301368713, "log_odds_ratio": -0.574053168296814, "logits/chosen": 194.5438232421875, "logits/rejected": 195.3663787841797, "logps/chosen": -0.8666488528251648, "logps/rejected": -1.1445974111557007, "loss": 22.1373, "nll_loss": 1.1306638717651367, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.4333244264125824, "rewards/margins": 0.13897429406642914, "rewards/rejected": -0.5722987055778503, "step": 115 }, { "epoch": 1.1374407582938388, "grad_norm": 32.0, "learning_rate": 4.6788055960981e-05, "log_odds_chosen": 0.5978150367736816, "log_odds_ratio": -0.5191441774368286, "logits/chosen": 195.80685424804688, "logits/rejected": 192.3872528076172, "logps/chosen": -0.816036581993103, "logps/rejected": -1.1533689498901367, "loss": 21.9447, "nll_loss": 1.137957215309143, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.4080182909965515, "rewards/margins": 0.16866618394851685, "rewards/rejected": -0.5766844749450684, "step": 120 }, { "epoch": 1.1848341232227488, "grad_norm": 21.0, "learning_rate": 4.538289398470304e-05, "log_odds_chosen": 0.44998010993003845, "log_odds_ratio": -0.5995658040046692, "logits/chosen": 196.32797241210938, "logits/rejected": 197.1869659423828, "logps/chosen": -0.9477843046188354, "logps/rejected": -1.2593724727630615, "loss": 21.1467, "nll_loss": 1.1062265634536743, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.4738921523094177, "rewards/margins": 0.15579405426979065, "rewards/rejected": -0.6296862363815308, "step": 125 }, { "epoch": 1.2322274881516588, "grad_norm": 21.625, "learning_rate": 4.393035218603139e-05, "log_odds_chosen": 0.19958534836769104, "log_odds_ratio": -0.6756640672683716, "logits/chosen": 199.8778076171875, "logits/rejected": 195.39340209960938, "logps/chosen": -0.9140733480453491, "logps/rejected": -1.0152888298034668, "loss": 21.3142, "nll_loss": 1.2156976461410522, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.45703667402267456, "rewards/margins": 0.05060772970318794, "rewards/rejected": -0.5076444149017334, "step": 130 }, { "epoch": 1.2796208530805688, "grad_norm": 27.75, "learning_rate": 4.243490444176123e-05, "log_odds_chosen": 0.38076427578926086, "log_odds_ratio": -0.6123644113540649, "logits/chosen": 199.5050048828125, "logits/rejected": 198.98667907714844, "logps/chosen": -0.8708294630050659, "logps/rejected": -1.09108304977417, "loss": 21.4393, "nll_loss": 1.1632344722747803, "rewards/accuracies": 0.625, "rewards/chosen": -0.43541473150253296, "rewards/margins": 0.11012685298919678, "rewards/rejected": -0.545541524887085, "step": 135 }, { "epoch": 1.3270142180094786, "grad_norm": 30.75, "learning_rate": 4.090115678041962e-05, "log_odds_chosen": 0.45514464378356934, "log_odds_ratio": -0.6075628399848938, "logits/chosen": 194.0288543701172, "logits/rejected": 193.20309448242188, "logps/chosen": -0.8634368181228638, "logps/rejected": -1.140328288078308, "loss": 21.9818, "nll_loss": 1.1953437328338623, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.4317184090614319, "rewards/margins": 0.13844572007656097, "rewards/rejected": -0.570164144039154, "step": 140 }, { "epoch": 1.3744075829383886, "grad_norm": 25.25, "learning_rate": 3.9333833195545325e-05, "log_odds_chosen": 0.3756052553653717, "log_odds_ratio": -0.5947796106338501, "logits/chosen": 198.31832885742188, "logits/rejected": 192.33865356445312, "logps/chosen": -0.9395328760147095, "logps/rejected": -1.2024142742156982, "loss": 21.7616, "nll_loss": 1.2738587856292725, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.46976643800735474, "rewards/margins": 0.13144069910049438, "rewards/rejected": -0.6012071371078491, "step": 145 }, { "epoch": 1.4218009478672986, "grad_norm": 23.0, "learning_rate": 3.7737761095632374e-05, "log_odds_chosen": 0.3193782866001129, "log_odds_ratio": -0.6514483690261841, "logits/chosen": 196.14259338378906, "logits/rejected": 195.2425537109375, "logps/chosen": -0.8644716143608093, "logps/rejected": -1.049574851989746, "loss": 20.83, "nll_loss": 1.167014479637146, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.43223580718040466, "rewards/margins": 0.092551589012146, "rewards/rejected": -0.524787425994873, "step": 150 }, { "epoch": 1.4691943127962086, "grad_norm": 20.75, "learning_rate": 3.611785643555225e-05, "log_odds_chosen": 0.303898423910141, "log_odds_ratio": -0.648755669593811, "logits/chosen": 200.64492797851562, "logits/rejected": 200.30389404296875, "logps/chosen": -0.8748540878295898, "logps/rejected": -1.0394160747528076, "loss": 21.6333, "nll_loss": 1.1785424947738647, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4374270439147949, "rewards/margins": 0.08228104561567307, "rewards/rejected": -0.5197080373764038, "step": 155 }, { "epoch": 1.5165876777251186, "grad_norm": 21.875, "learning_rate": 3.44791085752502e-05, "log_odds_chosen": 0.31724172830581665, "log_odds_ratio": -0.622181236743927, "logits/chosen": 205.41311645507812, "logits/rejected": 208.6095733642578, "logps/chosen": -0.9168610572814941, "logps/rejected": -1.1023683547973633, "loss": 22.0025, "nll_loss": 1.2696937322616577, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.45843052864074707, "rewards/margins": 0.09275360405445099, "rewards/rejected": -0.5511841773986816, "step": 160 }, { "epoch": 1.5639810426540284, "grad_norm": 30.125, "learning_rate": 3.2826564912351544e-05, "log_odds_chosen": 0.2731252908706665, "log_odds_ratio": -0.6811183094978333, "logits/chosen": 204.3468017578125, "logits/rejected": 205.2547149658203, "logps/chosen": -1.0431245565414429, "logps/rejected": -1.184552550315857, "loss": 21.4814, "nll_loss": 1.184350609779358, "rewards/accuracies": 0.625, "rewards/chosen": -0.5215622782707214, "rewards/margins": 0.07071395963430405, "rewards/rejected": -0.5922762751579285, "step": 165 }, { "epoch": 1.6113744075829384, "grad_norm": 30.0, "learning_rate": 3.116531533601003e-05, "log_odds_chosen": 0.4361351430416107, "log_odds_ratio": -0.5953701138496399, "logits/chosen": 194.65945434570312, "logits/rejected": 192.39102172851562, "logps/chosen": -0.8711638450622559, "logps/rejected": -1.1436076164245605, "loss": 21.1767, "nll_loss": 1.1069728136062622, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.43558192253112793, "rewards/margins": 0.13622191548347473, "rewards/rejected": -0.5718038082122803, "step": 170 }, { "epoch": 1.6587677725118484, "grad_norm": 34.0, "learning_rate": 2.9500476549880848e-05, "log_odds_chosen": 0.3290528357028961, "log_odds_ratio": -0.6428475975990295, "logits/chosen": 200.77029418945312, "logits/rejected": 195.89601135253906, "logps/chosen": -0.8381175994873047, "logps/rejected": -1.0445606708526611, "loss": 20.8206, "nll_loss": 1.1465178728103638, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.41905879974365234, "rewards/margins": 0.10322149097919464, "rewards/rejected": -0.5222803354263306, "step": 175 }, { "epoch": 1.7061611374407581, "grad_norm": 28.0, "learning_rate": 2.7837176312504037e-05, "log_odds_chosen": 0.03685625642538071, "log_odds_ratio": -0.766934335231781, "logits/chosen": 198.74905395507812, "logits/rejected": 196.25143432617188, "logps/chosen": -0.9278505444526672, "logps/rejected": -0.9253548383712769, "loss": 21.729, "nll_loss": 1.2215286493301392, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.4639252722263336, "rewards/margins": -0.0012478366261348128, "rewards/rejected": -0.4626774191856384, "step": 180 }, { "epoch": 1.7535545023696684, "grad_norm": 18.0, "learning_rate": 2.618053764363861e-05, "log_odds_chosen": 0.3314729630947113, "log_odds_ratio": -0.6066881418228149, "logits/chosen": 201.25289916992188, "logits/rejected": 198.02322387695312, "logps/chosen": -0.8792837858200073, "logps/rejected": -1.0925233364105225, "loss": 21.2035, "nll_loss": 1.1164947748184204, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.43964189291000366, "rewards/margins": 0.10661973804235458, "rewards/rejected": -0.5462616682052612, "step": 185 }, { "epoch": 1.8009478672985781, "grad_norm": 38.25, "learning_rate": 2.453566304519216e-05, "log_odds_chosen": 0.4536499083042145, "log_odds_ratio": -0.5942190885543823, "logits/chosen": 203.0521697998047, "logits/rejected": 202.32650756835938, "logps/chosen": -0.9582914113998413, "logps/rejected": -1.2642791271209717, "loss": 21.7423, "nll_loss": 1.1580461263656616, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.47914570569992065, "rewards/margins": 0.1529938280582428, "rewards/rejected": -0.6321395635604858, "step": 190 }, { "epoch": 1.8483412322274881, "grad_norm": 22.375, "learning_rate": 2.29076187853462e-05, "log_odds_chosen": 0.4630239009857178, "log_odds_ratio": -0.5749759078025818, "logits/chosen": 196.7127685546875, "logits/rejected": 196.4191131591797, "logps/chosen": -0.8674151301383972, "logps/rejected": -1.1494576930999756, "loss": 20.9195, "nll_loss": 1.1604869365692139, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.4337075650691986, "rewards/margins": 0.14102117717266083, "rewards/rejected": -0.5747288465499878, "step": 195 }, { "epoch": 1.8957345971563981, "grad_norm": 28.625, "learning_rate": 2.130141929428254e-05, "log_odds_chosen": 0.35148704051971436, "log_odds_ratio": -0.66729336977005, "logits/chosen": 197.56497192382812, "logits/rejected": 196.6879425048828, "logps/chosen": -0.8802660703659058, "logps/rejected": -1.102311134338379, "loss": 22.0774, "nll_loss": 1.1984275579452515, "rewards/accuracies": 0.625, "rewards/chosen": -0.4401330351829529, "rewards/margins": 0.11102245002985, "rewards/rejected": -0.5511555671691895, "step": 200 }, { "epoch": 1.943127962085308, "grad_norm": 25.375, "learning_rate": 1.9722011719572444e-05, "log_odds_chosen": 0.21564432978630066, "log_odds_ratio": -0.6583319902420044, "logits/chosen": 202.26856994628906, "logits/rejected": 193.0558624267578, "logps/chosen": -0.9100298881530762, "logps/rejected": -1.0561821460723877, "loss": 20.1611, "nll_loss": 1.0852024555206299, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4550149440765381, "rewards/margins": 0.07307618111371994, "rewards/rejected": -0.5280910730361938, "step": 205 }, { "epoch": 1.9905213270142181, "grad_norm": 23.625, "learning_rate": 1.8174260688798445e-05, "log_odds_chosen": 0.3166791498661041, "log_odds_ratio": -0.630929172039032, "logits/chosen": 197.60903930664062, "logits/rejected": 196.84121704101562, "logps/chosen": -0.821063220500946, "logps/rejected": -0.9948121905326843, "loss": 19.9686, "nll_loss": 1.0750689506530762, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.410531610250473, "rewards/margins": 0.08687452226877213, "rewards/rejected": -0.49740609526634216, "step": 210 }, { "epoch": 2.037914691943128, "grad_norm": 22.75, "learning_rate": 1.666293332634042e-05, "log_odds_chosen": 0.6822348833084106, "log_odds_ratio": -0.5266743898391724, "logits/chosen": 191.23080444335938, "logits/rejected": 194.97836303710938, "logps/chosen": -0.7306900024414062, "logps/rejected": -1.060121774673462, "loss": 18.0893, "nll_loss": 0.9460033178329468, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3653450012207031, "rewards/margins": 0.16471591591835022, "rewards/rejected": -0.530060887336731, "step": 215 }, { "epoch": 2.085308056872038, "grad_norm": 23.875, "learning_rate": 1.519268457047482e-05, "log_odds_chosen": 0.8683069944381714, "log_odds_ratio": -0.4624325633049011, "logits/chosen": 185.07095336914062, "logits/rejected": 190.39279174804688, "logps/chosen": -0.6318475008010864, "logps/rejected": -1.0846574306488037, "loss": 16.6833, "nll_loss": 0.8812177777290344, "rewards/accuracies": 0.8125, "rewards/chosen": -0.3159237504005432, "rewards/margins": 0.22640495002269745, "rewards/rejected": -0.5423287153244019, "step": 220 }, { "epoch": 2.132701421800948, "grad_norm": 18.75, "learning_rate": 1.3768042836010768e-05, "log_odds_chosen": 0.3730294704437256, "log_odds_ratio": -0.6350643038749695, "logits/chosen": 194.38063049316406, "logits/rejected": 189.1841583251953, "logps/chosen": -0.7411255836486816, "logps/rejected": -0.9265958070755005, "loss": 17.0913, "nll_loss": 1.006074070930481, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3705627918243408, "rewards/margins": 0.09273514896631241, "rewards/rejected": -0.46329790353775024, "step": 225 }, { "epoch": 2.1800947867298577, "grad_norm": 23.25, "learning_rate": 1.239339606662261e-05, "log_odds_chosen": 0.6575037240982056, "log_odds_ratio": -0.4991639256477356, "logits/chosen": 183.24179077148438, "logits/rejected": 185.40365600585938, "logps/chosen": -0.6491117477416992, "logps/rejected": -1.0063084363937378, "loss": 16.5076, "nll_loss": 0.8716222643852234, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3245558738708496, "rewards/margins": 0.17859837412834167, "rewards/rejected": -0.5031542181968689, "step": 230 }, { "epoch": 2.227488151658768, "grad_norm": 24.875, "learning_rate": 1.1072978219838283e-05, "log_odds_chosen": 0.4254986345767975, "log_odds_ratio": -0.5929109454154968, "logits/chosen": 181.78013610839844, "logits/rejected": 184.6556854248047, "logps/chosen": -0.707780122756958, "logps/rejected": -0.9049354791641235, "loss": 16.9862, "nll_loss": 0.9195895195007324, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.353890061378479, "rewards/margins": 0.09857770055532455, "rewards/rejected": -0.45246773958206177, "step": 235 }, { "epoch": 2.2748815165876777, "grad_norm": 21.0, "learning_rate": 9.810856226309972e-06, "log_odds_chosen": 0.8151445388793945, "log_odds_ratio": -0.45585957169532776, "logits/chosen": 182.42929077148438, "logits/rejected": 186.09323120117188, "logps/chosen": -0.6263293027877808, "logps/rejected": -1.0641155242919922, "loss": 16.7978, "nll_loss": 0.9048817753791809, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3131646513938904, "rewards/margins": 0.21889305114746094, "rewards/rejected": -0.5320577621459961, "step": 240 }, { "epoch": 2.322274881516588, "grad_norm": 23.375, "learning_rate": 8.61091746353324e-06, "log_odds_chosen": 0.6102806925773621, "log_odds_ratio": -0.5228442549705505, "logits/chosen": 184.25186157226562, "logits/rejected": 188.93673706054688, "logps/chosen": -0.6725679636001587, "logps/rejected": -0.954127311706543, "loss": 16.4777, "nll_loss": 0.9074475169181824, "rewards/accuracies": 0.75, "rewards/chosen": -0.33628398180007935, "rewards/margins": 0.14077970385551453, "rewards/rejected": -0.4770636558532715, "step": 245 }, { "epoch": 2.3696682464454977, "grad_norm": 18.125, "learning_rate": 7.47685778259568e-06, "log_odds_chosen": 0.8383617401123047, "log_odds_ratio": -0.45046114921569824, "logits/chosen": 183.37762451171875, "logits/rejected": 189.5059356689453, "logps/chosen": -0.6437116861343384, "logps/rejected": -1.0930787324905396, "loss": 16.4396, "nll_loss": 0.9055509567260742, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3218558430671692, "rewards/margins": 0.2246834933757782, "rewards/rejected": -0.5465393662452698, "step": 250 }, { "epoch": 2.4170616113744074, "grad_norm": 18.125, "learning_rate": 6.4121701248332905e-06, "log_odds_chosen": 0.6893147230148315, "log_odds_ratio": -0.5377334356307983, "logits/chosen": 179.131591796875, "logits/rejected": 181.28529357910156, "logps/chosen": -0.6199325323104858, "logps/rejected": -0.9626436233520508, "loss": 16.2759, "nll_loss": 0.854143500328064, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.3099662661552429, "rewards/margins": 0.17135553061962128, "rewards/rejected": -0.4813218116760254, "step": 255 }, { "epoch": 2.4644549763033177, "grad_norm": 19.5, "learning_rate": 5.420133763455645e-06, "log_odds_chosen": 0.6930850148200989, "log_odds_ratio": -0.5194807648658752, "logits/chosen": 179.89645385742188, "logits/rejected": 182.66842651367188, "logps/chosen": -0.6123950481414795, "logps/rejected": -0.9452868700027466, "loss": 16.4239, "nll_loss": 0.902696430683136, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.30619752407073975, "rewards/margins": 0.16644588112831116, "rewards/rejected": -0.4726434350013733, "step": 260 }, { "epoch": 2.5118483412322274, "grad_norm": 20.5, "learning_rate": 4.503804203275866e-06, "log_odds_chosen": 0.7105423212051392, "log_odds_ratio": -0.5525649189949036, "logits/chosen": 177.3004608154297, "logits/rejected": 179.8584747314453, "logps/chosen": -0.6415736079216003, "logps/rejected": -1.0120224952697754, "loss": 16.3394, "nll_loss": 0.8138397336006165, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.32078680396080017, "rewards/margins": 0.18522436916828156, "rewards/rejected": -0.5060112476348877, "step": 265 }, { "epoch": 2.5592417061611377, "grad_norm": 18.5, "learning_rate": 3.6660037696547376e-06, "log_odds_chosen": 0.725407063961029, "log_odds_ratio": -0.483724445104599, "logits/chosen": 181.6314239501953, "logits/rejected": 184.50576782226562, "logps/chosen": -0.6412969827651978, "logps/rejected": -0.9860894083976746, "loss": 16.5899, "nll_loss": 0.893083393573761, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3206484913825989, "rewards/margins": 0.1723962128162384, "rewards/rejected": -0.4930447041988373, "step": 270 }, { "epoch": 2.6066350710900474, "grad_norm": 19.625, "learning_rate": 2.909312915645238e-06, "log_odds_chosen": 0.6999877095222473, "log_odds_ratio": -0.4896921217441559, "logits/chosen": 179.7862548828125, "logits/rejected": 178.49549865722656, "logps/chosen": -0.6378815770149231, "logps/rejected": -0.9467176198959351, "loss": 16.7756, "nll_loss": 0.8352192640304565, "rewards/accuracies": 0.75, "rewards/chosen": -0.31894078850746155, "rewards/margins": 0.15441803634166718, "rewards/rejected": -0.47335880994796753, "step": 275 }, { "epoch": 2.654028436018957, "grad_norm": 23.25, "learning_rate": 2.236062274111741e-06, "log_odds_chosen": 0.7541594505310059, "log_odds_ratio": -0.5146032571792603, "logits/chosen": 178.07884216308594, "logits/rejected": 179.99327087402344, "logps/chosen": -0.6102009415626526, "logps/rejected": -1.0283238887786865, "loss": 15.7903, "nll_loss": 0.8353471755981445, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.3051004707813263, "rewards/margins": 0.20906153321266174, "rewards/rejected": -0.5141619443893433, "step": 280 }, { "epoch": 2.7014218009478674, "grad_norm": 22.5, "learning_rate": 1.648325479303684e-06, "log_odds_chosen": 0.6386028528213501, "log_odds_ratio": -0.5239256024360657, "logits/chosen": 181.93246459960938, "logits/rejected": 183.00357055664062, "logps/chosen": -0.5961137413978577, "logps/rejected": -0.9210435748100281, "loss": 16.5912, "nll_loss": 0.8747022747993469, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.29805687069892883, "rewards/margins": 0.162464901804924, "rewards/rejected": -0.46052178740501404, "step": 285 }, { "epoch": 2.748815165876777, "grad_norm": 23.0, "learning_rate": 1.1479127799935029e-06, "log_odds_chosen": 0.6820887327194214, "log_odds_ratio": -0.5130306482315063, "logits/chosen": 180.50137329101562, "logits/rejected": 187.8414764404297, "logps/chosen": -0.6403064727783203, "logps/rejected": -0.9878012537956238, "loss": 16.6567, "nll_loss": 0.873367190361023, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.32015323638916016, "rewards/margins": 0.17374737560749054, "rewards/rejected": -0.4939006268978119, "step": 290 }, { "epoch": 2.7962085308056874, "grad_norm": 33.5, "learning_rate": 7.363654638505046e-07, "log_odds_chosen": 0.8129827380180359, "log_odds_ratio": -0.45822620391845703, "logits/chosen": 181.46929931640625, "logits/rejected": 186.03634643554688, "logps/chosen": -0.6228169202804565, "logps/rejected": -1.0206798315048218, "loss": 16.5789, "nll_loss": 0.825291633605957, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.31140846014022827, "rewards/margins": 0.1989315003156662, "rewards/rejected": -0.5103399157524109, "step": 295 }, { "epoch": 2.843601895734597, "grad_norm": 20.25, "learning_rate": 4.149511102238568e-07, "log_odds_chosen": 0.6022200584411621, "log_odds_ratio": -0.5112254023551941, "logits/chosen": 186.76828002929688, "logits/rejected": 184.95945739746094, "logps/chosen": -0.656291127204895, "logps/rejected": -0.963117241859436, "loss": 16.9259, "nll_loss": 0.9450982809066772, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.3281455636024475, "rewards/margins": 0.1534130871295929, "rewards/rejected": -0.481558620929718, "step": 300 }, { "epoch": 2.890995260663507, "grad_norm": 18.75, "learning_rate": 1.8465968595625105e-07, "log_odds_chosen": 0.7331669926643372, "log_odds_ratio": -0.4934759736061096, "logits/chosen": 180.53512573242188, "logits/rejected": 180.0854949951172, "logps/chosen": -0.6695073843002319, "logps/rejected": -1.0153210163116455, "loss": 16.1073, "nll_loss": 0.8310354948043823, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.33475369215011597, "rewards/margins": 0.17290683090686798, "rewards/rejected": -0.5076605081558228, "step": 305 }, { "epoch": 2.938388625592417, "grad_norm": 19.875, "learning_rate": 4.620049625329803e-08, "log_odds_chosen": 0.8787549138069153, "log_odds_ratio": -0.4447788596153259, "logits/chosen": 182.63246154785156, "logits/rejected": 181.45892333984375, "logps/chosen": -0.6264249682426453, "logps/rejected": -1.0081883668899536, "loss": 16.5378, "nll_loss": 0.8261914253234863, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.31321248412132263, "rewards/margins": 0.19088168442249298, "rewards/rejected": -0.5040941834449768, "step": 310 }, { "epoch": 2.985781990521327, "grad_norm": 21.75, "learning_rate": 0.0, "log_odds_chosen": 0.7384462952613831, "log_odds_ratio": -0.4752270579338074, "logits/chosen": 184.30104064941406, "logits/rejected": 181.8874053955078, "logps/chosen": -0.6386845707893372, "logps/rejected": -1.0048197507858276, "loss": 16.1077, "nll_loss": 0.8921818733215332, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3193422853946686, "rewards/margins": 0.18306761980056763, "rewards/rejected": -0.5024098753929138, "step": 315 }, { "epoch": 2.985781990521327, "step": 315, "total_flos": 0.0, "train_loss": 127.4701649257115, "train_runtime": 3752.5983, "train_samples_per_second": 5.396, "train_steps_per_second": 0.084 } ], "logging_steps": 5, "max_steps": 315, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }