diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3823 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.999607072691552, + "eval_steps": 100, + "global_step": 1908, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.005239030779305829, + "grad_norm": 40.089039871992924, + "learning_rate": 3.6649214659685864e-07, + "log_odds_chosen": 0.40882301330566406, + "log_odds_ratio": -0.7743430137634277, + "logits/chosen": -1.251479148864746, + "logits/rejected": -1.348970651626587, + "logps/chosen": -1.8760229349136353, + "logps/rejected": -2.220275402069092, + "loss": 9.7709, + "nll_loss": 9.695734977722168, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.018760228529572487, + "rewards/margins": 0.00344252772629261, + "rewards/rejected": -0.022202756255865097, + "step": 10 + }, + { + "epoch": 0.010478061558611657, + "grad_norm": 41.48632386237846, + "learning_rate": 7.329842931937173e-07, + "log_odds_chosen": 0.2817743718624115, + "log_odds_ratio": -0.8707199096679688, + "logits/chosen": -1.3488132953643799, + "logits/rejected": -1.333653211593628, + "logps/chosen": -2.0333549976348877, + "logps/rejected": -2.2923362255096436, + "loss": 9.7365, + "nll_loss": 10.180292129516602, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.02033355087041855, + "rewards/margins": 0.00258981017395854, + "rewards/rejected": -0.022923361510038376, + "step": 20 + }, + { + "epoch": 0.015717092337917484, + "grad_norm": 38.86915439525821, + "learning_rate": 1.0994764397905759e-06, + "log_odds_chosen": 0.10666105896234512, + "log_odds_ratio": -0.879582405090332, + "logits/chosen": -1.411980390548706, + "logits/rejected": -1.4946210384368896, + "logps/chosen": -1.9859802722930908, + "logps/rejected": -2.066720724105835, + "loss": 9.2475, + "nll_loss": 9.394262313842773, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.019859803840517998, + "rewards/margins": 0.0008074056240729988, + "rewards/rejected": -0.02066720835864544, + "step": 30 + }, + { + "epoch": 0.020956123117223315, + "grad_norm": 64.36060245058518, + "learning_rate": 1.4659685863874346e-06, + "log_odds_chosen": 0.19943363964557648, + "log_odds_ratio": -0.9107095003128052, + "logits/chosen": -1.2995336055755615, + "logits/rejected": -1.3503811359405518, + "logps/chosen": -1.8726139068603516, + "logps/rejected": -2.0438897609710693, + "loss": 7.7077, + "nll_loss": 7.663236141204834, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.018726136535406113, + "rewards/margins": 0.0017127618193626404, + "rewards/rejected": -0.020438898354768753, + "step": 40 + }, + { + "epoch": 0.02619515389652914, + "grad_norm": 41.110372744682586, + "learning_rate": 1.8324607329842933e-06, + "log_odds_chosen": 0.06553123891353607, + "log_odds_ratio": -0.9510926008224487, + "logits/chosen": -0.8678807020187378, + "logits/rejected": -0.8854366540908813, + "logps/chosen": -2.0910544395446777, + "logps/rejected": -2.1262149810791016, + "loss": 4.4672, + "nll_loss": 4.56087589263916, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.020910542458295822, + "rewards/margins": 0.0003516075958032161, + "rewards/rejected": -0.02126215025782585, + "step": 50 + }, + { + "epoch": 0.03143418467583497, + "grad_norm": 16.651527410211266, + "learning_rate": 2.1989528795811517e-06, + "log_odds_chosen": 0.2586204707622528, + "log_odds_ratio": -0.7958043813705444, + "logits/chosen": -0.2105911523103714, + "logits/rejected": -0.19482675194740295, + "logps/chosen": -1.780350923538208, + "logps/rejected": -2.0165352821350098, + "loss": 2.0977, + "nll_loss": 2.121706008911133, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.017803508788347244, + "rewards/margins": 0.0023618421982973814, + "rewards/rejected": -0.020165350288152695, + "step": 60 + }, + { + "epoch": 0.0366732154551408, + "grad_norm": 0.6950743796393763, + "learning_rate": 2.5654450261780104e-06, + "log_odds_chosen": 0.40901675820350647, + "log_odds_ratio": -0.8085654973983765, + "logits/chosen": -0.22529537975788116, + "logits/rejected": -0.187855526804924, + "logps/chosen": -1.7872791290283203, + "logps/rejected": -2.1754517555236816, + "loss": 1.1033, + "nll_loss": 1.104498028755188, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.01787278801202774, + "rewards/margins": 0.003881725948303938, + "rewards/rejected": -0.021754514425992966, + "step": 70 + }, + { + "epoch": 0.04191224623444663, + "grad_norm": 0.5424625090833263, + "learning_rate": 2.931937172774869e-06, + "log_odds_chosen": 0.2518347501754761, + "log_odds_ratio": -0.8060741424560547, + "logits/chosen": -0.38638219237327576, + "logits/rejected": -0.37130922079086304, + "logps/chosen": -1.6561933755874634, + "logps/rejected": -1.8596878051757812, + "loss": 0.9622, + "nll_loss": 0.9561271667480469, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.016561932861804962, + "rewards/margins": 0.00203494424931705, + "rewards/rejected": -0.01859687827527523, + "step": 80 + }, + { + "epoch": 0.047151277013752456, + "grad_norm": 0.49692784970495385, + "learning_rate": 3.298429319371728e-06, + "log_odds_chosen": 0.25015050172805786, + "log_odds_ratio": -0.7168025970458984, + "logits/chosen": -0.46545910835266113, + "logits/rejected": -0.47551122307777405, + "logps/chosen": -1.434188723564148, + "logps/rejected": -1.617444634437561, + "loss": 0.9076, + "nll_loss": 0.9012999534606934, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.014341888949275017, + "rewards/margins": 0.0018325571436434984, + "rewards/rejected": -0.016174444928765297, + "step": 90 + }, + { + "epoch": 0.05239030779305828, + "grad_norm": 0.34409399893883913, + "learning_rate": 3.6649214659685865e-06, + "log_odds_chosen": 0.33899450302124023, + "log_odds_ratio": -0.6892405152320862, + "logits/chosen": -0.42995914816856384, + "logits/rejected": -0.46385449171066284, + "logps/chosen": -1.3845367431640625, + "logps/rejected": -1.6512413024902344, + "loss": 0.8633, + "nll_loss": 0.8542642593383789, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.013845366425812244, + "rewards/margins": 0.00266704591922462, + "rewards/rejected": -0.016512412577867508, + "step": 100 + }, + { + "epoch": 0.05239030779305828, + "eval_log_odds_chosen": 0.2912975251674652, + "eval_log_odds_ratio": -0.6964674592018127, + "eval_logits/chosen": -0.4465982913970947, + "eval_logits/rejected": -0.4503002464771271, + "eval_logps/chosen": -1.3476332426071167, + "eval_logps/rejected": -1.5779348611831665, + "eval_loss": 0.7180835008621216, + "eval_nll_loss": 0.7125721573829651, + "eval_rewards/accuracies": 0.6060000061988831, + "eval_rewards/chosen": -0.01347633171826601, + "eval_rewards/margins": 0.0023030168376863003, + "eval_rewards/rejected": -0.015779349952936172, + "eval_runtime": 269.9338, + "eval_samples_per_second": 7.406, + "eval_steps_per_second": 0.463, + "step": 100 + }, + { + "epoch": 0.05762933857236411, + "grad_norm": 0.2803398283753954, + "learning_rate": 4.031413612565445e-06, + "log_odds_chosen": 0.18521742522716522, + "log_odds_ratio": -0.7459183931350708, + "logits/chosen": -0.41053661704063416, + "logits/rejected": -0.4239114820957184, + "logps/chosen": -1.3022041320800781, + "logps/rejected": -1.4337875843048096, + "loss": 0.8606, + "nll_loss": 0.8364461064338684, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.013022040948271751, + "rewards/margins": 0.0013158348156139255, + "rewards/rejected": -0.014337876811623573, + "step": 110 + }, + { + "epoch": 0.06286836935166994, + "grad_norm": 0.27279868153391584, + "learning_rate": 4.3979057591623035e-06, + "log_odds_chosen": 0.3604566752910614, + "log_odds_ratio": -0.6311579942703247, + "logits/chosen": -0.42637091875076294, + "logits/rejected": -0.42720526456832886, + "logps/chosen": -1.2974519729614258, + "logps/rejected": -1.5656707286834717, + "loss": 0.8425, + "nll_loss": 0.8501418232917786, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.012974520213901997, + "rewards/margins": 0.0026821885257959366, + "rewards/rejected": -0.01565670594573021, + "step": 120 + }, + { + "epoch": 0.06810740013097577, + "grad_norm": 0.30807142716511443, + "learning_rate": 4.764397905759163e-06, + "log_odds_chosen": 0.35572677850723267, + "log_odds_ratio": -0.6256042718887329, + "logits/chosen": -0.4055427610874176, + "logits/rejected": -0.41731196641921997, + "logps/chosen": -1.1877421140670776, + "logps/rejected": -1.4562331438064575, + "loss": 0.9061, + "nll_loss": 0.9138392210006714, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.011877421289682388, + "rewards/margins": 0.0026849093846976757, + "rewards/rejected": -0.014562331140041351, + "step": 130 + }, + { + "epoch": 0.0733464309102816, + "grad_norm": 0.21901353529181275, + "learning_rate": 5.130890052356021e-06, + "log_odds_chosen": 0.20408260822296143, + "log_odds_ratio": -0.7225680351257324, + "logits/chosen": -0.3313930630683899, + "logits/rejected": -0.3637652099132538, + "logps/chosen": -1.1521110534667969, + "logps/rejected": -1.3134852647781372, + "loss": 0.8555, + "nll_loss": 0.8377349972724915, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.011521109379827976, + "rewards/margins": 0.0016137424390763044, + "rewards/rejected": -0.013134850189089775, + "step": 140 + }, + { + "epoch": 0.07858546168958742, + "grad_norm": 0.21331922770164838, + "learning_rate": 5.49738219895288e-06, + "log_odds_chosen": 0.28065016865730286, + "log_odds_ratio": -0.6753939390182495, + "logits/chosen": -0.3591609597206116, + "logits/rejected": -0.3576233983039856, + "logps/chosen": -1.0449597835540771, + "logps/rejected": -1.2448740005493164, + "loss": 0.7812, + "nll_loss": 0.7689298987388611, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.010449598543345928, + "rewards/margins": 0.001999142114073038, + "rewards/rejected": -0.012448740191757679, + "step": 150 + }, + { + "epoch": 0.08382449246889326, + "grad_norm": 0.23877612948281643, + "learning_rate": 5.863874345549738e-06, + "log_odds_chosen": 0.17812715470790863, + "log_odds_ratio": -0.7157899141311646, + "logits/chosen": -0.34616202116012573, + "logits/rejected": -0.32302820682525635, + "logps/chosen": -1.1274374723434448, + "logps/rejected": -1.2492964267730713, + "loss": 0.8319, + "nll_loss": 0.8008116483688354, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.011274375021457672, + "rewards/margins": 0.0012185879750177264, + "rewards/rejected": -0.012492964044213295, + "step": 160 + }, + { + "epoch": 0.08906352324819908, + "grad_norm": 0.16490503339477303, + "learning_rate": 6.230366492146597e-06, + "log_odds_chosen": 0.35694795846939087, + "log_odds_ratio": -0.6470257043838501, + "logits/chosen": -0.3833572566509247, + "logits/rejected": -0.3738633394241333, + "logps/chosen": -1.098602056503296, + "logps/rejected": -1.3455427885055542, + "loss": 0.8428, + "nll_loss": 0.777732253074646, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.010986020788550377, + "rewards/margins": 0.0024694064632058144, + "rewards/rejected": -0.013455428183078766, + "step": 170 + }, + { + "epoch": 0.09430255402750491, + "grad_norm": 0.19976560242149322, + "learning_rate": 6.596858638743456e-06, + "log_odds_chosen": 0.21126070618629456, + "log_odds_ratio": -0.6887334585189819, + "logits/chosen": -0.36508241295814514, + "logits/rejected": -0.35655850172042847, + "logps/chosen": -1.1599535942077637, + "logps/rejected": -1.3315963745117188, + "loss": 0.8363, + "nll_loss": 0.8872919082641602, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.01159953698515892, + "rewards/margins": 0.001716427505016327, + "rewards/rejected": -0.013315962627530098, + "step": 180 + }, + { + "epoch": 0.09954158480681075, + "grad_norm": 0.28624827514651513, + "learning_rate": 6.963350785340315e-06, + "log_odds_chosen": 0.23439832031726837, + "log_odds_ratio": -0.6879830956459045, + "logits/chosen": -0.3974788784980774, + "logits/rejected": -0.3820292353630066, + "logps/chosen": -1.1166422367095947, + "logps/rejected": -1.3017523288726807, + "loss": 0.817, + "nll_loss": 0.7972527146339417, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.011166421696543694, + "rewards/margins": 0.0018511017551645637, + "rewards/rejected": -0.013017524965107441, + "step": 190 + }, + { + "epoch": 0.10478061558611657, + "grad_norm": 0.2323297919875336, + "learning_rate": 6.999525460456016e-06, + "log_odds_chosen": 0.20863866806030273, + "log_odds_ratio": -0.6829880475997925, + "logits/chosen": -0.3505745530128479, + "logits/rejected": -0.37228649854660034, + "logps/chosen": -1.0812726020812988, + "logps/rejected": -1.2249776124954224, + "loss": 0.7831, + "nll_loss": 0.7905929684638977, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.010812724940478802, + "rewards/margins": 0.0014370509888976812, + "rewards/rejected": -0.012249777093529701, + "step": 200 + }, + { + "epoch": 0.10478061558611657, + "eval_log_odds_chosen": 0.2690645754337311, + "eval_log_odds_ratio": -0.6627397537231445, + "eval_logits/chosen": -0.36186957359313965, + "eval_logits/rejected": -0.3621442914009094, + "eval_logps/chosen": -1.0519521236419678, + "eval_logps/rejected": -1.2498859167099, + "eval_loss": 0.6487022042274475, + "eval_nll_loss": 0.6431540846824646, + "eval_rewards/accuracies": 0.6140000224113464, + "eval_rewards/chosen": -0.010519521310925484, + "eval_rewards/margins": 0.0019793356768786907, + "eval_rewards/rejected": -0.012498857453465462, + "eval_runtime": 268.3833, + "eval_samples_per_second": 7.448, + "eval_steps_per_second": 0.466, + "step": 200 + }, + { + "epoch": 0.1100196463654224, + "grad_norm": 0.22085594878634468, + "learning_rate": 6.997885242050564e-06, + "log_odds_chosen": 0.14821310341358185, + "log_odds_ratio": -0.7308140993118286, + "logits/chosen": -0.35621774196624756, + "logits/rejected": -0.34287530183792114, + "logps/chosen": -1.0816371440887451, + "logps/rejected": -1.1855530738830566, + "loss": 0.7606, + "nll_loss": 0.7189425826072693, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.010816371068358421, + "rewards/margins": 0.0010391605319455266, + "rewards/rejected": -0.0118555324152112, + "step": 210 + }, + { + "epoch": 0.11525867714472822, + "grad_norm": 0.20718906330906176, + "learning_rate": 6.9950740352365535e-06, + "log_odds_chosen": 0.13336250185966492, + "log_odds_ratio": -0.7418603897094727, + "logits/chosen": -0.31412142515182495, + "logits/rejected": -0.29769355058670044, + "logps/chosen": -1.0191659927368164, + "logps/rejected": -1.1312214136123657, + "loss": 0.7423, + "nll_loss": 0.7373214364051819, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.010191659443080425, + "rewards/margins": 0.0011205542832612991, + "rewards/rejected": -0.011312213726341724, + "step": 220 + }, + { + "epoch": 0.12049770792403405, + "grad_norm": 0.21517650420593173, + "learning_rate": 6.991092781122789e-06, + "log_odds_chosen": 0.21017885208129883, + "log_odds_ratio": -0.6870851516723633, + "logits/chosen": -0.34512776136398315, + "logits/rejected": -0.32372525334358215, + "logps/chosen": -1.1044729948043823, + "logps/rejected": -1.2339098453521729, + "loss": 0.7789, + "nll_loss": 0.7626355886459351, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.011044728569686413, + "rewards/margins": 0.0012943701585754752, + "rewards/rejected": -0.012339098379015923, + "step": 230 + }, + { + "epoch": 0.12573673870333987, + "grad_norm": 0.196338942404361, + "learning_rate": 6.985942812515264e-06, + "log_odds_chosen": 0.22509415447711945, + "log_odds_ratio": -0.6694994568824768, + "logits/chosen": -0.33054572343826294, + "logits/rejected": -0.34040743112564087, + "logps/chosen": -1.0536653995513916, + "logps/rejected": -1.2156822681427002, + "loss": 0.7989, + "nll_loss": 0.8106544613838196, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.010536652989685535, + "rewards/margins": 0.0016201699618250132, + "rewards/rejected": -0.01215682178735733, + "step": 240 + }, + { + "epoch": 0.13097576948264572, + "grad_norm": 0.19854612692373078, + "learning_rate": 6.9796258534709805e-06, + "log_odds_chosen": 0.26194605231285095, + "log_odds_ratio": -0.677527904510498, + "logits/chosen": -0.3183462917804718, + "logits/rejected": -0.3198302686214447, + "logps/chosen": -1.0683705806732178, + "logps/rejected": -1.2614667415618896, + "loss": 0.7328, + "nll_loss": 0.7110816240310669, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010683706030249596, + "rewards/margins": 0.0019309620838612318, + "rewards/rejected": -0.012614667415618896, + "step": 250 + }, + { + "epoch": 0.13621480026195154, + "grad_norm": 0.28276448420310984, + "learning_rate": 6.972144018720786e-06, + "log_odds_chosen": 0.08540093898773193, + "log_odds_ratio": -0.762796938419342, + "logits/chosen": -0.33047622442245483, + "logits/rejected": -0.30570656061172485, + "logps/chosen": -1.079242467880249, + "logps/rejected": -1.140084981918335, + "loss": 0.763, + "nll_loss": 0.7699041366577148, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.010792424902319908, + "rewards/margins": 0.0006084250053390861, + "rewards/rejected": -0.011400849558413029, + "step": 260 + }, + { + "epoch": 0.14145383104125736, + "grad_norm": 0.17805276020084845, + "learning_rate": 6.96349981296142e-06, + "log_odds_chosen": 0.30828922986984253, + "log_odds_ratio": -0.6861331462860107, + "logits/chosen": -0.28029608726501465, + "logits/rejected": -0.3021364212036133, + "logps/chosen": -1.0115910768508911, + "logps/rejected": -1.246225118637085, + "loss": 0.7833, + "nll_loss": 0.7348794937133789, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.01011591125279665, + "rewards/margins": 0.0023463410325348377, + "rewards/rejected": -0.012462252750992775, + "step": 270 + }, + { + "epoch": 0.1466928618205632, + "grad_norm": 0.16178698706252137, + "learning_rate": 6.953696130017022e-06, + "log_odds_chosen": 0.15516668558120728, + "log_odds_ratio": -0.7157121896743774, + "logits/chosen": -0.31760409474372864, + "logits/rejected": -0.31049543619155884, + "logps/chosen": -1.0282506942749023, + "logps/rejected": -1.1352583169937134, + "loss": 0.7233, + "nll_loss": 0.7511364817619324, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.010282507166266441, + "rewards/margins": 0.0010700763668864965, + "rewards/rejected": -0.011352581903338432, + "step": 280 + }, + { + "epoch": 0.15193189259986903, + "grad_norm": 0.1924395788457037, + "learning_rate": 6.94273625187036e-06, + "log_odds_chosen": 0.33385169506073, + "log_odds_ratio": -0.6219146251678467, + "logits/chosen": -0.3122417628765106, + "logits/rejected": -0.3353222906589508, + "logps/chosen": -0.9765304327011108, + "logps/rejected": -1.193291425704956, + "loss": 0.7901, + "nll_loss": 0.7909854650497437, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.009765303693711758, + "rewards/margins": 0.0021676109172403812, + "rewards/rejected": -0.011932915076613426, + "step": 290 + }, + { + "epoch": 0.15717092337917485, + "grad_norm": 0.34259992418933566, + "learning_rate": 6.9306238475641205e-06, + "log_odds_chosen": 0.2912523150444031, + "log_odds_ratio": -0.6484606266021729, + "logits/chosen": -0.2717845141887665, + "logits/rejected": -0.3028518557548523, + "logps/chosen": -1.0336506366729736, + "logps/rejected": -1.2281897068023682, + "loss": 0.7146, + "nll_loss": 0.6968905329704285, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.010336506180465221, + "rewards/margins": 0.0019453916465863585, + "rewards/rejected": -0.012281898409128189, + "step": 300 + }, + { + "epoch": 0.15717092337917485, + "eval_log_odds_chosen": 0.27896934747695923, + "eval_log_odds_ratio": -0.659382164478302, + "eval_logits/chosen": -0.3169032633304596, + "eval_logits/rejected": -0.3196420669555664, + "eval_logps/chosen": -1.0173438787460327, + "eval_logps/rejected": -1.2193564176559448, + "eval_loss": 0.6237765550613403, + "eval_nll_loss": 0.6180580854415894, + "eval_rewards/accuracies": 0.6140000224113464, + "eval_rewards/chosen": -0.010173438116908073, + "eval_rewards/margins": 0.002020125975832343, + "eval_rewards/rejected": -0.01219356432557106, + "eval_runtime": 277.9482, + "eval_samples_per_second": 7.192, + "eval_steps_per_second": 0.45, + "step": 300 + }, + { + "epoch": 0.16240995415848067, + "grad_norm": 0.16276041420565796, + "learning_rate": 6.917362971972625e-06, + "log_odds_chosen": 0.2336260974407196, + "log_odds_ratio": -0.6989104151725769, + "logits/chosen": -0.28494778275489807, + "logits/rejected": -0.3021838068962097, + "logps/chosen": -1.0360081195831299, + "logps/rejected": -1.2258890867233276, + "loss": 0.7523, + "nll_loss": 0.7411647439002991, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.010360080748796463, + "rewards/margins": 0.0018988108495250344, + "rewards/rejected": -0.012258890084922314, + "step": 310 + }, + { + "epoch": 0.16764898493778652, + "grad_norm": 0.15636902004729206, + "learning_rate": 6.902958064444372e-06, + "log_odds_chosen": 0.25901222229003906, + "log_odds_ratio": -0.6495088338851929, + "logits/chosen": -0.3297797739505768, + "logits/rejected": -0.32505810260772705, + "logps/chosen": -1.0110734701156616, + "logps/rejected": -1.1903458833694458, + "loss": 0.785, + "nll_loss": 0.788983166217804, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.010110734961926937, + "rewards/margins": 0.0017927236622199416, + "rewards/rejected": -0.011903459206223488, + "step": 320 + }, + { + "epoch": 0.17288801571709234, + "grad_norm": 0.16465507987416486, + "learning_rate": 6.8874139473158825e-06, + "log_odds_chosen": 0.50341796875, + "log_odds_ratio": -0.5877692103385925, + "logits/chosen": -0.33102065324783325, + "logits/rejected": -0.33758553862571716, + "logps/chosen": -1.0253140926361084, + "logps/rejected": -1.4051698446273804, + "loss": 0.772, + "nll_loss": 0.7442210912704468, + "rewards/accuracies": 0.731249988079071, + "rewards/chosen": -0.01025314163416624, + "rewards/margins": 0.0037985569797456264, + "rewards/rejected": -0.01405169814825058, + "step": 330 + }, + { + "epoch": 0.17812704649639816, + "grad_norm": 0.2553378662120776, + "learning_rate": 6.870735824297317e-06, + "log_odds_chosen": 0.0705319195985794, + "log_odds_ratio": -0.7645262479782104, + "logits/chosen": -0.2785702049732208, + "logits/rejected": -0.28481778502464294, + "logps/chosen": -1.1028659343719482, + "logps/rejected": -1.1618068218231201, + "loss": 0.7605, + "nll_loss": 0.7672589421272278, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.011028659529983997, + "rewards/margins": 0.0005894082714803517, + "rewards/rejected": -0.011618068441748619, + "step": 340 + }, + { + "epoch": 0.183366077275704, + "grad_norm": 0.18113875877671343, + "learning_rate": 6.852929278730433e-06, + "log_odds_chosen": 0.14474061131477356, + "log_odds_ratio": -0.7329230308532715, + "logits/chosen": -0.2613077461719513, + "logits/rejected": -0.2920432984828949, + "logps/chosen": -0.9968924522399902, + "logps/rejected": -1.098439335823059, + "loss": 0.7298, + "nll_loss": 0.6963182091712952, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.009968924336135387, + "rewards/margins": 0.0010154687333852053, + "rewards/rejected": -0.010984392836689949, + "step": 350 + }, + { + "epoch": 0.18860510805500982, + "grad_norm": 0.19400999564615143, + "learning_rate": 6.834000271719443e-06, + "log_odds_chosen": 0.2884977459907532, + "log_odds_ratio": -0.6588489413261414, + "logits/chosen": -0.3067547678947449, + "logits/rejected": -0.31717801094055176, + "logps/chosen": -1.0352160930633545, + "logps/rejected": -1.2498157024383545, + "loss": 0.7559, + "nll_loss": 0.7783513069152832, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.010352160781621933, + "rewards/margins": 0.002145996782928705, + "rewards/rejected": -0.012498157098889351, + "step": 360 + }, + { + "epoch": 0.19384413883431564, + "grad_norm": 0.1839111513162734, + "learning_rate": 6.813955140135418e-06, + "log_odds_chosen": 0.25216466188430786, + "log_odds_ratio": -0.6868494749069214, + "logits/chosen": -0.2936992347240448, + "logits/rejected": -0.2800842523574829, + "logps/chosen": -1.0396112203598022, + "logps/rejected": -1.2321968078613281, + "loss": 0.7315, + "nll_loss": 0.6916857957839966, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.010396112687885761, + "rewards/margins": 0.0019258556421846151, + "rewards/rejected": -0.01232196670025587, + "step": 370 + }, + { + "epoch": 0.1990831696136215, + "grad_norm": 0.20686182443960535, + "learning_rate": 6.7928005944948864e-06, + "log_odds_chosen": 0.18967841565608978, + "log_odds_ratio": -0.7045271992683411, + "logits/chosen": -0.27514809370040894, + "logits/rejected": -0.2739102840423584, + "logps/chosen": -1.0092417001724243, + "logps/rejected": -1.1560665369033813, + "loss": 0.74, + "nll_loss": 0.7480857968330383, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.010092416778206825, + "rewards/margins": 0.001468247384764254, + "rewards/rejected": -0.011560664512217045, + "step": 380 + }, + { + "epoch": 0.2043222003929273, + "grad_norm": 0.19077789339143533, + "learning_rate": 6.770543716713352e-06, + "log_odds_chosen": 0.2070658951997757, + "log_odds_ratio": -0.703337550163269, + "logits/chosen": -0.29966622591018677, + "logits/rejected": -0.29987436532974243, + "logps/chosen": -1.0798876285552979, + "logps/rejected": -1.2502596378326416, + "loss": 0.7795, + "nll_loss": 0.7723098993301392, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.01079887617379427, + "rewards/margins": 0.001703719375655055, + "rewards/rejected": -0.012502595782279968, + "step": 390 + }, + { + "epoch": 0.20956123117223313, + "grad_norm": 0.19361123064559435, + "learning_rate": 6.747191957734486e-06, + "log_odds_chosen": 0.15251222252845764, + "log_odds_ratio": -0.7213733196258545, + "logits/chosen": -0.2669166922569275, + "logits/rejected": -0.2855262756347656, + "logps/chosen": -1.0066086053848267, + "logps/rejected": -1.1335278749465942, + "loss": 0.7361, + "nll_loss": 0.6917256116867065, + "rewards/accuracies": 0.48124998807907104, + "rewards/chosen": -0.010066085495054722, + "rewards/margins": 0.0012691912706941366, + "rewards/rejected": -0.011335276998579502, + "step": 400 + }, + { + "epoch": 0.20956123117223313, + "eval_log_odds_chosen": 0.27702653408050537, + "eval_log_odds_ratio": -0.6617660522460938, + "eval_logits/chosen": -0.2810555696487427, + "eval_logits/rejected": -0.284096360206604, + "eval_logps/chosen": -1.0014485120773315, + "eval_logps/rejected": -1.2012392282485962, + "eval_loss": 0.6136931777000427, + "eval_nll_loss": 0.6077669858932495, + "eval_rewards/accuracies": 0.6140000224113464, + "eval_rewards/chosen": -0.010014484636485577, + "eval_rewards/margins": 0.001997907180339098, + "eval_rewards/rejected": -0.012012392282485962, + "eval_runtime": 269.6649, + "eval_samples_per_second": 7.413, + "eval_steps_per_second": 0.464, + "step": 400 + }, + { + "epoch": 0.21480026195153898, + "grad_norm": 0.24070277239270585, + "learning_rate": 6.7227531350357585e-06, + "log_odds_chosen": 0.48085784912109375, + "log_odds_ratio": -0.5908285975456238, + "logits/chosen": -0.27427297830581665, + "logits/rejected": -0.2949586510658264, + "logps/chosen": -0.972133994102478, + "logps/rejected": -1.2912139892578125, + "loss": 0.7185, + "nll_loss": 0.7345937490463257, + "rewards/accuracies": 0.6875, + "rewards/chosen": -0.00972133968025446, + "rewards/margins": 0.0031908005475997925, + "rewards/rejected": -0.012912139296531677, + "step": 410 + }, + { + "epoch": 0.2200392927308448, + "grad_norm": 0.2315925815874692, + "learning_rate": 6.697235430011389e-06, + "log_odds_chosen": 0.2700832486152649, + "log_odds_ratio": -0.6809287667274475, + "logits/chosen": -0.2654404044151306, + "logits/rejected": -0.2597273290157318, + "logps/chosen": -0.9692566990852356, + "logps/rejected": -1.1582214832305908, + "loss": 0.7136, + "nll_loss": 0.6900116801261902, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.009692566469311714, + "rewards/margins": 0.0018896475667133927, + "rewards/rejected": -0.011582214385271072, + "step": 420 + }, + { + "epoch": 0.22527832351015062, + "grad_norm": 0.1877491685270872, + "learning_rate": 6.670647385233456e-06, + "log_odds_chosen": 0.2508837580680847, + "log_odds_ratio": -0.7000848054885864, + "logits/chosen": -0.2642936706542969, + "logits/rejected": -0.26817911863327026, + "logps/chosen": -1.0234228372573853, + "logps/rejected": -1.2276101112365723, + "loss": 0.7568, + "nll_loss": 0.7772089242935181, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.010234227403998375, + "rewards/margins": 0.002041872590780258, + "rewards/rejected": -0.012276100926101208, + "step": 430 + }, + { + "epoch": 0.23051735428945644, + "grad_norm": 0.31875972452356033, + "learning_rate": 6.642997901592093e-06, + "log_odds_chosen": 0.15048085153102875, + "log_odds_ratio": -0.703689694404602, + "logits/chosen": -0.2521992325782776, + "logits/rejected": -0.24312739074230194, + "logps/chosen": -1.0188218355178833, + "logps/rejected": -1.1303977966308594, + "loss": 0.7633, + "nll_loss": 0.7371809482574463, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.010188218206167221, + "rewards/margins": 0.0011157591361552477, + "rewards/rejected": -0.01130397617816925, + "step": 440 + }, + { + "epoch": 0.2357563850687623, + "grad_norm": 0.25010097086107463, + "learning_rate": 6.614296235315736e-06, + "log_odds_chosen": 0.3179778754711151, + "log_odds_ratio": -0.6758134365081787, + "logits/chosen": -0.3085033595561981, + "logits/rejected": -0.3148275315761566, + "logps/chosen": -1.0270577669143677, + "logps/rejected": -1.2768559455871582, + "loss": 0.7733, + "nll_loss": 0.788577675819397, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.010270575992763042, + "rewards/margins": 0.0024979840964078903, + "rewards/rejected": -0.012768561020493507, + "step": 450 + }, + { + "epoch": 0.2409954158480681, + "grad_norm": 0.2124434061946913, + "learning_rate": 6.584551994872414e-06, + "log_odds_chosen": 0.1259727030992508, + "log_odds_ratio": -0.7334306836128235, + "logits/chosen": -0.28063657879829407, + "logits/rejected": -0.26131364703178406, + "logps/chosen": -1.0509703159332275, + "logps/rejected": -1.145269513130188, + "loss": 0.7295, + "nll_loss": 0.6992601752281189, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.010509702377021313, + "rewards/margins": 0.000942991697229445, + "rewards/rejected": -0.011452694423496723, + "step": 460 + }, + { + "epoch": 0.24623444662737393, + "grad_norm": 0.21593583192713314, + "learning_rate": 6.553775137753117e-06, + "log_odds_chosen": 0.11253416538238525, + "log_odds_ratio": -0.7634598016738892, + "logits/chosen": -0.28993192315101624, + "logits/rejected": -0.28420716524124146, + "logps/chosen": -1.0563971996307373, + "logps/rejected": -1.1430349349975586, + "loss": 0.7283, + "nll_loss": 0.7201040983200073, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.010563971474766731, + "rewards/margins": 0.0008663767948746681, + "rewards/rejected": -0.0114303482696414, + "step": 470 + }, + { + "epoch": 0.25147347740667975, + "grad_norm": 0.2598288179745421, + "learning_rate": 6.521975967138322e-06, + "log_odds_chosen": 0.31320375204086304, + "log_odds_ratio": -0.671956479549408, + "logits/chosen": -0.251331627368927, + "logits/rejected": -0.25280171632766724, + "logps/chosen": -1.0279942750930786, + "logps/rejected": -1.2646806240081787, + "loss": 0.7066, + "nll_loss": 0.6571913361549377, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.01027994230389595, + "rewards/margins": 0.0023668636567890644, + "rewards/rejected": -0.012646806426346302, + "step": 480 + }, + { + "epoch": 0.25671250818598557, + "grad_norm": 0.24565992829114924, + "learning_rate": 6.4891651284487955e-06, + "log_odds_chosen": 0.0688483789563179, + "log_odds_ratio": -0.7365472912788391, + "logits/chosen": -0.27430278062820435, + "logits/rejected": -0.29375168681144714, + "logps/chosen": -1.028810977935791, + "logps/rejected": -1.071062684059143, + "loss": 0.7146, + "nll_loss": 0.7346883416175842, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.010288110002875328, + "rewards/margins": 0.000422515906393528, + "rewards/rejected": -0.01071062684059143, + "step": 490 + }, + { + "epoch": 0.26195153896529144, + "grad_norm": 0.2353402772652972, + "learning_rate": 6.455353605781819e-06, + "log_odds_chosen": 0.10149893909692764, + "log_odds_ratio": -0.7544985413551331, + "logits/chosen": -0.31464990973472595, + "logits/rejected": -0.2927784025669098, + "logps/chosen": -1.0012602806091309, + "logps/rejected": -1.084865689277649, + "loss": 0.7382, + "nll_loss": 0.7470039129257202, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.010012601502239704, + "rewards/margins": 0.0008360546198673546, + "rewards/rejected": -0.010848656296730042, + "step": 500 + }, + { + "epoch": 0.26195153896529144, + "eval_log_odds_chosen": 0.2812057435512543, + "eval_log_odds_ratio": -0.6602885127067566, + "eval_logits/chosen": -0.2982476055622101, + "eval_logits/rejected": -0.30226191878318787, + "eval_logps/chosen": -0.9868198037147522, + "eval_logps/rejected": -1.1884440183639526, + "eval_loss": 0.6065535545349121, + "eval_nll_loss": 0.6005536913871765, + "eval_rewards/accuracies": 0.6119999885559082, + "eval_rewards/chosen": -0.009868198074400425, + "eval_rewards/margins": 0.0020162416622042656, + "eval_rewards/rejected": -0.01188443973660469, + "eval_runtime": 268.8732, + "eval_samples_per_second": 7.435, + "eval_steps_per_second": 0.465, + "step": 500 + }, + { + "epoch": 0.26719056974459726, + "grad_norm": 0.27957826390810075, + "learning_rate": 6.420552718234041e-06, + "log_odds_chosen": 0.2998487651348114, + "log_odds_ratio": -0.679768443107605, + "logits/chosen": -0.30905863642692566, + "logits/rejected": -0.32363763451576233, + "logps/chosen": -0.9874080419540405, + "logps/rejected": -1.2168712615966797, + "loss": 0.7884, + "nll_loss": 0.7535444498062134, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.009874081239104271, + "rewards/margins": 0.002294632140547037, + "rewards/rejected": -0.01216871291399002, + "step": 510 + }, + { + "epoch": 0.2724296005239031, + "grad_norm": 0.2531949493918585, + "learning_rate": 6.384774116112176e-06, + "log_odds_chosen": 0.24868044257164001, + "log_odds_ratio": -0.6564691066741943, + "logits/chosen": -0.2581290304660797, + "logits/rejected": -0.27538132667541504, + "logps/chosen": -0.9462459683418274, + "logps/rejected": -1.1043087244033813, + "loss": 0.7477, + "nll_loss": 0.7717889547348022, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.00946245901286602, + "rewards/margins": 0.0015806274022907019, + "rewards/rejected": -0.011043086647987366, + "step": 520 + }, + { + "epoch": 0.2776686313032089, + "grad_norm": 0.2457887466034346, + "learning_rate": 6.348029777032831e-06, + "log_odds_chosen": 0.16035327315330505, + "log_odds_ratio": -0.738000750541687, + "logits/chosen": -0.2864713668823242, + "logits/rejected": -0.2864418029785156, + "logps/chosen": -1.08033287525177, + "logps/rejected": -1.2031466960906982, + "loss": 0.7423, + "nll_loss": 0.7512191534042358, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.010803327895700932, + "rewards/margins": 0.0012281389208510518, + "rewards/rejected": -0.012031466700136662, + "step": 530 + }, + { + "epoch": 0.2829076620825147, + "grad_norm": 0.20086958833949786, + "learning_rate": 6.310332001912748e-06, + "log_odds_chosen": 0.21620038151741028, + "log_odds_ratio": -0.7137210369110107, + "logits/chosen": -0.273650199174881, + "logits/rejected": -0.2783169150352478, + "logps/chosen": -0.9874860644340515, + "logps/rejected": -1.1352880001068115, + "loss": 0.7143, + "nll_loss": 0.6813681721687317, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.00987485982477665, + "rewards/margins": 0.0014780184719711542, + "rewards/rejected": -0.011352878995239735, + "step": 540 + }, + { + "epoch": 0.28814669286182054, + "grad_norm": 0.24303066124311232, + "learning_rate": 6.27169341085083e-06, + "log_odds_chosen": 0.22751787304878235, + "log_odds_ratio": -0.6631879210472107, + "logits/chosen": -0.27736982703208923, + "logits/rejected": -0.2964634299278259, + "logps/chosen": -0.9909104108810425, + "logps/rejected": -1.1427714824676514, + "loss": 0.729, + "nll_loss": 0.6827758550643921, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.00990910455584526, + "rewards/margins": 0.0015186098171398044, + "rewards/rejected": -0.011427713558077812, + "step": 550 + }, + { + "epoch": 0.2933857236411264, + "grad_norm": 0.25050011460137184, + "learning_rate": 6.232126938903292e-06, + "log_odds_chosen": 0.32530641555786133, + "log_odds_ratio": -0.6435776352882385, + "logits/chosen": -0.2863444983959198, + "logits/rejected": -0.2765114903450012, + "logps/chosen": -0.9766008257865906, + "logps/rejected": -1.2149832248687744, + "loss": 0.7096, + "nll_loss": 0.689509928226471, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.009766008704900742, + "rewards/margins": 0.0023838237393647432, + "rewards/rejected": -0.012149832211434841, + "step": 560 + }, + { + "epoch": 0.29862475442043224, + "grad_norm": 0.21891859191151827, + "learning_rate": 6.191645831753405e-06, + "log_odds_chosen": 0.38149961829185486, + "log_odds_ratio": -0.6493080854415894, + "logits/chosen": -0.2663540244102478, + "logits/rejected": -0.29600009322166443, + "logps/chosen": -1.027773380279541, + "logps/rejected": -1.317977786064148, + "loss": 0.7722, + "nll_loss": 0.8025256395339966, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.010277734138071537, + "rewards/margins": 0.0029020446818321943, + "rewards/rejected": -0.013179777190089226, + "step": 570 + }, + { + "epoch": 0.30386378519973806, + "grad_norm": 0.22254143966893244, + "learning_rate": 6.150263641277216e-06, + "log_odds_chosen": 0.3161838948726654, + "log_odds_ratio": -0.6515612602233887, + "logits/chosen": -0.24465902149677277, + "logits/rejected": -0.26124146580696106, + "logps/chosen": -0.9947541952133179, + "logps/rejected": -1.1926690340042114, + "loss": 0.7212, + "nll_loss": 0.7128957509994507, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.009947540238499641, + "rewards/margins": 0.0019791489467024803, + "rewards/rejected": -0.011926690116524696, + "step": 580 + }, + { + "epoch": 0.3091028159790439, + "grad_norm": 0.22077450440144003, + "learning_rate": 6.107994221006794e-06, + "log_odds_chosen": 0.31419774889945984, + "log_odds_ratio": -0.6574016213417053, + "logits/chosen": -0.22359177470207214, + "logits/rejected": -0.23112180829048157, + "logps/chosen": -0.9491780996322632, + "logps/rejected": -1.1917085647583008, + "loss": 0.6984, + "nll_loss": 0.6670723557472229, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.009491780772805214, + "rewards/margins": 0.0024253041483461857, + "rewards/rejected": -0.011917085386812687, + "step": 590 + }, + { + "epoch": 0.3143418467583497, + "grad_norm": 0.251407676620302, + "learning_rate": 6.064851721492469e-06, + "log_odds_chosen": 0.34587010741233826, + "log_odds_ratio": -0.6531810760498047, + "logits/chosen": -0.2380281388759613, + "logits/rejected": -0.2737070322036743, + "logps/chosen": -0.965602695941925, + "logps/rejected": -1.2065523862838745, + "loss": 0.7339, + "nll_loss": 0.7357583045959473, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.009656025096774101, + "rewards/margins": 0.00240949634462595, + "rewards/rejected": -0.012065522372722626, + "step": 600 + }, + { + "epoch": 0.3143418467583497, + "eval_log_odds_chosen": 0.28590938448905945, + "eval_log_odds_ratio": -0.6587470769882202, + "eval_logits/chosen": -0.24898304045200348, + "eval_logits/rejected": -0.2544224262237549, + "eval_logps/chosen": -0.9714497923851013, + "eval_logps/rejected": -1.1750952005386353, + "eval_loss": 0.6008986830711365, + "eval_nll_loss": 0.5948084592819214, + "eval_rewards/accuracies": 0.6100000143051147, + "eval_rewards/chosen": -0.009714496321976185, + "eval_rewards/margins": 0.0020364541560411453, + "eval_rewards/rejected": -0.011750951409339905, + "eval_runtime": 269.4545, + "eval_samples_per_second": 7.419, + "eval_steps_per_second": 0.464, + "step": 600 + }, + { + "epoch": 0.3195808775376555, + "grad_norm": 0.19175608334969543, + "learning_rate": 6.0208505855656546e-06, + "log_odds_chosen": 0.07260292023420334, + "log_odds_ratio": -0.7751378417015076, + "logits/chosen": -0.25565439462661743, + "logits/rejected": -0.2717348337173462, + "logps/chosen": -1.075731635093689, + "logps/rejected": -1.1411268711090088, + "loss": 0.7227, + "nll_loss": 0.7402042150497437, + "rewards/accuracies": 0.4937500059604645, + "rewards/chosen": -0.010757316835224628, + "rewards/margins": 0.0006539515452459455, + "rewards/rejected": -0.011411268264055252, + "step": 610 + }, + { + "epoch": 0.32481990831696134, + "grad_norm": 0.2578615657242334, + "learning_rate": 5.976005543503809e-06, + "log_odds_chosen": 0.1597108542919159, + "log_odds_ratio": -0.7176756858825684, + "logits/chosen": -0.25491657853126526, + "logits/rejected": -0.24470999836921692, + "logps/chosen": -0.9926729202270508, + "logps/rejected": -1.1365479230880737, + "loss": 0.6902, + "nll_loss": 0.69666588306427, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.009926728904247284, + "rewards/margins": 0.0014387497212737799, + "rewards/rejected": -0.011365478858351707, + "step": 620 + }, + { + "epoch": 0.3300589390962672, + "grad_norm": 0.2633633043041551, + "learning_rate": 5.930331608099176e-06, + "log_odds_chosen": 0.3888145685195923, + "log_odds_ratio": -0.6442294716835022, + "logits/chosen": -0.2764403223991394, + "logits/rejected": -0.287002295255661, + "logps/chosen": -0.9268707036972046, + "logps/rejected": -1.1958353519439697, + "loss": 0.7199, + "nll_loss": 0.7196196913719177, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.00926870759576559, + "rewards/margins": 0.0026896463241428137, + "rewards/rejected": -0.011958354152739048, + "step": 630 + }, + { + "epoch": 0.33529796987557303, + "grad_norm": 0.21089837015837296, + "learning_rate": 5.88384406963295e-06, + "log_odds_chosen": 0.146010160446167, + "log_odds_ratio": -0.7168859839439392, + "logits/chosen": -0.26726484298706055, + "logits/rejected": -0.25664839148521423, + "logps/chosen": -1.0535497665405273, + "logps/rejected": -1.1466712951660156, + "loss": 0.7069, + "nll_loss": 0.6863896250724792, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.010535497218370438, + "rewards/margins": 0.000931215938180685, + "rewards/rejected": -0.011466712690889835, + "step": 640 + }, + { + "epoch": 0.34053700065487885, + "grad_norm": 0.19092936543303227, + "learning_rate": 5.836558490756538e-06, + "log_odds_chosen": 0.15154734253883362, + "log_odds_ratio": -0.7028461694717407, + "logits/chosen": -0.3015301823616028, + "logits/rejected": -0.29784873127937317, + "logps/chosen": -1.0319565534591675, + "logps/rejected": -1.1262956857681274, + "loss": 0.6908, + "nll_loss": 0.714655876159668, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.010319565422832966, + "rewards/margins": 0.000943391933105886, + "rewards/rejected": -0.011262957938015461, + "step": 650 + }, + { + "epoch": 0.34577603143418467, + "grad_norm": 0.26193122829170745, + "learning_rate": 5.788490701281647e-06, + "log_odds_chosen": 0.16511467099189758, + "log_odds_ratio": -0.6980458498001099, + "logits/chosen": -0.26230692863464355, + "logits/rejected": -0.2723314166069031, + "logps/chosen": -1.024036169052124, + "logps/rejected": -1.1353824138641357, + "loss": 0.7383, + "nll_loss": 0.7427499890327454, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.010240362025797367, + "rewards/margins": 0.001113462378270924, + "rewards/rejected": -0.011353823356330395, + "step": 660 + }, + { + "epoch": 0.3510150622134905, + "grad_norm": 0.23994917813654507, + "learning_rate": 5.739656792880934e-06, + "log_odds_chosen": 0.1396145075559616, + "log_odds_ratio": -0.7353970408439636, + "logits/chosen": -0.23678474128246307, + "logits/rejected": -0.22982017695903778, + "logps/chosen": -1.0073795318603516, + "logps/rejected": -1.132542371749878, + "loss": 0.7342, + "nll_loss": 0.7146428823471069, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.010073795914649963, + "rewards/margins": 0.0012516285059973598, + "rewards/rejected": -0.01132542360574007, + "step": 670 + }, + { + "epoch": 0.3562540929927963, + "grad_norm": 0.20221234021881487, + "learning_rate": 5.6900731137009834e-06, + "log_odds_chosen": 0.2808675765991211, + "log_odds_ratio": -0.6466277837753296, + "logits/chosen": -0.23304986953735352, + "logits/rejected": -0.24880293011665344, + "logps/chosen": -0.9442907571792603, + "logps/rejected": -1.1252390146255493, + "loss": 0.6538, + "nll_loss": 0.6462209224700928, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.009442906826734543, + "rewards/margins": 0.0018094830447807908, + "rewards/rejected": -0.011252389289438725, + "step": 680 + }, + { + "epoch": 0.3614931237721022, + "grad_norm": 0.21729386041077234, + "learning_rate": 5.639756262889441e-06, + "log_odds_chosen": 0.29118528962135315, + "log_odds_ratio": -0.6446735262870789, + "logits/chosen": -0.2575289011001587, + "logits/rejected": -0.2800835072994232, + "logps/chosen": -0.9227706789970398, + "logps/rejected": -1.119728684425354, + "loss": 0.7375, + "nll_loss": 0.7325758337974548, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.009227706119418144, + "rewards/margins": 0.0019695800729095936, + "rewards/rejected": -0.011197286657989025, + "step": 690 + }, + { + "epoch": 0.366732154551408, + "grad_norm": 0.24056294333924544, + "learning_rate": 5.588723085038102e-06, + "log_odds_chosen": 0.14757606387138367, + "log_odds_ratio": -0.7077519297599792, + "logits/chosen": -0.23453739285469055, + "logits/rejected": -0.24059641361236572, + "logps/chosen": -0.9751278758049011, + "logps/rejected": -1.0833594799041748, + "loss": 0.7133, + "nll_loss": 0.7068762183189392, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.009751277044415474, + "rewards/margins": 0.001082315924577415, + "rewards/rejected": -0.010833594016730785, + "step": 700 + }, + { + "epoch": 0.366732154551408, + "eval_log_odds_chosen": 0.2828400731086731, + "eval_log_odds_ratio": -0.6590429544448853, + "eval_logits/chosen": -0.2763667702674866, + "eval_logits/rejected": -0.2830006182193756, + "eval_logps/chosen": -0.95875483751297, + "eval_logps/rejected": -1.1589833498001099, + "eval_loss": 0.5967572331428528, + "eval_nll_loss": 0.5905880331993103, + "eval_rewards/accuracies": 0.6069999933242798, + "eval_rewards/chosen": -0.009587547741830349, + "eval_rewards/margins": 0.002002286957576871, + "eval_rewards/rejected": -0.011589834466576576, + "eval_runtime": 270.3826, + "eval_samples_per_second": 7.393, + "eval_steps_per_second": 0.462, + "step": 700 + }, + { + "epoch": 0.3719711853307138, + "grad_norm": 0.25075392464158525, + "learning_rate": 5.536990664543849e-06, + "log_odds_chosen": 0.25649288296699524, + "log_odds_ratio": -0.6813511252403259, + "logits/chosen": -0.26890888810157776, + "logits/rejected": -0.2766120135784149, + "logps/chosen": -0.9498542547225952, + "logps/rejected": -1.1336113214492798, + "loss": 0.7356, + "nll_loss": 0.7913224697113037, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.009498542174696922, + "rewards/margins": 0.0018375713843852282, + "rewards/rejected": -0.011336112394928932, + "step": 710 + }, + { + "epoch": 0.37721021611001965, + "grad_norm": 0.2560360768330474, + "learning_rate": 5.484576319889293e-06, + "log_odds_chosen": 0.35988372564315796, + "log_odds_ratio": -0.6421231031417847, + "logits/chosen": -0.2358725517988205, + "logits/rejected": -0.2683177888393402, + "logps/chosen": -0.9860288500785828, + "logps/rejected": -1.2582954168319702, + "loss": 0.6986, + "nll_loss": 0.7215532064437866, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.00986028928309679, + "rewards/margins": 0.0027226645033806562, + "rewards/rejected": -0.012582953087985516, + "step": 720 + }, + { + "epoch": 0.38244924688932547, + "grad_norm": 0.2389570992848936, + "learning_rate": 5.4314975978450645e-06, + "log_odds_chosen": 0.24539212882518768, + "log_odds_ratio": -0.666843831539154, + "logits/chosen": -0.23044565320014954, + "logits/rejected": -0.257163405418396, + "logps/chosen": -0.987472653388977, + "logps/rejected": -1.1630165576934814, + "loss": 0.724, + "nll_loss": 0.7047569751739502, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.009874725714325905, + "rewards/margins": 0.001755438162945211, + "rewards/rejected": -0.011630164459347725, + "step": 730 + }, + { + "epoch": 0.3876882776686313, + "grad_norm": 0.23349735339471042, + "learning_rate": 5.377772267595671e-06, + "log_odds_chosen": 0.1265036016702652, + "log_odds_ratio": -0.7426053881645203, + "logits/chosen": -0.22161659598350525, + "logits/rejected": -0.20148198306560516, + "logps/chosen": -0.987158477306366, + "logps/rejected": -1.084160566329956, + "loss": 0.6558, + "nll_loss": 0.6827282905578613, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.009871585294604301, + "rewards/margins": 0.0009700207156129181, + "rewards/rejected": -0.010841606184840202, + "step": 740 + }, + { + "epoch": 0.3929273084479371, + "grad_norm": 0.24740744827014474, + "learning_rate": 5.323418314790902e-06, + "log_odds_chosen": 0.180901437997818, + "log_odds_ratio": -0.6792001724243164, + "logits/chosen": -0.22566553950309753, + "logits/rejected": -0.2513706088066101, + "logps/chosen": -1.010834813117981, + "logps/rejected": -1.1412070989608765, + "loss": 0.7466, + "nll_loss": 0.7286490201950073, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.010108347050845623, + "rewards/margins": 0.001303724362514913, + "rewards/rejected": -0.01141207106411457, + "step": 750 + }, + { + "epoch": 0.398166339227243, + "grad_norm": 0.24398794037042804, + "learning_rate": 5.268453935524767e-06, + "log_odds_chosen": 0.27628079056739807, + "log_odds_ratio": -0.6668368577957153, + "logits/chosen": -0.23829717934131622, + "logits/rejected": -0.26499801874160767, + "logps/chosen": -1.0213868618011475, + "logps/rejected": -1.2219724655151367, + "loss": 0.6748, + "nll_loss": 0.6636958122253418, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.010213869623839855, + "rewards/margins": 0.0020058555528521538, + "rewards/rejected": -0.012219725176692009, + "step": 760 + }, + { + "epoch": 0.4034053700065488, + "grad_norm": 0.23509527899222843, + "learning_rate": 5.212897530243978e-06, + "log_odds_chosen": 0.29617372155189514, + "log_odds_ratio": -0.6592302322387695, + "logits/chosen": -0.2393266260623932, + "logits/rejected": -0.2519669830799103, + "logps/chosen": -0.9821060299873352, + "logps/rejected": -1.1928421258926392, + "loss": 0.7028, + "nll_loss": 0.7175225019454956, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.009821060113608837, + "rewards/margins": 0.002107360865920782, + "rewards/rejected": -0.011928422376513481, + "step": 770 + }, + { + "epoch": 0.4086444007858546, + "grad_norm": 0.23394947702424512, + "learning_rate": 5.156767697588029e-06, + "log_odds_chosen": 0.3425753116607666, + "log_odds_ratio": -0.6615421772003174, + "logits/chosen": -0.2409038543701172, + "logits/rejected": -0.2574775815010071, + "logps/chosen": -0.9479767084121704, + "logps/rejected": -1.1898690462112427, + "loss": 0.6941, + "nll_loss": 0.657213568687439, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.0094797657802701, + "rewards/margins": 0.002418924355879426, + "rewards/rejected": -0.011898690834641457, + "step": 780 + }, + { + "epoch": 0.41388343156516044, + "grad_norm": 0.21833827164567227, + "learning_rate": 5.100083228162918e-06, + "log_odds_chosen": 0.24498343467712402, + "log_odds_ratio": -0.6762871742248535, + "logits/chosen": -0.275887668132782, + "logits/rejected": -0.30585306882858276, + "logps/chosen": -0.9832944869995117, + "logps/rejected": -1.1611313819885254, + "loss": 0.7435, + "nll_loss": 0.7558133006095886, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.00983294378966093, + "rewards/margins": 0.001778370002284646, + "rewards/rejected": -0.011611313559114933, + "step": 790 + }, + { + "epoch": 0.41912246234446626, + "grad_norm": 0.23630257510340968, + "learning_rate": 5.042863098250613e-06, + "log_odds_chosen": 0.3097684979438782, + "log_odds_ratio": -0.669011652469635, + "logits/chosen": -0.2526930868625641, + "logits/rejected": -0.2613184154033661, + "logps/chosen": -0.975333034992218, + "logps/rejected": -1.203438401222229, + "loss": 0.6988, + "nll_loss": 0.6705261468887329, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.009753329679369926, + "rewards/margins": 0.002281052526086569, + "rewards/rejected": -0.012034382671117783, + "step": 800 + }, + { + "epoch": 0.41912246234446626, + "eval_log_odds_chosen": 0.2897730767726898, + "eval_log_odds_ratio": -0.6575716137886047, + "eval_logits/chosen": -0.27449238300323486, + "eval_logits/rejected": -0.28172942996025085, + "eval_logps/chosen": -0.9450584053993225, + "eval_logps/rejected": -1.1490689516067505, + "eval_loss": 0.5925648212432861, + "eval_nll_loss": 0.5863717198371887, + "eval_rewards/accuracies": 0.6069999933242798, + "eval_rewards/chosen": -0.009450582787394524, + "eval_rewards/margins": 0.0020401068031787872, + "eval_rewards/rejected": -0.011490690521895885, + "eval_runtime": 282.9062, + "eval_samples_per_second": 7.066, + "eval_steps_per_second": 0.442, + "step": 800 + }, + { + "epoch": 0.4243614931237721, + "grad_norm": 0.2488306854414885, + "learning_rate": 4.98512646345635e-06, + "log_odds_chosen": 0.28641897439956665, + "log_odds_ratio": -0.6547825336456299, + "logits/chosen": -0.22206516563892365, + "logits/rejected": -0.258362352848053, + "logps/chosen": -0.9418247938156128, + "logps/rejected": -1.1379072666168213, + "loss": 0.7665, + "nll_loss": 0.7691252827644348, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.009418248198926449, + "rewards/margins": 0.0019608228467404842, + "rewards/rejected": -0.011379070580005646, + "step": 810 + }, + { + "epoch": 0.42960052390307796, + "grad_norm": 0.23185879725371805, + "learning_rate": 4.92689265229591e-06, + "log_odds_chosen": 0.13744059205055237, + "log_odds_ratio": -0.7185770273208618, + "logits/chosen": -0.2039022445678711, + "logits/rejected": -0.19484642148017883, + "logps/chosen": -0.8942493200302124, + "logps/rejected": -0.9868205189704895, + "loss": 0.6985, + "nll_loss": 0.6817600727081299, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.008942494168877602, + "rewards/margins": 0.0009257116471417248, + "rewards/rejected": -0.009868205524981022, + "step": 820 + }, + { + "epoch": 0.4348395546823838, + "grad_norm": 0.26066946910165784, + "learning_rate": 4.8681811597249986e-06, + "log_odds_chosen": 0.31899920105934143, + "log_odds_ratio": -0.6595107316970825, + "logits/chosen": -0.2524524927139282, + "logits/rejected": -0.26533645391464233, + "logps/chosen": -0.9564129114151001, + "logps/rejected": -1.1792023181915283, + "loss": 0.7289, + "nll_loss": 0.736221194267273, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.009564128704369068, + "rewards/margins": 0.0022278937976807356, + "rewards/rejected": -0.011792022734880447, + "step": 830 + }, + { + "epoch": 0.4400785854616896, + "grad_norm": 0.2781863509449919, + "learning_rate": 4.80901164061291e-06, + "log_odds_chosen": 0.2759079933166504, + "log_odds_ratio": -0.6593595743179321, + "logits/chosen": -0.24136073887348175, + "logits/rejected": -0.24197664856910706, + "logps/chosen": -0.949256420135498, + "logps/rejected": -1.1427079439163208, + "loss": 0.7372, + "nll_loss": 0.6998056173324585, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.009492563083767891, + "rewards/margins": 0.001934514963068068, + "rewards/rejected": -0.011427078396081924, + "step": 840 + }, + { + "epoch": 0.4453176162409954, + "grad_norm": 0.24998305046902874, + "learning_rate": 4.7494039031626685e-06, + "log_odds_chosen": 0.20654296875, + "log_odds_ratio": -0.6940165162086487, + "logits/chosen": -0.23618969321250916, + "logits/rejected": -0.26105183362960815, + "logps/chosen": -0.949593186378479, + "logps/rejected": -1.0937670469284058, + "loss": 0.7206, + "nll_loss": 0.7645503282546997, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.0094959307461977, + "rewards/margins": 0.0014417401980608702, + "rewards/rejected": -0.01093767024576664, + "step": 850 + }, + { + "epoch": 0.45055664702030124, + "grad_norm": 0.20308732647543407, + "learning_rate": 4.689377902279818e-06, + "log_odds_chosen": 0.12388608604669571, + "log_odds_ratio": -0.7273297309875488, + "logits/chosen": -0.2292974442243576, + "logits/rejected": -0.22320961952209473, + "logps/chosen": -1.0063588619232178, + "logps/rejected": -1.0858452320098877, + "loss": 0.7091, + "nll_loss": 0.7223267555236816, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.010063587687909603, + "rewards/margins": 0.0007948653656058013, + "rewards/rejected": -0.010858452878892422, + "step": 860 + }, + { + "epoch": 0.45579567779960706, + "grad_norm": 0.3144056888989078, + "learning_rate": 4.628953732892118e-06, + "log_odds_chosen": 0.3134177327156067, + "log_odds_ratio": -0.6562970876693726, + "logits/chosen": -0.23619429767131805, + "logits/rejected": -0.25046223402023315, + "logps/chosen": -0.9373693466186523, + "logps/rejected": -1.1769232749938965, + "loss": 0.7338, + "nll_loss": 0.7179974913597107, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.009373693726956844, + "rewards/margins": 0.0023955402430146933, + "rewards/rejected": -0.011769233271479607, + "step": 870 + }, + { + "epoch": 0.4610347085789129, + "grad_norm": 0.20617728810621005, + "learning_rate": 4.568151623222352e-06, + "log_odds_chosen": 0.2670097351074219, + "log_odds_ratio": -0.6971144676208496, + "logits/chosen": -0.22974228858947754, + "logits/rejected": -0.25060245394706726, + "logps/chosen": -0.9138998985290527, + "logps/rejected": -1.1040019989013672, + "loss": 0.7077, + "nll_loss": 0.6301047801971436, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.009138999506831169, + "rewards/margins": 0.001901020877994597, + "rewards/rejected": -0.011040019802749157, + "step": 880 + }, + { + "epoch": 0.46627373935821875, + "grad_norm": 0.2652827403078759, + "learning_rate": 4.50699192801652e-06, + "log_odds_chosen": 0.3137222230434418, + "log_odds_ratio": -0.6815677285194397, + "logits/chosen": -0.26133638620376587, + "logits/rejected": -0.28535357117652893, + "logps/chosen": -0.9835060238838196, + "logps/rejected": -1.2045724391937256, + "loss": 0.7136, + "nll_loss": 0.7360855937004089, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.009835059754550457, + "rewards/margins": 0.0022106640972197056, + "rewards/rejected": -0.01204572431743145, + "step": 890 + }, + { + "epoch": 0.4715127701375246, + "grad_norm": 0.3207317475817683, + "learning_rate": 4.445495121729673e-06, + "log_odds_chosen": 0.2501332759857178, + "log_odds_ratio": -0.6778159141540527, + "logits/chosen": -0.24674773216247559, + "logits/rejected": -0.25447121262550354, + "logps/chosen": -0.9550365209579468, + "logps/rejected": -1.1225982904434204, + "loss": 0.7493, + "nll_loss": 0.70607590675354, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.009550364688038826, + "rewards/margins": 0.0016756162513047457, + "rewards/rejected": -0.011225981637835503, + "step": 900 + }, + { + "epoch": 0.4715127701375246, + "eval_log_odds_chosen": 0.2952045500278473, + "eval_log_odds_ratio": -0.6552044749259949, + "eval_logits/chosen": -0.2476220428943634, + "eval_logits/rejected": -0.2546870708465576, + "eval_logps/chosen": -0.9300667643547058, + "eval_logps/rejected": -1.1357324123382568, + "eval_loss": 0.5882026553153992, + "eval_nll_loss": 0.5820150971412659, + "eval_rewards/accuracies": 0.6079999804496765, + "eval_rewards/chosen": -0.009300666861236095, + "eval_rewards/margins": 0.002056657336652279, + "eval_rewards/rejected": -0.0113573232665658, + "eval_runtime": 272.7809, + "eval_samples_per_second": 7.328, + "eval_steps_per_second": 0.458, + "step": 900 + }, + { + "epoch": 0.4767518009168304, + "grad_norm": 0.23100558629186949, + "learning_rate": 4.3836817916716655e-06, + "log_odds_chosen": 0.33713942766189575, + "log_odds_ratio": -0.6442294120788574, + "logits/chosen": -0.25310784578323364, + "logits/rejected": -0.24791720509529114, + "logps/chosen": -0.913447380065918, + "logps/rejected": -1.1374478340148926, + "loss": 0.7021, + "nll_loss": 0.7241432666778564, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.009134473279118538, + "rewards/margins": 0.002240004250779748, + "rewards/rejected": -0.011374477297067642, + "step": 910 + }, + { + "epoch": 0.4819908316961362, + "grad_norm": 0.27311828272573985, + "learning_rate": 4.3215726311151454e-06, + "log_odds_chosen": 0.31479746103286743, + "log_odds_ratio": -0.6612164378166199, + "logits/chosen": -0.22212114930152893, + "logits/rejected": -0.23604507744312286, + "logps/chosen": -0.9464074969291687, + "logps/rejected": -1.160541296005249, + "loss": 0.7189, + "nll_loss": 0.7034357190132141, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.009464074857532978, + "rewards/margins": 0.0021413397043943405, + "rewards/rejected": -0.011605414561927319, + "step": 920 + }, + { + "epoch": 0.48722986247544203, + "grad_norm": 0.2251073682511537, + "learning_rate": 4.259188432368047e-06, + "log_odds_chosen": 0.2062271535396576, + "log_odds_ratio": -0.712655782699585, + "logits/chosen": -0.22355704009532928, + "logits/rejected": -0.21783855557441711, + "logps/chosen": -0.929416835308075, + "logps/rejected": -1.0706582069396973, + "loss": 0.7098, + "nll_loss": 0.6941349506378174, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.009294168092310429, + "rewards/margins": 0.0014124134322628379, + "rewards/rejected": -0.010706582106649876, + "step": 930 + }, + { + "epoch": 0.49246889325474785, + "grad_norm": 0.2896044869891697, + "learning_rate": 4.196550079812947e-06, + "log_odds_chosen": 0.14942023158073425, + "log_odds_ratio": -0.7291346788406372, + "logits/chosen": -0.20475438237190247, + "logits/rejected": -0.21312706172466278, + "logps/chosen": -0.9276836514472961, + "logps/rejected": -1.035669207572937, + "loss": 0.673, + "nll_loss": 0.6321808099746704, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.009276836179196835, + "rewards/margins": 0.0010798568837344646, + "rewards/rejected": -0.010356692597270012, + "step": 940 + }, + { + "epoch": 0.4977079240340537, + "grad_norm": 0.20608437650972947, + "learning_rate": 4.133678542915596e-06, + "log_odds_chosen": 0.21366234123706818, + "log_odds_ratio": -0.7176351547241211, + "logits/chosen": -0.1843259632587433, + "logits/rejected": -0.19231656193733215, + "logps/chosen": -0.9370969533920288, + "logps/rejected": -1.0868377685546875, + "loss": 0.7357, + "nll_loss": 0.689740777015686, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.009370969608426094, + "rewards/margins": 0.001497406861744821, + "rewards/rejected": -0.01086837612092495, + "step": 950 + }, + { + "epoch": 0.5029469548133595, + "grad_norm": 0.20440659461177407, + "learning_rate": 4.070594869204954e-06, + "log_odds_chosen": 0.31676673889160156, + "log_odds_ratio": -0.653446614742279, + "logits/chosen": -0.2184230387210846, + "logits/rejected": -0.232182115316391, + "logps/chosen": -0.8973399996757507, + "logps/rejected": -1.1070184707641602, + "loss": 0.6963, + "nll_loss": 0.6926812529563904, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.008973398245871067, + "rewards/margins": 0.0020967856980860233, + "rewards/rejected": -0.011070184409618378, + "step": 960 + }, + { + "epoch": 0.5081859855926654, + "grad_norm": 0.23363856927616253, + "learning_rate": 4.007320177227106e-06, + "log_odds_chosen": 0.18765881657600403, + "log_odds_ratio": -0.7033464908599854, + "logits/chosen": -0.22513191401958466, + "logits/rejected": -0.2241486757993698, + "logps/chosen": -0.9618891477584839, + "logps/rejected": -1.090888261795044, + "loss": 0.7067, + "nll_loss": 0.6825166940689087, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.009618892334401608, + "rewards/margins": 0.001289989217184484, + "rewards/rejected": -0.010908881202340126, + "step": 970 + }, + { + "epoch": 0.5134250163719711, + "grad_norm": 0.22410878238276724, + "learning_rate": 3.943875649475397e-06, + "log_odds_chosen": 0.2351769208908081, + "log_odds_ratio": -0.6929172277450562, + "logits/chosen": -0.23800428211688995, + "logits/rejected": -0.22524037957191467, + "logps/chosen": -0.9266209602355957, + "logps/rejected": -1.0913342237472534, + "loss": 0.7062, + "nll_loss": 0.7125518918037415, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.009266209788620472, + "rewards/margins": 0.0016471326816827059, + "rewards/rejected": -0.010913342237472534, + "step": 980 + }, + { + "epoch": 0.518664047151277, + "grad_norm": 0.2601248391316997, + "learning_rate": 3.880282525299161e-06, + "log_odds_chosen": 0.26979511976242065, + "log_odds_ratio": -0.6584133505821228, + "logits/chosen": -0.2896724343299866, + "logits/rejected": -0.27878767251968384, + "logps/chosen": -0.9138110280036926, + "logps/rejected": -1.0914112329483032, + "loss": 0.7303, + "nll_loss": 0.7162944078445435, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.009138109162449837, + "rewards/margins": 0.0017760014161467552, + "rewards/rejected": -0.010914110578596592, + "step": 990 + }, + { + "epoch": 0.5239030779305829, + "grad_norm": 0.2886836538722393, + "learning_rate": 3.816562093793414e-06, + "log_odds_chosen": 0.33312270045280457, + "log_odds_ratio": -0.675898551940918, + "logits/chosen": -0.23547939956188202, + "logits/rejected": -0.27134275436401367, + "logps/chosen": -0.9631227254867554, + "logps/rejected": -1.1864663362503052, + "loss": 0.7022, + "nll_loss": 0.701551079750061, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.00963122770190239, + "rewards/margins": 0.002233435632660985, + "rewards/rejected": -0.011864664033055305, + "step": 1000 + }, + { + "epoch": 0.5239030779305829, + "eval_log_odds_chosen": 0.2961971163749695, + "eval_log_odds_ratio": -0.6568813323974609, + "eval_logits/chosen": -0.25139424204826355, + "eval_logits/rejected": -0.25877639651298523, + "eval_logps/chosen": -0.9089908599853516, + "eval_logps/rejected": -1.110971450805664, + "eval_loss": 0.5842349529266357, + "eval_nll_loss": 0.5780314803123474, + "eval_rewards/accuracies": 0.6069999933242798, + "eval_rewards/chosen": -0.009089908562600613, + "eval_rewards/margins": 0.0020198060665279627, + "eval_rewards/rejected": -0.011109714396297932, + "eval_runtime": 268.1596, + "eval_samples_per_second": 7.455, + "eval_steps_per_second": 0.466, + "step": 1000 + }, + { + "epoch": 0.5291421087098886, + "grad_norm": 0.2409996471560838, + "learning_rate": 3.7527356866718955e-06, + "log_odds_chosen": 0.16977646946907043, + "log_odds_ratio": -0.718795120716095, + "logits/chosen": -0.234476238489151, + "logits/rejected": -0.24295465648174286, + "logps/chosen": -1.0260999202728271, + "logps/rejected": -1.149409532546997, + "loss": 0.7043, + "nll_loss": 0.6979072093963623, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.01026100106537342, + "rewards/margins": 0.0012330941390246153, + "rewards/rejected": -0.011494094505906105, + "step": 1010 + }, + { + "epoch": 0.5343811394891945, + "grad_norm": 0.24739341420079547, + "learning_rate": 3.6888246711258453e-06, + "log_odds_chosen": 0.36255502700805664, + "log_odds_ratio": -0.6197658777236938, + "logits/chosen": -0.24051399528980255, + "logits/rejected": -0.26482734084129333, + "logps/chosen": -0.875298798084259, + "logps/rejected": -1.1210336685180664, + "loss": 0.7153, + "nll_loss": 0.7133646607398987, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.008752988651394844, + "rewards/margins": 0.0024573481641709805, + "rewards/rejected": -0.011210335418581963, + "step": 1020 + }, + { + "epoch": 0.5396201702685003, + "grad_norm": 0.3154631649103451, + "learning_rate": 3.6248504426708986e-06, + "log_odds_chosen": 0.21956510841846466, + "log_odds_ratio": -0.693530261516571, + "logits/chosen": -0.2142527848482132, + "logits/rejected": -0.2322002351284027, + "logps/chosen": -1.004662275314331, + "logps/rejected": -1.155106782913208, + "loss": 0.7213, + "nll_loss": 0.7184774875640869, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.010046622715890408, + "rewards/margins": 0.0015044447500258684, + "rewards/rejected": -0.011551067233085632, + "step": 1030 + }, + { + "epoch": 0.5448592010478062, + "grad_norm": 0.3984212728138088, + "learning_rate": 3.5608344179844997e-06, + "log_odds_chosen": 0.07040030509233475, + "log_odds_ratio": -0.7465513348579407, + "logits/chosen": -0.26399117708206177, + "logits/rejected": -0.26066404581069946, + "logps/chosen": -0.9947193264961243, + "logps/rejected": -1.038916826248169, + "loss": 0.7643, + "nll_loss": 0.8070164918899536, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.009947193786501884, + "rewards/margins": 0.0004419738834258169, + "rewards/rejected": -0.010389168746769428, + "step": 1040 + }, + { + "epoch": 0.550098231827112, + "grad_norm": 0.24683028263597967, + "learning_rate": 3.4967980277362333e-06, + "log_odds_chosen": 0.26563113927841187, + "log_odds_ratio": -0.6936607956886292, + "logits/chosen": -0.24129600822925568, + "logits/rejected": -0.2225707471370697, + "logps/chosen": -0.9990348815917969, + "logps/rejected": -1.1718101501464844, + "loss": 0.6967, + "nll_loss": 0.6925816535949707, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.009990348480641842, + "rewards/margins": 0.001727753086015582, + "rewards/rejected": -0.011718102730810642, + "step": 1050 + }, + { + "epoch": 0.5553372626064178, + "grad_norm": 0.25916876482479656, + "learning_rate": 3.4327627094134725e-06, + "log_odds_chosen": 0.23857775330543518, + "log_odds_ratio": -0.6982980966567993, + "logits/chosen": -0.2231340855360031, + "logits/rejected": -0.23151478171348572, + "logps/chosen": -0.9416376352310181, + "logps/rejected": -1.1105958223342896, + "loss": 0.6977, + "nll_loss": 0.7219172716140747, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.009416376240551472, + "rewards/margins": 0.0016895814333111048, + "rewards/rejected": -0.011105956509709358, + "step": 1060 + }, + { + "epoch": 0.5605762933857237, + "grad_norm": 0.32629059742912764, + "learning_rate": 3.3687499001447395e-06, + "log_odds_chosen": 0.36377382278442383, + "log_odds_ratio": -0.6207367777824402, + "logits/chosen": -0.22005310654640198, + "logits/rejected": -0.2187117338180542, + "logps/chosen": -0.8868287801742554, + "logps/rejected": -1.1314175128936768, + "loss": 0.687, + "nll_loss": 0.6751260757446289, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.00886828824877739, + "rewards/margins": 0.0024458863772451878, + "rewards/rejected": -0.01131417416036129, + "step": 1070 + }, + { + "epoch": 0.5658153241650294, + "grad_norm": 0.29711247258862283, + "learning_rate": 3.304781029523195e-06, + "log_odds_chosen": 0.1744251698255539, + "log_odds_ratio": -0.7341504096984863, + "logits/chosen": -0.22395114600658417, + "logits/rejected": -0.2257882058620453, + "logps/chosen": -0.9004520177841187, + "logps/rejected": -1.040974497795105, + "loss": 0.6746, + "nll_loss": 0.6490055322647095, + "rewards/accuracies": 0.4937500059604645, + "rewards/chosen": -0.009004520252346992, + "rewards/margins": 0.0014052249025553465, + "rewards/rejected": -0.010409745387732983, + "step": 1080 + }, + { + "epoch": 0.5710543549443353, + "grad_norm": 0.22026906097892585, + "learning_rate": 3.240877512432638e-06, + "log_odds_chosen": 0.20723943412303925, + "log_odds_ratio": -0.7219654321670532, + "logits/chosen": -0.2135605365037918, + "logits/rejected": -0.2091299295425415, + "logps/chosen": -0.956895649433136, + "logps/rejected": -1.086503267288208, + "loss": 0.6855, + "nll_loss": 0.683186411857605, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.00956895761191845, + "rewards/margins": 0.0012960756430402398, + "rewards/rejected": -0.01086503267288208, + "step": 1090 + }, + { + "epoch": 0.5762933857236411, + "grad_norm": 0.22524737228540384, + "learning_rate": 3.1770607418784433e-06, + "log_odds_chosen": 0.25248509645462036, + "log_odds_ratio": -0.6578890085220337, + "logits/chosen": -0.2398686707019806, + "logits/rejected": -0.23859818279743195, + "logps/chosen": -0.9096572995185852, + "logps/rejected": -1.0772696733474731, + "loss": 0.6805, + "nll_loss": 0.6755146980285645, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.009096571244299412, + "rewards/margins": 0.0016761248698458076, + "rewards/rejected": -0.010772697627544403, + "step": 1100 + }, + { + "epoch": 0.5762933857236411, + "eval_log_odds_chosen": 0.2937109172344208, + "eval_log_odds_ratio": -0.660809338092804, + "eval_logits/chosen": -0.25185319781303406, + "eval_logits/rejected": -0.25904712080955505, + "eval_logps/chosen": -0.8865421414375305, + "eval_logps/rejected": -1.0833343267440796, + "eval_loss": 0.5807305574417114, + "eval_nll_loss": 0.5744020342826843, + "eval_rewards/accuracies": 0.6019999980926514, + "eval_rewards/chosen": -0.008865421637892723, + "eval_rewards/margins": 0.0019679218530654907, + "eval_rewards/rejected": -0.010833343490958214, + "eval_runtime": 271.1106, + "eval_samples_per_second": 7.373, + "eval_steps_per_second": 0.461, + "step": 1100 + }, + { + "epoch": 0.581532416502947, + "grad_norm": 0.45255186528770547, + "learning_rate": 3.1133520818258116e-06, + "log_odds_chosen": 0.4233662188053131, + "log_odds_ratio": -0.6200209856033325, + "logits/chosen": -0.24288010597229004, + "logits/rejected": -0.2747390866279602, + "logps/chosen": -0.8419440388679504, + "logps/rejected": -1.1101503372192383, + "loss": 0.7, + "nll_loss": 0.7085453271865845, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.008419441059231758, + "rewards/margins": 0.00268206256441772, + "rewards/rejected": -0.011101502925157547, + "step": 1110 + }, + { + "epoch": 0.5867714472822528, + "grad_norm": 0.2554458264274049, + "learning_rate": 3.0497728600477488e-06, + "log_odds_chosen": 0.2578720152378082, + "log_odds_ratio": -0.6819779276847839, + "logits/chosen": -0.226557657122612, + "logits/rejected": -0.2220630645751953, + "logps/chosen": -0.9298788905143738, + "logps/rejected": -1.101123571395874, + "loss": 0.7333, + "nll_loss": 0.6786555051803589, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.00929878931492567, + "rewards/margins": 0.0017124470323324203, + "rewards/rejected": -0.011011235415935516, + "step": 1120 + }, + { + "epoch": 0.5920104780615586, + "grad_norm": 0.26482758285065394, + "learning_rate": 2.986344360985162e-06, + "log_odds_chosen": 0.36092090606689453, + "log_odds_ratio": -0.6440589427947998, + "logits/chosen": -0.17206324636936188, + "logits/rejected": -0.19123123586177826, + "logps/chosen": -0.8286620378494263, + "logps/rejected": -1.0395228862762451, + "loss": 0.6597, + "nll_loss": 0.6818104982376099, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.008286619558930397, + "rewards/margins": 0.002108608838170767, + "rewards/rejected": -0.010395227000117302, + "step": 1130 + }, + { + "epoch": 0.5972495088408645, + "grad_norm": 0.24208228363944706, + "learning_rate": 2.923087818621452e-06, + "log_odds_chosen": 0.17570583522319794, + "log_odds_ratio": -0.7043822407722473, + "logits/chosen": -0.26782792806625366, + "logits/rejected": -0.26422086358070374, + "logps/chosen": -1.0224881172180176, + "logps/rejected": -1.1496175527572632, + "loss": 0.6784, + "nll_loss": 0.6882492899894714, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.01022487971931696, + "rewards/margins": 0.0012712948955595493, + "rewards/rejected": -0.011496175080537796, + "step": 1140 + }, + { + "epoch": 0.6024885396201702, + "grad_norm": 0.22840937868876304, + "learning_rate": 2.860024409374013e-06, + "log_odds_chosen": 0.19850441813468933, + "log_odds_ratio": -0.7063683271408081, + "logits/chosen": -0.21049292385578156, + "logits/rejected": -0.2100718915462494, + "logps/chosen": -0.9241889119148254, + "logps/rejected": -1.0648218393325806, + "loss": 0.7062, + "nll_loss": 0.676911473274231, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.009241889230906963, + "rewards/margins": 0.0014063273556530476, + "rewards/rejected": -0.010648216120898724, + "step": 1150 + }, + { + "epoch": 0.6077275703994761, + "grad_norm": 0.2640680842240368, + "learning_rate": 2.797175245004986e-06, + "log_odds_chosen": 0.26555079221725464, + "log_odds_ratio": -0.6697388887405396, + "logits/chosen": -0.2078789472579956, + "logits/rejected": -0.2241998165845871, + "logps/chosen": -0.9275274276733398, + "logps/rejected": -1.1062742471694946, + "loss": 0.7186, + "nll_loss": 0.7219773530960083, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.009275272488594055, + "rewards/margins": 0.0017874684417620301, + "rewards/rejected": -0.011062742210924625, + "step": 1160 + }, + { + "epoch": 0.6129666011787819, + "grad_norm": 0.24081971015397918, + "learning_rate": 2.734561365553671e-06, + "log_odds_chosen": 0.22908750176429749, + "log_odds_ratio": -0.7178616523742676, + "logits/chosen": -0.25099799036979675, + "logits/rejected": -0.24473123252391815, + "logps/chosen": -0.9188436269760132, + "logps/rejected": -1.0757328271865845, + "loss": 0.6875, + "nll_loss": 0.7033326625823975, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.009188435971736908, + "rewards/margins": 0.0015688911080360413, + "rewards/rejected": -0.01075732707977295, + "step": 1170 + }, + { + "epoch": 0.6182056319580878, + "grad_norm": 0.21690804460738192, + "learning_rate": 2.6722037322929485e-06, + "log_odds_chosen": 0.25051847100257874, + "log_odds_ratio": -0.6795316934585571, + "logits/chosen": -0.22288396954536438, + "logits/rejected": -0.2265399992465973, + "logps/chosen": -0.8900327682495117, + "logps/rejected": -1.0318130254745483, + "loss": 0.7016, + "nll_loss": 0.7159923911094666, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.008900327607989311, + "rewards/margins": 0.0014178026467561722, + "rewards/rejected": -0.010318130254745483, + "step": 1180 + }, + { + "epoch": 0.6234446627373936, + "grad_norm": 0.2480515943450666, + "learning_rate": 2.6101232207120546e-06, + "log_odds_chosen": 0.25469428300857544, + "log_odds_ratio": -0.7002100944519043, + "logits/chosen": -0.24988976120948792, + "logits/rejected": -0.25235018134117126, + "logps/chosen": -0.8989761471748352, + "logps/rejected": -1.0557796955108643, + "loss": 0.7201, + "nll_loss": 0.718826174736023, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.008989760652184486, + "rewards/margins": 0.0015680358046665788, + "rewards/rejected": -0.010557797737419605, + "step": 1190 + }, + { + "epoch": 0.6286836935166994, + "grad_norm": 0.34122416682838863, + "learning_rate": 2.5483406135281005e-06, + "log_odds_chosen": 0.41061514616012573, + "log_odds_ratio": -0.6271175146102905, + "logits/chosen": -0.20977671444416046, + "logits/rejected": -0.22667160630226135, + "logps/chosen": -0.7675566673278809, + "logps/rejected": -1.0119469165802002, + "loss": 0.6427, + "nll_loss": 0.6016801595687866, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.007675566710531712, + "rewards/margins": 0.002443903125822544, + "rewards/rejected": -0.010119469836354256, + "step": 1200 + }, + { + "epoch": 0.6286836935166994, + "eval_log_odds_chosen": 0.3024108111858368, + "eval_log_odds_ratio": -0.6608767509460449, + "eval_logits/chosen": -0.24303622543811798, + "eval_logits/rejected": -0.2482856959104538, + "eval_logps/chosen": -0.8682465553283691, + "eval_logps/rejected": -1.0670232772827148, + "eval_loss": 0.5780009031295776, + "eval_nll_loss": 0.5716609954833984, + "eval_rewards/accuracies": 0.6069999933242798, + "eval_rewards/chosen": -0.008682465180754662, + "eval_rewards/margins": 0.0019877671729773283, + "eval_rewards/rejected": -0.010670232586562634, + "eval_runtime": 278.9619, + "eval_samples_per_second": 7.166, + "eval_steps_per_second": 0.448, + "step": 1200 + }, + { + "epoch": 0.6339227242960053, + "grad_norm": 0.22680090574543296, + "learning_rate": 2.486876593728619e-06, + "log_odds_chosen": 0.3060600161552429, + "log_odds_ratio": -0.6400117874145508, + "logits/chosen": -0.20497791469097137, + "logits/rejected": -0.21911552548408508, + "logps/chosen": -0.8201937675476074, + "logps/rejected": -1.0039427280426025, + "loss": 0.6677, + "nll_loss": 0.6524402499198914, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.00820193625986576, + "rewards/margins": 0.0018374897772446275, + "rewards/rejected": -0.010039427317678928, + "step": 1210 + }, + { + "epoch": 0.639161755075311, + "grad_norm": 0.18697079192797328, + "learning_rate": 2.4257517376475235e-06, + "log_odds_chosen": 0.24433453381061554, + "log_odds_ratio": -0.7043715715408325, + "logits/chosen": -0.21600952744483948, + "logits/rejected": -0.21840915083885193, + "logps/chosen": -0.8748706579208374, + "logps/rejected": -1.038516879081726, + "loss": 0.689, + "nll_loss": 0.7082723379135132, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.008748706430196762, + "rewards/margins": 0.0016364629846066236, + "rewards/rejected": -0.010385168716311455, + "step": 1220 + }, + { + "epoch": 0.6444007858546169, + "grad_norm": 0.26043966163192633, + "learning_rate": 2.3649865080767573e-06, + "log_odds_chosen": 0.39842092990875244, + "log_odds_ratio": -0.6394789218902588, + "logits/chosen": -0.17968204617500305, + "logits/rejected": -0.22999629378318787, + "logps/chosen": -0.8687442541122437, + "logps/rejected": -1.1317379474639893, + "loss": 0.691, + "nll_loss": 0.6364887952804565, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.008687443099915981, + "rewards/margins": 0.0026299364399164915, + "rewards/rejected": -0.011317379772663116, + "step": 1230 + }, + { + "epoch": 0.6496398166339227, + "grad_norm": 0.2934035468939971, + "learning_rate": 2.3046012474159536e-06, + "log_odds_chosen": 0.18857654929161072, + "log_odds_ratio": -0.7218400835990906, + "logits/chosen": -0.22426645457744598, + "logits/rejected": -0.2606600522994995, + "logps/chosen": -0.8423234820365906, + "logps/rejected": -0.9672554135322571, + "loss": 0.6626, + "nll_loss": 0.6410557627677917, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.008423235267400742, + "rewards/margins": 0.0012493181275203824, + "rewards/rejected": -0.009672552347183228, + "step": 1240 + }, + { + "epoch": 0.6548788474132285, + "grad_norm": 0.2750280322287747, + "learning_rate": 2.2446161708624088e-06, + "log_odds_chosen": 0.35305264592170715, + "log_odds_ratio": -0.6322815418243408, + "logits/chosen": -0.22593221068382263, + "logits/rejected": -0.23385939002037048, + "logps/chosen": -0.8886737823486328, + "logps/rejected": -1.1158584356307983, + "loss": 0.7241, + "nll_loss": 0.7064876556396484, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.008886737748980522, + "rewards/margins": 0.0022718466352671385, + "rewards/rejected": -0.011158584617078304, + "step": 1250 + }, + { + "epoch": 0.6601178781925344, + "grad_norm": 0.2850170024015937, + "learning_rate": 2.1850513596436247e-06, + "log_odds_chosen": 0.2020442932844162, + "log_odds_ratio": -0.7057245373725891, + "logits/chosen": -0.2613026201725006, + "logits/rejected": -0.2628094553947449, + "logps/chosen": -0.8931059837341309, + "logps/rejected": -1.0006712675094604, + "loss": 0.6932, + "nll_loss": 0.6863256692886353, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.008931060321629047, + "rewards/margins": 0.001075651729479432, + "rewards/rejected": -0.010006711818277836, + "step": 1260 + }, + { + "epoch": 0.6653569089718402, + "grad_norm": 0.2334203645902276, + "learning_rate": 2.1259267542947185e-06, + "log_odds_chosen": 0.32196345925331116, + "log_odds_ratio": -0.6869245767593384, + "logits/chosen": -0.22752761840820312, + "logits/rejected": -0.22912946343421936, + "logps/chosen": -0.8861078023910522, + "logps/rejected": -1.070960283279419, + "loss": 0.6958, + "nll_loss": 0.7246706485748291, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.008861077949404716, + "rewards/margins": 0.0018485232722014189, + "rewards/rejected": -0.010709600523114204, + "step": 1270 + }, + { + "epoch": 0.6705959397511461, + "grad_norm": 0.2937841890124048, + "learning_rate": 2.067262147982912e-06, + "log_odds_chosen": 0.32801932096481323, + "log_odds_ratio": -0.6607939004898071, + "logits/chosen": -0.24211068451404572, + "logits/rejected": -0.2737501263618469, + "logps/chosen": -0.8753350377082825, + "logps/rejected": -1.0936840772628784, + "loss": 0.673, + "nll_loss": 0.7191804051399231, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.008753350004553795, + "rewards/margins": 0.002183489967137575, + "rewards/rejected": -0.010936839506030083, + "step": 1280 + }, + { + "epoch": 0.6758349705304518, + "grad_norm": 0.28448016420781497, + "learning_rate": 2.009077179881372e-06, + "log_odds_chosen": 0.17785023152828217, + "log_odds_ratio": -0.6940667033195496, + "logits/chosen": -0.20315225422382355, + "logits/rejected": -0.21491765975952148, + "logps/chosen": -0.9090517163276672, + "logps/rejected": -1.0145957469940186, + "loss": 0.7026, + "nll_loss": 0.6749101281166077, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.009090515784919262, + "rewards/margins": 0.0010554410982877016, + "rewards/rejected": -0.01014595665037632, + "step": 1290 + }, + { + "epoch": 0.6810740013097577, + "grad_norm": 0.24264184898863173, + "learning_rate": 1.9513913285945946e-06, + "log_odds_chosen": 0.42332401871681213, + "log_odds_ratio": -0.6192356944084167, + "logits/chosen": -0.1945888102054596, + "logits/rejected": -0.22977054119110107, + "logps/chosen": -0.8285413980484009, + "logps/rejected": -1.0920394659042358, + "loss": 0.6762, + "nll_loss": 0.6468032598495483, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.00828541349619627, + "rewards/margins": 0.0026349793188273907, + "rewards/rejected": -0.010920394212007523, + "step": 1300 + }, + { + "epoch": 0.6810740013097577, + "eval_log_odds_chosen": 0.3069436550140381, + "eval_log_odds_ratio": -0.6618441939353943, + "eval_logits/chosen": -0.23222360014915466, + "eval_logits/rejected": -0.23756533861160278, + "eval_logps/chosen": -0.8586292266845703, + "eval_logps/rejected": -1.0576375722885132, + "eval_loss": 0.5761923789978027, + "eval_nll_loss": 0.5697891712188721, + "eval_rewards/accuracies": 0.6069999933242798, + "eval_rewards/chosen": -0.008586292155086994, + "eval_rewards/margins": 0.0019900836050510406, + "eval_rewards/rejected": -0.01057637482881546, + "eval_runtime": 268.7533, + "eval_samples_per_second": 7.438, + "eval_steps_per_second": 0.465, + "step": 1300 + }, + { + "epoch": 0.6863130320890635, + "grad_norm": 0.22307221268147773, + "learning_rate": 1.8942239056375397e-06, + "log_odds_chosen": 0.18992213904857635, + "log_odds_ratio": -0.702189564704895, + "logits/chosen": -0.2199142426252365, + "logits/rejected": -0.20674149692058563, + "logps/chosen": -0.8908072710037231, + "logps/rejected": -1.0369397401809692, + "loss": 0.6973, + "nll_loss": 0.6958785057067871, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.008908072486519814, + "rewards/margins": 0.0014613252133131027, + "rewards/rejected": -0.010369397699832916, + "step": 1310 + }, + { + "epoch": 0.6915520628683693, + "grad_norm": 0.30978725818000513, + "learning_rate": 1.837594048970723e-06, + "log_odds_chosen": 0.14045199751853943, + "log_odds_ratio": -0.7219871878623962, + "logits/chosen": -0.2725422978401184, + "logits/rejected": -0.26409873366355896, + "logps/chosen": -0.9043842554092407, + "logps/rejected": -0.9981764554977417, + "loss": 0.6863, + "nll_loss": 0.7046749591827393, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.009043841622769833, + "rewards/margins": 0.0009379230323247612, + "rewards/rejected": -0.009981764480471611, + "step": 1320 + }, + { + "epoch": 0.6967910936476752, + "grad_norm": 0.22502876667437588, + "learning_rate": 1.7815207165933726e-06, + "log_odds_chosen": 0.24389496445655823, + "log_odds_ratio": -0.7243469953536987, + "logits/chosen": -0.22347286343574524, + "logits/rejected": -0.2305651158094406, + "logps/chosen": -0.8554804921150208, + "logps/rejected": -1.0290186405181885, + "loss": 0.6949, + "nll_loss": 0.7154419422149658, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.008554804138839245, + "rewards/margins": 0.0017353817820549011, + "rewards/rejected": -0.010290185920894146, + "step": 1330 + }, + { + "epoch": 0.702030124426981, + "grad_norm": 0.3089322594713111, + "learning_rate": 1.7260226801968695e-06, + "log_odds_chosen": 0.23613600432872772, + "log_odds_ratio": -0.691783607006073, + "logits/chosen": -0.21107229590415955, + "logits/rejected": -0.21636977791786194, + "logps/chosen": -0.9278246164321899, + "logps/rejected": -1.0566743612289429, + "loss": 0.6692, + "nll_loss": 0.6606216430664062, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.009278246201574802, + "rewards/margins": 0.0012884981697425246, + "rewards/rejected": -0.010566744022071362, + "step": 1340 + }, + { + "epoch": 0.7072691552062869, + "grad_norm": 0.2668190498643392, + "learning_rate": 1.671118518880532e-06, + "log_odds_chosen": 0.189827561378479, + "log_odds_ratio": -0.7072278261184692, + "logits/chosen": -0.2188723087310791, + "logits/rejected": -0.21698541939258575, + "logps/chosen": -0.896720290184021, + "logps/rejected": -1.0248229503631592, + "loss": 0.6859, + "nll_loss": 0.6656599640846252, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.008967203088104725, + "rewards/margins": 0.0012810255866497755, + "rewards/rejected": -0.010248229838907719, + "step": 1350 + }, + { + "epoch": 0.7125081859855926, + "grad_norm": 0.24259424337870872, + "learning_rate": 1.6168266129318865e-06, + "log_odds_chosen": 0.16265830397605896, + "log_odds_ratio": -0.7222410440444946, + "logits/chosen": -0.20469431579113007, + "logits/rejected": -0.21427664160728455, + "logps/chosen": -0.8708987236022949, + "logps/rejected": -0.9569934010505676, + "loss": 0.704, + "nll_loss": 0.6836594343185425, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.008708987385034561, + "rewards/margins": 0.0008609470096416771, + "rewards/rejected": -0.009569934569299221, + "step": 1360 + }, + { + "epoch": 0.7177472167648985, + "grad_norm": 0.2256184064459049, + "learning_rate": 1.5631651376734926e-06, + "log_odds_chosen": 0.22786390781402588, + "log_odds_ratio": -0.711846649646759, + "logits/chosen": -0.22860285639762878, + "logits/rejected": -0.24218401312828064, + "logps/chosen": -0.8583256602287292, + "logps/rejected": -0.9923291206359863, + "loss": 0.7127, + "nll_loss": 0.6810920238494873, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.008583256043493748, + "rewards/margins": 0.0013400347670540214, + "rewards/rejected": -0.009923290461301804, + "step": 1370 + }, + { + "epoch": 0.7229862475442044, + "grad_norm": 0.23503577068826886, + "learning_rate": 1.5101520573783751e-06, + "log_odds_chosen": 0.3741925358772278, + "log_odds_ratio": -0.6353830695152283, + "logits/chosen": -0.21928901970386505, + "logits/rejected": -0.24703797698020935, + "logps/chosen": -0.8672024011611938, + "logps/rejected": -1.1037095785140991, + "loss": 0.7124, + "nll_loss": 0.6939696073532104, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.008672022260725498, + "rewards/margins": 0.0023650729563087225, + "rewards/rejected": -0.011037096381187439, + "step": 1380 + }, + { + "epoch": 0.7282252783235101, + "grad_norm": 0.23280207331329258, + "learning_rate": 1.4578051192561342e-06, + "log_odds_chosen": 0.1830010563135147, + "log_odds_ratio": -0.7071625590324402, + "logits/chosen": -0.21185937523841858, + "logits/rejected": -0.20238959789276123, + "logps/chosen": -0.8515766263008118, + "logps/rejected": -0.9668426513671875, + "loss": 0.7134, + "nll_loss": 0.6686742901802063, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.008515766821801662, + "rewards/margins": 0.0011526598827913404, + "rewards/rejected": -0.009668425656855106, + "step": 1390 + }, + { + "epoch": 0.733464309102816, + "grad_norm": 0.2380029203431237, + "learning_rate": 1.4061418475116842e-06, + "log_odds_chosen": 0.39564546942710876, + "log_odds_ratio": -0.6270996332168579, + "logits/chosen": -0.24804405868053436, + "logits/rejected": -0.2720070779323578, + "logps/chosen": -0.8107814788818359, + "logps/rejected": -1.06538724899292, + "loss": 0.6944, + "nll_loss": 0.7024892568588257, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.008107814006507397, + "rewards/margins": 0.002546058502048254, + "rewards/rejected": -0.010653872042894363, + "step": 1400 + }, + { + "epoch": 0.733464309102816, + "eval_log_odds_chosen": 0.3101637065410614, + "eval_log_odds_ratio": -0.6609058976173401, + "eval_logits/chosen": -0.24197958409786224, + "eval_logits/rejected": -0.24680981040000916, + "eval_logps/chosen": -0.8542194962501526, + "eval_logps/rejected": -1.0547674894332886, + "eval_loss": 0.575017511844635, + "eval_nll_loss": 0.5686248540878296, + "eval_rewards/accuracies": 0.6069999933242798, + "eval_rewards/chosen": -0.008542194031178951, + "eval_rewards/margins": 0.0020054795313626528, + "eval_rewards/rejected": -0.010547674261033535, + "eval_runtime": 281.1827, + "eval_samples_per_second": 7.109, + "eval_steps_per_second": 0.445, + "step": 1400 + }, + { + "epoch": 0.7387033398821218, + "grad_norm": 0.2669416945991939, + "learning_rate": 1.3551795374786858e-06, + "log_odds_chosen": 0.25648033618927, + "log_odds_ratio": -0.6926220059394836, + "logits/chosen": -0.2435269057750702, + "logits/rejected": -0.2436356544494629, + "logps/chosen": -0.8806196451187134, + "logps/rejected": -1.0599586963653564, + "loss": 0.7048, + "nll_loss": 0.696427583694458, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.008806196041405201, + "rewards/margins": 0.0017933888593688607, + "rewards/rejected": -0.010599585250020027, + "step": 1410 + }, + { + "epoch": 0.7439423706614277, + "grad_norm": 0.24647473183287075, + "learning_rate": 1.3049352498295716e-06, + "log_odds_chosen": 0.38035115599632263, + "log_odds_ratio": -0.6361268162727356, + "logits/chosen": -0.24278739094734192, + "logits/rejected": -0.26014792919158936, + "logps/chosen": -0.8281903266906738, + "logps/rejected": -1.0844037532806396, + "loss": 0.6639, + "nll_loss": 0.6452184915542603, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.008281903341412544, + "rewards/margins": 0.0025621347595006227, + "rewards/rejected": -0.010844036936759949, + "step": 1420 + }, + { + "epoch": 0.7491814014407334, + "grad_norm": 0.3041558655657045, + "learning_rate": 1.2554258048641397e-06, + "log_odds_chosen": 0.19218070805072784, + "log_odds_ratio": -0.7099407911300659, + "logits/chosen": -0.2104649543762207, + "logits/rejected": -0.24154922366142273, + "logps/chosen": -0.8334420919418335, + "logps/rejected": -0.9641669988632202, + "loss": 0.6748, + "nll_loss": 0.6133008599281311, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.008334420621395111, + "rewards/margins": 0.0013072482543066144, + "rewards/rejected": -0.009641669690608978, + "step": 1430 + }, + { + "epoch": 0.7544204322200393, + "grad_norm": 0.25179642126636953, + "learning_rate": 1.2066677768786188e-06, + "log_odds_chosen": 0.2702215313911438, + "log_odds_ratio": -0.6887364387512207, + "logits/chosen": -0.2444891482591629, + "logits/rejected": -0.24194936454296112, + "logps/chosen": -0.9080901145935059, + "logps/rejected": -1.1055560111999512, + "loss": 0.6932, + "nll_loss": 0.6903983354568481, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.009080900810658932, + "rewards/margins": 0.001974658574908972, + "rewards/rejected": -0.01105555985122919, + "step": 1440 + }, + { + "epoch": 0.7596594629993452, + "grad_norm": 0.2566284168448171, + "learning_rate": 1.1586774886170772e-06, + "log_odds_chosen": 0.30707067251205444, + "log_odds_ratio": -0.6694291234016418, + "logits/chosen": -0.28426066040992737, + "logits/rejected": -0.26999443769454956, + "logps/chosen": -0.8766347765922546, + "logps/rejected": -1.0652177333831787, + "loss": 0.7163, + "nll_loss": 0.7289090752601624, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.008766347542405128, + "rewards/margins": 0.0018858297262340784, + "rewards/rejected": -0.010652177035808563, + "step": 1450 + }, + { + "epoch": 0.7648984937786509, + "grad_norm": 0.19726939440168126, + "learning_rate": 1.1114710058070592e-06, + "log_odds_chosen": 0.09831424057483673, + "log_odds_ratio": -0.7557646632194519, + "logits/chosen": -0.19436194002628326, + "logits/rejected": -0.19044212996959686, + "logps/chosen": -0.885438084602356, + "logps/rejected": -0.9487984776496887, + "loss": 0.6404, + "nll_loss": 0.634408712387085, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.008854379877448082, + "rewards/margins": 0.0006336040096357465, + "rewards/rejected": -0.009487984701991081, + "step": 1460 + }, + { + "epoch": 0.7701375245579568, + "grad_norm": 0.2560415712825397, + "learning_rate": 1.065064131781252e-06, + "log_odds_chosen": 0.13219106197357178, + "log_odds_ratio": -0.7492179870605469, + "logits/chosen": -0.22603929042816162, + "logits/rejected": -0.24617178738117218, + "logps/chosen": -0.9043794870376587, + "logps/rejected": -1.0165117979049683, + "loss": 0.7111, + "nll_loss": 0.6886984705924988, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.009043793193995953, + "rewards/margins": 0.0011213240213692188, + "rewards/rejected": -0.010165116749703884, + "step": 1470 + }, + { + "epoch": 0.7753765553372626, + "grad_norm": 0.2940452908279725, + "learning_rate": 1.0194724021869967e-06, + "log_odds_chosen": 0.26290208101272583, + "log_odds_ratio": -0.6962881684303284, + "logits/chosen": -0.23025016486644745, + "logits/rejected": -0.26146119832992554, + "logps/chosen": -0.889056384563446, + "logps/rejected": -1.077383279800415, + "loss": 0.7356, + "nll_loss": 0.7125190496444702, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.008890563622117043, + "rewards/margins": 0.0018832674250006676, + "rewards/rejected": -0.010773831978440285, + "step": 1480 + }, + { + "epoch": 0.7806155861165684, + "grad_norm": 0.30243858167930426, + "learning_rate": 9.747110797854164e-07, + "log_odds_chosen": 0.21796353161334991, + "log_odds_ratio": -0.7083435654640198, + "logits/chosen": -0.24902808666229248, + "logits/rejected": -0.2640915513038635, + "logps/chosen": -0.8834966421127319, + "logps/rejected": -1.0319344997406006, + "loss": 0.7424, + "nll_loss": 0.7267500162124634, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.008834966458380222, + "rewards/margins": 0.0014843789394944906, + "rewards/rejected": -0.010319346562027931, + "step": 1490 + }, + { + "epoch": 0.7858546168958742, + "grad_norm": 0.28561425999576384, + "learning_rate": 9.307951493418893e-07, + "log_odds_chosen": 0.43342700600624084, + "log_odds_ratio": -0.6266660690307617, + "logits/chosen": -0.20827969908714294, + "logits/rejected": -0.22024419903755188, + "logps/chosen": -0.8941072225570679, + "logps/rejected": -1.1734715700149536, + "loss": 0.6695, + "nll_loss": 0.6944609880447388, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.008941072039306164, + "rewards/margins": 0.002793641993775964, + "rewards/rejected": -0.01173471286892891, + "step": 1500 + }, + { + "epoch": 0.7858546168958742, + "eval_log_odds_chosen": 0.3134806752204895, + "eval_log_odds_ratio": -0.661631166934967, + "eval_logits/chosen": -0.2371899038553238, + "eval_logits/rejected": -0.2425604611635208, + "eval_logps/chosen": -0.8493260145187378, + "eval_logps/rejected": -1.0505434274673462, + "eval_loss": 0.5742121338844299, + "eval_nll_loss": 0.5677821040153503, + "eval_rewards/accuracies": 0.6079999804496765, + "eval_rewards/chosen": -0.008493260480463505, + "eval_rewards/margins": 0.002012175042182207, + "eval_rewards/rejected": -0.01050543412566185, + "eval_runtime": 270.8065, + "eval_samples_per_second": 7.382, + "eval_steps_per_second": 0.462, + "step": 1500 + }, + { + "epoch": 0.7910936476751801, + "grad_norm": 0.2254797688672913, + "learning_rate": 8.877393126096055e-07, + "log_odds_chosen": 0.24689963459968567, + "log_odds_ratio": -0.6949166059494019, + "logits/chosen": -0.2658199369907379, + "logits/rejected": -0.2810281813144684, + "logps/chosen": -0.8999541997909546, + "logps/rejected": -1.086625099182129, + "loss": 0.7423, + "nll_loss": 0.7465766668319702, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.008999543264508247, + "rewards/margins": 0.0018667096737772226, + "rewards/rejected": -0.010866251774132252, + "step": 1510 + }, + { + "epoch": 0.796332678454486, + "grad_norm": 0.24924822034612845, + "learning_rate": 8.455579834078397e-07, + "log_odds_chosen": 0.3433853089809418, + "log_odds_ratio": -0.6635347604751587, + "logits/chosen": -0.18545587360858917, + "logits/rejected": -0.200174480676651, + "logps/chosen": -0.8561753034591675, + "logps/rejected": -1.0463998317718506, + "loss": 0.6971, + "nll_loss": 0.6929630041122437, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.008561753667891026, + "rewards/margins": 0.0019022446358576417, + "rewards/rejected": -0.010463997721672058, + "step": 1520 + }, + { + "epoch": 0.8015717092337917, + "grad_norm": 0.2718986137429928, + "learning_rate": 8.042652827966437e-07, + "log_odds_chosen": 0.26018717885017395, + "log_odds_ratio": -0.6947474479675293, + "logits/chosen": -0.25021594762802124, + "logits/rejected": -0.24818949401378632, + "logps/chosen": -0.8872531652450562, + "logps/rejected": -1.0701402425765991, + "loss": 0.7235, + "nll_loss": 0.7240532636642456, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.008872531354427338, + "rewards/margins": 0.0018288707360625267, + "rewards/rejected": -0.010701403021812439, + "step": 1530 + }, + { + "epoch": 0.8068107400130976, + "grad_norm": 0.5108343233985189, + "learning_rate": 7.638750343495277e-07, + "log_odds_chosen": 0.3117789924144745, + "log_odds_ratio": -0.6702481508255005, + "logits/chosen": -0.23931124806404114, + "logits/rejected": -0.22990107536315918, + "logps/chosen": -0.8592001795768738, + "logps/rejected": -1.0367764234542847, + "loss": 0.6604, + "nll_loss": 0.6802663803100586, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.008592001162469387, + "rewards/margins": 0.00177576276473701, + "rewards/rejected": -0.01036776602268219, + "step": 1540 + }, + { + "epoch": 0.8120497707924034, + "grad_norm": 0.2781111416927778, + "learning_rate": 7.244007595257382e-07, + "log_odds_chosen": 0.27534064650535583, + "log_odds_ratio": -0.6695243716239929, + "logits/chosen": -0.20855948328971863, + "logits/rejected": -0.23724588751792908, + "logps/chosen": -0.8493406176567078, + "logps/rejected": -1.0317304134368896, + "loss": 0.6768, + "nll_loss": 0.6526767015457153, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.00849340669810772, + "rewards/margins": 0.0018238987540826201, + "rewards/rejected": -0.010317305102944374, + "step": 1550 + }, + { + "epoch": 0.8172888015717092, + "grad_norm": 0.3133123324918134, + "learning_rate": 6.858556731436754e-07, + "log_odds_chosen": 0.15951837599277496, + "log_odds_ratio": -0.7142313718795776, + "logits/chosen": -0.24038231372833252, + "logits/rejected": -0.23249582946300507, + "logps/chosen": -0.9233297109603882, + "logps/rejected": -1.008540391921997, + "loss": 0.6989, + "nll_loss": 0.7119780778884888, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.009233297780156136, + "rewards/margins": 0.0008521073614247143, + "rewards/rejected": -0.010085404850542545, + "step": 1560 + }, + { + "epoch": 0.822527832351015, + "grad_norm": 0.42996736238352995, + "learning_rate": 6.482526789569585e-07, + "log_odds_chosen": 0.2984713315963745, + "log_odds_ratio": -0.6555167436599731, + "logits/chosen": -0.23409931361675262, + "logits/rejected": -0.26614493131637573, + "logps/chosen": -0.8079819679260254, + "logps/rejected": -0.9996023178100586, + "loss": 0.6759, + "nll_loss": 0.6624296307563782, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.00807981938123703, + "rewards/margins": 0.001916204346343875, + "rewards/rejected": -0.009996023960411549, + "step": 1570 + }, + { + "epoch": 0.8277668631303209, + "grad_norm": 0.23537449504928848, + "learning_rate": 6.116043653346403e-07, + "log_odds_chosen": 0.38262271881103516, + "log_odds_ratio": -0.6627165675163269, + "logits/chosen": -0.22528938949108124, + "logits/rejected": -0.23289379477500916, + "logps/chosen": -0.903620719909668, + "logps/rejected": -1.1372332572937012, + "loss": 0.6809, + "nll_loss": 0.6387246251106262, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.009036206640303135, + "rewards/margins": 0.0023361260537058115, + "rewards/rejected": -0.011372332461178303, + "step": 1580 + }, + { + "epoch": 0.8330058939096268, + "grad_norm": 0.24050810385940227, + "learning_rate": 5.759230010469826e-07, + "log_odds_chosen": 0.11879537254571915, + "log_odds_ratio": -0.7360481023788452, + "logits/chosen": -0.23608234524726868, + "logits/rejected": -0.2152477204799652, + "logps/chosen": -0.8976501226425171, + "logps/rejected": -0.9821032285690308, + "loss": 0.6742, + "nll_loss": 0.6351466178894043, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.00897650234401226, + "rewards/margins": 0.0008445307612419128, + "rewards/rejected": -0.009821033105254173, + "step": 1590 + }, + { + "epoch": 0.8382449246889325, + "grad_norm": 0.2736382551857292, + "learning_rate": 5.412205311582433e-07, + "log_odds_chosen": 0.26674309372901917, + "log_odds_ratio": -0.7051304578781128, + "logits/chosen": -0.23347100615501404, + "logits/rejected": -0.24622151255607605, + "logps/chosen": -0.8361061811447144, + "logps/rejected": -1.024967908859253, + "loss": 0.7258, + "nll_loss": 0.7080037593841553, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.008361062034964561, + "rewards/margins": 0.0018886181060224771, + "rewards/rejected": -0.010249679908156395, + "step": 1600 + }, + { + "epoch": 0.8382449246889325, + "eval_log_odds_chosen": 0.3139868974685669, + "eval_log_odds_ratio": -0.6618570685386658, + "eval_logits/chosen": -0.23709820210933685, + "eval_logits/rejected": -0.241834819316864, + "eval_logps/chosen": -0.8485015034675598, + "eval_logps/rejected": -1.0496830940246582, + "eval_loss": 0.5737613439559937, + "eval_nll_loss": 0.5673460960388184, + "eval_rewards/accuracies": 0.6079999804496765, + "eval_rewards/chosen": -0.00848501455038786, + "eval_rewards/margins": 0.00201181648299098, + "eval_rewards/rejected": -0.010496831499040127, + "eval_runtime": 278.0418, + "eval_samples_per_second": 7.19, + "eval_steps_per_second": 0.45, + "step": 1600 + }, + { + "epoch": 0.8434839554682384, + "grad_norm": 0.2461154279813755, + "learning_rate": 5.075085730278202e-07, + "log_odds_chosen": 0.11390231549739838, + "log_odds_ratio": -0.7587698101997375, + "logits/chosen": -0.2537403702735901, + "logits/rejected": -0.26043227314949036, + "logps/chosen": -0.933973491191864, + "logps/rejected": -1.0090564489364624, + "loss": 0.6737, + "nll_loss": 0.6847957372665405, + "rewards/accuracies": 0.46875, + "rewards/chosen": -0.00933973491191864, + "rewards/margins": 0.000750830746255815, + "rewards/rejected": -0.01009056530892849, + "step": 1610 + }, + { + "epoch": 0.8487229862475442, + "grad_norm": 0.293725880646676, + "learning_rate": 4.747984124211031e-07, + "log_odds_chosen": 0.4481363296508789, + "log_odds_ratio": -0.6022178530693054, + "logits/chosen": -0.26498937606811523, + "logits/rejected": -0.29027503728866577, + "logps/chosen": -0.82249516248703, + "logps/rejected": -1.0860894918441772, + "loss": 0.682, + "nll_loss": 0.6935005784034729, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.008224951103329659, + "rewards/margins": 0.0026359441690146923, + "rewards/rejected": -0.010860895738005638, + "step": 1620 + }, + { + "epoch": 0.85396201702685, + "grad_norm": 0.28181497622786605, + "learning_rate": 4.4310099973133324e-07, + "log_odds_chosen": 0.1358368694782257, + "log_odds_ratio": -0.7588969469070435, + "logits/chosen": -0.25816676020622253, + "logits/rejected": -0.2658771574497223, + "logps/chosen": -0.9440135955810547, + "logps/rejected": -1.0287643671035767, + "loss": 0.7223, + "nll_loss": 0.7669018507003784, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.009440136142075062, + "rewards/margins": 0.0008475076174363494, + "rewards/rejected": -0.010287643410265446, + "step": 1630 + }, + { + "epoch": 0.8592010478061559, + "grad_norm": 0.24767085551640736, + "learning_rate": 4.124269463137341e-07, + "log_odds_chosen": 0.24949102103710175, + "log_odds_ratio": -0.6826614141464233, + "logits/chosen": -0.24843844771385193, + "logits/rejected": -0.2676991820335388, + "logps/chosen": -0.8724035024642944, + "logps/rejected": -1.0047967433929443, + "loss": 0.6895, + "nll_loss": 0.6859319806098938, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.008724034763872623, + "rewards/margins": 0.001323932665400207, + "rewards/rejected": -0.010047967545688152, + "step": 1640 + }, + { + "epoch": 0.8644400785854617, + "grad_norm": 0.273095979053061, + "learning_rate": 3.8278652093315045e-07, + "log_odds_chosen": 0.21245428919792175, + "log_odds_ratio": -0.6998537182807922, + "logits/chosen": -0.2235928326845169, + "logits/rejected": -0.2198958396911621, + "logps/chosen": -0.9110556840896606, + "logps/rejected": -1.014875888824463, + "loss": 0.6751, + "nll_loss": 0.6564816236495972, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.009110555984079838, + "rewards/margins": 0.0010382026666775346, + "rewards/rejected": -0.010148759000003338, + "step": 1650 + }, + { + "epoch": 0.8696791093647676, + "grad_norm": 0.21686983060047896, + "learning_rate": 3.5418964632636075e-07, + "log_odds_chosen": 0.3318633735179901, + "log_odds_ratio": -0.6623369455337524, + "logits/chosen": -0.24393992125988007, + "logits/rejected": -0.25989559292793274, + "logps/chosen": -0.8131970167160034, + "logps/rejected": -1.0291340351104736, + "loss": 0.6765, + "nll_loss": 0.627708375453949, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.008131968788802624, + "rewards/margins": 0.002159371506422758, + "rewards/rejected": -0.01029134076088667, + "step": 1660 + }, + { + "epoch": 0.8749181401440733, + "grad_norm": 0.23331375473591626, + "learning_rate": 3.266458958802463e-07, + "log_odds_chosen": 0.23905089497566223, + "log_odds_ratio": -0.7230226397514343, + "logits/chosen": -0.20989327132701874, + "logits/rejected": -0.2193128615617752, + "logps/chosen": -0.8955384492874146, + "logps/rejected": -1.0794498920440674, + "loss": 0.7067, + "nll_loss": 0.7193494439125061, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.00895538367331028, + "rewards/margins": 0.0018391149351373315, + "rewards/rejected": -0.010794498957693577, + "step": 1670 + }, + { + "epoch": 0.8801571709233792, + "grad_norm": 0.2388493711074595, + "learning_rate": 3.0016449042690057e-07, + "log_odds_chosen": 0.3502407968044281, + "log_odds_ratio": -0.6578859686851501, + "logits/chosen": -0.2545422613620758, + "logits/rejected": -0.24275808036327362, + "logps/chosen": -0.868048369884491, + "logps/rejected": -1.0792254209518433, + "loss": 0.6822, + "nll_loss": 0.7034357190132141, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.008680484257638454, + "rewards/margins": 0.0021117704454809427, + "rewards/rejected": -0.010792254470288754, + "step": 1680 + }, + { + "epoch": 0.885396201702685, + "grad_norm": 0.21583687175178848, + "learning_rate": 2.747542951567702e-07, + "log_odds_chosen": 0.2685350179672241, + "log_odds_ratio": -0.7099257707595825, + "logits/chosen": -0.2754712998867035, + "logits/rejected": -0.27395230531692505, + "logps/chosen": -0.8825618624687195, + "logps/rejected": -1.0439611673355103, + "loss": 0.736, + "nll_loss": 0.7420133948326111, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.008825617842376232, + "rewards/margins": 0.001613991567865014, + "rewards/rejected": -0.010439610108733177, + "step": 1690 + }, + { + "epoch": 0.8906352324819908, + "grad_norm": 0.2625664453357368, + "learning_rate": 2.5042381665084907e-07, + "log_odds_chosen": 0.2786557972431183, + "log_odds_ratio": -0.674875020980835, + "logits/chosen": -0.25832805037498474, + "logits/rejected": -0.2771572470664978, + "logps/chosen": -0.8848080635070801, + "logps/rejected": -1.0470314025878906, + "loss": 0.7193, + "nll_loss": 0.7683790326118469, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.008848080411553383, + "rewards/margins": 0.001622233772650361, + "rewards/rejected": -0.0104703139513731, + "step": 1700 + }, + { + "epoch": 0.8906352324819908, + "eval_log_odds_chosen": 0.3162248134613037, + "eval_log_odds_ratio": -0.6610245108604431, + "eval_logits/chosen": -0.2351589947938919, + "eval_logits/rejected": -0.24033093452453613, + "eval_logps/chosen": -0.8476623296737671, + "eval_logps/rejected": -1.0499019622802734, + "eval_loss": 0.5735238194465637, + "eval_nll_loss": 0.5670892596244812, + "eval_rewards/accuracies": 0.6050000190734863, + "eval_rewards/chosen": -0.008476623333990574, + "eval_rewards/margins": 0.0020223965402692556, + "eval_rewards/rejected": -0.010499019175767899, + "eval_runtime": 282.3911, + "eval_samples_per_second": 7.079, + "eval_steps_per_second": 0.443, + "step": 1700 + }, + { + "epoch": 0.8958742632612967, + "grad_norm": 0.2201709218551474, + "learning_rate": 2.2718120003292786e-07, + "log_odds_chosen": 0.1792387217283249, + "log_odds_ratio": -0.7307632565498352, + "logits/chosen": -0.2693052291870117, + "logits/rejected": -0.24535951018333435, + "logps/chosen": -0.9198406338691711, + "logps/rejected": -1.0340744256973267, + "loss": 0.6955, + "nll_loss": 0.6777786016464233, + "rewards/accuracies": 0.5062500238418579, + "rewards/chosen": -0.009198406711220741, + "rewards/margins": 0.0011423381511121988, + "rewards/rejected": -0.010340743698179722, + "step": 1710 + }, + { + "epoch": 0.9011132940406025, + "grad_norm": 0.25776105345582007, + "learning_rate": 2.0503422624285079e-07, + "log_odds_chosen": 0.11539041996002197, + "log_odds_ratio": -0.7499845623970032, + "logits/chosen": -0.2296096533536911, + "logits/rejected": -0.22232845425605774, + "logps/chosen": -0.9569549560546875, + "logps/rejected": -1.0448832511901855, + "loss": 0.7511, + "nll_loss": 0.7580893635749817, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.00956954900175333, + "rewards/margins": 0.0008792821317911148, + "rewards/rejected": -0.01044883020222187, + "step": 1720 + }, + { + "epoch": 0.9063523248199084, + "grad_norm": 0.2706279093106233, + "learning_rate": 1.8399030943168143e-07, + "log_odds_chosen": 0.3941977620124817, + "log_odds_ratio": -0.6337345242500305, + "logits/chosen": -0.24551761150360107, + "logits/rejected": -0.2374114990234375, + "logps/chosen": -0.9035981297492981, + "logps/rejected": -1.148105502128601, + "loss": 0.6806, + "nll_loss": 0.66960209608078, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.009035980328917503, + "rewards/margins": 0.002445075660943985, + "rewards/rejected": -0.011481055058538914, + "step": 1730 + }, + { + "epoch": 0.9115913555992141, + "grad_norm": 0.3609652737051337, + "learning_rate": 1.6405649447966974e-07, + "log_odds_chosen": 0.3195926547050476, + "log_odds_ratio": -0.6484790444374084, + "logits/chosen": -0.2420916110277176, + "logits/rejected": -0.2452610284090042, + "logps/chosen": -0.8236813545227051, + "logps/rejected": -1.0253655910491943, + "loss": 0.6678, + "nll_loss": 0.6674818396568298, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.008236813358962536, + "rewards/margins": 0.002016842132434249, + "rewards/rejected": -0.010253656655550003, + "step": 1740 + }, + { + "epoch": 0.91683038637852, + "grad_norm": 0.31053252175434054, + "learning_rate": 1.4523945463783188e-07, + "log_odds_chosen": 0.39518290758132935, + "log_odds_ratio": -0.6608596444129944, + "logits/chosen": -0.24014584720134735, + "logits/rejected": -0.2523557245731354, + "logps/chosen": -0.8841649293899536, + "logps/rejected": -1.1480939388275146, + "loss": 0.715, + "nll_loss": 0.7150102853775024, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.008841649629175663, + "rewards/margins": 0.00263928878121078, + "rewards/rejected": -0.011480937711894512, + "step": 1750 + }, + { + "epoch": 0.9220694171578258, + "grad_norm": 0.2695837646740879, + "learning_rate": 1.2754548929394504e-07, + "log_odds_chosen": 0.30153781175613403, + "log_odds_ratio": -0.6760331988334656, + "logits/chosen": -0.24449577927589417, + "logits/rejected": -0.26775243878364563, + "logps/chosen": -0.8484500646591187, + "logps/rejected": -1.0482269525527954, + "loss": 0.6887, + "nll_loss": 0.7010708451271057, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.008484501391649246, + "rewards/margins": 0.0019977684132754803, + "rewards/rejected": -0.01048226933926344, + "step": 1760 + }, + { + "epoch": 0.9273084479371316, + "grad_norm": 0.3245310485170031, + "learning_rate": 1.1098052186369816e-07, + "log_odds_chosen": 0.22715063393115997, + "log_odds_ratio": -0.709073543548584, + "logits/chosen": -0.2291758507490158, + "logits/rejected": -0.24192312359809875, + "logps/chosen": -0.9002648591995239, + "logps/rejected": -1.0433080196380615, + "loss": 0.6921, + "nll_loss": 0.6624379754066467, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.00900264736264944, + "rewards/margins": 0.0014304317301139235, + "rewards/rejected": -0.010433079674839973, + "step": 1770 + }, + { + "epoch": 0.9325474787164375, + "grad_norm": 0.3213633363643417, + "learning_rate": 9.555009780770584e-08, + "log_odds_chosen": 0.29082897305488586, + "log_odds_ratio": -0.6803869605064392, + "logits/chosen": -0.2208433598279953, + "logits/rejected": -0.25635868310928345, + "logps/chosen": -0.8286264538764954, + "logps/rejected": -1.0254353284835815, + "loss": 0.679, + "nll_loss": 0.6641760468482971, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.008286263793706894, + "rewards/margins": 0.0019680894911289215, + "rewards/rejected": -0.01025435421615839, + "step": 1780 + }, + { + "epoch": 0.9377865094957433, + "grad_norm": 0.306877632695617, + "learning_rate": 8.125938277505645e-08, + "log_odds_chosen": 0.31603384017944336, + "log_odds_ratio": -0.6547614336013794, + "logits/chosen": -0.2147616595029831, + "logits/rejected": -0.22782523930072784, + "logps/chosen": -0.8640085458755493, + "logps/rejected": -1.0608965158462524, + "loss": 0.6808, + "nll_loss": 0.6702268719673157, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.008640085346996784, + "rewards/margins": 0.0019688797183334827, + "rewards/rejected": -0.01060896459966898, + "step": 1790 + }, + { + "epoch": 0.9430255402750491, + "grad_norm": 0.20245915700115008, + "learning_rate": 6.81131608740026e-08, + "log_odds_chosen": 0.22377637028694153, + "log_odds_ratio": -0.7018457651138306, + "logits/chosen": -0.19310477375984192, + "logits/rejected": -0.2190311849117279, + "logps/chosen": -0.8574590682983398, + "logps/rejected": -0.9911470413208008, + "loss": 0.7038, + "nll_loss": 0.6608118414878845, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.008574591018259525, + "rewards/margins": 0.0013368797954171896, + "rewards/rejected": -0.009911470115184784, + "step": 1800 + }, + { + "epoch": 0.9430255402750491, + "eval_log_odds_chosen": 0.3163548707962036, + "eval_log_odds_ratio": -0.661532461643219, + "eval_logits/chosen": -0.23114821314811707, + "eval_logits/rejected": -0.23599176108837128, + "eval_logps/chosen": -0.8470891118049622, + "eval_logps/rejected": -1.049268364906311, + "eval_loss": 0.5734038949012756, + "eval_nll_loss": 0.5669639706611633, + "eval_rewards/accuracies": 0.609000027179718, + "eval_rewards/chosen": -0.008470890112221241, + "eval_rewards/margins": 0.00202179211191833, + "eval_rewards/rejected": -0.010492682456970215, + "eval_runtime": 269.8606, + "eval_samples_per_second": 7.408, + "eval_steps_per_second": 0.463, + "step": 1800 + }, + { + "epoch": 0.9482645710543549, + "grad_norm": 0.3628155654733571, + "learning_rate": 5.611583307038381e-08, + "log_odds_chosen": 0.24962477385997772, + "log_odds_ratio": -0.6831658482551575, + "logits/chosen": -0.23160485923290253, + "logits/rejected": -0.2222505509853363, + "logps/chosen": -0.8988336324691772, + "logps/rejected": -1.0403974056243896, + "loss": 0.7116, + "nll_loss": 0.6903770565986633, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.008988335728645325, + "rewards/margins": 0.0014156366232782602, + "rewards/rejected": -0.010403972119092941, + "step": 1810 + }, + { + "epoch": 0.9535036018336608, + "grad_norm": 0.2483018209845032, + "learning_rate": 4.527141571431498e-08, + "log_odds_chosen": 0.22445717453956604, + "log_odds_ratio": -0.6999959945678711, + "logits/chosen": -0.23954102396965027, + "logits/rejected": -0.24183711409568787, + "logps/chosen": -0.8445944786071777, + "logps/rejected": -0.9891592860221863, + "loss": 0.6892, + "nll_loss": 0.7019317746162415, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.00844594370573759, + "rewards/margins": 0.0014456479111686349, + "rewards/rejected": -0.009891592897474766, + "step": 1820 + }, + { + "epoch": 0.9587426326129665, + "grad_norm": 0.2131281074522212, + "learning_rate": 3.5583539195629285e-08, + "log_odds_chosen": 0.43377724289894104, + "log_odds_ratio": -0.6147680282592773, + "logits/chosen": -0.2520751655101776, + "logits/rejected": -0.2712712287902832, + "logps/chosen": -0.8412583470344543, + "logps/rejected": -1.1093076467514038, + "loss": 0.7113, + "nll_loss": 0.7085931897163391, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.00841258279979229, + "rewards/margins": 0.00268049375154078, + "rewards/rejected": -0.011093077249825, + "step": 1830 + }, + { + "epoch": 0.9639816633922724, + "grad_norm": 0.4078068154194888, + "learning_rate": 2.7055446728532382e-08, + "log_odds_chosen": 0.23464258015155792, + "log_odds_ratio": -0.707868218421936, + "logits/chosen": -0.25544843077659607, + "logits/rejected": -0.27141958475112915, + "logps/chosen": -0.8739751577377319, + "logps/rejected": -1.0185130834579468, + "loss": 0.7277, + "nll_loss": 0.7003148198127747, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.008739751763641834, + "rewards/margins": 0.0014453793410211802, + "rewards/rejected": -0.010185131803154945, + "step": 1840 + }, + { + "epoch": 0.9692206941715783, + "grad_norm": 0.2412577505445294, + "learning_rate": 1.9689993265870176e-08, + "log_odds_chosen": 0.23277541995048523, + "log_odds_ratio": -0.6795376539230347, + "logits/chosen": -0.2662840187549591, + "logits/rejected": -0.2727211117744446, + "logps/chosen": -0.895865261554718, + "logps/rejected": -1.037330150604248, + "loss": 0.6942, + "nll_loss": 0.7306576371192932, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.00895865261554718, + "rewards/margins": 0.0014146488392725587, + "rewards/rejected": -0.010373301804065704, + "step": 1850 + }, + { + "epoch": 0.9744597249508841, + "grad_norm": 0.3033721699704284, + "learning_rate": 1.3489644543374479e-08, + "log_odds_chosen": 0.29598861932754517, + "log_odds_ratio": -0.6771318912506104, + "logits/chosen": -0.21042628586292267, + "logits/rejected": -0.23291948437690735, + "logps/chosen": -0.8353894948959351, + "logps/rejected": -0.9999237060546875, + "loss": 0.6644, + "nll_loss": 0.6520312428474426, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.00835389643907547, + "rewards/margins": 0.001645339885726571, + "rewards/rejected": -0.009999236091971397, + "step": 1860 + }, + { + "epoch": 0.9796987557301899, + "grad_norm": 0.27851831050728887, + "learning_rate": 8.456476254209367e-09, + "log_odds_chosen": 0.25299039483070374, + "log_odds_ratio": -0.6810643076896667, + "logits/chosen": -0.20314674079418182, + "logits/rejected": -0.22378787398338318, + "logps/chosen": -0.8736406564712524, + "logps/rejected": -1.0240364074707031, + "loss": 0.7072, + "nll_loss": 0.6920244693756104, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.008736406452953815, + "rewards/margins": 0.0015039570862427354, + "rewards/rejected": -0.010240363888442516, + "step": 1870 + }, + { + "epoch": 0.9849377865094957, + "grad_norm": 0.24734569987361493, + "learning_rate": 4.592173354088291e-09, + "log_odds_chosen": 0.3224944472312927, + "log_odds_ratio": -0.678228497505188, + "logits/chosen": -0.2332799881696701, + "logits/rejected": -0.2256723940372467, + "logps/chosen": -0.8689178228378296, + "logps/rejected": -1.074220061302185, + "loss": 0.6902, + "nll_loss": 0.6795616745948792, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.00868917815387249, + "rewards/margins": 0.002053022850304842, + "rewards/rejected": -0.010742200538516045, + "step": 1880 + }, + { + "epoch": 0.9901768172888016, + "grad_norm": 0.2370498550298322, + "learning_rate": 1.8980294972025245e-09, + "log_odds_chosen": 0.29929059743881226, + "log_odds_ratio": -0.6822630763053894, + "logits/chosen": -0.21594195067882538, + "logits/rejected": -0.24053767323493958, + "logps/chosen": -0.8797448873519897, + "logps/rejected": -1.052920937538147, + "loss": 0.7147, + "nll_loss": 0.7171341180801392, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.008797448128461838, + "rewards/margins": 0.0017317605670541525, + "rewards/rejected": -0.010529209859669209, + "step": 1890 + }, + { + "epoch": 0.9954158480681073, + "grad_norm": 0.2673964952852329, + "learning_rate": 3.749466031427451e-10, + "log_odds_chosen": 0.35855141282081604, + "log_odds_ratio": -0.6542550325393677, + "logits/chosen": -0.20227336883544922, + "logits/rejected": -0.20370423793792725, + "logps/chosen": -0.8851898312568665, + "logps/rejected": -1.0999327898025513, + "loss": 0.6723, + "nll_loss": 0.662273108959198, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.008851898834109306, + "rewards/margins": 0.002147428225725889, + "rewards/rejected": -0.010999326594173908, + "step": 1900 + }, + { + "epoch": 0.9954158480681073, + "eval_log_odds_chosen": 0.316756933927536, + "eval_log_odds_ratio": -0.6615029573440552, + "eval_logits/chosen": -0.23195622861385345, + "eval_logits/rejected": -0.2369166910648346, + "eval_logps/chosen": -0.847005307674408, + "eval_logps/rejected": -1.0492846965789795, + "eval_loss": 0.5733689069747925, + "eval_nll_loss": 0.5669326186180115, + "eval_rewards/accuracies": 0.6069999933242798, + "eval_rewards/chosen": -0.008470052853226662, + "eval_rewards/margins": 0.002022792585194111, + "eval_rewards/rejected": -0.010492845438420773, + "eval_runtime": 280.6123, + "eval_samples_per_second": 7.124, + "eval_steps_per_second": 0.445, + "step": 1900 + }, + { + "epoch": 0.999607072691552, + "step": 1908, + "total_flos": 0.0, + "train_loss": 0.9266155345884759, + "train_runtime": 40145.686, + "train_samples_per_second": 1.521, + "train_steps_per_second": 0.048 + } + ], + "logging_steps": 10, + "max_steps": 1908, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 100, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}