| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 805, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 6.172839506172839e-09, | |
| "logits/chosen": -2.8421168327331543, | |
| "logits/rejected": -2.6747336387634277, | |
| "logps/chosen": -92.33953094482422, | |
| "logps/rejected": -44.262760162353516, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 6.172839506172839e-08, | |
| "logits/chosen": -2.9818191528320312, | |
| "logits/rejected": -2.9740567207336426, | |
| "logps/chosen": -197.3586883544922, | |
| "logps/rejected": -149.28749084472656, | |
| "loss": 0.6901, | |
| "rewards/accuracies": 0.3611111044883728, | |
| "rewards/chosen": -0.005469343159347773, | |
| "rewards/margins": 0.00850688572973013, | |
| "rewards/rejected": -0.01397622935473919, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 1.2345679012345677e-07, | |
| "logits/chosen": -2.9317967891693115, | |
| "logits/rejected": -2.8763492107391357, | |
| "logps/chosen": -172.7858428955078, | |
| "logps/rejected": -133.58245849609375, | |
| "loss": 0.6353, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.1074294000864029, | |
| "rewards/margins": 0.16630074381828308, | |
| "rewards/rejected": -0.05887135863304138, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 1.8518518518518516e-07, | |
| "logits/chosen": -3.0448741912841797, | |
| "logits/rejected": -2.999143362045288, | |
| "logps/chosen": -179.38113403320312, | |
| "logps/rejected": -146.7749786376953, | |
| "loss": 0.5047, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": 0.08539465069770813, | |
| "rewards/margins": 0.540686845779419, | |
| "rewards/rejected": -0.4552922248840332, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 2.4691358024691354e-07, | |
| "logits/chosen": -2.9631247520446777, | |
| "logits/rejected": -2.9552507400512695, | |
| "logps/chosen": -174.05288696289062, | |
| "logps/rejected": -143.798583984375, | |
| "loss": 0.3928, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.11930576711893082, | |
| "rewards/margins": 0.9224799871444702, | |
| "rewards/rejected": -0.8031741976737976, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 3.086419753086419e-07, | |
| "logits/chosen": -2.955972194671631, | |
| "logits/rejected": -2.880552053451538, | |
| "logps/chosen": -183.1387481689453, | |
| "logps/rejected": -157.20669555664062, | |
| "loss": 0.2956, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.8193867802619934, | |
| "rewards/margins": 1.485733985900879, | |
| "rewards/rejected": -2.3051209449768066, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 3.703703703703703e-07, | |
| "logits/chosen": -2.960036516189575, | |
| "logits/rejected": -2.8513035774230957, | |
| "logps/chosen": -154.39926147460938, | |
| "logps/rejected": -133.40078735351562, | |
| "loss": 0.2919, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -0.9133367538452148, | |
| "rewards/margins": 1.8072048425674438, | |
| "rewards/rejected": -2.720541477203369, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 4.320987654320987e-07, | |
| "logits/chosen": -2.8771309852600098, | |
| "logits/rejected": -2.797616481781006, | |
| "logps/chosen": -165.77328491210938, | |
| "logps/rejected": -142.69815063476562, | |
| "loss": 0.2689, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.7793342471122742, | |
| "rewards/margins": 2.3448145389556885, | |
| "rewards/rejected": -3.1241488456726074, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.938271604938271e-07, | |
| "logits/chosen": -2.7670953273773193, | |
| "logits/rejected": -2.723829984664917, | |
| "logps/chosen": -185.12596130371094, | |
| "logps/rejected": -174.61465454101562, | |
| "loss": 0.2633, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.7798545360565186, | |
| "rewards/margins": 2.6085076332092285, | |
| "rewards/rejected": -4.388362884521484, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 4.937845303867402e-07, | |
| "logits/chosen": -2.933378219604492, | |
| "logits/rejected": -2.9017395973205566, | |
| "logps/chosen": -183.1439208984375, | |
| "logps/rejected": -171.66964721679688, | |
| "loss": 0.2236, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -1.4862909317016602, | |
| "rewards/margins": 2.482652187347412, | |
| "rewards/rejected": -3.968942642211914, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.868784530386741e-07, | |
| "logits/chosen": -2.7947914600372314, | |
| "logits/rejected": -2.823068618774414, | |
| "logps/chosen": -189.07904052734375, | |
| "logps/rejected": -181.06781005859375, | |
| "loss": 0.2324, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.5773736238479614, | |
| "rewards/margins": 3.1609904766082764, | |
| "rewards/rejected": -3.738363742828369, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 4.799723756906077e-07, | |
| "logits/chosen": -2.972888946533203, | |
| "logits/rejected": -2.856055498123169, | |
| "logps/chosen": -194.32847595214844, | |
| "logps/rejected": -180.61575317382812, | |
| "loss": 0.2595, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -1.5956058502197266, | |
| "rewards/margins": 2.7098982334136963, | |
| "rewards/rejected": -4.305504322052002, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 4.730662983425414e-07, | |
| "logits/chosen": -2.8165183067321777, | |
| "logits/rejected": -2.7924342155456543, | |
| "logps/chosen": -171.48434448242188, | |
| "logps/rejected": -174.36788940429688, | |
| "loss": 0.2177, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -1.069062352180481, | |
| "rewards/margins": 3.2766518592834473, | |
| "rewards/rejected": -4.345714092254639, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.661602209944751e-07, | |
| "logits/chosen": -2.941241502761841, | |
| "logits/rejected": -2.8772006034851074, | |
| "logps/chosen": -206.2366180419922, | |
| "logps/rejected": -196.18931579589844, | |
| "loss": 0.1857, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -1.6614869832992554, | |
| "rewards/margins": 3.376704454421997, | |
| "rewards/rejected": -5.038191318511963, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 4.592541436464088e-07, | |
| "logits/chosen": -2.7895777225494385, | |
| "logits/rejected": -2.7228431701660156, | |
| "logps/chosen": -185.4031524658203, | |
| "logps/rejected": -193.98040771484375, | |
| "loss": 0.174, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -0.8083890080451965, | |
| "rewards/margins": 4.341578960418701, | |
| "rewards/rejected": -5.149968147277832, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.5234806629834255e-07, | |
| "logits/chosen": -2.73811674118042, | |
| "logits/rejected": -2.7120604515075684, | |
| "logps/chosen": -214.88565063476562, | |
| "logps/rejected": -221.6265411376953, | |
| "loss": 0.185, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -2.5129263401031494, | |
| "rewards/margins": 3.6045918464660645, | |
| "rewards/rejected": -6.117517948150635, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 4.454419889502762e-07, | |
| "logits/chosen": -2.9045376777648926, | |
| "logits/rejected": -2.8451740741729736, | |
| "logps/chosen": -202.41903686523438, | |
| "logps/rejected": -205.6617889404297, | |
| "loss": 0.208, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.077017307281494, | |
| "rewards/margins": 4.358880996704102, | |
| "rewards/rejected": -6.4358978271484375, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 4.3853591160220993e-07, | |
| "logits/chosen": -2.8927552700042725, | |
| "logits/rejected": -2.7776336669921875, | |
| "logps/chosen": -203.76724243164062, | |
| "logps/rejected": -200.79864501953125, | |
| "loss": 0.1703, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -2.2595643997192383, | |
| "rewards/margins": 3.9248855113983154, | |
| "rewards/rejected": -6.184449672698975, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.3162983425414365e-07, | |
| "logits/chosen": -2.7550511360168457, | |
| "logits/rejected": -2.7225847244262695, | |
| "logps/chosen": -225.6936798095703, | |
| "logps/rejected": -231.35400390625, | |
| "loss": 0.1733, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.379578113555908, | |
| "rewards/margins": 3.530996322631836, | |
| "rewards/rejected": -6.910574436187744, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.247237569060773e-07, | |
| "logits/chosen": -2.8363826274871826, | |
| "logits/rejected": -2.7442939281463623, | |
| "logps/chosen": -212.888427734375, | |
| "logps/rejected": -198.71542358398438, | |
| "loss": 0.218, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -2.968935251235962, | |
| "rewards/margins": 3.221897602081299, | |
| "rewards/rejected": -6.19083309173584, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 4.1781767955801103e-07, | |
| "logits/chosen": -2.8280460834503174, | |
| "logits/rejected": -2.7499425411224365, | |
| "logps/chosen": -182.047607421875, | |
| "logps/rejected": -188.87193298339844, | |
| "loss": 0.1708, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -2.369425058364868, | |
| "rewards/margins": 4.022088050842285, | |
| "rewards/rejected": -6.391513824462891, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.1091160220994475e-07, | |
| "logits/chosen": -2.7316782474517822, | |
| "logits/rejected": -2.6802334785461426, | |
| "logps/chosen": -212.96212768554688, | |
| "logps/rejected": -203.3765106201172, | |
| "loss": 0.1689, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -3.15626859664917, | |
| "rewards/margins": 3.610233783721924, | |
| "rewards/rejected": -6.766502380371094, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 4.0400552486187846e-07, | |
| "logits/chosen": -2.8573451042175293, | |
| "logits/rejected": -2.751213550567627, | |
| "logps/chosen": -225.369140625, | |
| "logps/rejected": -213.1463623046875, | |
| "loss": 0.2091, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -3.183094024658203, | |
| "rewards/margins": 3.5506489276885986, | |
| "rewards/rejected": -6.733743190765381, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 3.970994475138121e-07, | |
| "logits/chosen": -2.7918601036071777, | |
| "logits/rejected": -2.731823444366455, | |
| "logps/chosen": -194.12893676757812, | |
| "logps/rejected": -183.81556701660156, | |
| "loss": 0.1979, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -2.6381051540374756, | |
| "rewards/margins": 2.9515299797058105, | |
| "rewards/rejected": -5.589634895324707, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 3.9019337016574584e-07, | |
| "logits/chosen": -2.699711561203003, | |
| "logits/rejected": -2.6905601024627686, | |
| "logps/chosen": -212.59793090820312, | |
| "logps/rejected": -232.6545867919922, | |
| "loss": 0.1849, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -3.5139076709747314, | |
| "rewards/margins": 4.449800491333008, | |
| "rewards/rejected": -7.96370792388916, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.832872928176795e-07, | |
| "logits/chosen": -2.721381425857544, | |
| "logits/rejected": -2.7046854496002197, | |
| "logps/chosen": -200.67886352539062, | |
| "logps/rejected": -220.4629669189453, | |
| "loss": 0.1447, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -3.1384024620056152, | |
| "rewards/margins": 4.899154186248779, | |
| "rewards/rejected": -8.037556648254395, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 3.763812154696133e-07, | |
| "logits/chosen": -2.8270747661590576, | |
| "logits/rejected": -2.741473913192749, | |
| "logps/chosen": -208.8101043701172, | |
| "logps/rejected": -218.16537475585938, | |
| "loss": 0.1567, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -3.2333245277404785, | |
| "rewards/margins": 4.820733547210693, | |
| "rewards/rejected": -8.054059028625488, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 3.6947513812154694e-07, | |
| "logits/chosen": -2.8693325519561768, | |
| "logits/rejected": -2.7638630867004395, | |
| "logps/chosen": -211.0121612548828, | |
| "logps/rejected": -217.35910034179688, | |
| "loss": 0.1731, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -3.540541410446167, | |
| "rewards/margins": 4.472687721252441, | |
| "rewards/rejected": -8.013228416442871, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 3.6256906077348066e-07, | |
| "logits/chosen": -2.7264175415039062, | |
| "logits/rejected": -2.7106542587280273, | |
| "logps/chosen": -221.5767822265625, | |
| "logps/rejected": -234.4220733642578, | |
| "loss": 0.2261, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -3.8089592456817627, | |
| "rewards/margins": 4.285206317901611, | |
| "rewards/rejected": -8.094165802001953, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 3.556629834254143e-07, | |
| "logits/chosen": -2.709073305130005, | |
| "logits/rejected": -2.650305986404419, | |
| "logps/chosen": -183.47171020507812, | |
| "logps/rejected": -189.16053771972656, | |
| "loss": 0.1949, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.39518666267395, | |
| "rewards/margins": 3.255284070968628, | |
| "rewards/rejected": -6.6504716873168945, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 3.4875690607734804e-07, | |
| "logits/chosen": -2.742584228515625, | |
| "logits/rejected": -2.634887456893921, | |
| "logps/chosen": -212.4958953857422, | |
| "logps/rejected": -214.77978515625, | |
| "loss": 0.1577, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -2.987830400466919, | |
| "rewards/margins": 4.540104389190674, | |
| "rewards/rejected": -7.527935028076172, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 3.418508287292817e-07, | |
| "logits/chosen": -2.803213119506836, | |
| "logits/rejected": -2.705699920654297, | |
| "logps/chosen": -201.23739624023438, | |
| "logps/rejected": -210.5272979736328, | |
| "loss": 0.1819, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -2.2409796714782715, | |
| "rewards/margins": 4.629876136779785, | |
| "rewards/rejected": -6.870855808258057, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 3.3494475138121547e-07, | |
| "logits/chosen": -2.821776866912842, | |
| "logits/rejected": -2.6838371753692627, | |
| "logps/chosen": -205.13998413085938, | |
| "logps/rejected": -210.2687225341797, | |
| "loss": 0.1284, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -1.8670909404754639, | |
| "rewards/margins": 5.2015180587768555, | |
| "rewards/rejected": -7.068609714508057, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 3.280386740331492e-07, | |
| "logits/chosen": -2.717777729034424, | |
| "logits/rejected": -2.6688733100891113, | |
| "logps/chosen": -181.1573028564453, | |
| "logps/rejected": -195.77450561523438, | |
| "loss": 0.2068, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.757702589035034, | |
| "rewards/margins": 4.233901023864746, | |
| "rewards/rejected": -6.991603851318359, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.2113259668508285e-07, | |
| "logits/chosen": -2.834904193878174, | |
| "logits/rejected": -2.751018524169922, | |
| "logps/chosen": -236.5355224609375, | |
| "logps/rejected": -249.8627166748047, | |
| "loss": 0.1873, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -4.415268898010254, | |
| "rewards/margins": 4.573099136352539, | |
| "rewards/rejected": -8.988368034362793, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 3.1422651933701657e-07, | |
| "logits/chosen": -2.702388048171997, | |
| "logits/rejected": -2.5783984661102295, | |
| "logps/chosen": -205.3250274658203, | |
| "logps/rejected": -226.51025390625, | |
| "loss": 0.1513, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -3.3364059925079346, | |
| "rewards/margins": 5.607662200927734, | |
| "rewards/rejected": -8.94406795501709, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.0732044198895023e-07, | |
| "logits/chosen": -2.719581127166748, | |
| "logits/rejected": -2.6670548915863037, | |
| "logps/chosen": -215.56069946289062, | |
| "logps/rejected": -238.80691528320312, | |
| "loss": 0.1319, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -3.7043700218200684, | |
| "rewards/margins": 5.375087261199951, | |
| "rewards/rejected": -9.07945728302002, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 3.00414364640884e-07, | |
| "logits/chosen": -2.687743902206421, | |
| "logits/rejected": -2.534635305404663, | |
| "logps/chosen": -197.8555145263672, | |
| "logps/rejected": -212.0418243408203, | |
| "loss": 0.1153, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -2.731755495071411, | |
| "rewards/margins": 6.006522178649902, | |
| "rewards/rejected": -8.738277435302734, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 2.9350828729281767e-07, | |
| "logits/chosen": -2.8031535148620605, | |
| "logits/rejected": -2.7039589881896973, | |
| "logps/chosen": -196.32485961914062, | |
| "logps/rejected": -220.0038604736328, | |
| "loss": 0.1482, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -3.087531805038452, | |
| "rewards/margins": 5.792913436889648, | |
| "rewards/rejected": -8.88044548034668, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 2.866022099447514e-07, | |
| "logits/chosen": -2.762528657913208, | |
| "logits/rejected": -2.7139079570770264, | |
| "logps/chosen": -245.5567169189453, | |
| "logps/rejected": -268.78729248046875, | |
| "loss": 0.1335, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -3.6741371154785156, | |
| "rewards/margins": 6.558190822601318, | |
| "rewards/rejected": -10.232328414916992, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 2.7969613259668505e-07, | |
| "logits/chosen": -2.8232762813568115, | |
| "logits/rejected": -2.6880855560302734, | |
| "logps/chosen": -210.9452362060547, | |
| "logps/rejected": -228.576416015625, | |
| "loss": 0.1482, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -3.587395191192627, | |
| "rewards/margins": 6.285602569580078, | |
| "rewards/rejected": -9.87299919128418, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 2.7279005524861877e-07, | |
| "logits/chosen": -2.968181610107422, | |
| "logits/rejected": -2.783268690109253, | |
| "logps/chosen": -250.2484893798828, | |
| "logps/rejected": -248.7700653076172, | |
| "loss": 0.16, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -4.302699565887451, | |
| "rewards/margins": 5.751161098480225, | |
| "rewards/rejected": -10.053861618041992, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 2.6588397790055243e-07, | |
| "logits/chosen": -2.843967914581299, | |
| "logits/rejected": -2.7779390811920166, | |
| "logps/chosen": -220.41262817382812, | |
| "logps/rejected": -247.0951690673828, | |
| "loss": 0.131, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -3.6894760131835938, | |
| "rewards/margins": 5.2908501625061035, | |
| "rewards/rejected": -8.980325698852539, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 2.589779005524862e-07, | |
| "logits/chosen": -2.895799398422241, | |
| "logits/rejected": -2.795949935913086, | |
| "logps/chosen": -243.14785766601562, | |
| "logps/rejected": -262.537109375, | |
| "loss": 0.1558, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -4.353732109069824, | |
| "rewards/margins": 6.2553300857543945, | |
| "rewards/rejected": -10.609061241149902, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 2.5207182320441986e-07, | |
| "logits/chosen": -2.8478636741638184, | |
| "logits/rejected": -2.782703399658203, | |
| "logps/chosen": -251.97689819335938, | |
| "logps/rejected": -279.1159362792969, | |
| "loss": 0.1489, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -4.981522083282471, | |
| "rewards/margins": 6.379393100738525, | |
| "rewards/rejected": -11.360913276672363, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 2.451657458563536e-07, | |
| "logits/chosen": -2.9137637615203857, | |
| "logits/rejected": -2.846818447113037, | |
| "logps/chosen": -231.48171997070312, | |
| "logps/rejected": -252.91494750976562, | |
| "loss": 0.1211, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -4.0656819343566895, | |
| "rewards/margins": 6.312979221343994, | |
| "rewards/rejected": -10.378661155700684, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 2.3825966850828727e-07, | |
| "logits/chosen": -2.820435047149658, | |
| "logits/rejected": -2.7222158908843994, | |
| "logps/chosen": -176.95230102539062, | |
| "logps/rejected": -202.005859375, | |
| "loss": 0.1487, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.8066993951797485, | |
| "rewards/margins": 6.806242942810059, | |
| "rewards/rejected": -8.612942695617676, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 2.31353591160221e-07, | |
| "logits/chosen": -2.861485004425049, | |
| "logits/rejected": -2.7676100730895996, | |
| "logps/chosen": -223.93276977539062, | |
| "logps/rejected": -244.85665893554688, | |
| "loss": 0.1293, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -3.4348251819610596, | |
| "rewards/margins": 6.165853023529053, | |
| "rewards/rejected": -9.600679397583008, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.2444751381215468e-07, | |
| "logits/chosen": -2.933964967727661, | |
| "logits/rejected": -2.788942337036133, | |
| "logps/chosen": -226.66796875, | |
| "logps/rejected": -244.5904998779297, | |
| "loss": 0.1224, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -3.5399329662323, | |
| "rewards/margins": 5.981089115142822, | |
| "rewards/rejected": -9.521021842956543, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 2.175414364640884e-07, | |
| "logits/chosen": -2.798732280731201, | |
| "logits/rejected": -2.7525930404663086, | |
| "logps/chosen": -224.1324462890625, | |
| "logps/rejected": -260.4334411621094, | |
| "loss": 0.1676, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -3.89032244682312, | |
| "rewards/margins": 7.569252967834473, | |
| "rewards/rejected": -11.459574699401855, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 2.1063535911602208e-07, | |
| "logits/chosen": -2.837965726852417, | |
| "logits/rejected": -2.7505483627319336, | |
| "logps/chosen": -236.68588256835938, | |
| "logps/rejected": -258.0523681640625, | |
| "loss": 0.1537, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -5.027956008911133, | |
| "rewards/margins": 6.000524997711182, | |
| "rewards/rejected": -11.028480529785156, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_logits/chosen": -2.780914545059204, | |
| "eval_logits/rejected": -2.70768666267395, | |
| "eval_logps/chosen": -219.7342071533203, | |
| "eval_logps/rejected": -246.59507751464844, | |
| "eval_loss": 0.1479674130678177, | |
| "eval_rewards/accuracies": 0.8563829660415649, | |
| "eval_rewards/chosen": -3.757824182510376, | |
| "eval_rewards/margins": 6.220169544219971, | |
| "eval_rewards/rejected": -9.97799301147461, | |
| "eval_runtime": 240.8343, | |
| "eval_samples_per_second": 6.208, | |
| "eval_steps_per_second": 0.195, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 2.0372928176795578e-07, | |
| "logits/chosen": -2.8163836002349854, | |
| "logits/rejected": -2.7235684394836426, | |
| "logps/chosen": -206.2197265625, | |
| "logps/rejected": -240.59561157226562, | |
| "loss": 0.1149, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -3.1149652004241943, | |
| "rewards/margins": 7.466650485992432, | |
| "rewards/rejected": -10.58161449432373, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.968232044198895e-07, | |
| "logits/chosen": -2.895784854888916, | |
| "logits/rejected": -2.8176522254943848, | |
| "logps/chosen": -211.858642578125, | |
| "logps/rejected": -227.81741333007812, | |
| "loss": 0.1775, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -4.783520698547363, | |
| "rewards/margins": 4.954631805419922, | |
| "rewards/rejected": -9.738151550292969, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.899171270718232e-07, | |
| "logits/chosen": -2.9654347896575928, | |
| "logits/rejected": -2.8510003089904785, | |
| "logps/chosen": -215.161376953125, | |
| "logps/rejected": -225.93490600585938, | |
| "loss": 0.1701, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -3.928018093109131, | |
| "rewards/margins": 5.275222301483154, | |
| "rewards/rejected": -9.203241348266602, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.830110497237569e-07, | |
| "logits/chosen": -2.834559202194214, | |
| "logits/rejected": -2.7768056392669678, | |
| "logps/chosen": -248.0672149658203, | |
| "logps/rejected": -277.980224609375, | |
| "loss": 0.1456, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -4.741249084472656, | |
| "rewards/margins": 5.934549331665039, | |
| "rewards/rejected": -10.675798416137695, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.7610497237569062e-07, | |
| "logits/chosen": -2.878166913986206, | |
| "logits/rejected": -2.8250679969787598, | |
| "logps/chosen": -214.7487030029297, | |
| "logps/rejected": -249.8466339111328, | |
| "loss": 0.1641, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -3.848902940750122, | |
| "rewards/margins": 6.1274847984313965, | |
| "rewards/rejected": -9.976387023925781, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 1.691988950276243e-07, | |
| "logits/chosen": -2.9045345783233643, | |
| "logits/rejected": -2.85447096824646, | |
| "logps/chosen": -237.44955444335938, | |
| "logps/rejected": -270.6697998046875, | |
| "loss": 0.1439, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -4.506590843200684, | |
| "rewards/margins": 6.747040748596191, | |
| "rewards/rejected": -11.253631591796875, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.62292817679558e-07, | |
| "logits/chosen": -2.8267338275909424, | |
| "logits/rejected": -2.673027515411377, | |
| "logps/chosen": -219.37026977539062, | |
| "logps/rejected": -235.9922332763672, | |
| "loss": 0.1133, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -4.315529823303223, | |
| "rewards/margins": 6.053648471832275, | |
| "rewards/rejected": -10.369178771972656, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.5538674033149171e-07, | |
| "logits/chosen": -2.883451223373413, | |
| "logits/rejected": -2.7439939975738525, | |
| "logps/chosen": -228.4202117919922, | |
| "logps/rejected": -242.2844696044922, | |
| "loss": 0.1627, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -4.730469226837158, | |
| "rewards/margins": 6.399707317352295, | |
| "rewards/rejected": -11.130178451538086, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.484806629834254e-07, | |
| "logits/chosen": -2.9942538738250732, | |
| "logits/rejected": -2.868765115737915, | |
| "logps/chosen": -260.10406494140625, | |
| "logps/rejected": -253.8542938232422, | |
| "loss": 0.1594, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -5.60601282119751, | |
| "rewards/margins": 4.876471519470215, | |
| "rewards/rejected": -10.482483863830566, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.4157458563535912e-07, | |
| "logits/chosen": -2.9481163024902344, | |
| "logits/rejected": -2.8333523273468018, | |
| "logps/chosen": -226.2001953125, | |
| "logps/rejected": -235.05014038085938, | |
| "loss": 0.1784, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -5.186473846435547, | |
| "rewards/margins": 4.799227714538574, | |
| "rewards/rejected": -9.985700607299805, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.346685082872928e-07, | |
| "logits/chosen": -2.820510149002075, | |
| "logits/rejected": -2.734377384185791, | |
| "logps/chosen": -261.7950134277344, | |
| "logps/rejected": -286.0494689941406, | |
| "loss": 0.104, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -5.037507057189941, | |
| "rewards/margins": 6.992823600769043, | |
| "rewards/rejected": -12.030329704284668, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.277624309392265e-07, | |
| "logits/chosen": -2.909327507019043, | |
| "logits/rejected": -2.750497817993164, | |
| "logps/chosen": -224.2764129638672, | |
| "logps/rejected": -231.7129364013672, | |
| "loss": 0.1504, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -5.132498741149902, | |
| "rewards/margins": 5.400217056274414, | |
| "rewards/rejected": -10.532715797424316, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.2085635359116022e-07, | |
| "logits/chosen": -2.9544167518615723, | |
| "logits/rejected": -2.8220763206481934, | |
| "logps/chosen": -245.1588134765625, | |
| "logps/rejected": -261.6687927246094, | |
| "loss": 0.1255, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -5.218487739562988, | |
| "rewards/margins": 6.255653381347656, | |
| "rewards/rejected": -11.474142074584961, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.1395027624309392e-07, | |
| "logits/chosen": -2.8086042404174805, | |
| "logits/rejected": -2.785409927368164, | |
| "logps/chosen": -253.0203857421875, | |
| "logps/rejected": -281.2375793457031, | |
| "loss": 0.16, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -6.011744976043701, | |
| "rewards/margins": 5.636991024017334, | |
| "rewards/rejected": -11.648736000061035, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.0704419889502763e-07, | |
| "logits/chosen": -2.9435603618621826, | |
| "logits/rejected": -2.7380692958831787, | |
| "logps/chosen": -261.493896484375, | |
| "logps/rejected": -278.2936096191406, | |
| "loss": 0.1174, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -4.648413181304932, | |
| "rewards/margins": 7.05194616317749, | |
| "rewards/rejected": -11.700358390808105, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.0013812154696132e-07, | |
| "logits/chosen": -2.9581801891326904, | |
| "logits/rejected": -2.8110265731811523, | |
| "logps/chosen": -244.45413208007812, | |
| "logps/rejected": -264.9123840332031, | |
| "loss": 0.1429, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -5.337543487548828, | |
| "rewards/margins": 6.183696746826172, | |
| "rewards/rejected": -11.521239280700684, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 9.323204419889502e-08, | |
| "logits/chosen": -2.905041217803955, | |
| "logits/rejected": -2.7901289463043213, | |
| "logps/chosen": -200.6727752685547, | |
| "logps/rejected": -228.93032836914062, | |
| "loss": 0.1408, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -2.679917812347412, | |
| "rewards/margins": 6.9505295753479, | |
| "rewards/rejected": -9.630447387695312, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.632596685082872e-08, | |
| "logits/chosen": -2.856194257736206, | |
| "logits/rejected": -2.761082410812378, | |
| "logps/chosen": -218.3007049560547, | |
| "logps/rejected": -234.4198455810547, | |
| "loss": 0.1398, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -3.6605896949768066, | |
| "rewards/margins": 6.374427795410156, | |
| "rewards/rejected": -10.035017013549805, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 7.941988950276243e-08, | |
| "logits/chosen": -2.896955966949463, | |
| "logits/rejected": -2.786186933517456, | |
| "logps/chosen": -197.8193817138672, | |
| "logps/rejected": -214.23001098632812, | |
| "loss": 0.1442, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -4.441520690917969, | |
| "rewards/margins": 5.363109588623047, | |
| "rewards/rejected": -9.804631233215332, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 7.251381215469612e-08, | |
| "logits/chosen": -2.957908868789673, | |
| "logits/rejected": -2.8377201557159424, | |
| "logps/chosen": -270.6792907714844, | |
| "logps/rejected": -286.62213134765625, | |
| "loss": 0.1486, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -6.179241180419922, | |
| "rewards/margins": 6.042364597320557, | |
| "rewards/rejected": -12.22160530090332, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 6.560773480662984e-08, | |
| "logits/chosen": -2.774834156036377, | |
| "logits/rejected": -2.703329563140869, | |
| "logps/chosen": -202.3549041748047, | |
| "logps/rejected": -244.16653442382812, | |
| "loss": 0.1335, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -4.156593322753906, | |
| "rewards/margins": 6.949100494384766, | |
| "rewards/rejected": -11.105693817138672, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 5.870165745856354e-08, | |
| "logits/chosen": -2.8945531845092773, | |
| "logits/rejected": -2.7444348335266113, | |
| "logps/chosen": -190.70558166503906, | |
| "logps/rejected": -232.6694793701172, | |
| "loss": 0.1084, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -2.4744460582733154, | |
| "rewards/margins": 8.159748077392578, | |
| "rewards/rejected": -10.634195327758789, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 5.1795580110497236e-08, | |
| "logits/chosen": -2.9436967372894287, | |
| "logits/rejected": -2.8356709480285645, | |
| "logps/chosen": -236.69949340820312, | |
| "logps/rejected": -253.46145629882812, | |
| "loss": 0.1855, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -4.680401802062988, | |
| "rewards/margins": 6.0413007736206055, | |
| "rewards/rejected": -10.72170352935791, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.488950276243094e-08, | |
| "logits/chosen": -2.7878644466400146, | |
| "logits/rejected": -2.745734930038452, | |
| "logps/chosen": -197.5469970703125, | |
| "logps/rejected": -246.12252807617188, | |
| "loss": 0.1106, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -3.669739246368408, | |
| "rewards/margins": 6.831077575683594, | |
| "rewards/rejected": -10.500818252563477, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.7983425414364637e-08, | |
| "logits/chosen": -2.918865203857422, | |
| "logits/rejected": -2.7714521884918213, | |
| "logps/chosen": -224.0564727783203, | |
| "logps/rejected": -241.874267578125, | |
| "loss": 0.1357, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -4.486175537109375, | |
| "rewards/margins": 6.323982238769531, | |
| "rewards/rejected": -10.810157775878906, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 3.107734806629834e-08, | |
| "logits/chosen": -2.789961338043213, | |
| "logits/rejected": -2.702083110809326, | |
| "logps/chosen": -197.3814697265625, | |
| "logps/rejected": -234.6428680419922, | |
| "loss": 0.1653, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -3.8847968578338623, | |
| "rewards/margins": 6.971263885498047, | |
| "rewards/rejected": -10.856060981750488, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.4171270718232044e-08, | |
| "logits/chosen": -2.886162042617798, | |
| "logits/rejected": -2.782167673110962, | |
| "logps/chosen": -251.7931365966797, | |
| "logps/rejected": -276.71856689453125, | |
| "loss": 0.1811, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -5.165857791900635, | |
| "rewards/margins": 6.922626495361328, | |
| "rewards/rejected": -12.088483810424805, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.7265193370165747e-08, | |
| "logits/chosen": -2.998176336288452, | |
| "logits/rejected": -2.831453800201416, | |
| "logps/chosen": -238.40634155273438, | |
| "logps/rejected": -264.1795349121094, | |
| "loss": 0.1153, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -3.043548345565796, | |
| "rewards/margins": 8.052096366882324, | |
| "rewards/rejected": -11.0956449508667, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.0359116022099446e-08, | |
| "logits/chosen": -2.931680202484131, | |
| "logits/rejected": -2.758117198944092, | |
| "logps/chosen": -215.9856719970703, | |
| "logps/rejected": -238.5784912109375, | |
| "loss": 0.133, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -4.6980485916137695, | |
| "rewards/margins": 6.217514991760254, | |
| "rewards/rejected": -10.915563583374023, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 3.453038674033149e-09, | |
| "logits/chosen": -2.8354744911193848, | |
| "logits/rejected": -2.7321650981903076, | |
| "logps/chosen": -240.4857940673828, | |
| "logps/rejected": -271.0428771972656, | |
| "loss": 0.1495, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -3.8582940101623535, | |
| "rewards/margins": 7.768431186676025, | |
| "rewards/rejected": -11.626726150512695, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 805, | |
| "total_flos": 0.0, | |
| "train_loss": 0.18541636852003773, | |
| "train_runtime": 4261.3779, | |
| "train_samples_per_second": 3.022, | |
| "train_steps_per_second": 0.189 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 805, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |