{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.0, "eval_steps": 100, "global_step": 1650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.06060606060606e-10, "logits/generated": 2.699579954147339, "logits/real": 2.682819366455078, "logps/generated": -775.121826171875, "logps/real": -1241.9539794921875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.04, "learning_rate": 6.060606060606061e-09, "logits/generated": 3.0682497024536133, "logits/real": 2.809837818145752, "logps/generated": -734.121826171875, "logps/real": -1688.1424560546875, "loss": 2.421, "rewards/accuracies": 0.5555555820465088, "rewards/generated": -0.8136113286018372, "rewards/margins": 1.0536798238754272, "rewards/real": 0.2400684356689453, "step": 10 }, { "epoch": 0.07, "learning_rate": 1.2121212121212122e-08, "logits/generated": 3.1582841873168945, "logits/real": 2.793609142303467, "logps/generated": -701.7354736328125, "logps/real": -1495.578369140625, "loss": 2.7262, "rewards/accuracies": 0.6000000238418579, "rewards/generated": 0.00853276252746582, "rewards/margins": 1.969605803489685, "rewards/real": 1.9781383275985718, "step": 20 }, { "epoch": 0.11, "learning_rate": 1.818181818181818e-08, "logits/generated": 3.054560422897339, "logits/real": 2.7821545600891113, "logps/generated": -736.0550537109375, "logps/real": -1443.3416748046875, "loss": 2.3244, "rewards/accuracies": 0.5249999761581421, "rewards/generated": -0.5645670890808105, "rewards/margins": 0.8431296348571777, "rewards/real": 0.2785625457763672, "step": 30 }, { "epoch": 0.15, "learning_rate": 2.4242424242424243e-08, "logits/generated": 3.061432361602783, "logits/real": 2.788212299346924, "logps/generated": -740.2737426757812, "logps/real": -1564.3731689453125, "loss": 1.7391, "rewards/accuracies": 0.7749999761581421, "rewards/generated": -2.301443099975586, "rewards/margins": 5.458680629730225, "rewards/real": 3.157238006591797, "step": 40 }, { "epoch": 0.18, "learning_rate": 3.0303030303030305e-08, "logits/generated": 3.128068208694458, "logits/real": 2.797170400619507, "logps/generated": -744.0007934570312, "logps/real": -1659.2623291015625, "loss": 1.2213, "rewards/accuracies": 0.7250000238418579, "rewards/generated": -4.055088043212891, "rewards/margins": 6.465470790863037, "rewards/real": 2.4103832244873047, "step": 50 }, { "epoch": 0.22, "learning_rate": 3.636363636363636e-08, "logits/generated": 3.0427095890045166, "logits/real": 2.733898162841797, "logps/generated": -682.5880126953125, "logps/real": -1528.8931884765625, "loss": 0.7301, "rewards/accuracies": 0.7875000238418579, "rewards/generated": -6.884118556976318, "rewards/margins": 8.962200164794922, "rewards/real": 2.0780816078186035, "step": 60 }, { "epoch": 0.25, "learning_rate": 4.242424242424242e-08, "logits/generated": 3.080779552459717, "logits/real": 2.861422061920166, "logps/generated": -731.451904296875, "logps/real": -1573.796142578125, "loss": 0.5063, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -9.1003999710083, "rewards/margins": 14.506108283996582, "rewards/real": 5.405708312988281, "step": 70 }, { "epoch": 0.29, "learning_rate": 4.8484848484848486e-08, "logits/generated": 2.93912410736084, "logits/real": 2.864893913269043, "logps/generated": -688.7911376953125, "logps/real": -1399.1343994140625, "loss": 0.478, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -9.95896053314209, "rewards/margins": 13.613897323608398, "rewards/real": 3.654937744140625, "step": 80 }, { "epoch": 0.33, "learning_rate": 5.454545454545454e-08, "logits/generated": 2.843259811401367, "logits/real": 2.858530044555664, "logps/generated": -670.5985107421875, "logps/real": -1429.979736328125, "loss": 0.3533, "rewards/accuracies": 0.862500011920929, "rewards/generated": -10.419904708862305, "rewards/margins": 13.761541366577148, "rewards/real": 3.341637372970581, "step": 90 }, { "epoch": 0.36, "learning_rate": 6.060606060606061e-08, "logits/generated": 3.0854344367980957, "logits/real": 2.881145477294922, "logps/generated": -753.0096435546875, "logps/real": -1628.697998046875, "loss": 0.3451, "rewards/accuracies": 0.9375, "rewards/generated": -10.505245208740234, "rewards/margins": 15.42334270477295, "rewards/real": 4.918098449707031, "step": 100 }, { "epoch": 0.4, "learning_rate": 6.666666666666665e-08, "logits/generated": 2.9555041790008545, "logits/real": 2.828155755996704, "logps/generated": -725.7302856445312, "logps/real": -1499.3577880859375, "loss": 0.3466, "rewards/accuracies": 0.862500011920929, "rewards/generated": -11.467514991760254, "rewards/margins": 14.158941268920898, "rewards/real": 2.691427707672119, "step": 110 }, { "epoch": 0.44, "learning_rate": 7.272727272727273e-08, "logits/generated": 3.0568456649780273, "logits/real": 2.9170186519622803, "logps/generated": -724.0201416015625, "logps/real": -1427.0118408203125, "loss": 0.1936, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -11.029727935791016, "rewards/margins": 14.526552200317383, "rewards/real": 3.4968230724334717, "step": 120 }, { "epoch": 0.47, "learning_rate": 7.878787878787878e-08, "logits/generated": 3.027113914489746, "logits/real": 2.8878231048583984, "logps/generated": -697.5856323242188, "logps/real": -1266.6959228515625, "loss": 0.2656, "rewards/accuracies": 0.862500011920929, "rewards/generated": -14.533329963684082, "rewards/margins": 17.506813049316406, "rewards/real": 2.973484754562378, "step": 130 }, { "epoch": 0.51, "learning_rate": 8.484848484848484e-08, "logits/generated": 3.0132346153259277, "logits/real": 2.9195351600646973, "logps/generated": -725.3068237304688, "logps/real": -1509.488525390625, "loss": 0.2118, "rewards/accuracies": 0.949999988079071, "rewards/generated": -14.189427375793457, "rewards/margins": 18.903362274169922, "rewards/real": 4.7139387130737305, "step": 140 }, { "epoch": 0.55, "learning_rate": 9.09090909090909e-08, "logits/generated": 3.0432262420654297, "logits/real": 2.8000125885009766, "logps/generated": -706.8033447265625, "logps/real": -1340.9671630859375, "loss": 0.1454, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -16.990093231201172, "rewards/margins": 20.733434677124023, "rewards/real": 3.7433419227600098, "step": 150 }, { "epoch": 0.58, "learning_rate": 9.696969696969697e-08, "logits/generated": 3.036531448364258, "logits/real": 2.7710182666778564, "logps/generated": -750.1927490234375, "logps/real": -1476.255126953125, "loss": 0.0842, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -21.58728790283203, "rewards/margins": 28.88445472717285, "rewards/real": 7.297167778015137, "step": 160 }, { "epoch": 0.62, "learning_rate": 9.966329966329967e-08, "logits/generated": 3.0014750957489014, "logits/real": 2.911832332611084, "logps/generated": -711.2100830078125, "logps/real": -1511.9127197265625, "loss": 0.1629, "rewards/accuracies": 0.949999988079071, "rewards/generated": -18.818857192993164, "rewards/margins": 23.491788864135742, "rewards/real": 4.672932147979736, "step": 170 }, { "epoch": 0.65, "learning_rate": 9.898989898989899e-08, "logits/generated": 3.043483257293701, "logits/real": 2.851480007171631, "logps/generated": -774.1932373046875, "logps/real": -1405.355712890625, "loss": 0.1615, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -19.68424415588379, "rewards/margins": 25.7573184967041, "rewards/real": 6.07307243347168, "step": 180 }, { "epoch": 0.69, "learning_rate": 9.83164983164983e-08, "logits/generated": 3.1408205032348633, "logits/real": 2.865788221359253, "logps/generated": -766.6047973632812, "logps/real": -1503.375244140625, "loss": 0.0839, "rewards/accuracies": 0.987500011920929, "rewards/generated": -23.049453735351562, "rewards/margins": 30.902719497680664, "rewards/real": 7.853263854980469, "step": 190 }, { "epoch": 0.73, "learning_rate": 9.764309764309763e-08, "logits/generated": 2.928053617477417, "logits/real": 2.758157253265381, "logps/generated": -743.7946166992188, "logps/real": -1352.399169921875, "loss": 0.0354, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -22.094032287597656, "rewards/margins": 27.527359008789062, "rewards/real": 5.43332576751709, "step": 200 }, { "epoch": 0.76, "learning_rate": 9.696969696969697e-08, "logits/generated": 3.090585947036743, "logits/real": 2.7313692569732666, "logps/generated": -690.6846923828125, "logps/real": -1458.363037109375, "loss": 0.1122, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -20.866918563842773, "rewards/margins": 27.813827514648438, "rewards/real": 6.946908473968506, "step": 210 }, { "epoch": 0.8, "learning_rate": 9.629629629629629e-08, "logits/generated": 3.0646731853485107, "logits/real": 2.8153064250946045, "logps/generated": -755.1292724609375, "logps/real": -1506.2027587890625, "loss": 0.3571, "rewards/accuracies": 1.0, "rewards/generated": -22.05111312866211, "rewards/margins": 30.379648208618164, "rewards/real": 8.328536987304688, "step": 220 }, { "epoch": 0.84, "learning_rate": 9.562289562289561e-08, "logits/generated": 3.0094521045684814, "logits/real": 2.7294180393218994, "logps/generated": -737.7617797851562, "logps/real": -1406.5279541015625, "loss": 0.3359, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -22.222150802612305, "rewards/margins": 27.68829345703125, "rewards/real": 5.4661407470703125, "step": 230 }, { "epoch": 0.87, "learning_rate": 9.494949494949494e-08, "logits/generated": 3.0864949226379395, "logits/real": 2.9361164569854736, "logps/generated": -705.2486572265625, "logps/real": -1410.1124267578125, "loss": 0.0453, "rewards/accuracies": 0.987500011920929, "rewards/generated": -21.030681610107422, "rewards/margins": 28.817712783813477, "rewards/real": 7.787031650543213, "step": 240 }, { "epoch": 0.91, "learning_rate": 9.427609427609428e-08, "logits/generated": 3.0638632774353027, "logits/real": 2.7662625312805176, "logps/generated": -734.6624755859375, "logps/real": -1535.09423828125, "loss": 0.1326, "rewards/accuracies": 1.0, "rewards/generated": -25.62575340270996, "rewards/margins": 33.133296966552734, "rewards/real": 7.50754451751709, "step": 250 }, { "epoch": 0.95, "learning_rate": 9.36026936026936e-08, "logits/generated": 3.0285589694976807, "logits/real": 2.85501766204834, "logps/generated": -684.90283203125, "logps/real": -1474.42919921875, "loss": 0.0694, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.305273056030273, "rewards/margins": 30.4588565826416, "rewards/real": 8.153582572937012, "step": 260 }, { "epoch": 0.98, "learning_rate": 9.292929292929292e-08, "logits/generated": 3.028271436691284, "logits/real": 2.9165408611297607, "logps/generated": -702.5819702148438, "logps/real": -1456.512451171875, "loss": 0.0563, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.11782455444336, "rewards/margins": 34.10071563720703, "rewards/real": 6.982888698577881, "step": 270 }, { "epoch": 1.02, "learning_rate": 9.225589225589225e-08, "logits/generated": 2.9877219200134277, "logits/real": 2.856584072113037, "logps/generated": -709.028564453125, "logps/real": -1621.059814453125, "loss": 0.0752, "rewards/accuracies": 0.987500011920929, "rewards/generated": -22.911020278930664, "rewards/margins": 32.11205291748047, "rewards/real": 9.201033592224121, "step": 280 }, { "epoch": 1.05, "learning_rate": 9.158249158249159e-08, "logits/generated": 3.0530412197113037, "logits/real": 2.8289854526519775, "logps/generated": -695.4741821289062, "logps/real": -1409.147216796875, "loss": 0.0372, "rewards/accuracies": 1.0, "rewards/generated": -21.494949340820312, "rewards/margins": 29.098968505859375, "rewards/real": 7.604022026062012, "step": 290 }, { "epoch": 1.09, "learning_rate": 9.09090909090909e-08, "logits/generated": 3.0395150184631348, "logits/real": 2.8340461254119873, "logps/generated": -708.5654296875, "logps/real": -1284.200439453125, "loss": 0.0476, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.743383407592773, "rewards/margins": 33.827754974365234, "rewards/real": 8.084371566772461, "step": 300 }, { "epoch": 1.13, "learning_rate": 9.023569023569023e-08, "logits/generated": 3.1169886589050293, "logits/real": 2.8133883476257324, "logps/generated": -756.3373413085938, "logps/real": -1375.59033203125, "loss": 0.0717, "rewards/accuracies": 1.0, "rewards/generated": -28.533214569091797, "rewards/margins": 37.33450698852539, "rewards/real": 8.801285743713379, "step": 310 }, { "epoch": 1.16, "learning_rate": 8.956228956228956e-08, "logits/generated": 2.9686315059661865, "logits/real": 2.896556854248047, "logps/generated": -742.1793212890625, "logps/real": -1546.449951171875, "loss": 0.062, "rewards/accuracies": 1.0, "rewards/generated": -27.933496475219727, "rewards/margins": 37.03893280029297, "rewards/real": 9.105436325073242, "step": 320 }, { "epoch": 1.2, "learning_rate": 8.888888888888888e-08, "logits/generated": 3.1490354537963867, "logits/real": 2.734900712966919, "logps/generated": -755.2977905273438, "logps/real": -1635.7431640625, "loss": 0.0473, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -29.23895263671875, "rewards/margins": 39.24039077758789, "rewards/real": 10.001440048217773, "step": 330 }, { "epoch": 1.24, "learning_rate": 8.821548821548821e-08, "logits/generated": 3.0037903785705566, "logits/real": 2.79923415184021, "logps/generated": -709.5477294921875, "logps/real": -1330.9671630859375, "loss": 0.0213, "rewards/accuracies": 1.0, "rewards/generated": -26.20556640625, "rewards/margins": 34.81034851074219, "rewards/real": 8.60478401184082, "step": 340 }, { "epoch": 1.27, "learning_rate": 8.754208754208754e-08, "logits/generated": 3.069556713104248, "logits/real": 2.879063129425049, "logps/generated": -722.0208740234375, "logps/real": -1506.7451171875, "loss": 0.0125, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.53557777404785, "rewards/margins": 42.85404968261719, "rewards/real": 12.318475723266602, "step": 350 }, { "epoch": 1.31, "learning_rate": 8.686868686868686e-08, "logits/generated": 3.0302517414093018, "logits/real": 2.8793983459472656, "logps/generated": -701.7003173828125, "logps/real": -1694.4791259765625, "loss": 0.0239, "rewards/accuracies": 0.987500011920929, "rewards/generated": -25.19091796875, "rewards/margins": 37.82417678833008, "rewards/real": 12.633260726928711, "step": 360 }, { "epoch": 1.35, "learning_rate": 8.619528619528619e-08, "logits/generated": 3.0597193241119385, "logits/real": 2.807206392288208, "logps/generated": -761.3441772460938, "logps/real": -1326.437744140625, "loss": 0.0312, "rewards/accuracies": 1.0, "rewards/generated": -25.752056121826172, "rewards/margins": 35.21023941040039, "rewards/real": 9.458181381225586, "step": 370 }, { "epoch": 1.38, "learning_rate": 8.552188552188552e-08, "logits/generated": 3.0551209449768066, "logits/real": 2.7823100090026855, "logps/generated": -704.1209106445312, "logps/real": -1315.218505859375, "loss": 0.0369, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -25.932231903076172, "rewards/margins": 34.08350372314453, "rewards/real": 8.151269912719727, "step": 380 }, { "epoch": 1.42, "learning_rate": 8.484848484848484e-08, "logits/generated": 3.0843758583068848, "logits/real": 2.7782723903656006, "logps/generated": -705.9140625, "logps/real": -1448.736572265625, "loss": 0.0046, "rewards/accuracies": 0.987500011920929, "rewards/generated": -26.40155029296875, "rewards/margins": 37.458251953125, "rewards/real": 11.05670166015625, "step": 390 }, { "epoch": 1.45, "learning_rate": 8.417508417508418e-08, "logits/generated": 3.11004900932312, "logits/real": 2.744499444961548, "logps/generated": -691.4265747070312, "logps/real": -1582.999755859375, "loss": 0.0701, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -25.861703872680664, "rewards/margins": 36.900672912597656, "rewards/real": 11.038970947265625, "step": 400 }, { "epoch": 1.49, "learning_rate": 8.35016835016835e-08, "logits/generated": 3.0825066566467285, "logits/real": 2.856200695037842, "logps/generated": -752.9417724609375, "logps/real": -1431.7108154296875, "loss": 0.0384, "rewards/accuracies": 1.0, "rewards/generated": -29.039600372314453, "rewards/margins": 39.49211502075195, "rewards/real": 10.452515602111816, "step": 410 }, { "epoch": 1.53, "learning_rate": 8.282828282828282e-08, "logits/generated": 3.0164382457733154, "logits/real": 2.8686347007751465, "logps/generated": -746.9415283203125, "logps/real": -1504.063232421875, "loss": 0.0288, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.603206634521484, "rewards/margins": 42.40104675292969, "rewards/real": 11.797839164733887, "step": 420 }, { "epoch": 1.56, "learning_rate": 8.215488215488215e-08, "logits/generated": 2.9895148277282715, "logits/real": 2.7924630641937256, "logps/generated": -732.4207763671875, "logps/real": -1519.8125, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/generated": -28.783283233642578, "rewards/margins": 40.31874465942383, "rewards/real": 11.535463333129883, "step": 430 }, { "epoch": 1.6, "learning_rate": 8.148148148148148e-08, "logits/generated": 2.9808883666992188, "logits/real": 2.835967540740967, "logps/generated": -704.5755004882812, "logps/real": -1317.364501953125, "loss": 0.0448, "rewards/accuracies": 0.987500011920929, "rewards/generated": -29.565563201904297, "rewards/margins": 40.2669677734375, "rewards/real": 10.70140266418457, "step": 440 }, { "epoch": 1.64, "learning_rate": 8.08080808080808e-08, "logits/generated": 3.079317569732666, "logits/real": 2.793091297149658, "logps/generated": -761.6934814453125, "logps/real": -1553.8653564453125, "loss": 0.0587, "rewards/accuracies": 1.0, "rewards/generated": -29.624114990234375, "rewards/margins": 42.62477493286133, "rewards/real": 13.00065803527832, "step": 450 }, { "epoch": 1.67, "learning_rate": 8.013468013468013e-08, "logits/generated": 3.0111546516418457, "logits/real": 2.84384822845459, "logps/generated": -705.1884765625, "logps/real": -1358.9456787109375, "loss": 0.0406, "rewards/accuracies": 1.0, "rewards/generated": -27.007949829101562, "rewards/margins": 38.559513092041016, "rewards/real": 11.551559448242188, "step": 460 }, { "epoch": 1.71, "learning_rate": 7.946127946127946e-08, "logits/generated": 3.075064182281494, "logits/real": 2.7784323692321777, "logps/generated": -721.3555297851562, "logps/real": -1506.717041015625, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/generated": -29.36679458618164, "rewards/margins": 42.57081985473633, "rewards/real": 13.20402717590332, "step": 470 }, { "epoch": 1.75, "learning_rate": 7.878787878787878e-08, "logits/generated": 3.117192506790161, "logits/real": 2.881643772125244, "logps/generated": -721.9058227539062, "logps/real": -1640.3363037109375, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/generated": -28.368335723876953, "rewards/margins": 42.70831298828125, "rewards/real": 14.339981079101562, "step": 480 }, { "epoch": 1.78, "learning_rate": 7.811447811447811e-08, "logits/generated": 3.0311577320098877, "logits/real": 2.858957529067993, "logps/generated": -699.8917846679688, "logps/real": -1541.83642578125, "loss": 0.0314, "rewards/accuracies": 1.0, "rewards/generated": -29.87860679626465, "rewards/margins": 43.26655960083008, "rewards/real": 13.387951850891113, "step": 490 }, { "epoch": 1.82, "learning_rate": 7.744107744107744e-08, "logits/generated": 3.1190757751464844, "logits/real": 2.8085410594940186, "logps/generated": -718.1214599609375, "logps/real": -1486.966796875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/generated": -23.88811683654785, "rewards/margins": 37.78776168823242, "rewards/real": 13.89964771270752, "step": 500 }, { "epoch": 1.85, "learning_rate": 7.676767676767677e-08, "logits/generated": 2.9580323696136475, "logits/real": 2.816072463989258, "logps/generated": -748.8839111328125, "logps/real": -1491.2899169921875, "loss": 0.0236, "rewards/accuracies": 1.0, "rewards/generated": -28.695343017578125, "rewards/margins": 40.448646545410156, "rewards/real": 11.75330638885498, "step": 510 }, { "epoch": 1.89, "learning_rate": 7.609427609427609e-08, "logits/generated": 3.0777218341827393, "logits/real": 2.777287006378174, "logps/generated": -762.1123046875, "logps/real": -1536.8941650390625, "loss": 0.0304, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.0263729095459, "rewards/margins": 45.39332962036133, "rewards/real": 14.36695384979248, "step": 520 }, { "epoch": 1.93, "learning_rate": 7.542087542087542e-08, "logits/generated": 3.0767197608947754, "logits/real": 2.95578670501709, "logps/generated": -750.724609375, "logps/real": -1250.8936767578125, "loss": 0.0152, "rewards/accuracies": 1.0, "rewards/generated": -29.208332061767578, "rewards/margins": 39.577842712402344, "rewards/real": 10.369508743286133, "step": 530 }, { "epoch": 1.96, "learning_rate": 7.474747474747475e-08, "logits/generated": 3.0988030433654785, "logits/real": 2.9259018898010254, "logps/generated": -730.708740234375, "logps/real": -1310.5799560546875, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/generated": -31.4479923248291, "rewards/margins": 42.12135314941406, "rewards/real": 10.673360824584961, "step": 540 }, { "epoch": 2.0, "learning_rate": 7.407407407407407e-08, "logits/generated": 3.0401549339294434, "logits/real": 2.793870210647583, "logps/generated": -733.7213745117188, "logps/real": -1337.715576171875, "loss": 0.0088, "rewards/accuracies": 1.0, "rewards/generated": -28.995868682861328, "rewards/margins": 39.94435501098633, "rewards/real": 10.948486328125, "step": 550 }, { "epoch": 2.04, "learning_rate": 7.34006734006734e-08, "logits/generated": 3.056553602218628, "logits/real": 2.7884602546691895, "logps/generated": -688.52001953125, "logps/real": -1462.112548828125, "loss": 0.0177, "rewards/accuracies": 0.987500011920929, "rewards/generated": -31.7587833404541, "rewards/margins": 46.42646026611328, "rewards/real": 14.667678833007812, "step": 560 }, { "epoch": 2.07, "learning_rate": 7.272727272727273e-08, "logits/generated": 3.112514019012451, "logits/real": 2.907961130142212, "logps/generated": -751.2688598632812, "logps/real": -1407.45703125, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/generated": -31.8891658782959, "rewards/margins": 46.06678009033203, "rewards/real": 14.177614212036133, "step": 570 }, { "epoch": 2.11, "learning_rate": 7.205387205387205e-08, "logits/generated": 3.0566983222961426, "logits/real": 2.923910617828369, "logps/generated": -701.1279907226562, "logps/real": -1302.158935546875, "loss": 0.0215, "rewards/accuracies": 0.987500011920929, "rewards/generated": -30.777050018310547, "rewards/margins": 43.49699401855469, "rewards/real": 12.719941139221191, "step": 580 }, { "epoch": 2.15, "learning_rate": 7.138047138047138e-08, "logits/generated": 3.0059492588043213, "logits/real": 2.8256354331970215, "logps/generated": -723.5939331054688, "logps/real": -1454.8687744140625, "loss": 0.0279, "rewards/accuracies": 0.987500011920929, "rewards/generated": -33.424068450927734, "rewards/margins": 49.1785888671875, "rewards/real": 15.754518508911133, "step": 590 }, { "epoch": 2.18, "learning_rate": 7.070707070707071e-08, "logits/generated": 2.9487192630767822, "logits/real": 2.847012758255005, "logps/generated": -668.1467895507812, "logps/real": -1324.0634765625, "loss": 0.0165, "rewards/accuracies": 0.987500011920929, "rewards/generated": -32.96076202392578, "rewards/margins": 45.69402313232422, "rewards/real": 12.733263969421387, "step": 600 }, { "epoch": 2.22, "learning_rate": 7.003367003367003e-08, "logits/generated": 3.067147970199585, "logits/real": 2.8171210289001465, "logps/generated": -717.472412109375, "logps/real": -1411.53125, "loss": 0.0115, "rewards/accuracies": 1.0, "rewards/generated": -27.626388549804688, "rewards/margins": 42.88236618041992, "rewards/real": 15.255979537963867, "step": 610 }, { "epoch": 2.25, "learning_rate": 6.936026936026935e-08, "logits/generated": 3.088181734085083, "logits/real": 2.812743663787842, "logps/generated": -698.3175048828125, "logps/real": -1480.5269775390625, "loss": 0.0209, "rewards/accuracies": 0.987500011920929, "rewards/generated": -28.876567840576172, "rewards/margins": 42.66175079345703, "rewards/real": 13.785179138183594, "step": 620 }, { "epoch": 2.29, "learning_rate": 6.868686868686869e-08, "logits/generated": 3.010751962661743, "logits/real": 2.9585797786712646, "logps/generated": -677.6873779296875, "logps/real": -1270.915283203125, "loss": 0.0702, "rewards/accuracies": 0.987500011920929, "rewards/generated": -27.593402862548828, "rewards/margins": 41.15986633300781, "rewards/real": 13.566459655761719, "step": 630 }, { "epoch": 2.33, "learning_rate": 6.801346801346801e-08, "logits/generated": 3.0732274055480957, "logits/real": 2.7747623920440674, "logps/generated": -680.6365966796875, "logps/real": -1507.0126953125, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -32.45772171020508, "rewards/margins": 49.80105972290039, "rewards/real": 17.34333610534668, "step": 640 }, { "epoch": 2.36, "learning_rate": 6.734006734006734e-08, "logits/generated": 3.1298694610595703, "logits/real": 2.8429999351501465, "logps/generated": -751.9732666015625, "logps/real": -1524.2716064453125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -36.8809814453125, "rewards/margins": 56.35636520385742, "rewards/real": 19.475383758544922, "step": 650 }, { "epoch": 2.4, "learning_rate": 6.666666666666665e-08, "logits/generated": 3.0035860538482666, "logits/real": 2.8745462894439697, "logps/generated": -748.1893310546875, "logps/real": -1361.515380859375, "loss": 0.0325, "rewards/accuracies": 1.0, "rewards/generated": -35.40471649169922, "rewards/margins": 49.71015167236328, "rewards/real": 14.305438041687012, "step": 660 }, { "epoch": 2.44, "learning_rate": 6.5993265993266e-08, "logits/generated": 3.088907480239868, "logits/real": 2.7869341373443604, "logps/generated": -756.3989868164062, "logps/real": -1387.8505859375, "loss": 0.0124, "rewards/accuracies": 1.0, "rewards/generated": -31.784137725830078, "rewards/margins": 47.05036544799805, "rewards/real": 15.26623249053955, "step": 670 }, { "epoch": 2.47, "learning_rate": 6.531986531986532e-08, "logits/generated": 3.078676462173462, "logits/real": 2.87841796875, "logps/generated": -718.3302001953125, "logps/real": -1401.547607421875, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/generated": -34.85867691040039, "rewards/margins": 48.483741760253906, "rewards/real": 13.62506103515625, "step": 680 }, { "epoch": 2.51, "learning_rate": 6.464646464646465e-08, "logits/generated": 3.066138744354248, "logits/real": 2.7867319583892822, "logps/generated": -774.26220703125, "logps/real": -1429.322509765625, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/generated": -38.60667419433594, "rewards/margins": 53.43706512451172, "rewards/real": 14.83039665222168, "step": 690 }, { "epoch": 2.55, "learning_rate": 6.397306397306396e-08, "logits/generated": 2.9843876361846924, "logits/real": 2.8095316886901855, "logps/generated": -769.455322265625, "logps/real": -1340.113525390625, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/generated": -35.12670135498047, "rewards/margins": 51.223480224609375, "rewards/real": 16.096778869628906, "step": 700 }, { "epoch": 2.58, "learning_rate": 6.32996632996633e-08, "logits/generated": 3.0770888328552246, "logits/real": 2.7803187370300293, "logps/generated": -773.3048095703125, "logps/real": -1606.997802734375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/generated": -32.44374465942383, "rewards/margins": 52.67897415161133, "rewards/real": 20.2352294921875, "step": 710 }, { "epoch": 2.62, "learning_rate": 6.262626262626263e-08, "logits/generated": 3.0355000495910645, "logits/real": 2.872314214706421, "logps/generated": -695.8619995117188, "logps/real": -1385.9244384765625, "loss": 0.001, "rewards/accuracies": 1.0, "rewards/generated": -29.878408432006836, "rewards/margins": 43.25067138671875, "rewards/real": 13.372262954711914, "step": 720 }, { "epoch": 2.65, "learning_rate": 6.195286195286194e-08, "logits/generated": 3.036618709564209, "logits/real": 2.7703795433044434, "logps/generated": -718.2050170898438, "logps/real": -1526.0164794921875, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/generated": -30.98556137084961, "rewards/margins": 50.902225494384766, "rewards/real": 19.916662216186523, "step": 730 }, { "epoch": 2.69, "learning_rate": 6.127946127946127e-08, "logits/generated": 3.1282575130462646, "logits/real": 2.9719161987304688, "logps/generated": -742.7721557617188, "logps/real": -1431.595947265625, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/generated": -30.542556762695312, "rewards/margins": 48.44450759887695, "rewards/real": 17.901947021484375, "step": 740 }, { "epoch": 2.73, "learning_rate": 6.060606060606061e-08, "logits/generated": 3.210226535797119, "logits/real": 2.841000556945801, "logps/generated": -748.2347412109375, "logps/real": -1713.9771728515625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -33.95073699951172, "rewards/margins": 54.87969970703125, "rewards/real": 20.928958892822266, "step": 750 }, { "epoch": 2.76, "learning_rate": 5.993265993265994e-08, "logits/generated": 3.043410062789917, "logits/real": 2.901235818862915, "logps/generated": -690.8917846679688, "logps/real": -1381.51416015625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/generated": -34.603294372558594, "rewards/margins": 52.166481018066406, "rewards/real": 17.56318473815918, "step": 760 }, { "epoch": 2.8, "learning_rate": 5.925925925925925e-08, "logits/generated": 3.0491907596588135, "logits/real": 2.9479236602783203, "logps/generated": -754.4249877929688, "logps/real": -1335.704833984375, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/generated": -34.789825439453125, "rewards/margins": 48.579429626464844, "rewards/real": 13.789602279663086, "step": 770 }, { "epoch": 2.84, "learning_rate": 5.8585858585858584e-08, "logits/generated": 3.1674771308898926, "logits/real": 2.8359758853912354, "logps/generated": -709.5343627929688, "logps/real": -1627.704833984375, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/generated": -32.5056037902832, "rewards/margins": 52.365699768066406, "rewards/real": 19.860095977783203, "step": 780 }, { "epoch": 2.87, "learning_rate": 5.791245791245791e-08, "logits/generated": 3.0346832275390625, "logits/real": 2.8684706687927246, "logps/generated": -695.8799438476562, "logps/real": -1270.2745361328125, "loss": 0.0086, "rewards/accuracies": 0.987500011920929, "rewards/generated": -39.13224792480469, "rewards/margins": 52.627349853515625, "rewards/real": 13.495101928710938, "step": 790 }, { "epoch": 2.91, "learning_rate": 5.723905723905723e-08, "logits/generated": 3.1303420066833496, "logits/real": 2.7591614723205566, "logps/generated": -730.6192626953125, "logps/real": -1435.2237548828125, "loss": 0.0094, "rewards/accuracies": 1.0, "rewards/generated": -37.35695266723633, "rewards/margins": 52.618690490722656, "rewards/real": 15.261739730834961, "step": 800 }, { "epoch": 2.95, "learning_rate": 5.6565656565656564e-08, "logits/generated": 3.1085684299468994, "logits/real": 2.9068782329559326, "logps/generated": -775.0628662109375, "logps/real": -1592.507080078125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -34.874488830566406, "rewards/margins": 54.11597442626953, "rewards/real": 19.241479873657227, "step": 810 }, { "epoch": 2.98, "learning_rate": 5.589225589225589e-08, "logits/generated": 3.2035202980041504, "logits/real": 2.7201569080352783, "logps/generated": -680.8076171875, "logps/real": -1521.5247802734375, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/generated": -33.13090133666992, "rewards/margins": 51.3615837097168, "rewards/real": 18.230682373046875, "step": 820 }, { "epoch": 3.02, "learning_rate": 5.521885521885522e-08, "logits/generated": 3.086456060409546, "logits/real": 2.9254355430603027, "logps/generated": -700.845703125, "logps/real": -1483.135986328125, "loss": 0.0101, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -34.048065185546875, "rewards/margins": 49.319061279296875, "rewards/real": 15.27099609375, "step": 830 }, { "epoch": 3.05, "learning_rate": 5.454545454545454e-08, "logits/generated": 3.1234586238861084, "logits/real": 2.939340353012085, "logps/generated": -743.561279296875, "logps/real": -1300.602294921875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -41.906612396240234, "rewards/margins": 56.99538040161133, "rewards/real": 15.088765144348145, "step": 840 }, { "epoch": 3.09, "learning_rate": 5.387205387205387e-08, "logits/generated": 2.9916865825653076, "logits/real": 2.976253032684326, "logps/generated": -734.1006469726562, "logps/real": -1259.8819580078125, "loss": 0.0059, "rewards/accuracies": 0.987500011920929, "rewards/generated": -41.373931884765625, "rewards/margins": 54.366546630859375, "rewards/real": 12.992612838745117, "step": 850 }, { "epoch": 3.13, "learning_rate": 5.31986531986532e-08, "logits/generated": 3.0443127155303955, "logits/real": 2.925724744796753, "logps/generated": -741.2023315429688, "logps/real": -1654.55078125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -37.850894927978516, "rewards/margins": 56.64594268798828, "rewards/real": 18.795047760009766, "step": 860 }, { "epoch": 3.16, "learning_rate": 5.2525252525252525e-08, "logits/generated": 3.144395351409912, "logits/real": 2.7978408336639404, "logps/generated": -741.2575073242188, "logps/real": -1477.39501953125, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -39.5072135925293, "rewards/margins": 55.39802932739258, "rewards/real": 15.890813827514648, "step": 870 }, { "epoch": 3.2, "learning_rate": 5.1851851851851846e-08, "logits/generated": 3.062936544418335, "logits/real": 2.8950228691101074, "logps/generated": -675.0447387695312, "logps/real": -1391.226806640625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/generated": -39.9172248840332, "rewards/margins": 54.84810256958008, "rewards/real": 14.930872917175293, "step": 880 }, { "epoch": 3.24, "learning_rate": 5.117845117845118e-08, "logits/generated": 3.15445876121521, "logits/real": 2.8755147457122803, "logps/generated": -652.5638427734375, "logps/real": -1479.97216796875, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/generated": -34.85411834716797, "rewards/margins": 54.70256805419922, "rewards/real": 19.848445892333984, "step": 890 }, { "epoch": 3.27, "learning_rate": 5.0505050505050506e-08, "logits/generated": 3.0380635261535645, "logits/real": 2.871950626373291, "logps/generated": -730.4390869140625, "logps/real": -1575.0455322265625, "loss": 0.0025, "rewards/accuracies": 1.0, "rewards/generated": -33.577415466308594, "rewards/margins": 51.80384063720703, "rewards/real": 18.226425170898438, "step": 900 }, { "epoch": 3.31, "learning_rate": 4.983164983164983e-08, "logits/generated": 3.0305845737457275, "logits/real": 2.8189616203308105, "logps/generated": -743.3963623046875, "logps/real": -1461.7225341796875, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -38.53675079345703, "rewards/margins": 57.3032341003418, "rewards/real": 18.766483306884766, "step": 910 }, { "epoch": 3.35, "learning_rate": 4.915824915824915e-08, "logits/generated": 2.959317684173584, "logits/real": 2.880563974380493, "logps/generated": -734.4588623046875, "logps/real": -1528.0472412109375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -37.20275115966797, "rewards/margins": 57.61243438720703, "rewards/real": 20.409683227539062, "step": 920 }, { "epoch": 3.38, "learning_rate": 4.8484848484848486e-08, "logits/generated": 3.0965051651000977, "logits/real": 2.9035251140594482, "logps/generated": -740.6205444335938, "logps/real": -1542.7652587890625, "loss": 0.0018, "rewards/accuracies": 1.0, "rewards/generated": -38.03114318847656, "rewards/margins": 58.14781951904297, "rewards/real": 20.116676330566406, "step": 930 }, { "epoch": 3.42, "learning_rate": 4.781144781144781e-08, "logits/generated": 3.0000622272491455, "logits/real": 2.935492992401123, "logps/generated": -752.5732421875, "logps/real": -1522.435546875, "loss": 0.0016, "rewards/accuracies": 1.0, "rewards/generated": -40.24225616455078, "rewards/margins": 59.14085006713867, "rewards/real": 18.898588180541992, "step": 940 }, { "epoch": 3.45, "learning_rate": 4.713804713804714e-08, "logits/generated": 3.075613498687744, "logits/real": 2.865962266921997, "logps/generated": -740.7411499023438, "logps/real": -1492.5826416015625, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/generated": -37.88569259643555, "rewards/margins": 59.29375076293945, "rewards/real": 21.40806007385254, "step": 950 }, { "epoch": 3.49, "learning_rate": 4.646464646464646e-08, "logits/generated": 3.1555328369140625, "logits/real": 2.798086404800415, "logps/generated": -706.0817260742188, "logps/real": -1535.33544921875, "loss": 0.004, "rewards/accuracies": 1.0, "rewards/generated": -39.4811897277832, "rewards/margins": 60.82379913330078, "rewards/real": 21.342609405517578, "step": 960 }, { "epoch": 3.53, "learning_rate": 4.5791245791245794e-08, "logits/generated": 3.081317186355591, "logits/real": 2.845825672149658, "logps/generated": -730.57373046875, "logps/real": -1374.495361328125, "loss": 0.0108, "rewards/accuracies": 1.0, "rewards/generated": -39.06752395629883, "rewards/margins": 56.50775146484375, "rewards/real": 17.440229415893555, "step": 970 }, { "epoch": 3.56, "learning_rate": 4.5117845117845114e-08, "logits/generated": 3.154078960418701, "logits/real": 2.910524845123291, "logps/generated": -753.1373901367188, "logps/real": -1561.719482421875, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/generated": -43.68804931640625, "rewards/margins": 63.04149627685547, "rewards/real": 19.35345458984375, "step": 980 }, { "epoch": 3.6, "learning_rate": 4.444444444444444e-08, "logits/generated": 3.032217025756836, "logits/real": 2.9035446643829346, "logps/generated": -710.7857666015625, "logps/real": -1517.644775390625, "loss": 0.0048, "rewards/accuracies": 0.987500011920929, "rewards/generated": -36.37682342529297, "rewards/margins": 54.066917419433594, "rewards/real": 17.690086364746094, "step": 990 }, { "epoch": 3.64, "learning_rate": 4.377104377104377e-08, "logits/generated": 3.0784573554992676, "logits/real": 2.8150768280029297, "logps/generated": -695.4595947265625, "logps/real": -1281.703125, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/generated": -39.671722412109375, "rewards/margins": 54.309326171875, "rewards/real": 14.637606620788574, "step": 1000 }, { "epoch": 3.67, "learning_rate": 4.3097643097643095e-08, "logits/generated": 3.1063504219055176, "logits/real": 2.8608155250549316, "logps/generated": -692.5540161132812, "logps/real": -1632.0693359375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -34.622886657714844, "rewards/margins": 56.013877868652344, "rewards/real": 21.390993118286133, "step": 1010 }, { "epoch": 3.71, "learning_rate": 4.242424242424242e-08, "logits/generated": 3.0541396141052246, "logits/real": 2.6461181640625, "logps/generated": -721.364013671875, "logps/real": -1407.12548828125, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -43.80752182006836, "rewards/margins": 63.071189880371094, "rewards/real": 19.263675689697266, "step": 1020 }, { "epoch": 3.75, "learning_rate": 4.175084175084175e-08, "logits/generated": 2.9913887977600098, "logits/real": 2.934549331665039, "logps/generated": -695.5123291015625, "logps/real": -1403.131103515625, "loss": 0.0037, "rewards/accuracies": 0.987500011920929, "rewards/generated": -39.6518669128418, "rewards/margins": 57.8861198425293, "rewards/real": 18.234256744384766, "step": 1030 }, { "epoch": 3.78, "learning_rate": 4.1077441077441075e-08, "logits/generated": 2.991410493850708, "logits/real": 2.88810658454895, "logps/generated": -705.2049560546875, "logps/real": -1338.635986328125, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -41.076866149902344, "rewards/margins": 58.57453536987305, "rewards/real": 17.497663497924805, "step": 1040 }, { "epoch": 3.82, "learning_rate": 4.04040404040404e-08, "logits/generated": 3.1155622005462646, "logits/real": 2.835115909576416, "logps/generated": -680.0533447265625, "logps/real": -1494.996826171875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -39.507102966308594, "rewards/margins": 57.86688995361328, "rewards/real": 18.359786987304688, "step": 1050 }, { "epoch": 3.85, "learning_rate": 3.973063973063973e-08, "logits/generated": 2.9895262718200684, "logits/real": 2.7560791969299316, "logps/generated": -739.0970458984375, "logps/real": -1501.715576171875, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -46.34663391113281, "rewards/margins": 64.84639739990234, "rewards/real": 18.499757766723633, "step": 1060 }, { "epoch": 3.89, "learning_rate": 3.9057239057239056e-08, "logits/generated": 3.105269432067871, "logits/real": 2.8226687908172607, "logps/generated": -756.6768798828125, "logps/real": -1623.533447265625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -37.96059036254883, "rewards/margins": 59.31414031982422, "rewards/real": 21.353551864624023, "step": 1070 }, { "epoch": 3.93, "learning_rate": 3.838383838383838e-08, "logits/generated": 3.191237211227417, "logits/real": 2.8417656421661377, "logps/generated": -751.603759765625, "logps/real": -1454.302490234375, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -42.773094177246094, "rewards/margins": 60.9084358215332, "rewards/real": 18.13533592224121, "step": 1080 }, { "epoch": 3.96, "learning_rate": 3.771043771043771e-08, "logits/generated": 3.114577531814575, "logits/real": 2.928928852081299, "logps/generated": -731.4918823242188, "logps/real": -1369.5748291015625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -42.268646240234375, "rewards/margins": 58.8697509765625, "rewards/real": 16.601106643676758, "step": 1090 }, { "epoch": 4.0, "learning_rate": 3.7037037037037036e-08, "logits/generated": 2.96974778175354, "logits/real": 2.8065786361694336, "logps/generated": -704.4192504882812, "logps/real": -1559.37255859375, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -34.664466857910156, "rewards/margins": 55.423606872558594, "rewards/real": 20.759136199951172, "step": 1100 }, { "epoch": 4.04, "learning_rate": 3.636363636363636e-08, "logits/generated": 3.086235284805298, "logits/real": 2.8906631469726562, "logps/generated": -715.7879638671875, "logps/real": -1566.2054443359375, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -43.6541748046875, "rewards/margins": 62.014198303222656, "rewards/real": 18.360017776489258, "step": 1110 }, { "epoch": 4.07, "learning_rate": 3.569023569023569e-08, "logits/generated": 3.0685572624206543, "logits/real": 2.875532627105713, "logps/generated": -737.6215209960938, "logps/real": -1280.81591796875, "loss": 0.0007, "rewards/accuracies": 1.0, "rewards/generated": -42.01698684692383, "rewards/margins": 59.42559814453125, "rewards/real": 17.408613204956055, "step": 1120 }, { "epoch": 4.11, "learning_rate": 3.501683501683502e-08, "logits/generated": 3.167466402053833, "logits/real": 2.8821394443511963, "logps/generated": -693.0511474609375, "logps/real": -1415.286865234375, "loss": 0.0032, "rewards/accuracies": 1.0, "rewards/generated": -36.813697814941406, "rewards/margins": 54.748443603515625, "rewards/real": 17.934749603271484, "step": 1130 }, { "epoch": 4.15, "learning_rate": 3.4343434343434344e-08, "logits/generated": 3.150578260421753, "logits/real": 2.941450595855713, "logps/generated": -733.4342651367188, "logps/real": -1328.4635009765625, "loss": 0.0011, "rewards/accuracies": 1.0, "rewards/generated": -43.951080322265625, "rewards/margins": 59.140625, "rewards/real": 15.189538955688477, "step": 1140 }, { "epoch": 4.18, "learning_rate": 3.367003367003367e-08, "logits/generated": 3.0381405353546143, "logits/real": 2.7518515586853027, "logps/generated": -759.1390380859375, "logps/real": -1387.8656005859375, "loss": 0.0014, "rewards/accuracies": 1.0, "rewards/generated": -44.51789093017578, "rewards/margins": 59.31587600708008, "rewards/real": 14.797983169555664, "step": 1150 }, { "epoch": 4.22, "learning_rate": 3.2996632996633e-08, "logits/generated": 3.148815870285034, "logits/real": 2.8494136333465576, "logps/generated": -718.7544555664062, "logps/real": -1469.125732421875, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -43.554443359375, "rewards/margins": 60.92546463012695, "rewards/real": 17.371023178100586, "step": 1160 }, { "epoch": 4.25, "learning_rate": 3.2323232323232324e-08, "logits/generated": 3.0761420726776123, "logits/real": 2.8544418811798096, "logps/generated": -739.6768798828125, "logps/real": -1545.593017578125, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -46.87248992919922, "rewards/margins": 65.18719482421875, "rewards/real": 18.31471061706543, "step": 1170 }, { "epoch": 4.29, "learning_rate": 3.164983164983165e-08, "logits/generated": 3.008000135421753, "logits/real": 2.7431397438049316, "logps/generated": -684.0732421875, "logps/real": -1354.230224609375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -39.17574691772461, "rewards/margins": 54.674102783203125, "rewards/real": 15.498357772827148, "step": 1180 }, { "epoch": 4.33, "learning_rate": 3.097643097643097e-08, "logits/generated": 3.0173258781433105, "logits/real": 2.8204867839813232, "logps/generated": -706.6377563476562, "logps/real": -1528.116455078125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -40.873321533203125, "rewards/margins": 60.62104034423828, "rewards/real": 19.74771499633789, "step": 1190 }, { "epoch": 4.36, "learning_rate": 3.0303030303030305e-08, "logits/generated": 3.1523802280426025, "logits/real": 2.847137212753296, "logps/generated": -749.1297607421875, "logps/real": -1355.4627685546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -47.15817642211914, "rewards/margins": 63.498390197753906, "rewards/real": 16.340208053588867, "step": 1200 }, { "epoch": 4.4, "learning_rate": 2.9629629629629625e-08, "logits/generated": 3.133452892303467, "logits/real": 2.862262487411499, "logps/generated": -781.4495239257812, "logps/real": -1706.143310546875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -48.15586471557617, "rewards/margins": 68.83937072753906, "rewards/real": 20.68350601196289, "step": 1210 }, { "epoch": 4.44, "learning_rate": 2.8956228956228955e-08, "logits/generated": 3.1630351543426514, "logits/real": 2.918198823928833, "logps/generated": -736.3922119140625, "logps/real": -1501.014892578125, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -40.61255645751953, "rewards/margins": 59.3216438293457, "rewards/real": 18.70907974243164, "step": 1220 }, { "epoch": 4.47, "learning_rate": 2.8282828282828282e-08, "logits/generated": 3.0927798748016357, "logits/real": 2.896526575088501, "logps/generated": -728.1473388671875, "logps/real": -1276.7889404296875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/generated": -51.20064163208008, "rewards/margins": 66.27869415283203, "rewards/real": 15.07805061340332, "step": 1230 }, { "epoch": 4.51, "learning_rate": 2.760942760942761e-08, "logits/generated": 3.2132561206817627, "logits/real": 2.8452341556549072, "logps/generated": -720.19775390625, "logps/real": -1552.061279296875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -44.881900787353516, "rewards/margins": 64.55671691894531, "rewards/real": 19.674819946289062, "step": 1240 }, { "epoch": 4.55, "learning_rate": 2.6936026936026936e-08, "logits/generated": 3.0134525299072266, "logits/real": 2.899104595184326, "logps/generated": -726.0602416992188, "logps/real": -1371.2618408203125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -44.18798065185547, "rewards/margins": 62.98927688598633, "rewards/real": 18.801300048828125, "step": 1250 }, { "epoch": 4.58, "learning_rate": 2.6262626262626263e-08, "logits/generated": 3.175976276397705, "logits/real": 2.763339042663574, "logps/generated": -747.1805419921875, "logps/real": -1569.853271484375, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -44.73353958129883, "rewards/margins": 62.41156005859375, "rewards/real": 17.678028106689453, "step": 1260 }, { "epoch": 4.62, "learning_rate": 2.558922558922559e-08, "logits/generated": 3.063750743865967, "logits/real": 2.814635753631592, "logps/generated": -764.0443115234375, "logps/real": -1493.582275390625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -39.699615478515625, "rewards/margins": 59.87761306762695, "rewards/real": 20.177993774414062, "step": 1270 }, { "epoch": 4.65, "learning_rate": 2.4915824915824916e-08, "logits/generated": 3.027374744415283, "logits/real": 2.812553644180298, "logps/generated": -706.6592407226562, "logps/real": -1426.7469482421875, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -41.57141876220703, "rewards/margins": 59.673583984375, "rewards/real": 18.102169036865234, "step": 1280 }, { "epoch": 4.69, "learning_rate": 2.4242424242424243e-08, "logits/generated": 2.928546190261841, "logits/real": 2.7974414825439453, "logps/generated": -710.8232421875, "logps/real": -1253.787353515625, "loss": 0.038, "rewards/accuracies": 0.987500011920929, "rewards/generated": -39.23189163208008, "rewards/margins": 57.215492248535156, "rewards/real": 17.983592987060547, "step": 1290 }, { "epoch": 4.73, "learning_rate": 2.356902356902357e-08, "logits/generated": 3.1371006965637207, "logits/real": 2.8329155445098877, "logps/generated": -709.4130859375, "logps/real": -1598.436767578125, "loss": 0.0019, "rewards/accuracies": 1.0, "rewards/generated": -45.08416748046875, "rewards/margins": 66.89482116699219, "rewards/real": 21.810646057128906, "step": 1300 }, { "epoch": 4.76, "learning_rate": 2.2895622895622897e-08, "logits/generated": 3.0580315589904785, "logits/real": 2.7360568046569824, "logps/generated": -738.2771606445312, "logps/real": -1422.2760009765625, "loss": 0.0005, "rewards/accuracies": 1.0, "rewards/generated": -43.13732147216797, "rewards/margins": 63.8698616027832, "rewards/real": 20.732540130615234, "step": 1310 }, { "epoch": 4.8, "learning_rate": 2.222222222222222e-08, "logits/generated": 3.1114742755889893, "logits/real": 2.8517985343933105, "logps/generated": -738.4879150390625, "logps/real": -1297.4793701171875, "loss": 0.0026, "rewards/accuracies": 1.0, "rewards/generated": -43.627891540527344, "rewards/margins": 60.771217346191406, "rewards/real": 17.143320083618164, "step": 1320 }, { "epoch": 4.84, "learning_rate": 2.1548821548821547e-08, "logits/generated": 3.036583423614502, "logits/real": 2.91205096244812, "logps/generated": -714.7296142578125, "logps/real": -1485.159423828125, "loss": 0.0022, "rewards/accuracies": 1.0, "rewards/generated": -41.47072219848633, "rewards/margins": 63.18402099609375, "rewards/real": 21.713294982910156, "step": 1330 }, { "epoch": 4.87, "learning_rate": 2.0875420875420874e-08, "logits/generated": 3.1096909046173096, "logits/real": 2.841613531112671, "logps/generated": -708.9425048828125, "logps/real": -1414.4041748046875, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -51.68889236450195, "rewards/margins": 72.71478271484375, "rewards/real": 21.025896072387695, "step": 1340 }, { "epoch": 4.91, "learning_rate": 2.02020202020202e-08, "logits/generated": 3.1264853477478027, "logits/real": 2.844923734664917, "logps/generated": -737.677978515625, "logps/real": -1605.68115234375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -43.05826187133789, "rewards/margins": 64.03519439697266, "rewards/real": 20.976930618286133, "step": 1350 }, { "epoch": 4.95, "learning_rate": 1.9528619528619528e-08, "logits/generated": 3.0127763748168945, "logits/real": 2.7896978855133057, "logps/generated": -693.1800537109375, "logps/real": -1496.40283203125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -41.741485595703125, "rewards/margins": 60.62617874145508, "rewards/real": 18.884695053100586, "step": 1360 }, { "epoch": 4.98, "learning_rate": 1.8855218855218855e-08, "logits/generated": 3.0590386390686035, "logits/real": 2.885988235473633, "logps/generated": -692.5420532226562, "logps/real": -1366.4156494140625, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -37.74138259887695, "rewards/margins": 56.46770477294922, "rewards/real": 18.7263240814209, "step": 1370 }, { "epoch": 5.02, "learning_rate": 1.818181818181818e-08, "logits/generated": 3.0501322746276855, "logits/real": 2.886653423309326, "logps/generated": -701.55419921875, "logps/real": -1274.3582763671875, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -44.69414138793945, "rewards/margins": 61.38763427734375, "rewards/real": 16.693500518798828, "step": 1380 }, { "epoch": 5.05, "learning_rate": 1.750841750841751e-08, "logits/generated": 3.149022340774536, "logits/real": 2.875645160675049, "logps/generated": -714.64501953125, "logps/real": -1578.6724853515625, "loss": 0.002, "rewards/accuracies": 1.0, "rewards/generated": -41.473243713378906, "rewards/margins": 62.6212272644043, "rewards/real": 21.147979736328125, "step": 1390 }, { "epoch": 5.09, "learning_rate": 1.6835016835016835e-08, "logits/generated": 3.0550694465637207, "logits/real": 2.867199182510376, "logps/generated": -726.8590087890625, "logps/real": -1454.0662841796875, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -45.141422271728516, "rewards/margins": 62.887451171875, "rewards/real": 17.746028900146484, "step": 1400 }, { "epoch": 5.13, "learning_rate": 1.6161616161616162e-08, "logits/generated": 3.064716100692749, "logits/real": 2.839592933654785, "logps/generated": -755.7197265625, "logps/real": -1499.4759521484375, "loss": 0.0013, "rewards/accuracies": 0.987500011920929, "rewards/generated": -45.96525955200195, "rewards/margins": 65.89015197753906, "rewards/real": 19.92488670349121, "step": 1410 }, { "epoch": 5.16, "learning_rate": 1.5488215488215486e-08, "logits/generated": 3.0473430156707764, "logits/real": 2.976881504058838, "logps/generated": -672.17822265625, "logps/real": -1597.4091796875, "loss": 0.0012, "rewards/accuracies": 1.0, "rewards/generated": -38.601783752441406, "rewards/margins": 60.1019287109375, "rewards/real": 21.50014305114746, "step": 1420 }, { "epoch": 5.2, "learning_rate": 1.4814814814814813e-08, "logits/generated": 3.0654213428497314, "logits/real": 2.946502208709717, "logps/generated": -689.8428344726562, "logps/real": -1442.19140625, "loss": 0.0027, "rewards/accuracies": 1.0, "rewards/generated": -39.60371017456055, "rewards/margins": 60.931739807128906, "rewards/real": 21.32802963256836, "step": 1430 }, { "epoch": 5.24, "learning_rate": 1.4141414141414141e-08, "logits/generated": 3.1885037422180176, "logits/real": 2.9668922424316406, "logps/generated": -709.119873046875, "logps/real": -1579.7081298828125, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -42.88719940185547, "rewards/margins": 62.00786209106445, "rewards/real": 19.120656967163086, "step": 1440 }, { "epoch": 5.27, "learning_rate": 1.3468013468013468e-08, "logits/generated": 3.22326397895813, "logits/real": 2.988579273223877, "logps/generated": -752.568603515625, "logps/real": -1602.955810546875, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -41.44694519042969, "rewards/margins": 64.63514709472656, "rewards/real": 23.188203811645508, "step": 1450 }, { "epoch": 5.31, "learning_rate": 1.2794612794612795e-08, "logits/generated": 3.106879234313965, "logits/real": 2.9339661598205566, "logps/generated": -731.5869750976562, "logps/real": -1420.4613037109375, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -48.846099853515625, "rewards/margins": 66.61249542236328, "rewards/real": 17.766395568847656, "step": 1460 }, { "epoch": 5.35, "learning_rate": 1.2121212121212122e-08, "logits/generated": 3.0550105571746826, "logits/real": 2.9313416481018066, "logps/generated": -729.6038818359375, "logps/real": -1670.770751953125, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -42.07399368286133, "rewards/margins": 65.95115661621094, "rewards/real": 23.87717056274414, "step": 1470 }, { "epoch": 5.38, "learning_rate": 1.1447811447811448e-08, "logits/generated": 3.079362154006958, "logits/real": 2.8558249473571777, "logps/generated": -687.9454956054688, "logps/real": -1376.7467041015625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -41.538719177246094, "rewards/margins": 60.89226150512695, "rewards/real": 19.35353660583496, "step": 1480 }, { "epoch": 5.42, "learning_rate": 1.0774410774410774e-08, "logits/generated": 3.048426628112793, "logits/real": 2.709348201751709, "logps/generated": -701.3831787109375, "logps/real": -1443.1263427734375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -40.00122833251953, "rewards/margins": 60.95526885986328, "rewards/real": 20.954030990600586, "step": 1490 }, { "epoch": 5.45, "learning_rate": 1.01010101010101e-08, "logits/generated": 3.009213447570801, "logits/real": 3.025883197784424, "logps/generated": -737.5149536132812, "logps/real": -1425.9295654296875, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -40.59176254272461, "rewards/margins": 59.60404586791992, "rewards/real": 19.012287139892578, "step": 1500 }, { "epoch": 5.49, "learning_rate": 9.427609427609427e-09, "logits/generated": 3.0659737586975098, "logits/real": 2.893873453140259, "logps/generated": -747.0940551757812, "logps/real": -1362.1685791015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -46.11384201049805, "rewards/margins": 65.68705749511719, "rewards/real": 19.57320785522461, "step": 1510 }, { "epoch": 5.53, "learning_rate": 8.754208754208754e-09, "logits/generated": 3.0093538761138916, "logits/real": 2.7467427253723145, "logps/generated": -693.4508056640625, "logps/real": -1546.880615234375, "loss": 0.0009, "rewards/accuracies": 1.0, "rewards/generated": -42.20683670043945, "rewards/margins": 62.614654541015625, "rewards/real": 20.407817840576172, "step": 1520 }, { "epoch": 5.56, "learning_rate": 8.080808080808081e-09, "logits/generated": 3.2107110023498535, "logits/real": 2.8605856895446777, "logps/generated": -701.0584106445312, "logps/real": -1577.1934814453125, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -41.23312759399414, "rewards/margins": 62.98259353637695, "rewards/real": 21.749473571777344, "step": 1530 }, { "epoch": 5.6, "learning_rate": 7.407407407407406e-09, "logits/generated": 3.084939479827881, "logits/real": 2.890946865081787, "logps/generated": -731.8319091796875, "logps/real": -1644.828369140625, "loss": 0.0033, "rewards/accuracies": 1.0, "rewards/generated": -48.95234298706055, "rewards/margins": 70.60417175292969, "rewards/real": 21.65183448791504, "step": 1540 }, { "epoch": 5.64, "learning_rate": 6.734006734006734e-09, "logits/generated": 3.0578205585479736, "logits/real": 2.9019885063171387, "logps/generated": -705.0582275390625, "logps/real": -1583.41943359375, "loss": 0.0004, "rewards/accuracies": 1.0, "rewards/generated": -39.79522705078125, "rewards/margins": 61.96832275390625, "rewards/real": 22.173097610473633, "step": 1550 }, { "epoch": 5.67, "learning_rate": 6.060606060606061e-09, "logits/generated": 3.148460865020752, "logits/real": 2.82420015335083, "logps/generated": -704.2009887695312, "logps/real": -1561.6871337890625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -47.81111526489258, "rewards/margins": 68.67701721191406, "rewards/real": 20.86590576171875, "step": 1560 }, { "epoch": 5.71, "learning_rate": 5.387205387205387e-09, "logits/generated": 3.0725507736206055, "logits/real": 2.880845546722412, "logps/generated": -715.9830322265625, "logps/real": -1333.7222900390625, "loss": 0.0001, "rewards/accuracies": 1.0, "rewards/generated": -47.347434997558594, "rewards/margins": 65.27032470703125, "rewards/real": 17.922895431518555, "step": 1570 }, { "epoch": 5.75, "learning_rate": 4.713804713804714e-09, "logits/generated": 3.1293959617614746, "logits/real": 2.8387153148651123, "logps/generated": -726.591552734375, "logps/real": -1756.815673828125, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -47.847747802734375, "rewards/margins": 75.11756896972656, "rewards/real": 27.269817352294922, "step": 1580 }, { "epoch": 5.78, "learning_rate": 4.0404040404040405e-09, "logits/generated": 3.1061339378356934, "logits/real": 2.793865203857422, "logps/generated": -680.6685791015625, "logps/real": -1437.5989990234375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -40.11597442626953, "rewards/margins": 59.0734748840332, "rewards/real": 18.957500457763672, "step": 1590 }, { "epoch": 5.82, "learning_rate": 3.367003367003367e-09, "logits/generated": 2.965463161468506, "logits/real": 2.860352039337158, "logps/generated": -710.3120727539062, "logps/real": -1333.382568359375, "loss": 0.0003, "rewards/accuracies": 1.0, "rewards/generated": -47.621055603027344, "rewards/margins": 64.4249038696289, "rewards/real": 16.803848266601562, "step": 1600 }, { "epoch": 5.85, "learning_rate": 2.6936026936026934e-09, "logits/generated": 3.0897064208984375, "logits/real": 2.8475499153137207, "logps/generated": -716.2346801757812, "logps/real": -1237.0738525390625, "loss": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -47.742759704589844, "rewards/margins": 64.90467071533203, "rewards/real": 17.161916732788086, "step": 1610 }, { "epoch": 5.89, "learning_rate": 2.0202020202020203e-09, "logits/generated": 3.0108256340026855, "logits/real": 2.778660774230957, "logps/generated": -688.628173828125, "logps/real": -1455.0592041015625, "loss": 0.0002, "rewards/accuracies": 1.0, "rewards/generated": -38.33720397949219, "rewards/margins": 57.62987518310547, "rewards/real": 19.29267120361328, "step": 1620 }, { "epoch": 5.93, "learning_rate": 1.3468013468013467e-09, "logits/generated": 3.0343985557556152, "logits/real": 2.7732152938842773, "logps/generated": -739.4644775390625, "logps/real": -1594.602783203125, "loss": 0.0008, "rewards/accuracies": 1.0, "rewards/generated": -44.978233337402344, "rewards/margins": 67.10484313964844, "rewards/real": 22.126609802246094, "step": 1630 }, { "epoch": 5.96, "learning_rate": 6.734006734006734e-10, "logits/generated": 3.1493444442749023, "logits/real": 2.8230597972869873, "logps/generated": -735.3685302734375, "logps/real": -1376.6683349609375, "loss": 0.0017, "rewards/accuracies": 1.0, "rewards/generated": -46.86839294433594, "rewards/margins": 64.47018432617188, "rewards/real": 17.601795196533203, "step": 1640 }, { "epoch": 6.0, "learning_rate": 0.0, "logits/generated": 3.1379590034484863, "logits/real": 2.829414129257202, "logps/generated": -707.5266723632812, "logps/real": -1518.556640625, "loss": 0.0024, "rewards/accuracies": 1.0, "rewards/generated": -49.48387145996094, "rewards/margins": 69.3672866821289, "rewards/real": 19.88340950012207, "step": 1650 }, { "epoch": 6.0, "step": 1650, "total_flos": 0.0, "train_loss": 0.10275351283818689, "train_runtime": 25691.8113, "train_samples_per_second": 4.11, "train_steps_per_second": 0.064 } ], "logging_steps": 10, "max_steps": 1650, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }