{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.988571428571428, "eval_steps": 100, "global_step": 1572, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 6.329113924050632e-10, "logits/generated": 3.334580421447754, "logits/real": 2.520763397216797, "logps/generated": -653.1103515625, "logps/real": -1322.83154296875, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.04, "learning_rate": 6.329113924050633e-09, "logits/generated": 2.984175682067871, "logits/real": 2.4933767318725586, "logps/generated": -569.99365234375, "logps/real": -1401.09375, "loss": 0.6926, "rewards/accuracies": 0.4027777910232544, "rewards/generated": 0.001990392105653882, "rewards/margins": 0.003700366010889411, "rewards/real": 0.005690759979188442, "step": 10 }, { "epoch": 0.08, "learning_rate": 1.2658227848101265e-08, "logits/generated": 3.1899852752685547, "logits/real": 2.5960335731506348, "logps/generated": -639.3368530273438, "logps/real": -1633.5921630859375, "loss": 0.6864, "rewards/accuracies": 0.5375000238418579, "rewards/generated": -0.03260010853409767, "rewards/margins": 0.02890392579138279, "rewards/real": -0.0036961850710213184, "step": 20 }, { "epoch": 0.11, "learning_rate": 1.89873417721519e-08, "logits/generated": 3.1001484394073486, "logits/real": 2.5820600986480713, "logps/generated": -625.9615478515625, "logps/real": -1550.431884765625, "loss": 0.6632, "rewards/accuracies": 0.6875, "rewards/generated": -0.0366821363568306, "rewards/margins": 0.0638287365436554, "rewards/real": 0.0271465964615345, "step": 30 }, { "epoch": 0.15, "learning_rate": 2.531645569620253e-08, "logits/generated": 2.8814804553985596, "logits/real": 2.6468660831451416, "logps/generated": -592.5699462890625, "logps/real": -1527.6875, "loss": 0.6267, "rewards/accuracies": 0.762499988079071, "rewards/generated": -0.08724664151668549, "rewards/margins": 0.15261340141296387, "rewards/real": 0.06536674499511719, "step": 40 }, { "epoch": 0.19, "learning_rate": 3.1645569620253166e-08, "logits/generated": 3.147773265838623, "logits/real": 2.535146713256836, "logps/generated": -688.0377807617188, "logps/real": -1441.314453125, "loss": 0.5423, "rewards/accuracies": 0.887499988079071, "rewards/generated": -0.17641791701316833, "rewards/margins": 0.3502033054828644, "rewards/real": 0.17378540337085724, "step": 50 }, { "epoch": 0.23, "learning_rate": 3.79746835443038e-08, "logits/generated": 3.1963257789611816, "logits/real": 2.5462851524353027, "logps/generated": -594.2725219726562, "logps/real": -1453.6497802734375, "loss": 0.5062, "rewards/accuracies": 0.8999999761581421, "rewards/generated": -0.27025485038757324, "rewards/margins": 0.4667099416255951, "rewards/real": 0.19645504653453827, "step": 60 }, { "epoch": 0.27, "learning_rate": 4.430379746835442e-08, "logits/generated": 2.942413806915283, "logits/real": 2.5651955604553223, "logps/generated": -623.3458251953125, "logps/real": -1351.8724365234375, "loss": 0.4366, "rewards/accuracies": 0.949999988079071, "rewards/generated": -0.48640790581703186, "rewards/margins": 0.7007172703742981, "rewards/real": 0.21430937945842743, "step": 70 }, { "epoch": 0.3, "learning_rate": 5.063291139240506e-08, "logits/generated": 3.1414570808410645, "logits/real": 2.5161662101745605, "logps/generated": -623.8413696289062, "logps/real": -1677.4615478515625, "loss": 0.3137, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -0.7329307794570923, "rewards/margins": 1.239257574081421, "rewards/real": 0.5063267350196838, "step": 80 }, { "epoch": 0.34, "learning_rate": 5.69620253164557e-08, "logits/generated": 3.070830821990967, "logits/real": 2.5541062355041504, "logps/generated": -657.6233520507812, "logps/real": -1626.2838134765625, "loss": 0.2947, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.0010322332382202, "rewards/margins": 1.5189402103424072, "rewards/real": 0.517907977104187, "step": 90 }, { "epoch": 0.38, "learning_rate": 6.329113924050633e-08, "logits/generated": 2.870387315750122, "logits/real": 2.7046616077423096, "logps/generated": -579.2174072265625, "logps/real": -1487.188232421875, "loss": 0.2589, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.0812404155731201, "rewards/margins": 1.698075532913208, "rewards/real": 0.6168351173400879, "step": 100 }, { "epoch": 0.42, "learning_rate": 6.962025316455696e-08, "logits/generated": 2.9290151596069336, "logits/real": 2.728487014770508, "logps/generated": -648.2110595703125, "logps/real": -1627.390869140625, "loss": 0.2453, "rewards/accuracies": 1.0, "rewards/generated": -1.244185209274292, "rewards/margins": 1.9264612197875977, "rewards/real": 0.6822759509086609, "step": 110 }, { "epoch": 0.46, "learning_rate": 7.59493670886076e-08, "logits/generated": 2.9588751792907715, "logits/real": 2.5211470127105713, "logps/generated": -652.1360473632812, "logps/real": -1250.042236328125, "loss": 0.2277, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -1.391522765159607, "rewards/margins": 1.8222811222076416, "rewards/real": 0.43075838685035706, "step": 120 }, { "epoch": 0.5, "learning_rate": 8.227848101265823e-08, "logits/generated": 3.0586698055267334, "logits/real": 2.4790332317352295, "logps/generated": -611.0165405273438, "logps/real": -1484.681884765625, "loss": 0.188, "rewards/accuracies": 0.9624999761581421, "rewards/generated": -1.7131977081298828, "rewards/margins": 2.333926200866699, "rewards/real": 0.6207287907600403, "step": 130 }, { "epoch": 0.53, "learning_rate": 8.860759493670885e-08, "logits/generated": 2.900022506713867, "logits/real": 2.670552968978882, "logps/generated": -655.8267822265625, "logps/real": -1645.905029296875, "loss": 0.1467, "rewards/accuracies": 1.0, "rewards/generated": -2.051678419113159, "rewards/margins": 2.9112563133239746, "rewards/real": 0.8595778346061707, "step": 140 }, { "epoch": 0.57, "learning_rate": 9.493670886075948e-08, "logits/generated": 3.083251476287842, "logits/real": 2.547938108444214, "logps/generated": -647.1588134765625, "logps/real": -1623.0638427734375, "loss": 0.122, "rewards/accuracies": 1.0, "rewards/generated": -2.5622642040252686, "rewards/margins": 3.50146746635437, "rewards/real": 0.9392032623291016, "step": 150 }, { "epoch": 0.61, "learning_rate": 9.985855728429985e-08, "logits/generated": 3.189711809158325, "logits/real": 2.609210729598999, "logps/generated": -673.2716064453125, "logps/real": -1431.1510009765625, "loss": 0.1103, "rewards/accuracies": 1.0, "rewards/generated": -2.6634578704833984, "rewards/margins": 3.5925452709198, "rewards/real": 0.9290875196456909, "step": 160 }, { "epoch": 0.65, "learning_rate": 9.915134370579915e-08, "logits/generated": 2.796431064605713, "logits/real": 2.6858787536621094, "logps/generated": -623.2662353515625, "logps/real": -1411.96533203125, "loss": 0.1011, "rewards/accuracies": 0.987500011920929, "rewards/generated": -2.4235339164733887, "rewards/margins": 3.487780809402466, "rewards/real": 1.0642468929290771, "step": 170 }, { "epoch": 0.69, "learning_rate": 9.844413012729844e-08, "logits/generated": 2.992323875427246, "logits/real": 2.6532320976257324, "logps/generated": -626.7801513671875, "logps/real": -1628.442626953125, "loss": 0.1009, "rewards/accuracies": 1.0, "rewards/generated": -2.747467517852783, "rewards/margins": 4.025119781494141, "rewards/real": 1.2776525020599365, "step": 180 }, { "epoch": 0.72, "learning_rate": 9.773691654879774e-08, "logits/generated": 3.0291736125946045, "logits/real": 2.5113823413848877, "logps/generated": -576.511474609375, "logps/real": -1578.1036376953125, "loss": 0.0983, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -2.7749691009521484, "rewards/margins": 3.916727066040039, "rewards/real": 1.1417579650878906, "step": 190 }, { "epoch": 0.76, "learning_rate": 9.702970297029703e-08, "logits/generated": 2.8259243965148926, "logits/real": 2.727518081665039, "logps/generated": -609.0852661132812, "logps/real": -1438.0333251953125, "loss": 0.0846, "rewards/accuracies": 1.0, "rewards/generated": -2.6536800861358643, "rewards/margins": 3.9251868724823, "rewards/real": 1.2715070247650146, "step": 200 }, { "epoch": 0.8, "learning_rate": 9.632248939179631e-08, "logits/generated": 3.0957698822021484, "logits/real": 2.5199692249298096, "logps/generated": -674.4405517578125, "logps/real": -1574.9976806640625, "loss": 0.0784, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.413438320159912, "rewards/margins": 4.9110894203186035, "rewards/real": 1.497650384902954, "step": 210 }, { "epoch": 0.84, "learning_rate": 9.561527581329562e-08, "logits/generated": 3.059602737426758, "logits/real": 2.611131429672241, "logps/generated": -657.6616821289062, "logps/real": -1342.2515869140625, "loss": 0.0751, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.3509624004364014, "rewards/margins": 4.484201908111572, "rewards/real": 1.1332390308380127, "step": 220 }, { "epoch": 0.88, "learning_rate": 9.49080622347949e-08, "logits/generated": 2.9535973072052, "logits/real": 2.695953130722046, "logps/generated": -623.5858764648438, "logps/real": -1357.4754638671875, "loss": 0.0752, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.0820462703704834, "rewards/margins": 4.444340705871582, "rewards/real": 1.3622944355010986, "step": 230 }, { "epoch": 0.91, "learning_rate": 9.420084865629419e-08, "logits/generated": 3.0728561878204346, "logits/real": 2.6514034271240234, "logps/generated": -646.6805419921875, "logps/real": -1661.2340087890625, "loss": 0.074, "rewards/accuracies": 0.987500011920929, "rewards/generated": -3.4602718353271484, "rewards/margins": 5.120414733886719, "rewards/real": 1.6601425409317017, "step": 240 }, { "epoch": 0.95, "learning_rate": 9.349363507779348e-08, "logits/generated": 3.14707088470459, "logits/real": 2.582815647125244, "logps/generated": -659.8969116210938, "logps/real": -1602.3460693359375, "loss": 0.0605, "rewards/accuracies": 1.0, "rewards/generated": -3.8163421154022217, "rewards/margins": 5.412659645080566, "rewards/real": 1.5963174104690552, "step": 250 }, { "epoch": 0.99, "learning_rate": 9.278642149929278e-08, "logits/generated": 2.910804271697998, "logits/real": 2.588214159011841, "logps/generated": -618.7315673828125, "logps/real": -1585.5712890625, "loss": 0.0591, "rewards/accuracies": 1.0, "rewards/generated": -3.172882556915283, "rewards/margins": 4.7417426109313965, "rewards/real": 1.5688601732254028, "step": 260 }, { "epoch": 1.03, "learning_rate": 9.207920792079208e-08, "logits/generated": 3.0260536670684814, "logits/real": 2.576744556427002, "logps/generated": -712.7923583984375, "logps/real": -1611.0130615234375, "loss": 0.0658, "rewards/accuracies": 0.987500011920929, "rewards/generated": -4.007518768310547, "rewards/margins": 5.749050617218018, "rewards/real": 1.7415320873260498, "step": 270 }, { "epoch": 1.07, "learning_rate": 9.137199434229137e-08, "logits/generated": 3.0747456550598145, "logits/real": 2.6565499305725098, "logps/generated": -677.0684814453125, "logps/real": -1437.9249267578125, "loss": 0.0598, "rewards/accuracies": 1.0, "rewards/generated": -4.153753280639648, "rewards/margins": 5.587630271911621, "rewards/real": 1.4338771104812622, "step": 280 }, { "epoch": 1.1, "learning_rate": 9.066478076379066e-08, "logits/generated": 3.047402858734131, "logits/real": 2.692274332046509, "logps/generated": -652.3853759765625, "logps/real": -1553.9385986328125, "loss": 0.0612, "rewards/accuracies": 0.987500011920929, "rewards/generated": -4.262011528015137, "rewards/margins": 5.818949222564697, "rewards/real": 1.556937575340271, "step": 290 }, { "epoch": 1.14, "learning_rate": 8.995756718528995e-08, "logits/generated": 2.943486452102661, "logits/real": 2.4795732498168945, "logps/generated": -604.9609375, "logps/real": -1512.994140625, "loss": 0.0622, "rewards/accuracies": 1.0, "rewards/generated": -3.6027259826660156, "rewards/margins": 5.401385307312012, "rewards/real": 1.7986586093902588, "step": 300 }, { "epoch": 1.18, "learning_rate": 8.925035360678924e-08, "logits/generated": 2.987004518508911, "logits/real": 2.6335082054138184, "logps/generated": -626.880126953125, "logps/real": -1727.589599609375, "loss": 0.0537, "rewards/accuracies": 1.0, "rewards/generated": -3.473705768585205, "rewards/margins": 5.608192443847656, "rewards/real": 2.1344869136810303, "step": 310 }, { "epoch": 1.22, "learning_rate": 8.854314002828854e-08, "logits/generated": 3.1043455600738525, "logits/real": 2.5045957565307617, "logps/generated": -654.8699951171875, "logps/real": -1668.8502197265625, "loss": 0.056, "rewards/accuracies": 1.0, "rewards/generated": -3.9041481018066406, "rewards/margins": 5.877499580383301, "rewards/real": 1.9733517169952393, "step": 320 }, { "epoch": 1.26, "learning_rate": 8.783592644978784e-08, "logits/generated": 2.9959933757781982, "logits/real": 2.833263874053955, "logps/generated": -700.354248046875, "logps/real": -1501.94775390625, "loss": 0.0495, "rewards/accuracies": 1.0, "rewards/generated": -4.707437515258789, "rewards/margins": 6.398648262023926, "rewards/real": 1.691210389137268, "step": 330 }, { "epoch": 1.3, "learning_rate": 8.712871287128713e-08, "logits/generated": 3.1212868690490723, "logits/real": 2.434352159500122, "logps/generated": -631.5654296875, "logps/real": -1499.0816650390625, "loss": 0.056, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -3.8523108959198, "rewards/margins": 5.5262956619262695, "rewards/real": 1.6739847660064697, "step": 340 }, { "epoch": 1.33, "learning_rate": 8.642149929278641e-08, "logits/generated": 3.180377244949341, "logits/real": 2.5462543964385986, "logps/generated": -658.816162109375, "logps/real": -1713.6314697265625, "loss": 0.0449, "rewards/accuracies": 0.987500011920929, "rewards/generated": -4.1651411056518555, "rewards/margins": 6.40631628036499, "rewards/real": 2.241175651550293, "step": 350 }, { "epoch": 1.37, "learning_rate": 8.57142857142857e-08, "logits/generated": 3.1063506603240967, "logits/real": 2.604684352874756, "logps/generated": -630.5732421875, "logps/real": -1476.998779296875, "loss": 0.0508, "rewards/accuracies": 0.9750000238418579, "rewards/generated": -4.010001182556152, "rewards/margins": 5.754232406616211, "rewards/real": 1.74423086643219, "step": 360 }, { "epoch": 1.41, "learning_rate": 8.5007072135785e-08, "logits/generated": 2.880650758743286, "logits/real": 2.8358330726623535, "logps/generated": -612.5302124023438, "logps/real": -1495.0152587890625, "loss": 0.0521, "rewards/accuracies": 0.987500011920929, "rewards/generated": -4.035798072814941, "rewards/margins": 6.071022987365723, "rewards/real": 2.035224676132202, "step": 370 }, { "epoch": 1.45, "learning_rate": 8.429985855728429e-08, "logits/generated": 2.8637542724609375, "logits/real": 2.558077335357666, "logps/generated": -667.6907958984375, "logps/real": -1590.078857421875, "loss": 0.0392, "rewards/accuracies": 0.987500011920929, "rewards/generated": -5.026558876037598, "rewards/margins": 7.185013771057129, "rewards/real": 2.158454656600952, "step": 380 }, { "epoch": 1.49, "learning_rate": 8.35926449787836e-08, "logits/generated": 3.141674518585205, "logits/real": 2.5596821308135986, "logps/generated": -645.9779663085938, "logps/real": -1424.9947509765625, "loss": 0.0396, "rewards/accuracies": 1.0, "rewards/generated": -4.634699821472168, "rewards/margins": 6.322070121765137, "rewards/real": 1.6873706579208374, "step": 390 }, { "epoch": 1.52, "learning_rate": 8.288543140028288e-08, "logits/generated": 3.265359878540039, "logits/real": 2.5316691398620605, "logps/generated": -671.8311767578125, "logps/real": -1545.91796875, "loss": 0.0326, "rewards/accuracies": 1.0, "rewards/generated": -5.4212493896484375, "rewards/margins": 7.516656398773193, "rewards/real": 2.0954079627990723, "step": 400 }, { "epoch": 1.56, "learning_rate": 8.217821782178217e-08, "logits/generated": 3.1239216327667236, "logits/real": 2.669434070587158, "logps/generated": -656.9334716796875, "logps/real": -1509.529541015625, "loss": 0.0422, "rewards/accuracies": 1.0, "rewards/generated": -4.773463249206543, "rewards/margins": 6.696150779724121, "rewards/real": 1.9226871728897095, "step": 410 }, { "epoch": 1.6, "learning_rate": 8.147100424328147e-08, "logits/generated": 3.169534683227539, "logits/real": 2.637077808380127, "logps/generated": -701.36767578125, "logps/real": -1685.140380859375, "loss": 0.0393, "rewards/accuracies": 1.0, "rewards/generated": -5.24563455581665, "rewards/margins": 7.496856689453125, "rewards/real": 2.2512223720550537, "step": 420 }, { "epoch": 1.64, "learning_rate": 8.076379066478076e-08, "logits/generated": 2.9224164485931396, "logits/real": 2.562767505645752, "logps/generated": -679.8922729492188, "logps/real": -1469.2054443359375, "loss": 0.0388, "rewards/accuracies": 1.0, "rewards/generated": -4.989293098449707, "rewards/margins": 7.009693145751953, "rewards/real": 2.020399570465088, "step": 430 }, { "epoch": 1.68, "learning_rate": 8.005657708628005e-08, "logits/generated": 2.9800190925598145, "logits/real": 2.654110908508301, "logps/generated": -697.5772705078125, "logps/real": -1385.52099609375, "loss": 0.0376, "rewards/accuracies": 1.0, "rewards/generated": -5.243396759033203, "rewards/margins": 7.55600643157959, "rewards/real": 2.312610149383545, "step": 440 }, { "epoch": 1.71, "learning_rate": 7.934936350777935e-08, "logits/generated": 3.078356981277466, "logits/real": 2.535681962966919, "logps/generated": -660.41943359375, "logps/real": -1552.767822265625, "loss": 0.0262, "rewards/accuracies": 1.0, "rewards/generated": -5.234507083892822, "rewards/margins": 7.48751974105835, "rewards/real": 2.2530131340026855, "step": 450 }, { "epoch": 1.75, "learning_rate": 7.864214992927864e-08, "logits/generated": 2.9969305992126465, "logits/real": 2.7245428562164307, "logps/generated": -742.2290649414062, "logps/real": -1633.814453125, "loss": 0.0317, "rewards/accuracies": 1.0, "rewards/generated": -5.18667459487915, "rewards/margins": 7.811495780944824, "rewards/real": 2.624821186065674, "step": 460 }, { "epoch": 1.79, "learning_rate": 7.793493635077794e-08, "logits/generated": 3.072664260864258, "logits/real": 2.67760968208313, "logps/generated": -669.0721435546875, "logps/real": -1515.5504150390625, "loss": 0.0351, "rewards/accuracies": 1.0, "rewards/generated": -4.986624717712402, "rewards/margins": 7.337203025817871, "rewards/real": 2.350578546524048, "step": 470 }, { "epoch": 1.83, "learning_rate": 7.722772277227723e-08, "logits/generated": 3.0342142581939697, "logits/real": 2.5741496086120605, "logps/generated": -678.1422729492188, "logps/real": -1544.0634765625, "loss": 0.0307, "rewards/accuracies": 1.0, "rewards/generated": -5.9622602462768555, "rewards/margins": 8.503713607788086, "rewards/real": 2.5414538383483887, "step": 480 }, { "epoch": 1.87, "learning_rate": 7.652050919377651e-08, "logits/generated": 3.1236672401428223, "logits/real": 2.6698803901672363, "logps/generated": -673.8851928710938, "logps/real": -1506.4398193359375, "loss": 0.0251, "rewards/accuracies": 1.0, "rewards/generated": -5.428993225097656, "rewards/margins": 8.060708999633789, "rewards/real": 2.631716251373291, "step": 490 }, { "epoch": 1.9, "learning_rate": 7.58132956152758e-08, "logits/generated": 3.169041633605957, "logits/real": 2.631805658340454, "logps/generated": -637.0628662109375, "logps/real": -1626.2333984375, "loss": 0.0338, "rewards/accuracies": 0.987500011920929, "rewards/generated": -5.156910419464111, "rewards/margins": 7.718507289886475, "rewards/real": 2.5615973472595215, "step": 500 }, { "epoch": 1.94, "learning_rate": 7.51060820367751e-08, "logits/generated": 3.1486570835113525, "logits/real": 2.661397933959961, "logps/generated": -655.4933471679688, "logps/real": -1476.0849609375, "loss": 0.0256, "rewards/accuracies": 0.987500011920929, "rewards/generated": -5.335035800933838, "rewards/margins": 7.7971930503845215, "rewards/real": 2.462157726287842, "step": 510 }, { "epoch": 1.98, "learning_rate": 7.43988684582744e-08, "logits/generated": 2.928063154220581, "logits/real": 2.730922222137451, "logps/generated": -652.9177856445312, "logps/real": -1432.533935546875, "loss": 0.031, "rewards/accuracies": 0.987500011920929, "rewards/generated": -5.431285858154297, "rewards/margins": 8.037918090820312, "rewards/real": 2.606632709503174, "step": 520 }, { "epoch": 2.02, "learning_rate": 7.36916548797737e-08, "logits/generated": 3.1943507194519043, "logits/real": 2.7164087295532227, "logps/generated": -679.414794921875, "logps/real": -1487.551025390625, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/generated": -5.67262601852417, "rewards/margins": 8.073965072631836, "rewards/real": 2.4013381004333496, "step": 530 }, { "epoch": 2.06, "learning_rate": 7.298444130127298e-08, "logits/generated": 3.188331127166748, "logits/real": 2.6221508979797363, "logps/generated": -695.04931640625, "logps/real": -1611.9033203125, "loss": 0.0215, "rewards/accuracies": 1.0, "rewards/generated": -5.9647440910339355, "rewards/margins": 9.016992568969727, "rewards/real": 3.052248477935791, "step": 540 }, { "epoch": 2.1, "learning_rate": 7.227722772277227e-08, "logits/generated": 3.062971591949463, "logits/real": 2.652564525604248, "logps/generated": -722.2522583007812, "logps/real": -1378.610107421875, "loss": 0.022, "rewards/accuracies": 1.0, "rewards/generated": -6.261225700378418, "rewards/margins": 9.063823699951172, "rewards/real": 2.802598476409912, "step": 550 }, { "epoch": 2.13, "learning_rate": 7.157001414427156e-08, "logits/generated": 2.944784641265869, "logits/real": 2.626530408859253, "logps/generated": -630.6044311523438, "logps/real": -1555.120849609375, "loss": 0.0287, "rewards/accuracies": 1.0, "rewards/generated": -5.279501914978027, "rewards/margins": 8.238837242126465, "rewards/real": 2.9593350887298584, "step": 560 }, { "epoch": 2.17, "learning_rate": 7.086280056577086e-08, "logits/generated": 2.974566698074341, "logits/real": 2.734873056411743, "logps/generated": -740.10498046875, "logps/real": -1393.660400390625, "loss": 0.026, "rewards/accuracies": 1.0, "rewards/generated": -6.561972141265869, "rewards/margins": 9.23666000366211, "rewards/real": 2.6746881008148193, "step": 570 }, { "epoch": 2.21, "learning_rate": 7.015558698727016e-08, "logits/generated": 3.162912130355835, "logits/real": 2.7691640853881836, "logps/generated": -637.5083618164062, "logps/real": -1494.177490234375, "loss": 0.0244, "rewards/accuracies": 1.0, "rewards/generated": -5.555890083312988, "rewards/margins": 8.500304222106934, "rewards/real": 2.944413900375366, "step": 580 }, { "epoch": 2.25, "learning_rate": 6.944837340876945e-08, "logits/generated": 2.8717501163482666, "logits/real": 2.6633965969085693, "logps/generated": -659.5353393554688, "logps/real": -1353.6895751953125, "loss": 0.0217, "rewards/accuracies": 1.0, "rewards/generated": -6.05059814453125, "rewards/margins": 8.580648422241211, "rewards/real": 2.530050754547119, "step": 590 }, { "epoch": 2.29, "learning_rate": 6.874115983026874e-08, "logits/generated": 3.1894919872283936, "logits/real": 2.647244691848755, "logps/generated": -659.2657470703125, "logps/real": -1401.873779296875, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/generated": -5.985430717468262, "rewards/margins": 8.887848854064941, "rewards/real": 2.902418375015259, "step": 600 }, { "epoch": 2.32, "learning_rate": 6.803394625176802e-08, "logits/generated": 3.2617828845977783, "logits/real": 2.715851306915283, "logps/generated": -682.93603515625, "logps/real": -1624.47509765625, "loss": 0.0211, "rewards/accuracies": 1.0, "rewards/generated": -6.639365196228027, "rewards/margins": 10.307806015014648, "rewards/real": 3.6684412956237793, "step": 610 }, { "epoch": 2.36, "learning_rate": 6.732673267326733e-08, "logits/generated": 3.0423474311828613, "logits/real": 2.620255947113037, "logps/generated": -686.6878662109375, "logps/real": -1532.2369384765625, "loss": 0.0197, "rewards/accuracies": 1.0, "rewards/generated": -6.549188137054443, "rewards/margins": 10.09010124206543, "rewards/real": 3.5409133434295654, "step": 620 }, { "epoch": 2.4, "learning_rate": 6.661951909476661e-08, "logits/generated": 3.0638270378112793, "logits/real": 2.641237258911133, "logps/generated": -684.035888671875, "logps/real": -1405.203369140625, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/generated": -6.292230606079102, "rewards/margins": 9.292798042297363, "rewards/real": 3.0005664825439453, "step": 630 }, { "epoch": 2.44, "learning_rate": 6.591230551626592e-08, "logits/generated": 2.9729528427124023, "logits/real": 2.662184715270996, "logps/generated": -651.6361083984375, "logps/real": -1185.017578125, "loss": 0.0219, "rewards/accuracies": 1.0, "rewards/generated": -6.018342971801758, "rewards/margins": 8.436049461364746, "rewards/real": 2.41770601272583, "step": 640 }, { "epoch": 2.48, "learning_rate": 6.52050919377652e-08, "logits/generated": 2.9936587810516357, "logits/real": 2.7459309101104736, "logps/generated": -617.2003784179688, "logps/real": -1374.72607421875, "loss": 0.0199, "rewards/accuracies": 1.0, "rewards/generated": -6.145440578460693, "rewards/margins": 9.338098526000977, "rewards/real": 3.192657470703125, "step": 650 }, { "epoch": 2.51, "learning_rate": 6.449787835926449e-08, "logits/generated": 3.140695095062256, "logits/real": 2.72432541847229, "logps/generated": -712.6917724609375, "logps/real": -1558.529296875, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/generated": -7.1376166343688965, "rewards/margins": 10.716670036315918, "rewards/real": 3.5790531635284424, "step": 660 }, { "epoch": 2.55, "learning_rate": 6.379066478076379e-08, "logits/generated": 3.162978172302246, "logits/real": 2.6974151134490967, "logps/generated": -722.5013427734375, "logps/real": -1497.13232421875, "loss": 0.0184, "rewards/accuracies": 1.0, "rewards/generated": -7.2485809326171875, "rewards/margins": 10.17140007019043, "rewards/real": 2.922818660736084, "step": 670 }, { "epoch": 2.59, "learning_rate": 6.308345120226308e-08, "logits/generated": 3.1272811889648438, "logits/real": 2.774167776107788, "logps/generated": -650.5689697265625, "logps/real": -1484.775634765625, "loss": 0.0181, "rewards/accuracies": 1.0, "rewards/generated": -6.5811662673950195, "rewards/margins": 10.269671440124512, "rewards/real": 3.6885063648223877, "step": 680 }, { "epoch": 2.63, "learning_rate": 6.237623762376237e-08, "logits/generated": 3.171654462814331, "logits/real": 2.742036819458008, "logps/generated": -754.7232666015625, "logps/real": -1549.0023193359375, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/generated": -7.933679103851318, "rewards/margins": 11.404020309448242, "rewards/real": 3.4703421592712402, "step": 690 }, { "epoch": 2.67, "learning_rate": 6.166902404526166e-08, "logits/generated": 3.1139299869537354, "logits/real": 2.8051939010620117, "logps/generated": -735.65087890625, "logps/real": -1367.9779052734375, "loss": 0.0153, "rewards/accuracies": 1.0, "rewards/generated": -7.616179466247559, "rewards/margins": 11.006625175476074, "rewards/real": 3.3904449939727783, "step": 700 }, { "epoch": 2.7, "learning_rate": 6.096181046676096e-08, "logits/generated": 3.1485302448272705, "logits/real": 2.6787030696868896, "logps/generated": -647.2530517578125, "logps/real": -1589.8712158203125, "loss": 0.0174, "rewards/accuracies": 1.0, "rewards/generated": -6.846640110015869, "rewards/margins": 10.44523811340332, "rewards/real": 3.598597288131714, "step": 710 }, { "epoch": 2.74, "learning_rate": 6.025459688826026e-08, "logits/generated": 3.155958890914917, "logits/real": 2.688934803009033, "logps/generated": -694.9849243164062, "logps/real": -1377.62548828125, "loss": 0.0185, "rewards/accuracies": 1.0, "rewards/generated": -7.8145751953125, "rewards/margins": 11.082307815551758, "rewards/real": 3.267733097076416, "step": 720 }, { "epoch": 2.78, "learning_rate": 5.954738330975955e-08, "logits/generated": 2.951596736907959, "logits/real": 2.6604816913604736, "logps/generated": -621.7342529296875, "logps/real": -1276.5732421875, "loss": 0.0178, "rewards/accuracies": 0.987500011920929, "rewards/generated": -5.571743488311768, "rewards/margins": 8.480446815490723, "rewards/real": 2.908703327178955, "step": 730 }, { "epoch": 2.82, "learning_rate": 5.8840169731258837e-08, "logits/generated": 2.979721784591675, "logits/real": 2.7603650093078613, "logps/generated": -680.7958984375, "logps/real": -1584.530029296875, "loss": 0.0171, "rewards/accuracies": 1.0, "rewards/generated": -7.498532295227051, "rewards/margins": 11.40682601928711, "rewards/real": 3.9082934856414795, "step": 740 }, { "epoch": 2.86, "learning_rate": 5.8132956152758125e-08, "logits/generated": 3.0327863693237305, "logits/real": 2.7106642723083496, "logps/generated": -674.9633178710938, "logps/real": -1397.305419921875, "loss": 0.0199, "rewards/accuracies": 0.987500011920929, "rewards/generated": -6.23696756362915, "rewards/margins": 9.843786239624023, "rewards/real": 3.6068196296691895, "step": 750 }, { "epoch": 2.9, "learning_rate": 5.742574257425742e-08, "logits/generated": 3.1310720443725586, "logits/real": 2.61232328414917, "logps/generated": -686.5684814453125, "logps/real": -1574.0673828125, "loss": 0.0111, "rewards/accuracies": 0.987500011920929, "rewards/generated": -7.091876983642578, "rewards/margins": 11.274415969848633, "rewards/real": 4.182539939880371, "step": 760 }, { "epoch": 2.93, "learning_rate": 5.671852899575672e-08, "logits/generated": 2.9953229427337646, "logits/real": 2.858652353286743, "logps/generated": -703.0581665039062, "logps/real": -1572.0938720703125, "loss": 0.0172, "rewards/accuracies": 0.987500011920929, "rewards/generated": -7.434505462646484, "rewards/margins": 11.167814254760742, "rewards/real": 3.733309268951416, "step": 770 }, { "epoch": 2.97, "learning_rate": 5.601131541725601e-08, "logits/generated": 3.2020938396453857, "logits/real": 2.65535831451416, "logps/generated": -668.9046630859375, "logps/real": -1516.696533203125, "loss": 0.0179, "rewards/accuracies": 1.0, "rewards/generated": -7.276400566101074, "rewards/margins": 11.022329330444336, "rewards/real": 3.7459285259246826, "step": 780 }, { "epoch": 3.01, "learning_rate": 5.5304101838755304e-08, "logits/generated": 3.178405523300171, "logits/real": 2.871461868286133, "logps/generated": -699.7593994140625, "logps/real": -1644.784423828125, "loss": 0.0139, "rewards/accuracies": 1.0, "rewards/generated": -7.41660213470459, "rewards/margins": 11.70053482055664, "rewards/real": 4.283932685852051, "step": 790 }, { "epoch": 3.05, "learning_rate": 5.459688826025459e-08, "logits/generated": 3.105088472366333, "logits/real": 2.7389116287231445, "logps/generated": -686.2722778320312, "logps/real": -1577.5899658203125, "loss": 0.0145, "rewards/accuracies": 1.0, "rewards/generated": -7.21103572845459, "rewards/margins": 11.520840644836426, "rewards/real": 4.309804439544678, "step": 800 }, { "epoch": 3.09, "learning_rate": 5.388967468175388e-08, "logits/generated": 3.111755132675171, "logits/real": 2.6383872032165527, "logps/generated": -740.4054565429688, "logps/real": -1518.056884765625, "loss": 0.0155, "rewards/accuracies": 1.0, "rewards/generated": -8.46458625793457, "rewards/margins": 12.396316528320312, "rewards/real": 3.9317307472229004, "step": 810 }, { "epoch": 3.12, "learning_rate": 5.318246110325318e-08, "logits/generated": 3.1267387866973877, "logits/real": 2.761569023132324, "logps/generated": -690.6373291015625, "logps/real": -1433.797119140625, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/generated": -7.513435363769531, "rewards/margins": 11.60542106628418, "rewards/real": 4.091986179351807, "step": 820 }, { "epoch": 3.16, "learning_rate": 5.2475247524752476e-08, "logits/generated": 3.094365358352661, "logits/real": 2.7339248657226562, "logps/generated": -677.4999389648438, "logps/real": -1461.193603515625, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/generated": -7.2015275955200195, "rewards/margins": 10.88963508605957, "rewards/real": 3.68810772895813, "step": 830 }, { "epoch": 3.2, "learning_rate": 5.1768033946251764e-08, "logits/generated": 3.1627821922302246, "logits/real": 2.6869568824768066, "logps/generated": -708.0393676757812, "logps/real": -1620.342529296875, "loss": 0.013, "rewards/accuracies": 1.0, "rewards/generated": -6.853402614593506, "rewards/margins": 11.502897262573242, "rewards/real": 4.649496078491211, "step": 840 }, { "epoch": 3.24, "learning_rate": 5.106082036775106e-08, "logits/generated": 2.925158977508545, "logits/real": 2.806152820587158, "logps/generated": -670.198486328125, "logps/real": -1649.2200927734375, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/generated": -7.177328586578369, "rewards/margins": 11.58249568939209, "rewards/real": 4.405167102813721, "step": 850 }, { "epoch": 3.28, "learning_rate": 5.035360678925035e-08, "logits/generated": 3.0794711112976074, "logits/real": 2.7551910877227783, "logps/generated": -688.251708984375, "logps/real": -1551.3802490234375, "loss": 0.0092, "rewards/accuracies": 1.0, "rewards/generated": -7.524941921234131, "rewards/margins": 11.831631660461426, "rewards/real": 4.306689262390137, "step": 860 }, { "epoch": 3.31, "learning_rate": 4.964639321074964e-08, "logits/generated": 3.2023205757141113, "logits/real": 2.7396531105041504, "logps/generated": -726.21484375, "logps/real": -1432.2078857421875, "loss": 0.0096, "rewards/accuracies": 1.0, "rewards/generated": -8.621014595031738, "rewards/margins": 12.233930587768555, "rewards/real": 3.6129164695739746, "step": 870 }, { "epoch": 3.35, "learning_rate": 4.8939179632248937e-08, "logits/generated": 3.036933422088623, "logits/real": 2.7342514991760254, "logps/generated": -679.8505859375, "logps/real": -1413.323974609375, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/generated": -7.424284934997559, "rewards/margins": 11.5066499710083, "rewards/real": 4.082363605499268, "step": 880 }, { "epoch": 3.39, "learning_rate": 4.823196605374823e-08, "logits/generated": 3.1470329761505127, "logits/real": 2.7851274013519287, "logps/generated": -707.0967407226562, "logps/real": -1301.5491943359375, "loss": 0.0129, "rewards/accuracies": 1.0, "rewards/generated": -7.919151306152344, "rewards/margins": 11.449126243591309, "rewards/real": 3.529975175857544, "step": 890 }, { "epoch": 3.43, "learning_rate": 4.752475247524752e-08, "logits/generated": 3.122591018676758, "logits/real": 2.899055242538452, "logps/generated": -719.7399291992188, "logps/real": -1345.24267578125, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/generated": -8.44778060913086, "rewards/margins": 12.348614692687988, "rewards/real": 3.9008331298828125, "step": 900 }, { "epoch": 3.47, "learning_rate": 4.6817538896746814e-08, "logits/generated": 3.1257503032684326, "logits/real": 2.7537941932678223, "logps/generated": -675.872314453125, "logps/real": -1462.6578369140625, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/generated": -7.721535682678223, "rewards/margins": 11.623268127441406, "rewards/real": 3.901732921600342, "step": 910 }, { "epoch": 3.5, "learning_rate": 4.611032531824611e-08, "logits/generated": 3.1477560997009277, "logits/real": 2.8998966217041016, "logps/generated": -659.6651611328125, "logps/real": -1254.7550048828125, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/generated": -6.890495300292969, "rewards/margins": 10.485551834106445, "rewards/real": 3.595057725906372, "step": 920 }, { "epoch": 3.54, "learning_rate": 4.5403111739745404e-08, "logits/generated": 3.0974771976470947, "logits/real": 2.7432289123535156, "logps/generated": -679.0161743164062, "logps/real": -1514.939453125, "loss": 0.0153, "rewards/accuracies": 0.987500011920929, "rewards/generated": -8.148828506469727, "rewards/margins": 12.779142379760742, "rewards/real": 4.630312919616699, "step": 930 }, { "epoch": 3.58, "learning_rate": 4.469589816124469e-08, "logits/generated": 3.1638052463531494, "logits/real": 2.8109536170959473, "logps/generated": -698.923828125, "logps/real": -1472.047119140625, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/generated": -7.732884407043457, "rewards/margins": 11.864561080932617, "rewards/real": 4.13167667388916, "step": 940 }, { "epoch": 3.62, "learning_rate": 4.3988684582743986e-08, "logits/generated": 3.1099958419799805, "logits/real": 2.689439296722412, "logps/generated": -644.0321044921875, "logps/real": -1493.985107421875, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/generated": -7.19980001449585, "rewards/margins": 11.551348686218262, "rewards/real": 4.351548194885254, "step": 950 }, { "epoch": 3.66, "learning_rate": 4.328147100424328e-08, "logits/generated": 3.068477153778076, "logits/real": 2.7735862731933594, "logps/generated": -691.45166015625, "logps/real": -1532.789306640625, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/generated": -7.632587432861328, "rewards/margins": 12.373538970947266, "rewards/real": 4.740950584411621, "step": 960 }, { "epoch": 3.7, "learning_rate": 4.257425742574257e-08, "logits/generated": 3.0876381397247314, "logits/real": 2.819197177886963, "logps/generated": -723.0416259765625, "logps/real": -1523.8883056640625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/generated": -8.482768058776855, "rewards/margins": 12.97950553894043, "rewards/real": 4.496739387512207, "step": 970 }, { "epoch": 3.73, "learning_rate": 4.186704384724187e-08, "logits/generated": 3.1232168674468994, "logits/real": 2.7532286643981934, "logps/generated": -705.3154296875, "logps/real": -1505.3544921875, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/generated": -9.721161842346191, "rewards/margins": 14.407315254211426, "rewards/real": 4.686154365539551, "step": 980 }, { "epoch": 3.77, "learning_rate": 4.115983026874116e-08, "logits/generated": 3.03477144241333, "logits/real": 2.8783843517303467, "logps/generated": -643.8148803710938, "logps/real": -1341.3072509765625, "loss": 0.0125, "rewards/accuracies": 1.0, "rewards/generated": -8.113399505615234, "rewards/margins": 12.22516918182373, "rewards/real": 4.111769199371338, "step": 990 }, { "epoch": 3.81, "learning_rate": 4.045261669024045e-08, "logits/generated": 3.1741697788238525, "logits/real": 2.8709182739257812, "logps/generated": -732.4049682617188, "logps/real": -1317.285400390625, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/generated": -9.147187232971191, "rewards/margins": 12.971673965454102, "rewards/real": 3.8244857788085938, "step": 1000 }, { "epoch": 3.85, "learning_rate": 3.974540311173974e-08, "logits/generated": 3.0487265586853027, "logits/real": 2.70530366897583, "logps/generated": -710.79931640625, "logps/real": -1459.953125, "loss": 0.0078, "rewards/accuracies": 1.0, "rewards/generated": -8.931787490844727, "rewards/margins": 12.98430347442627, "rewards/real": 4.052516460418701, "step": 1010 }, { "epoch": 3.89, "learning_rate": 3.9038189533239036e-08, "logits/generated": 3.2147929668426514, "logits/real": 2.8529582023620605, "logps/generated": -705.3197021484375, "logps/real": -1510.0345458984375, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/generated": -9.200128555297852, "rewards/margins": 13.442059516906738, "rewards/real": 4.241931438446045, "step": 1020 }, { "epoch": 3.92, "learning_rate": 3.833097595473833e-08, "logits/generated": 3.0952019691467285, "logits/real": 2.843813419342041, "logps/generated": -767.7200317382812, "logps/real": -1536.8294677734375, "loss": 0.007, "rewards/accuracies": 1.0, "rewards/generated": -9.248136520385742, "rewards/margins": 14.309036254882812, "rewards/real": 5.06089973449707, "step": 1030 }, { "epoch": 3.96, "learning_rate": 3.762376237623762e-08, "logits/generated": 3.064669609069824, "logits/real": 2.8759753704071045, "logps/generated": -747.2686767578125, "logps/real": -1288.828857421875, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/generated": -9.522977828979492, "rewards/margins": 13.202028274536133, "rewards/real": 3.679051637649536, "step": 1040 }, { "epoch": 4.0, "learning_rate": 3.6916548797736914e-08, "logits/generated": 3.093175172805786, "logits/real": 2.7089123725891113, "logps/generated": -727.3369750976562, "logps/real": -1490.734619140625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/generated": -9.388141632080078, "rewards/margins": 14.153898239135742, "rewards/real": 4.765757083892822, "step": 1050 }, { "epoch": 4.04, "learning_rate": 3.620933521923621e-08, "logits/generated": 3.0252323150634766, "logits/real": 2.6994900703430176, "logps/generated": -706.836181640625, "logps/real": -1605.043701171875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/generated": -8.568842887878418, "rewards/margins": 13.715721130371094, "rewards/real": 5.146877288818359, "step": 1060 }, { "epoch": 4.08, "learning_rate": 3.55021216407355e-08, "logits/generated": 3.1279215812683105, "logits/real": 2.813671588897705, "logps/generated": -725.0180053710938, "logps/real": -1311.329833984375, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -9.17328929901123, "rewards/margins": 13.425753593444824, "rewards/real": 4.252464294433594, "step": 1070 }, { "epoch": 4.11, "learning_rate": 3.47949080622348e-08, "logits/generated": 3.1973278522491455, "logits/real": 2.841034412384033, "logps/generated": -739.7282104492188, "logps/real": -1421.683349609375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -9.601125717163086, "rewards/margins": 13.759020805358887, "rewards/real": 4.157895088195801, "step": 1080 }, { "epoch": 4.15, "learning_rate": 3.4087694483734086e-08, "logits/generated": 3.14319109916687, "logits/real": 2.650394916534424, "logps/generated": -694.9728393554688, "logps/real": -1557.013671875, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/generated": -8.979018211364746, "rewards/margins": 14.022770881652832, "rewards/real": 5.043752193450928, "step": 1090 }, { "epoch": 4.19, "learning_rate": 3.3380480905233374e-08, "logits/generated": 3.283674716949463, "logits/real": 2.774038791656494, "logps/generated": -750.1487426757812, "logps/real": -1579.89697265625, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/generated": -10.157815933227539, "rewards/margins": 14.974313735961914, "rewards/real": 4.816495895385742, "step": 1100 }, { "epoch": 4.23, "learning_rate": 3.2673267326732676e-08, "logits/generated": 3.134382963180542, "logits/real": 2.8532094955444336, "logps/generated": -750.8877563476562, "logps/real": -1525.961669921875, "loss": 0.0077, "rewards/accuracies": 1.0, "rewards/generated": -9.825960159301758, "rewards/margins": 14.588778495788574, "rewards/real": 4.762818336486816, "step": 1110 }, { "epoch": 4.27, "learning_rate": 3.1966053748231964e-08, "logits/generated": 3.130510091781616, "logits/real": 2.9025044441223145, "logps/generated": -675.0423583984375, "logps/real": -1538.1998291015625, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/generated": -9.219243049621582, "rewards/margins": 14.35087776184082, "rewards/real": 5.131636619567871, "step": 1120 }, { "epoch": 4.3, "learning_rate": 3.125884016973126e-08, "logits/generated": 3.234394073486328, "logits/real": 2.7886247634887695, "logps/generated": -688.5629272460938, "logps/real": -1481.4114990234375, "loss": 0.0074, "rewards/accuracies": 1.0, "rewards/generated": -8.884227752685547, "rewards/margins": 13.749975204467773, "rewards/real": 4.865747928619385, "step": 1130 }, { "epoch": 4.34, "learning_rate": 3.0551626591230553e-08, "logits/generated": 3.151052474975586, "logits/real": 2.8268675804138184, "logps/generated": -702.4296264648438, "logps/real": -1545.9931640625, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -8.792219161987305, "rewards/margins": 13.802957534790039, "rewards/real": 5.010737419128418, "step": 1140 }, { "epoch": 4.38, "learning_rate": 2.984441301272984e-08, "logits/generated": 3.0674405097961426, "logits/real": 2.791660785675049, "logps/generated": -728.8613891601562, "logps/real": -1402.8472900390625, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -9.552156448364258, "rewards/margins": 13.963113784790039, "rewards/real": 4.410956382751465, "step": 1150 }, { "epoch": 4.42, "learning_rate": 2.9137199434229136e-08, "logits/generated": 3.085538864135742, "logits/real": 2.8696396350860596, "logps/generated": -699.1375732421875, "logps/real": -1528.8197021484375, "loss": 0.0061, "rewards/accuracies": 1.0, "rewards/generated": -9.45177173614502, "rewards/margins": 14.307217597961426, "rewards/real": 4.8554463386535645, "step": 1160 }, { "epoch": 4.46, "learning_rate": 2.8429985855728428e-08, "logits/generated": 3.1769864559173584, "logits/real": 2.864468574523926, "logps/generated": -730.0369262695312, "logps/real": -1405.126220703125, "loss": 0.006, "rewards/accuracies": 1.0, "rewards/generated": -10.242330551147461, "rewards/margins": 14.709861755371094, "rewards/real": 4.467529296875, "step": 1170 }, { "epoch": 4.5, "learning_rate": 2.7722772277227722e-08, "logits/generated": 3.0584521293640137, "logits/real": 2.7300102710723877, "logps/generated": -722.6023559570312, "logps/real": -1533.9681396484375, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/generated": -10.0105562210083, "rewards/margins": 14.953516960144043, "rewards/real": 4.942961692810059, "step": 1180 }, { "epoch": 4.53, "learning_rate": 2.7015558698727014e-08, "logits/generated": 3.1814303398132324, "logits/real": 2.767988681793213, "logps/generated": -705.47021484375, "logps/real": -1517.984619140625, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -10.146055221557617, "rewards/margins": 15.356274604797363, "rewards/real": 5.210217475891113, "step": 1190 }, { "epoch": 4.57, "learning_rate": 2.6308345120226305e-08, "logits/generated": 3.2221198081970215, "logits/real": 2.8411731719970703, "logps/generated": -727.7474365234375, "logps/real": -1452.932373046875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -10.274625778198242, "rewards/margins": 15.288644790649414, "rewards/real": 5.014019012451172, "step": 1200 }, { "epoch": 4.61, "learning_rate": 2.56011315417256e-08, "logits/generated": 2.9806618690490723, "logits/real": 2.761728286743164, "logps/generated": -739.073974609375, "logps/real": -1438.574951171875, "loss": 0.0054, "rewards/accuracies": 1.0, "rewards/generated": -10.485044479370117, "rewards/margins": 15.389866828918457, "rewards/real": 4.904819965362549, "step": 1210 }, { "epoch": 4.65, "learning_rate": 2.489391796322489e-08, "logits/generated": 3.0833356380462646, "logits/real": 2.8559961318969727, "logps/generated": -737.3714599609375, "logps/real": -1319.4708251953125, "loss": 0.0088, "rewards/accuracies": 0.987500011920929, "rewards/generated": -9.90731430053711, "rewards/margins": 14.22021484375, "rewards/real": 4.312900066375732, "step": 1220 }, { "epoch": 4.69, "learning_rate": 2.4186704384724186e-08, "logits/generated": 3.1098473072052, "logits/real": 2.8987298011779785, "logps/generated": -751.8609619140625, "logps/real": -1390.7666015625, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/generated": -10.742658615112305, "rewards/margins": 15.709897994995117, "rewards/real": 4.967240810394287, "step": 1230 }, { "epoch": 4.72, "learning_rate": 2.3479490806223478e-08, "logits/generated": 3.181952476501465, "logits/real": 2.8264319896698, "logps/generated": -725.786865234375, "logps/real": -1668.9781494140625, "loss": 0.0057, "rewards/accuracies": 1.0, "rewards/generated": -9.877522468566895, "rewards/margins": 15.596285820007324, "rewards/real": 5.7187628746032715, "step": 1240 }, { "epoch": 4.76, "learning_rate": 2.2772277227722772e-08, "logits/generated": 3.2247843742370605, "logits/real": 2.787977695465088, "logps/generated": -689.7197265625, "logps/real": -1527.9981689453125, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/generated": -9.805652618408203, "rewards/margins": 14.870402336120605, "rewards/real": 5.064749240875244, "step": 1250 }, { "epoch": 4.8, "learning_rate": 2.2065063649222067e-08, "logits/generated": 3.1002349853515625, "logits/real": 2.8746185302734375, "logps/generated": -755.68994140625, "logps/real": -1484.486328125, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/generated": -9.283646583557129, "rewards/margins": 14.29266357421875, "rewards/real": 5.009018421173096, "step": 1260 }, { "epoch": 4.84, "learning_rate": 2.1357850070721355e-08, "logits/generated": 3.172830104827881, "logits/real": 2.891188859939575, "logps/generated": -711.9218139648438, "logps/real": -1525.380615234375, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/generated": -9.644311904907227, "rewards/margins": 14.991864204406738, "rewards/real": 5.3475518226623535, "step": 1270 }, { "epoch": 4.88, "learning_rate": 2.065063649222065e-08, "logits/generated": 3.162585735321045, "logits/real": 2.814383029937744, "logps/generated": -691.09375, "logps/real": -1261.48046875, "loss": 0.0063, "rewards/accuracies": 1.0, "rewards/generated": -8.76778793334961, "rewards/margins": 13.0126953125, "rewards/real": 4.244906425476074, "step": 1280 }, { "epoch": 4.91, "learning_rate": 1.994342291371994e-08, "logits/generated": 3.1490421295166016, "logits/real": 2.8507959842681885, "logps/generated": -685.4339599609375, "logps/real": -1518.389404296875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -9.577953338623047, "rewards/margins": 14.928466796875, "rewards/real": 5.350512504577637, "step": 1290 }, { "epoch": 4.95, "learning_rate": 1.9236209335219236e-08, "logits/generated": 3.1204447746276855, "logits/real": 2.761258125305176, "logps/generated": -718.8822021484375, "logps/real": -1507.6552734375, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/generated": -10.944616317749023, "rewards/margins": 15.193156242370605, "rewards/real": 4.248539447784424, "step": 1300 }, { "epoch": 4.99, "learning_rate": 1.8528995756718528e-08, "logits/generated": 3.202990770339966, "logits/real": 2.789196014404297, "logps/generated": -736.8391723632812, "logps/real": -1285.6285400390625, "loss": 0.0069, "rewards/accuracies": 1.0, "rewards/generated": -9.708449363708496, "rewards/margins": 14.332748413085938, "rewards/real": 4.624300003051758, "step": 1310 }, { "epoch": 5.03, "learning_rate": 1.782178217821782e-08, "logits/generated": 3.044565200805664, "logits/real": 2.813560724258423, "logps/generated": -749.41845703125, "logps/real": -1450.473876953125, "loss": 0.0039, "rewards/accuracies": 1.0, "rewards/generated": -9.962289810180664, "rewards/margins": 15.018086433410645, "rewards/real": 5.055794715881348, "step": 1320 }, { "epoch": 5.07, "learning_rate": 1.7114568599717114e-08, "logits/generated": 3.1547980308532715, "logits/real": 2.8710808753967285, "logps/generated": -685.3002319335938, "logps/real": -1505.046875, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/generated": -10.081647872924805, "rewards/margins": 15.215327262878418, "rewards/real": 5.133678913116455, "step": 1330 }, { "epoch": 5.1, "learning_rate": 1.640735502121641e-08, "logits/generated": 3.111386299133301, "logits/real": 2.878361701965332, "logps/generated": -712.4747924804688, "logps/real": -1361.859375, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -9.593215942382812, "rewards/margins": 13.842330932617188, "rewards/real": 4.249114990234375, "step": 1340 }, { "epoch": 5.14, "learning_rate": 1.57001414427157e-08, "logits/generated": 3.1498868465423584, "logits/real": 2.785670518875122, "logps/generated": -749.1596069335938, "logps/real": -1366.0186767578125, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/generated": -10.603869438171387, "rewards/margins": 15.060259819030762, "rewards/real": 4.45639181137085, "step": 1350 }, { "epoch": 5.18, "learning_rate": 1.499292786421499e-08, "logits/generated": 3.2311694622039795, "logits/real": 2.8071227073669434, "logps/generated": -698.42333984375, "logps/real": -1556.6697998046875, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -10.801340103149414, "rewards/margins": 16.656749725341797, "rewards/real": 5.855408668518066, "step": 1360 }, { "epoch": 5.22, "learning_rate": 1.4285714285714284e-08, "logits/generated": 3.204010486602783, "logits/real": 2.8270351886749268, "logps/generated": -768.1214599609375, "logps/real": -1522.0247802734375, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -11.407632827758789, "rewards/margins": 16.46245574951172, "rewards/real": 5.054823875427246, "step": 1370 }, { "epoch": 5.26, "learning_rate": 1.3578500707213577e-08, "logits/generated": 3.2223219871520996, "logits/real": 2.748478412628174, "logps/generated": -709.1806640625, "logps/real": -1449.1051025390625, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/generated": -10.24437427520752, "rewards/margins": 15.332929611206055, "rewards/real": 5.088555335998535, "step": 1380 }, { "epoch": 5.3, "learning_rate": 1.287128712871287e-08, "logits/generated": 2.949439525604248, "logits/real": 2.8462400436401367, "logps/generated": -702.388427734375, "logps/real": -1221.220458984375, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/generated": -9.625406265258789, "rewards/margins": 13.894259452819824, "rewards/real": 4.268852710723877, "step": 1390 }, { "epoch": 5.33, "learning_rate": 1.2164073550212164e-08, "logits/generated": 3.1298446655273438, "logits/real": 2.8281898498535156, "logps/generated": -750.8180541992188, "logps/real": -1293.175048828125, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/generated": -10.595232009887695, "rewards/margins": 15.49177360534668, "rewards/real": 4.896543025970459, "step": 1400 }, { "epoch": 5.37, "learning_rate": 1.1456859971711457e-08, "logits/generated": 3.046416759490967, "logits/real": 2.7686657905578613, "logps/generated": -703.9090576171875, "logps/real": -1550.712158203125, "loss": 0.005, "rewards/accuracies": 1.0, "rewards/generated": -9.808374404907227, "rewards/margins": 15.086359024047852, "rewards/real": 5.277985572814941, "step": 1410 }, { "epoch": 5.41, "learning_rate": 1.074964639321075e-08, "logits/generated": 3.1554598808288574, "logits/real": 2.8488306999206543, "logps/generated": -740.9146118164062, "logps/real": -1494.5152587890625, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -10.964345932006836, "rewards/margins": 16.294296264648438, "rewards/real": 5.329949378967285, "step": 1420 }, { "epoch": 5.45, "learning_rate": 1.0042432814710041e-08, "logits/generated": 3.15816330909729, "logits/real": 2.857891082763672, "logps/generated": -700.6273803710938, "logps/real": -1395.680419921875, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/generated": -10.415566444396973, "rewards/margins": 14.575197219848633, "rewards/real": 4.15963077545166, "step": 1430 }, { "epoch": 5.49, "learning_rate": 9.335219236209336e-09, "logits/generated": 3.150120496749878, "logits/real": 2.801614761352539, "logps/generated": -702.9065551757812, "logps/real": -1336.076904296875, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/generated": -10.592188835144043, "rewards/margins": 15.421045303344727, "rewards/real": 4.828855037689209, "step": 1440 }, { "epoch": 5.52, "learning_rate": 8.628005657708627e-09, "logits/generated": 3.075422525405884, "logits/real": 2.759566307067871, "logps/generated": -750.0064086914062, "logps/real": -1322.994140625, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/generated": -11.502080917358398, "rewards/margins": 16.186368942260742, "rewards/real": 4.68428897857666, "step": 1450 }, { "epoch": 5.56, "learning_rate": 7.92079207920792e-09, "logits/generated": 2.975106716156006, "logits/real": 2.8558712005615234, "logps/generated": -730.2501831054688, "logps/real": -1330.128173828125, "loss": 0.0043, "rewards/accuracies": 1.0, "rewards/generated": -10.35800552368164, "rewards/margins": 14.964300155639648, "rewards/real": 4.60629415512085, "step": 1460 }, { "epoch": 5.6, "learning_rate": 7.2135785007072135e-09, "logits/generated": 3.338933229446411, "logits/real": 2.866070032119751, "logps/generated": -702.8323974609375, "logps/real": -1660.9990234375, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/generated": -10.770402908325195, "rewards/margins": 16.416067123413086, "rewards/real": 5.645665645599365, "step": 1470 }, { "epoch": 5.64, "learning_rate": 6.506364922206506e-09, "logits/generated": 3.193213701248169, "logits/real": 2.8589539527893066, "logps/generated": -730.3358154296875, "logps/real": -1374.8104248046875, "loss": 0.0053, "rewards/accuracies": 1.0, "rewards/generated": -10.911657333374023, "rewards/margins": 15.75550651550293, "rewards/real": 4.843849182128906, "step": 1480 }, { "epoch": 5.68, "learning_rate": 5.799151343705799e-09, "logits/generated": 3.0994668006896973, "logits/real": 2.7762365341186523, "logps/generated": -667.8460693359375, "logps/real": -1382.4453125, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -9.389671325683594, "rewards/margins": 14.397994995117188, "rewards/real": 5.008322238922119, "step": 1490 }, { "epoch": 5.71, "learning_rate": 5.091937765205092e-09, "logits/generated": 3.0917019844055176, "logits/real": 2.8576996326446533, "logps/generated": -709.4324340820312, "logps/real": -1651.778076171875, "loss": 0.0048, "rewards/accuracies": 1.0, "rewards/generated": -10.680952072143555, "rewards/margins": 16.81113052368164, "rewards/real": 6.130180358886719, "step": 1500 }, { "epoch": 5.75, "learning_rate": 4.384724186704385e-09, "logits/generated": 3.144845724105835, "logits/real": 2.8753390312194824, "logps/generated": -750.3989868164062, "logps/real": -1244.6375732421875, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/generated": -11.346524238586426, "rewards/margins": 16.04071807861328, "rewards/real": 4.694192409515381, "step": 1510 }, { "epoch": 5.79, "learning_rate": 3.6775106082036773e-09, "logits/generated": 3.173389196395874, "logits/real": 2.8000612258911133, "logps/generated": -695.5457763671875, "logps/real": -1390.158935546875, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/generated": -9.513983726501465, "rewards/margins": 14.579229354858398, "rewards/real": 5.065245151519775, "step": 1520 }, { "epoch": 5.83, "learning_rate": 2.97029702970297e-09, "logits/generated": 3.1202340126037598, "logits/real": 2.816042900085449, "logps/generated": -763.26806640625, "logps/real": -1385.561279296875, "loss": 0.0052, "rewards/accuracies": 1.0, "rewards/generated": -11.074748039245605, "rewards/margins": 16.243824005126953, "rewards/real": 5.169075965881348, "step": 1530 }, { "epoch": 5.87, "learning_rate": 2.263083451202263e-09, "logits/generated": 3.286569118499756, "logits/real": 2.9330081939697266, "logps/generated": -684.3175659179688, "logps/real": -1652.4827880859375, "loss": 0.0044, "rewards/accuracies": 1.0, "rewards/generated": -10.155891418457031, "rewards/margins": 15.940603256225586, "rewards/real": 5.784712314605713, "step": 1540 }, { "epoch": 5.9, "learning_rate": 1.5558698727015557e-09, "logits/generated": 3.17760968208313, "logits/real": 2.854255199432373, "logps/generated": -746.004150390625, "logps/real": -1345.9742431640625, "loss": 0.0038, "rewards/accuracies": 1.0, "rewards/generated": -11.761791229248047, "rewards/margins": 16.628210067749023, "rewards/real": 4.86641788482666, "step": 1550 }, { "epoch": 5.94, "learning_rate": 8.486562942008486e-10, "logits/generated": 3.056246280670166, "logits/real": 2.8659956455230713, "logps/generated": -775.7562255859375, "logps/real": -1462.85009765625, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/generated": -11.534282684326172, "rewards/margins": 16.907535552978516, "rewards/real": 5.3732523918151855, "step": 1560 }, { "epoch": 5.98, "learning_rate": 1.4144271570014144e-10, "logits/generated": 3.131124496459961, "logits/real": 2.8286118507385254, "logps/generated": -742.1873779296875, "logps/real": -1465.8543701171875, "loss": 0.0037, "rewards/accuracies": 1.0, "rewards/generated": -10.972893714904785, "rewards/margins": 16.086231231689453, "rewards/real": 5.113335609436035, "step": 1570 }, { "epoch": 5.99, "step": 1572, "total_flos": 0.0, "train_loss": 0.057728804025485636, "train_runtime": 21391.1697, "train_samples_per_second": 4.712, "train_steps_per_second": 0.073 } ], "logging_steps": 10, "max_steps": 1572, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }