|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.988571428571428, |
|
"eval_steps": 100, |
|
"global_step": 1572, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.329113924050632e-10, |
|
"logits/generated": 3.334580421447754, |
|
"logits/real": 2.520763397216797, |
|
"logps/generated": -653.1103515625, |
|
"logps/real": -1322.83154296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 6.329113924050633e-09, |
|
"logits/generated": 2.984175682067871, |
|
"logits/real": 2.4933767318725586, |
|
"logps/generated": -569.99365234375, |
|
"logps/real": -1401.09375, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.4027777910232544, |
|
"rewards/generated": 0.001990392105653882, |
|
"rewards/margins": 0.003700366010889411, |
|
"rewards/real": 0.005690759979188442, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.2658227848101265e-08, |
|
"logits/generated": 3.1899852752685547, |
|
"logits/real": 2.5960335731506348, |
|
"logps/generated": -639.3368530273438, |
|
"logps/real": -1633.5921630859375, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/generated": -0.03260010853409767, |
|
"rewards/margins": 0.02890392579138279, |
|
"rewards/real": -0.0036961850710213184, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.89873417721519e-08, |
|
"logits/generated": 3.1001484394073486, |
|
"logits/real": 2.5820600986480713, |
|
"logps/generated": -625.9615478515625, |
|
"logps/real": -1550.431884765625, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/generated": -0.0366821363568306, |
|
"rewards/margins": 0.0638287365436554, |
|
"rewards/real": 0.0271465964615345, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.531645569620253e-08, |
|
"logits/generated": 2.8814804553985596, |
|
"logits/real": 2.6468660831451416, |
|
"logps/generated": -592.5699462890625, |
|
"logps/real": -1527.6875, |
|
"loss": 0.6267, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/generated": -0.08724664151668549, |
|
"rewards/margins": 0.15261340141296387, |
|
"rewards/real": 0.06536674499511719, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 3.1645569620253166e-08, |
|
"logits/generated": 3.147773265838623, |
|
"logits/real": 2.535146713256836, |
|
"logps/generated": -688.0377807617188, |
|
"logps/real": -1441.314453125, |
|
"loss": 0.5423, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/generated": -0.17641791701316833, |
|
"rewards/margins": 0.3502033054828644, |
|
"rewards/real": 0.17378540337085724, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.79746835443038e-08, |
|
"logits/generated": 3.1963257789611816, |
|
"logits/real": 2.5462851524353027, |
|
"logps/generated": -594.2725219726562, |
|
"logps/real": -1453.6497802734375, |
|
"loss": 0.5062, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/generated": -0.27025485038757324, |
|
"rewards/margins": 0.4667099416255951, |
|
"rewards/real": 0.19645504653453827, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.430379746835442e-08, |
|
"logits/generated": 2.942413806915283, |
|
"logits/real": 2.5651955604553223, |
|
"logps/generated": -623.3458251953125, |
|
"logps/real": -1351.8724365234375, |
|
"loss": 0.4366, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/generated": -0.48640790581703186, |
|
"rewards/margins": 0.7007172703742981, |
|
"rewards/real": 0.21430937945842743, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5.063291139240506e-08, |
|
"logits/generated": 3.1414570808410645, |
|
"logits/real": 2.5161662101745605, |
|
"logps/generated": -623.8413696289062, |
|
"logps/real": -1677.4615478515625, |
|
"loss": 0.3137, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -0.7329307794570923, |
|
"rewards/margins": 1.239257574081421, |
|
"rewards/real": 0.5063267350196838, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 5.69620253164557e-08, |
|
"logits/generated": 3.070830821990967, |
|
"logits/real": 2.5541062355041504, |
|
"logps/generated": -657.6233520507812, |
|
"logps/real": -1626.2838134765625, |
|
"loss": 0.2947, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.0010322332382202, |
|
"rewards/margins": 1.5189402103424072, |
|
"rewards/real": 0.517907977104187, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 6.329113924050633e-08, |
|
"logits/generated": 2.870387315750122, |
|
"logits/real": 2.7046616077423096, |
|
"logps/generated": -579.2174072265625, |
|
"logps/real": -1487.188232421875, |
|
"loss": 0.2589, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.0812404155731201, |
|
"rewards/margins": 1.698075532913208, |
|
"rewards/real": 0.6168351173400879, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 6.962025316455696e-08, |
|
"logits/generated": 2.9290151596069336, |
|
"logits/real": 2.728487014770508, |
|
"logps/generated": -648.2110595703125, |
|
"logps/real": -1627.390869140625, |
|
"loss": 0.2453, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.244185209274292, |
|
"rewards/margins": 1.9264612197875977, |
|
"rewards/real": 0.6822759509086609, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 7.59493670886076e-08, |
|
"logits/generated": 2.9588751792907715, |
|
"logits/real": 2.5211470127105713, |
|
"logps/generated": -652.1360473632812, |
|
"logps/real": -1250.042236328125, |
|
"loss": 0.2277, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -1.391522765159607, |
|
"rewards/margins": 1.8222811222076416, |
|
"rewards/real": 0.43075838685035706, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 8.227848101265823e-08, |
|
"logits/generated": 3.0586698055267334, |
|
"logits/real": 2.4790332317352295, |
|
"logps/generated": -611.0165405273438, |
|
"logps/real": -1484.681884765625, |
|
"loss": 0.188, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -1.7131977081298828, |
|
"rewards/margins": 2.333926200866699, |
|
"rewards/real": 0.6207287907600403, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 8.860759493670885e-08, |
|
"logits/generated": 2.900022506713867, |
|
"logits/real": 2.670552968978882, |
|
"logps/generated": -655.8267822265625, |
|
"logps/real": -1645.905029296875, |
|
"loss": 0.1467, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.051678419113159, |
|
"rewards/margins": 2.9112563133239746, |
|
"rewards/real": 0.8595778346061707, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.493670886075948e-08, |
|
"logits/generated": 3.083251476287842, |
|
"logits/real": 2.547938108444214, |
|
"logps/generated": -647.1588134765625, |
|
"logps/real": -1623.0638427734375, |
|
"loss": 0.122, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.5622642040252686, |
|
"rewards/margins": 3.50146746635437, |
|
"rewards/real": 0.9392032623291016, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.985855728429985e-08, |
|
"logits/generated": 3.189711809158325, |
|
"logits/real": 2.609210729598999, |
|
"logps/generated": -673.2716064453125, |
|
"logps/real": -1431.1510009765625, |
|
"loss": 0.1103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.6634578704833984, |
|
"rewards/margins": 3.5925452709198, |
|
"rewards/real": 0.9290875196456909, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 9.915134370579915e-08, |
|
"logits/generated": 2.796431064605713, |
|
"logits/real": 2.6858787536621094, |
|
"logps/generated": -623.2662353515625, |
|
"logps/real": -1411.96533203125, |
|
"loss": 0.1011, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -2.4235339164733887, |
|
"rewards/margins": 3.487780809402466, |
|
"rewards/real": 1.0642468929290771, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 9.844413012729844e-08, |
|
"logits/generated": 2.992323875427246, |
|
"logits/real": 2.6532320976257324, |
|
"logps/generated": -626.7801513671875, |
|
"logps/real": -1628.442626953125, |
|
"loss": 0.1009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.747467517852783, |
|
"rewards/margins": 4.025119781494141, |
|
"rewards/real": 1.2776525020599365, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.773691654879774e-08, |
|
"logits/generated": 3.0291736125946045, |
|
"logits/real": 2.5113823413848877, |
|
"logps/generated": -576.511474609375, |
|
"logps/real": -1578.1036376953125, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -2.7749691009521484, |
|
"rewards/margins": 3.916727066040039, |
|
"rewards/real": 1.1417579650878906, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.702970297029703e-08, |
|
"logits/generated": 2.8259243965148926, |
|
"logits/real": 2.727518081665039, |
|
"logps/generated": -609.0852661132812, |
|
"logps/real": -1438.0333251953125, |
|
"loss": 0.0846, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.6536800861358643, |
|
"rewards/margins": 3.9251868724823, |
|
"rewards/real": 1.2715070247650146, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.632248939179631e-08, |
|
"logits/generated": 3.0957698822021484, |
|
"logits/real": 2.5199692249298096, |
|
"logps/generated": -674.4405517578125, |
|
"logps/real": -1574.9976806640625, |
|
"loss": 0.0784, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.413438320159912, |
|
"rewards/margins": 4.9110894203186035, |
|
"rewards/real": 1.497650384902954, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 9.561527581329562e-08, |
|
"logits/generated": 3.059602737426758, |
|
"logits/real": 2.611131429672241, |
|
"logps/generated": -657.6616821289062, |
|
"logps/real": -1342.2515869140625, |
|
"loss": 0.0751, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.3509624004364014, |
|
"rewards/margins": 4.484201908111572, |
|
"rewards/real": 1.1332390308380127, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.49080622347949e-08, |
|
"logits/generated": 2.9535973072052, |
|
"logits/real": 2.695953130722046, |
|
"logps/generated": -623.5858764648438, |
|
"logps/real": -1357.4754638671875, |
|
"loss": 0.0752, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.0820462703704834, |
|
"rewards/margins": 4.444340705871582, |
|
"rewards/real": 1.3622944355010986, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.420084865629419e-08, |
|
"logits/generated": 3.0728561878204346, |
|
"logits/real": 2.6514034271240234, |
|
"logps/generated": -646.6805419921875, |
|
"logps/real": -1661.2340087890625, |
|
"loss": 0.074, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -3.4602718353271484, |
|
"rewards/margins": 5.120414733886719, |
|
"rewards/real": 1.6601425409317017, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.349363507779348e-08, |
|
"logits/generated": 3.14707088470459, |
|
"logits/real": 2.582815647125244, |
|
"logps/generated": -659.8969116210938, |
|
"logps/real": -1602.3460693359375, |
|
"loss": 0.0605, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.8163421154022217, |
|
"rewards/margins": 5.412659645080566, |
|
"rewards/real": 1.5963174104690552, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.278642149929278e-08, |
|
"logits/generated": 2.910804271697998, |
|
"logits/real": 2.588214159011841, |
|
"logps/generated": -618.7315673828125, |
|
"logps/real": -1585.5712890625, |
|
"loss": 0.0591, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.172882556915283, |
|
"rewards/margins": 4.7417426109313965, |
|
"rewards/real": 1.5688601732254028, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 9.207920792079208e-08, |
|
"logits/generated": 3.0260536670684814, |
|
"logits/real": 2.576744556427002, |
|
"logps/generated": -712.7923583984375, |
|
"logps/real": -1611.0130615234375, |
|
"loss": 0.0658, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.007518768310547, |
|
"rewards/margins": 5.749050617218018, |
|
"rewards/real": 1.7415320873260498, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.137199434229137e-08, |
|
"logits/generated": 3.0747456550598145, |
|
"logits/real": 2.6565499305725098, |
|
"logps/generated": -677.0684814453125, |
|
"logps/real": -1437.9249267578125, |
|
"loss": 0.0598, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.153753280639648, |
|
"rewards/margins": 5.587630271911621, |
|
"rewards/real": 1.4338771104812622, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.066478076379066e-08, |
|
"logits/generated": 3.047402858734131, |
|
"logits/real": 2.692274332046509, |
|
"logps/generated": -652.3853759765625, |
|
"logps/real": -1553.9385986328125, |
|
"loss": 0.0612, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.262011528015137, |
|
"rewards/margins": 5.818949222564697, |
|
"rewards/real": 1.556937575340271, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 8.995756718528995e-08, |
|
"logits/generated": 2.943486452102661, |
|
"logits/real": 2.4795732498168945, |
|
"logps/generated": -604.9609375, |
|
"logps/real": -1512.994140625, |
|
"loss": 0.0622, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.6027259826660156, |
|
"rewards/margins": 5.401385307312012, |
|
"rewards/real": 1.7986586093902588, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.925035360678924e-08, |
|
"logits/generated": 2.987004518508911, |
|
"logits/real": 2.6335082054138184, |
|
"logps/generated": -626.880126953125, |
|
"logps/real": -1727.589599609375, |
|
"loss": 0.0537, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.473705768585205, |
|
"rewards/margins": 5.608192443847656, |
|
"rewards/real": 2.1344869136810303, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 8.854314002828854e-08, |
|
"logits/generated": 3.1043455600738525, |
|
"logits/real": 2.5045957565307617, |
|
"logps/generated": -654.8699951171875, |
|
"logps/real": -1668.8502197265625, |
|
"loss": 0.056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.9041481018066406, |
|
"rewards/margins": 5.877499580383301, |
|
"rewards/real": 1.9733517169952393, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 8.783592644978784e-08, |
|
"logits/generated": 2.9959933757781982, |
|
"logits/real": 2.833263874053955, |
|
"logps/generated": -700.354248046875, |
|
"logps/real": -1501.94775390625, |
|
"loss": 0.0495, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.707437515258789, |
|
"rewards/margins": 6.398648262023926, |
|
"rewards/real": 1.691210389137268, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 8.712871287128713e-08, |
|
"logits/generated": 3.1212868690490723, |
|
"logits/real": 2.434352159500122, |
|
"logps/generated": -631.5654296875, |
|
"logps/real": -1499.0816650390625, |
|
"loss": 0.056, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -3.8523108959198, |
|
"rewards/margins": 5.5262956619262695, |
|
"rewards/real": 1.6739847660064697, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 8.642149929278641e-08, |
|
"logits/generated": 3.180377244949341, |
|
"logits/real": 2.5462543964385986, |
|
"logps/generated": -658.816162109375, |
|
"logps/real": -1713.6314697265625, |
|
"loss": 0.0449, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.1651411056518555, |
|
"rewards/margins": 6.40631628036499, |
|
"rewards/real": 2.241175651550293, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 8.57142857142857e-08, |
|
"logits/generated": 3.1063506603240967, |
|
"logits/real": 2.604684352874756, |
|
"logps/generated": -630.5732421875, |
|
"logps/real": -1476.998779296875, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/generated": -4.010001182556152, |
|
"rewards/margins": 5.754232406616211, |
|
"rewards/real": 1.74423086643219, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.5007072135785e-08, |
|
"logits/generated": 2.880650758743286, |
|
"logits/real": 2.8358330726623535, |
|
"logps/generated": -612.5302124023438, |
|
"logps/real": -1495.0152587890625, |
|
"loss": 0.0521, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -4.035798072814941, |
|
"rewards/margins": 6.071022987365723, |
|
"rewards/real": 2.035224676132202, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 8.429985855728429e-08, |
|
"logits/generated": 2.8637542724609375, |
|
"logits/real": 2.558077335357666, |
|
"logps/generated": -667.6907958984375, |
|
"logps/real": -1590.078857421875, |
|
"loss": 0.0392, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.026558876037598, |
|
"rewards/margins": 7.185013771057129, |
|
"rewards/real": 2.158454656600952, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 8.35926449787836e-08, |
|
"logits/generated": 3.141674518585205, |
|
"logits/real": 2.5596821308135986, |
|
"logps/generated": -645.9779663085938, |
|
"logps/real": -1424.9947509765625, |
|
"loss": 0.0396, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.634699821472168, |
|
"rewards/margins": 6.322070121765137, |
|
"rewards/real": 1.6873706579208374, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 8.288543140028288e-08, |
|
"logits/generated": 3.265359878540039, |
|
"logits/real": 2.5316691398620605, |
|
"logps/generated": -671.8311767578125, |
|
"logps/real": -1545.91796875, |
|
"loss": 0.0326, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.4212493896484375, |
|
"rewards/margins": 7.516656398773193, |
|
"rewards/real": 2.0954079627990723, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 8.217821782178217e-08, |
|
"logits/generated": 3.1239216327667236, |
|
"logits/real": 2.669434070587158, |
|
"logps/generated": -656.9334716796875, |
|
"logps/real": -1509.529541015625, |
|
"loss": 0.0422, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.773463249206543, |
|
"rewards/margins": 6.696150779724121, |
|
"rewards/real": 1.9226871728897095, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 8.147100424328147e-08, |
|
"logits/generated": 3.169534683227539, |
|
"logits/real": 2.637077808380127, |
|
"logps/generated": -701.36767578125, |
|
"logps/real": -1685.140380859375, |
|
"loss": 0.0393, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.24563455581665, |
|
"rewards/margins": 7.496856689453125, |
|
"rewards/real": 2.2512223720550537, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 8.076379066478076e-08, |
|
"logits/generated": 2.9224164485931396, |
|
"logits/real": 2.562767505645752, |
|
"logps/generated": -679.8922729492188, |
|
"logps/real": -1469.2054443359375, |
|
"loss": 0.0388, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.989293098449707, |
|
"rewards/margins": 7.009693145751953, |
|
"rewards/real": 2.020399570465088, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.005657708628005e-08, |
|
"logits/generated": 2.9800190925598145, |
|
"logits/real": 2.654110908508301, |
|
"logps/generated": -697.5772705078125, |
|
"logps/real": -1385.52099609375, |
|
"loss": 0.0376, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.243396759033203, |
|
"rewards/margins": 7.55600643157959, |
|
"rewards/real": 2.312610149383545, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 7.934936350777935e-08, |
|
"logits/generated": 3.078356981277466, |
|
"logits/real": 2.535681962966919, |
|
"logps/generated": -660.41943359375, |
|
"logps/real": -1552.767822265625, |
|
"loss": 0.0262, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.234507083892822, |
|
"rewards/margins": 7.48751974105835, |
|
"rewards/real": 2.2530131340026855, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 7.864214992927864e-08, |
|
"logits/generated": 2.9969305992126465, |
|
"logits/real": 2.7245428562164307, |
|
"logps/generated": -742.2290649414062, |
|
"logps/real": -1633.814453125, |
|
"loss": 0.0317, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.18667459487915, |
|
"rewards/margins": 7.811495780944824, |
|
"rewards/real": 2.624821186065674, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 7.793493635077794e-08, |
|
"logits/generated": 3.072664260864258, |
|
"logits/real": 2.67760968208313, |
|
"logps/generated": -669.0721435546875, |
|
"logps/real": -1515.5504150390625, |
|
"loss": 0.0351, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.986624717712402, |
|
"rewards/margins": 7.337203025817871, |
|
"rewards/real": 2.350578546524048, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.722772277227723e-08, |
|
"logits/generated": 3.0342142581939697, |
|
"logits/real": 2.5741496086120605, |
|
"logps/generated": -678.1422729492188, |
|
"logps/real": -1544.0634765625, |
|
"loss": 0.0307, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.9622602462768555, |
|
"rewards/margins": 8.503713607788086, |
|
"rewards/real": 2.5414538383483887, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.652050919377651e-08, |
|
"logits/generated": 3.1236672401428223, |
|
"logits/real": 2.6698803901672363, |
|
"logps/generated": -673.8851928710938, |
|
"logps/real": -1506.4398193359375, |
|
"loss": 0.0251, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.428993225097656, |
|
"rewards/margins": 8.060708999633789, |
|
"rewards/real": 2.631716251373291, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.58132956152758e-08, |
|
"logits/generated": 3.169041633605957, |
|
"logits/real": 2.631805658340454, |
|
"logps/generated": -637.0628662109375, |
|
"logps/real": -1626.2333984375, |
|
"loss": 0.0338, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.156910419464111, |
|
"rewards/margins": 7.718507289886475, |
|
"rewards/real": 2.5615973472595215, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.51060820367751e-08, |
|
"logits/generated": 3.1486570835113525, |
|
"logits/real": 2.661397933959961, |
|
"logps/generated": -655.4933471679688, |
|
"logps/real": -1476.0849609375, |
|
"loss": 0.0256, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.335035800933838, |
|
"rewards/margins": 7.7971930503845215, |
|
"rewards/real": 2.462157726287842, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 7.43988684582744e-08, |
|
"logits/generated": 2.928063154220581, |
|
"logits/real": 2.730922222137451, |
|
"logps/generated": -652.9177856445312, |
|
"logps/real": -1432.533935546875, |
|
"loss": 0.031, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.431285858154297, |
|
"rewards/margins": 8.037918090820312, |
|
"rewards/real": 2.606632709503174, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 7.36916548797737e-08, |
|
"logits/generated": 3.1943507194519043, |
|
"logits/real": 2.7164087295532227, |
|
"logps/generated": -679.414794921875, |
|
"logps/real": -1487.551025390625, |
|
"loss": 0.022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.67262601852417, |
|
"rewards/margins": 8.073965072631836, |
|
"rewards/real": 2.4013381004333496, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 7.298444130127298e-08, |
|
"logits/generated": 3.188331127166748, |
|
"logits/real": 2.6221508979797363, |
|
"logps/generated": -695.04931640625, |
|
"logps/real": -1611.9033203125, |
|
"loss": 0.0215, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.9647440910339355, |
|
"rewards/margins": 9.016992568969727, |
|
"rewards/real": 3.052248477935791, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 7.227722772277227e-08, |
|
"logits/generated": 3.062971591949463, |
|
"logits/real": 2.652564525604248, |
|
"logps/generated": -722.2522583007812, |
|
"logps/real": -1378.610107421875, |
|
"loss": 0.022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.261225700378418, |
|
"rewards/margins": 9.063823699951172, |
|
"rewards/real": 2.802598476409912, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 7.157001414427156e-08, |
|
"logits/generated": 2.944784641265869, |
|
"logits/real": 2.626530408859253, |
|
"logps/generated": -630.6044311523438, |
|
"logps/real": -1555.120849609375, |
|
"loss": 0.0287, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.279501914978027, |
|
"rewards/margins": 8.238837242126465, |
|
"rewards/real": 2.9593350887298584, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 7.086280056577086e-08, |
|
"logits/generated": 2.974566698074341, |
|
"logits/real": 2.734873056411743, |
|
"logps/generated": -740.10498046875, |
|
"logps/real": -1393.660400390625, |
|
"loss": 0.026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.561972141265869, |
|
"rewards/margins": 9.23666000366211, |
|
"rewards/real": 2.6746881008148193, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 7.015558698727016e-08, |
|
"logits/generated": 3.162912130355835, |
|
"logits/real": 2.7691640853881836, |
|
"logps/generated": -637.5083618164062, |
|
"logps/real": -1494.177490234375, |
|
"loss": 0.0244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.555890083312988, |
|
"rewards/margins": 8.500304222106934, |
|
"rewards/real": 2.944413900375366, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 6.944837340876945e-08, |
|
"logits/generated": 2.8717501163482666, |
|
"logits/real": 2.6633965969085693, |
|
"logps/generated": -659.5353393554688, |
|
"logps/real": -1353.6895751953125, |
|
"loss": 0.0217, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.05059814453125, |
|
"rewards/margins": 8.580648422241211, |
|
"rewards/real": 2.530050754547119, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 6.874115983026874e-08, |
|
"logits/generated": 3.1894919872283936, |
|
"logits/real": 2.647244691848755, |
|
"logps/generated": -659.2657470703125, |
|
"logps/real": -1401.873779296875, |
|
"loss": 0.0149, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.985430717468262, |
|
"rewards/margins": 8.887848854064941, |
|
"rewards/real": 2.902418375015259, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 6.803394625176802e-08, |
|
"logits/generated": 3.2617828845977783, |
|
"logits/real": 2.715851306915283, |
|
"logps/generated": -682.93603515625, |
|
"logps/real": -1624.47509765625, |
|
"loss": 0.0211, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.639365196228027, |
|
"rewards/margins": 10.307806015014648, |
|
"rewards/real": 3.6684412956237793, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 6.732673267326733e-08, |
|
"logits/generated": 3.0423474311828613, |
|
"logits/real": 2.620255947113037, |
|
"logps/generated": -686.6878662109375, |
|
"logps/real": -1532.2369384765625, |
|
"loss": 0.0197, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.549188137054443, |
|
"rewards/margins": 10.09010124206543, |
|
"rewards/real": 3.5409133434295654, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 6.661951909476661e-08, |
|
"logits/generated": 3.0638270378112793, |
|
"logits/real": 2.641237258911133, |
|
"logps/generated": -684.035888671875, |
|
"logps/real": -1405.203369140625, |
|
"loss": 0.0182, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.292230606079102, |
|
"rewards/margins": 9.292798042297363, |
|
"rewards/real": 3.0005664825439453, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 6.591230551626592e-08, |
|
"logits/generated": 2.9729528427124023, |
|
"logits/real": 2.662184715270996, |
|
"logps/generated": -651.6361083984375, |
|
"logps/real": -1185.017578125, |
|
"loss": 0.0219, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.018342971801758, |
|
"rewards/margins": 8.436049461364746, |
|
"rewards/real": 2.41770601272583, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 6.52050919377652e-08, |
|
"logits/generated": 2.9936587810516357, |
|
"logits/real": 2.7459309101104736, |
|
"logps/generated": -617.2003784179688, |
|
"logps/real": -1374.72607421875, |
|
"loss": 0.0199, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.145440578460693, |
|
"rewards/margins": 9.338098526000977, |
|
"rewards/real": 3.192657470703125, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 6.449787835926449e-08, |
|
"logits/generated": 3.140695095062256, |
|
"logits/real": 2.72432541847229, |
|
"logps/generated": -712.6917724609375, |
|
"logps/real": -1558.529296875, |
|
"loss": 0.0128, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.1376166343688965, |
|
"rewards/margins": 10.716670036315918, |
|
"rewards/real": 3.5790531635284424, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 6.379066478076379e-08, |
|
"logits/generated": 3.162978172302246, |
|
"logits/real": 2.6974151134490967, |
|
"logps/generated": -722.5013427734375, |
|
"logps/real": -1497.13232421875, |
|
"loss": 0.0184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.2485809326171875, |
|
"rewards/margins": 10.17140007019043, |
|
"rewards/real": 2.922818660736084, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.308345120226308e-08, |
|
"logits/generated": 3.1272811889648438, |
|
"logits/real": 2.774167776107788, |
|
"logps/generated": -650.5689697265625, |
|
"logps/real": -1484.775634765625, |
|
"loss": 0.0181, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.5811662673950195, |
|
"rewards/margins": 10.269671440124512, |
|
"rewards/real": 3.6885063648223877, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 6.237623762376237e-08, |
|
"logits/generated": 3.171654462814331, |
|
"logits/real": 2.742036819458008, |
|
"logps/generated": -754.7232666015625, |
|
"logps/real": -1549.0023193359375, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.933679103851318, |
|
"rewards/margins": 11.404020309448242, |
|
"rewards/real": 3.4703421592712402, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 6.166902404526166e-08, |
|
"logits/generated": 3.1139299869537354, |
|
"logits/real": 2.8051939010620117, |
|
"logps/generated": -735.65087890625, |
|
"logps/real": -1367.9779052734375, |
|
"loss": 0.0153, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.616179466247559, |
|
"rewards/margins": 11.006625175476074, |
|
"rewards/real": 3.3904449939727783, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 6.096181046676096e-08, |
|
"logits/generated": 3.1485302448272705, |
|
"logits/real": 2.6787030696868896, |
|
"logps/generated": -647.2530517578125, |
|
"logps/real": -1589.8712158203125, |
|
"loss": 0.0174, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.846640110015869, |
|
"rewards/margins": 10.44523811340332, |
|
"rewards/real": 3.598597288131714, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 6.025459688826026e-08, |
|
"logits/generated": 3.155958890914917, |
|
"logits/real": 2.688934803009033, |
|
"logps/generated": -694.9849243164062, |
|
"logps/real": -1377.62548828125, |
|
"loss": 0.0185, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.8145751953125, |
|
"rewards/margins": 11.082307815551758, |
|
"rewards/real": 3.267733097076416, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 5.954738330975955e-08, |
|
"logits/generated": 2.951596736907959, |
|
"logits/real": 2.6604816913604736, |
|
"logps/generated": -621.7342529296875, |
|
"logps/real": -1276.5732421875, |
|
"loss": 0.0178, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -5.571743488311768, |
|
"rewards/margins": 8.480446815490723, |
|
"rewards/real": 2.908703327178955, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 5.8840169731258837e-08, |
|
"logits/generated": 2.979721784591675, |
|
"logits/real": 2.7603650093078613, |
|
"logps/generated": -680.7958984375, |
|
"logps/real": -1584.530029296875, |
|
"loss": 0.0171, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.498532295227051, |
|
"rewards/margins": 11.40682601928711, |
|
"rewards/real": 3.9082934856414795, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 5.8132956152758125e-08, |
|
"logits/generated": 3.0327863693237305, |
|
"logits/real": 2.7106642723083496, |
|
"logps/generated": -674.9633178710938, |
|
"logps/real": -1397.305419921875, |
|
"loss": 0.0199, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -6.23696756362915, |
|
"rewards/margins": 9.843786239624023, |
|
"rewards/real": 3.6068196296691895, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 5.742574257425742e-08, |
|
"logits/generated": 3.1310720443725586, |
|
"logits/real": 2.61232328414917, |
|
"logps/generated": -686.5684814453125, |
|
"logps/real": -1574.0673828125, |
|
"loss": 0.0111, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.091876983642578, |
|
"rewards/margins": 11.274415969848633, |
|
"rewards/real": 4.182539939880371, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5.671852899575672e-08, |
|
"logits/generated": 2.9953229427337646, |
|
"logits/real": 2.858652353286743, |
|
"logps/generated": -703.0581665039062, |
|
"logps/real": -1572.0938720703125, |
|
"loss": 0.0172, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.434505462646484, |
|
"rewards/margins": 11.167814254760742, |
|
"rewards/real": 3.733309268951416, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 5.601131541725601e-08, |
|
"logits/generated": 3.2020938396453857, |
|
"logits/real": 2.65535831451416, |
|
"logps/generated": -668.9046630859375, |
|
"logps/real": -1516.696533203125, |
|
"loss": 0.0179, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.276400566101074, |
|
"rewards/margins": 11.022329330444336, |
|
"rewards/real": 3.7459285259246826, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 5.5304101838755304e-08, |
|
"logits/generated": 3.178405523300171, |
|
"logits/real": 2.871461868286133, |
|
"logps/generated": -699.7593994140625, |
|
"logps/real": -1644.784423828125, |
|
"loss": 0.0139, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.41660213470459, |
|
"rewards/margins": 11.70053482055664, |
|
"rewards/real": 4.283932685852051, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 5.459688826025459e-08, |
|
"logits/generated": 3.105088472366333, |
|
"logits/real": 2.7389116287231445, |
|
"logps/generated": -686.2722778320312, |
|
"logps/real": -1577.5899658203125, |
|
"loss": 0.0145, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.21103572845459, |
|
"rewards/margins": 11.520840644836426, |
|
"rewards/real": 4.309804439544678, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 5.388967468175388e-08, |
|
"logits/generated": 3.111755132675171, |
|
"logits/real": 2.6383872032165527, |
|
"logps/generated": -740.4054565429688, |
|
"logps/real": -1518.056884765625, |
|
"loss": 0.0155, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.46458625793457, |
|
"rewards/margins": 12.396316528320312, |
|
"rewards/real": 3.9317307472229004, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 5.318246110325318e-08, |
|
"logits/generated": 3.1267387866973877, |
|
"logits/real": 2.761569023132324, |
|
"logps/generated": -690.6373291015625, |
|
"logps/real": -1433.797119140625, |
|
"loss": 0.0127, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.513435363769531, |
|
"rewards/margins": 11.60542106628418, |
|
"rewards/real": 4.091986179351807, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 5.2475247524752476e-08, |
|
"logits/generated": 3.094365358352661, |
|
"logits/real": 2.7339248657226562, |
|
"logps/generated": -677.4999389648438, |
|
"logps/real": -1461.193603515625, |
|
"loss": 0.0123, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.2015275955200195, |
|
"rewards/margins": 10.88963508605957, |
|
"rewards/real": 3.68810772895813, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 5.1768033946251764e-08, |
|
"logits/generated": 3.1627821922302246, |
|
"logits/real": 2.6869568824768066, |
|
"logps/generated": -708.0393676757812, |
|
"logps/real": -1620.342529296875, |
|
"loss": 0.013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.853402614593506, |
|
"rewards/margins": 11.502897262573242, |
|
"rewards/real": 4.649496078491211, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 5.106082036775106e-08, |
|
"logits/generated": 2.925158977508545, |
|
"logits/real": 2.806152820587158, |
|
"logps/generated": -670.198486328125, |
|
"logps/real": -1649.2200927734375, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.177328586578369, |
|
"rewards/margins": 11.58249568939209, |
|
"rewards/real": 4.405167102813721, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 5.035360678925035e-08, |
|
"logits/generated": 3.0794711112976074, |
|
"logits/real": 2.7551910877227783, |
|
"logps/generated": -688.251708984375, |
|
"logps/real": -1551.3802490234375, |
|
"loss": 0.0092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.524941921234131, |
|
"rewards/margins": 11.831631660461426, |
|
"rewards/real": 4.306689262390137, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 4.964639321074964e-08, |
|
"logits/generated": 3.2023205757141113, |
|
"logits/real": 2.7396531105041504, |
|
"logps/generated": -726.21484375, |
|
"logps/real": -1432.2078857421875, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.621014595031738, |
|
"rewards/margins": 12.233930587768555, |
|
"rewards/real": 3.6129164695739746, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 4.8939179632248937e-08, |
|
"logits/generated": 3.036933422088623, |
|
"logits/real": 2.7342514991760254, |
|
"logps/generated": -679.8505859375, |
|
"logps/real": -1413.323974609375, |
|
"loss": 0.0156, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.424284934997559, |
|
"rewards/margins": 11.5066499710083, |
|
"rewards/real": 4.082363605499268, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 4.823196605374823e-08, |
|
"logits/generated": 3.1470329761505127, |
|
"logits/real": 2.7851274013519287, |
|
"logps/generated": -707.0967407226562, |
|
"logps/real": -1301.5491943359375, |
|
"loss": 0.0129, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.919151306152344, |
|
"rewards/margins": 11.449126243591309, |
|
"rewards/real": 3.529975175857544, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 4.752475247524752e-08, |
|
"logits/generated": 3.122591018676758, |
|
"logits/real": 2.899055242538452, |
|
"logps/generated": -719.7399291992188, |
|
"logps/real": -1345.24267578125, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.44778060913086, |
|
"rewards/margins": 12.348614692687988, |
|
"rewards/real": 3.9008331298828125, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 4.6817538896746814e-08, |
|
"logits/generated": 3.1257503032684326, |
|
"logits/real": 2.7537941932678223, |
|
"logps/generated": -675.872314453125, |
|
"logps/real": -1462.6578369140625, |
|
"loss": 0.0083, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.721535682678223, |
|
"rewards/margins": 11.623268127441406, |
|
"rewards/real": 3.901732921600342, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 4.611032531824611e-08, |
|
"logits/generated": 3.1477560997009277, |
|
"logits/real": 2.8998966217041016, |
|
"logps/generated": -659.6651611328125, |
|
"logps/real": -1254.7550048828125, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.890495300292969, |
|
"rewards/margins": 10.485551834106445, |
|
"rewards/real": 3.595057725906372, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 4.5403111739745404e-08, |
|
"logits/generated": 3.0974771976470947, |
|
"logits/real": 2.7432289123535156, |
|
"logps/generated": -679.0161743164062, |
|
"logps/real": -1514.939453125, |
|
"loss": 0.0153, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -8.148828506469727, |
|
"rewards/margins": 12.779142379760742, |
|
"rewards/real": 4.630312919616699, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 4.469589816124469e-08, |
|
"logits/generated": 3.1638052463531494, |
|
"logits/real": 2.8109536170959473, |
|
"logps/generated": -698.923828125, |
|
"logps/real": -1472.047119140625, |
|
"loss": 0.012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.732884407043457, |
|
"rewards/margins": 11.864561080932617, |
|
"rewards/real": 4.13167667388916, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 4.3988684582743986e-08, |
|
"logits/generated": 3.1099958419799805, |
|
"logits/real": 2.689439296722412, |
|
"logps/generated": -644.0321044921875, |
|
"logps/real": -1493.985107421875, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.19980001449585, |
|
"rewards/margins": 11.551348686218262, |
|
"rewards/real": 4.351548194885254, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 4.328147100424328e-08, |
|
"logits/generated": 3.068477153778076, |
|
"logits/real": 2.7735862731933594, |
|
"logps/generated": -691.45166015625, |
|
"logps/real": -1532.789306640625, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.632587432861328, |
|
"rewards/margins": 12.373538970947266, |
|
"rewards/real": 4.740950584411621, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 4.257425742574257e-08, |
|
"logits/generated": 3.0876381397247314, |
|
"logits/real": 2.819197177886963, |
|
"logps/generated": -723.0416259765625, |
|
"logps/real": -1523.8883056640625, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.482768058776855, |
|
"rewards/margins": 12.97950553894043, |
|
"rewards/real": 4.496739387512207, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 4.186704384724187e-08, |
|
"logits/generated": 3.1232168674468994, |
|
"logits/real": 2.7532286643981934, |
|
"logps/generated": -705.3154296875, |
|
"logps/real": -1505.3544921875, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.721161842346191, |
|
"rewards/margins": 14.407315254211426, |
|
"rewards/real": 4.686154365539551, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 4.115983026874116e-08, |
|
"logits/generated": 3.03477144241333, |
|
"logits/real": 2.8783843517303467, |
|
"logps/generated": -643.8148803710938, |
|
"logps/real": -1341.3072509765625, |
|
"loss": 0.0125, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.113399505615234, |
|
"rewards/margins": 12.22516918182373, |
|
"rewards/real": 4.111769199371338, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 4.045261669024045e-08, |
|
"logits/generated": 3.1741697788238525, |
|
"logits/real": 2.8709182739257812, |
|
"logps/generated": -732.4049682617188, |
|
"logps/real": -1317.285400390625, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.147187232971191, |
|
"rewards/margins": 12.971673965454102, |
|
"rewards/real": 3.8244857788085938, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 3.974540311173974e-08, |
|
"logits/generated": 3.0487265586853027, |
|
"logits/real": 2.70530366897583, |
|
"logps/generated": -710.79931640625, |
|
"logps/real": -1459.953125, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.931787490844727, |
|
"rewards/margins": 12.98430347442627, |
|
"rewards/real": 4.052516460418701, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 3.9038189533239036e-08, |
|
"logits/generated": 3.2147929668426514, |
|
"logits/real": 2.8529582023620605, |
|
"logps/generated": -705.3197021484375, |
|
"logps/real": -1510.0345458984375, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.200128555297852, |
|
"rewards/margins": 13.442059516906738, |
|
"rewards/real": 4.241931438446045, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 3.833097595473833e-08, |
|
"logits/generated": 3.0952019691467285, |
|
"logits/real": 2.843813419342041, |
|
"logps/generated": -767.7200317382812, |
|
"logps/real": -1536.8294677734375, |
|
"loss": 0.007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.248136520385742, |
|
"rewards/margins": 14.309036254882812, |
|
"rewards/real": 5.06089973449707, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 3.762376237623762e-08, |
|
"logits/generated": 3.064669609069824, |
|
"logits/real": 2.8759753704071045, |
|
"logps/generated": -747.2686767578125, |
|
"logps/real": -1288.828857421875, |
|
"loss": 0.011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.522977828979492, |
|
"rewards/margins": 13.202028274536133, |
|
"rewards/real": 3.679051637649536, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.6916548797736914e-08, |
|
"logits/generated": 3.093175172805786, |
|
"logits/real": 2.7089123725891113, |
|
"logps/generated": -727.3369750976562, |
|
"logps/real": -1490.734619140625, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.388141632080078, |
|
"rewards/margins": 14.153898239135742, |
|
"rewards/real": 4.765757083892822, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 3.620933521923621e-08, |
|
"logits/generated": 3.0252323150634766, |
|
"logits/real": 2.6994900703430176, |
|
"logps/generated": -706.836181640625, |
|
"logps/real": -1605.043701171875, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.568842887878418, |
|
"rewards/margins": 13.715721130371094, |
|
"rewards/real": 5.146877288818359, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 3.55021216407355e-08, |
|
"logits/generated": 3.1279215812683105, |
|
"logits/real": 2.813671588897705, |
|
"logps/generated": -725.0180053710938, |
|
"logps/real": -1311.329833984375, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.17328929901123, |
|
"rewards/margins": 13.425753593444824, |
|
"rewards/real": 4.252464294433594, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 3.47949080622348e-08, |
|
"logits/generated": 3.1973278522491455, |
|
"logits/real": 2.841034412384033, |
|
"logps/generated": -739.7282104492188, |
|
"logps/real": -1421.683349609375, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.601125717163086, |
|
"rewards/margins": 13.759020805358887, |
|
"rewards/real": 4.157895088195801, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 3.4087694483734086e-08, |
|
"logits/generated": 3.14319109916687, |
|
"logits/real": 2.650394916534424, |
|
"logps/generated": -694.9728393554688, |
|
"logps/real": -1557.013671875, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.979018211364746, |
|
"rewards/margins": 14.022770881652832, |
|
"rewards/real": 5.043752193450928, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 3.3380480905233374e-08, |
|
"logits/generated": 3.283674716949463, |
|
"logits/real": 2.774038791656494, |
|
"logps/generated": -750.1487426757812, |
|
"logps/real": -1579.89697265625, |
|
"loss": 0.0086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.157815933227539, |
|
"rewards/margins": 14.974313735961914, |
|
"rewards/real": 4.816495895385742, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 3.2673267326732676e-08, |
|
"logits/generated": 3.134382963180542, |
|
"logits/real": 2.8532094955444336, |
|
"logps/generated": -750.8877563476562, |
|
"logps/real": -1525.961669921875, |
|
"loss": 0.0077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.825960159301758, |
|
"rewards/margins": 14.588778495788574, |
|
"rewards/real": 4.762818336486816, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 3.1966053748231964e-08, |
|
"logits/generated": 3.130510091781616, |
|
"logits/real": 2.9025044441223145, |
|
"logps/generated": -675.0423583984375, |
|
"logps/real": -1538.1998291015625, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.219243049621582, |
|
"rewards/margins": 14.35087776184082, |
|
"rewards/real": 5.131636619567871, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 3.125884016973126e-08, |
|
"logits/generated": 3.234394073486328, |
|
"logits/real": 2.7886247634887695, |
|
"logps/generated": -688.5629272460938, |
|
"logps/real": -1481.4114990234375, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.884227752685547, |
|
"rewards/margins": 13.749975204467773, |
|
"rewards/real": 4.865747928619385, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 3.0551626591230553e-08, |
|
"logits/generated": 3.151052474975586, |
|
"logits/real": 2.8268675804138184, |
|
"logps/generated": -702.4296264648438, |
|
"logps/real": -1545.9931640625, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.792219161987305, |
|
"rewards/margins": 13.802957534790039, |
|
"rewards/real": 5.010737419128418, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 2.984441301272984e-08, |
|
"logits/generated": 3.0674405097961426, |
|
"logits/real": 2.791660785675049, |
|
"logps/generated": -728.8613891601562, |
|
"logps/real": -1402.8472900390625, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.552156448364258, |
|
"rewards/margins": 13.963113784790039, |
|
"rewards/real": 4.410956382751465, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 2.9137199434229136e-08, |
|
"logits/generated": 3.085538864135742, |
|
"logits/real": 2.8696396350860596, |
|
"logps/generated": -699.1375732421875, |
|
"logps/real": -1528.8197021484375, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.45177173614502, |
|
"rewards/margins": 14.307217597961426, |
|
"rewards/real": 4.8554463386535645, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 2.8429985855728428e-08, |
|
"logits/generated": 3.1769864559173584, |
|
"logits/real": 2.864468574523926, |
|
"logps/generated": -730.0369262695312, |
|
"logps/real": -1405.126220703125, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.242330551147461, |
|
"rewards/margins": 14.709861755371094, |
|
"rewards/real": 4.467529296875, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2.7722772277227722e-08, |
|
"logits/generated": 3.0584521293640137, |
|
"logits/real": 2.7300102710723877, |
|
"logps/generated": -722.6023559570312, |
|
"logps/real": -1533.9681396484375, |
|
"loss": 0.0089, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.0105562210083, |
|
"rewards/margins": 14.953516960144043, |
|
"rewards/real": 4.942961692810059, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 2.7015558698727014e-08, |
|
"logits/generated": 3.1814303398132324, |
|
"logits/real": 2.767988681793213, |
|
"logps/generated": -705.47021484375, |
|
"logps/real": -1517.984619140625, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.146055221557617, |
|
"rewards/margins": 15.356274604797363, |
|
"rewards/real": 5.210217475891113, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 2.6308345120226305e-08, |
|
"logits/generated": 3.2221198081970215, |
|
"logits/real": 2.8411731719970703, |
|
"logps/generated": -727.7474365234375, |
|
"logps/real": -1452.932373046875, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.274625778198242, |
|
"rewards/margins": 15.288644790649414, |
|
"rewards/real": 5.014019012451172, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 2.56011315417256e-08, |
|
"logits/generated": 2.9806618690490723, |
|
"logits/real": 2.761728286743164, |
|
"logps/generated": -739.073974609375, |
|
"logps/real": -1438.574951171875, |
|
"loss": 0.0054, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.485044479370117, |
|
"rewards/margins": 15.389866828918457, |
|
"rewards/real": 4.904819965362549, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 2.489391796322489e-08, |
|
"logits/generated": 3.0833356380462646, |
|
"logits/real": 2.8559961318969727, |
|
"logps/generated": -737.3714599609375, |
|
"logps/real": -1319.4708251953125, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -9.90731430053711, |
|
"rewards/margins": 14.22021484375, |
|
"rewards/real": 4.312900066375732, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 2.4186704384724186e-08, |
|
"logits/generated": 3.1098473072052, |
|
"logits/real": 2.8987298011779785, |
|
"logps/generated": -751.8609619140625, |
|
"logps/real": -1390.7666015625, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.742658615112305, |
|
"rewards/margins": 15.709897994995117, |
|
"rewards/real": 4.967240810394287, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 2.3479490806223478e-08, |
|
"logits/generated": 3.181952476501465, |
|
"logits/real": 2.8264319896698, |
|
"logps/generated": -725.786865234375, |
|
"logps/real": -1668.9781494140625, |
|
"loss": 0.0057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.877522468566895, |
|
"rewards/margins": 15.596285820007324, |
|
"rewards/real": 5.7187628746032715, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 2.2772277227722772e-08, |
|
"logits/generated": 3.2247843742370605, |
|
"logits/real": 2.787977695465088, |
|
"logps/generated": -689.7197265625, |
|
"logps/real": -1527.9981689453125, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.805652618408203, |
|
"rewards/margins": 14.870402336120605, |
|
"rewards/real": 5.064749240875244, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 2.2065063649222067e-08, |
|
"logits/generated": 3.1002349853515625, |
|
"logits/real": 2.8746185302734375, |
|
"logps/generated": -755.68994140625, |
|
"logps/real": -1484.486328125, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.283646583557129, |
|
"rewards/margins": 14.29266357421875, |
|
"rewards/real": 5.009018421173096, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 2.1357850070721355e-08, |
|
"logits/generated": 3.172830104827881, |
|
"logits/real": 2.891188859939575, |
|
"logps/generated": -711.9218139648438, |
|
"logps/real": -1525.380615234375, |
|
"loss": 0.0075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.644311904907227, |
|
"rewards/margins": 14.991864204406738, |
|
"rewards/real": 5.3475518226623535, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 2.065063649222065e-08, |
|
"logits/generated": 3.162585735321045, |
|
"logits/real": 2.814383029937744, |
|
"logps/generated": -691.09375, |
|
"logps/real": -1261.48046875, |
|
"loss": 0.0063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.76778793334961, |
|
"rewards/margins": 13.0126953125, |
|
"rewards/real": 4.244906425476074, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 1.994342291371994e-08, |
|
"logits/generated": 3.1490421295166016, |
|
"logits/real": 2.8507959842681885, |
|
"logps/generated": -685.4339599609375, |
|
"logps/real": -1518.389404296875, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.577953338623047, |
|
"rewards/margins": 14.928466796875, |
|
"rewards/real": 5.350512504577637, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 1.9236209335219236e-08, |
|
"logits/generated": 3.1204447746276855, |
|
"logits/real": 2.761258125305176, |
|
"logps/generated": -718.8822021484375, |
|
"logps/real": -1507.6552734375, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.944616317749023, |
|
"rewards/margins": 15.193156242370605, |
|
"rewards/real": 4.248539447784424, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 1.8528995756718528e-08, |
|
"logits/generated": 3.202990770339966, |
|
"logits/real": 2.789196014404297, |
|
"logps/generated": -736.8391723632812, |
|
"logps/real": -1285.6285400390625, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.708449363708496, |
|
"rewards/margins": 14.332748413085938, |
|
"rewards/real": 4.624300003051758, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 1.782178217821782e-08, |
|
"logits/generated": 3.044565200805664, |
|
"logits/real": 2.813560724258423, |
|
"logps/generated": -749.41845703125, |
|
"logps/real": -1450.473876953125, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.962289810180664, |
|
"rewards/margins": 15.018086433410645, |
|
"rewards/real": 5.055794715881348, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 1.7114568599717114e-08, |
|
"logits/generated": 3.1547980308532715, |
|
"logits/real": 2.8710808753967285, |
|
"logps/generated": -685.3002319335938, |
|
"logps/real": -1505.046875, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.081647872924805, |
|
"rewards/margins": 15.215327262878418, |
|
"rewards/real": 5.133678913116455, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 1.640735502121641e-08, |
|
"logits/generated": 3.111386299133301, |
|
"logits/real": 2.878361701965332, |
|
"logps/generated": -712.4747924804688, |
|
"logps/real": -1361.859375, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.593215942382812, |
|
"rewards/margins": 13.842330932617188, |
|
"rewards/real": 4.249114990234375, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 1.57001414427157e-08, |
|
"logits/generated": 3.1498868465423584, |
|
"logits/real": 2.785670518875122, |
|
"logps/generated": -749.1596069335938, |
|
"logps/real": -1366.0186767578125, |
|
"loss": 0.0041, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.603869438171387, |
|
"rewards/margins": 15.060259819030762, |
|
"rewards/real": 4.45639181137085, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 1.499292786421499e-08, |
|
"logits/generated": 3.2311694622039795, |
|
"logits/real": 2.8071227073669434, |
|
"logps/generated": -698.42333984375, |
|
"logps/real": -1556.6697998046875, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.801340103149414, |
|
"rewards/margins": 16.656749725341797, |
|
"rewards/real": 5.855408668518066, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 1.4285714285714284e-08, |
|
"logits/generated": 3.204010486602783, |
|
"logits/real": 2.8270351886749268, |
|
"logps/generated": -768.1214599609375, |
|
"logps/real": -1522.0247802734375, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.407632827758789, |
|
"rewards/margins": 16.46245574951172, |
|
"rewards/real": 5.054823875427246, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 1.3578500707213577e-08, |
|
"logits/generated": 3.2223219871520996, |
|
"logits/real": 2.748478412628174, |
|
"logps/generated": -709.1806640625, |
|
"logps/real": -1449.1051025390625, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.24437427520752, |
|
"rewards/margins": 15.332929611206055, |
|
"rewards/real": 5.088555335998535, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 1.287128712871287e-08, |
|
"logits/generated": 2.949439525604248, |
|
"logits/real": 2.8462400436401367, |
|
"logps/generated": -702.388427734375, |
|
"logps/real": -1221.220458984375, |
|
"loss": 0.0065, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.625406265258789, |
|
"rewards/margins": 13.894259452819824, |
|
"rewards/real": 4.268852710723877, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 1.2164073550212164e-08, |
|
"logits/generated": 3.1298446655273438, |
|
"logits/real": 2.8281898498535156, |
|
"logps/generated": -750.8180541992188, |
|
"logps/real": -1293.175048828125, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.595232009887695, |
|
"rewards/margins": 15.49177360534668, |
|
"rewards/real": 4.896543025970459, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 1.1456859971711457e-08, |
|
"logits/generated": 3.046416759490967, |
|
"logits/real": 2.7686657905578613, |
|
"logps/generated": -703.9090576171875, |
|
"logps/real": -1550.712158203125, |
|
"loss": 0.005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.808374404907227, |
|
"rewards/margins": 15.086359024047852, |
|
"rewards/real": 5.277985572814941, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 1.074964639321075e-08, |
|
"logits/generated": 3.1554598808288574, |
|
"logits/real": 2.8488306999206543, |
|
"logps/generated": -740.9146118164062, |
|
"logps/real": -1494.5152587890625, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.964345932006836, |
|
"rewards/margins": 16.294296264648438, |
|
"rewards/real": 5.329949378967285, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 1.0042432814710041e-08, |
|
"logits/generated": 3.15816330909729, |
|
"logits/real": 2.857891082763672, |
|
"logps/generated": -700.6273803710938, |
|
"logps/real": -1395.680419921875, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.415566444396973, |
|
"rewards/margins": 14.575197219848633, |
|
"rewards/real": 4.15963077545166, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 9.335219236209336e-09, |
|
"logits/generated": 3.150120496749878, |
|
"logits/real": 2.801614761352539, |
|
"logps/generated": -702.9065551757812, |
|
"logps/real": -1336.076904296875, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.592188835144043, |
|
"rewards/margins": 15.421045303344727, |
|
"rewards/real": 4.828855037689209, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 8.628005657708627e-09, |
|
"logits/generated": 3.075422525405884, |
|
"logits/real": 2.759566307067871, |
|
"logps/generated": -750.0064086914062, |
|
"logps/real": -1322.994140625, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.502080917358398, |
|
"rewards/margins": 16.186368942260742, |
|
"rewards/real": 4.68428897857666, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 7.92079207920792e-09, |
|
"logits/generated": 2.975106716156006, |
|
"logits/real": 2.8558712005615234, |
|
"logps/generated": -730.2501831054688, |
|
"logps/real": -1330.128173828125, |
|
"loss": 0.0043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.35800552368164, |
|
"rewards/margins": 14.964300155639648, |
|
"rewards/real": 4.60629415512085, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 7.2135785007072135e-09, |
|
"logits/generated": 3.338933229446411, |
|
"logits/real": 2.866070032119751, |
|
"logps/generated": -702.8323974609375, |
|
"logps/real": -1660.9990234375, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.770402908325195, |
|
"rewards/margins": 16.416067123413086, |
|
"rewards/real": 5.645665645599365, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 6.506364922206506e-09, |
|
"logits/generated": 3.193213701248169, |
|
"logits/real": 2.8589539527893066, |
|
"logps/generated": -730.3358154296875, |
|
"logps/real": -1374.8104248046875, |
|
"loss": 0.0053, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.911657333374023, |
|
"rewards/margins": 15.75550651550293, |
|
"rewards/real": 4.843849182128906, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 5.799151343705799e-09, |
|
"logits/generated": 3.0994668006896973, |
|
"logits/real": 2.7762365341186523, |
|
"logps/generated": -667.8460693359375, |
|
"logps/real": -1382.4453125, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.389671325683594, |
|
"rewards/margins": 14.397994995117188, |
|
"rewards/real": 5.008322238922119, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 5.091937765205092e-09, |
|
"logits/generated": 3.0917019844055176, |
|
"logits/real": 2.8576996326446533, |
|
"logps/generated": -709.4324340820312, |
|
"logps/real": -1651.778076171875, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.680952072143555, |
|
"rewards/margins": 16.81113052368164, |
|
"rewards/real": 6.130180358886719, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 4.384724186704385e-09, |
|
"logits/generated": 3.144845724105835, |
|
"logits/real": 2.8753390312194824, |
|
"logps/generated": -750.3989868164062, |
|
"logps/real": -1244.6375732421875, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.346524238586426, |
|
"rewards/margins": 16.04071807861328, |
|
"rewards/real": 4.694192409515381, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 3.6775106082036773e-09, |
|
"logits/generated": 3.173389196395874, |
|
"logits/real": 2.8000612258911133, |
|
"logps/generated": -695.5457763671875, |
|
"logps/real": -1390.158935546875, |
|
"loss": 0.0047, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.513983726501465, |
|
"rewards/margins": 14.579229354858398, |
|
"rewards/real": 5.065245151519775, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 2.97029702970297e-09, |
|
"logits/generated": 3.1202340126037598, |
|
"logits/real": 2.816042900085449, |
|
"logps/generated": -763.26806640625, |
|
"logps/real": -1385.561279296875, |
|
"loss": 0.0052, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.074748039245605, |
|
"rewards/margins": 16.243824005126953, |
|
"rewards/real": 5.169075965881348, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 2.263083451202263e-09, |
|
"logits/generated": 3.286569118499756, |
|
"logits/real": 2.9330081939697266, |
|
"logps/generated": -684.3175659179688, |
|
"logps/real": -1652.4827880859375, |
|
"loss": 0.0044, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.155891418457031, |
|
"rewards/margins": 15.940603256225586, |
|
"rewards/real": 5.784712314605713, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 1.5558698727015557e-09, |
|
"logits/generated": 3.17760968208313, |
|
"logits/real": 2.854255199432373, |
|
"logps/generated": -746.004150390625, |
|
"logps/real": -1345.9742431640625, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.761791229248047, |
|
"rewards/margins": 16.628210067749023, |
|
"rewards/real": 4.86641788482666, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 8.486562942008486e-10, |
|
"logits/generated": 3.056246280670166, |
|
"logits/real": 2.8659956455230713, |
|
"logps/generated": -775.7562255859375, |
|
"logps/real": -1462.85009765625, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.534282684326172, |
|
"rewards/margins": 16.907535552978516, |
|
"rewards/real": 5.3732523918151855, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 1.4144271570014144e-10, |
|
"logits/generated": 3.131124496459961, |
|
"logits/real": 2.8286118507385254, |
|
"logps/generated": -742.1873779296875, |
|
"logps/real": -1465.8543701171875, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.972893714904785, |
|
"rewards/margins": 16.086231231689453, |
|
"rewards/real": 5.113335609436035, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"step": 1572, |
|
"total_flos": 0.0, |
|
"train_loss": 0.057728804025485636, |
|
"train_runtime": 21391.1697, |
|
"train_samples_per_second": 4.712, |
|
"train_steps_per_second": 0.073 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1572, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|