Llama-3.2-1B-SPIN-iter2 / trainer_state.json
DavieLion's picture
Upload 11 files
ee81e5a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.988571428571428,
"eval_steps": 100,
"global_step": 1572,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 6.329113924050632e-10,
"logits/generated": 3.334580421447754,
"logits/real": 2.520763397216797,
"logps/generated": -653.1103515625,
"logps/real": -1322.83154296875,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.04,
"learning_rate": 6.329113924050633e-09,
"logits/generated": 2.984175682067871,
"logits/real": 2.4933767318725586,
"logps/generated": -569.99365234375,
"logps/real": -1401.09375,
"loss": 0.6926,
"rewards/accuracies": 0.4027777910232544,
"rewards/generated": 0.001990392105653882,
"rewards/margins": 0.003700366010889411,
"rewards/real": 0.005690759979188442,
"step": 10
},
{
"epoch": 0.08,
"learning_rate": 1.2658227848101265e-08,
"logits/generated": 3.1899852752685547,
"logits/real": 2.5960335731506348,
"logps/generated": -639.3368530273438,
"logps/real": -1633.5921630859375,
"loss": 0.6864,
"rewards/accuracies": 0.5375000238418579,
"rewards/generated": -0.03260010853409767,
"rewards/margins": 0.02890392579138279,
"rewards/real": -0.0036961850710213184,
"step": 20
},
{
"epoch": 0.11,
"learning_rate": 1.89873417721519e-08,
"logits/generated": 3.1001484394073486,
"logits/real": 2.5820600986480713,
"logps/generated": -625.9615478515625,
"logps/real": -1550.431884765625,
"loss": 0.6632,
"rewards/accuracies": 0.6875,
"rewards/generated": -0.0366821363568306,
"rewards/margins": 0.0638287365436554,
"rewards/real": 0.0271465964615345,
"step": 30
},
{
"epoch": 0.15,
"learning_rate": 2.531645569620253e-08,
"logits/generated": 2.8814804553985596,
"logits/real": 2.6468660831451416,
"logps/generated": -592.5699462890625,
"logps/real": -1527.6875,
"loss": 0.6267,
"rewards/accuracies": 0.762499988079071,
"rewards/generated": -0.08724664151668549,
"rewards/margins": 0.15261340141296387,
"rewards/real": 0.06536674499511719,
"step": 40
},
{
"epoch": 0.19,
"learning_rate": 3.1645569620253166e-08,
"logits/generated": 3.147773265838623,
"logits/real": 2.535146713256836,
"logps/generated": -688.0377807617188,
"logps/real": -1441.314453125,
"loss": 0.5423,
"rewards/accuracies": 0.887499988079071,
"rewards/generated": -0.17641791701316833,
"rewards/margins": 0.3502033054828644,
"rewards/real": 0.17378540337085724,
"step": 50
},
{
"epoch": 0.23,
"learning_rate": 3.79746835443038e-08,
"logits/generated": 3.1963257789611816,
"logits/real": 2.5462851524353027,
"logps/generated": -594.2725219726562,
"logps/real": -1453.6497802734375,
"loss": 0.5062,
"rewards/accuracies": 0.8999999761581421,
"rewards/generated": -0.27025485038757324,
"rewards/margins": 0.4667099416255951,
"rewards/real": 0.19645504653453827,
"step": 60
},
{
"epoch": 0.27,
"learning_rate": 4.430379746835442e-08,
"logits/generated": 2.942413806915283,
"logits/real": 2.5651955604553223,
"logps/generated": -623.3458251953125,
"logps/real": -1351.8724365234375,
"loss": 0.4366,
"rewards/accuracies": 0.949999988079071,
"rewards/generated": -0.48640790581703186,
"rewards/margins": 0.7007172703742981,
"rewards/real": 0.21430937945842743,
"step": 70
},
{
"epoch": 0.3,
"learning_rate": 5.063291139240506e-08,
"logits/generated": 3.1414570808410645,
"logits/real": 2.5161662101745605,
"logps/generated": -623.8413696289062,
"logps/real": -1677.4615478515625,
"loss": 0.3137,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -0.7329307794570923,
"rewards/margins": 1.239257574081421,
"rewards/real": 0.5063267350196838,
"step": 80
},
{
"epoch": 0.34,
"learning_rate": 5.69620253164557e-08,
"logits/generated": 3.070830821990967,
"logits/real": 2.5541062355041504,
"logps/generated": -657.6233520507812,
"logps/real": -1626.2838134765625,
"loss": 0.2947,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.0010322332382202,
"rewards/margins": 1.5189402103424072,
"rewards/real": 0.517907977104187,
"step": 90
},
{
"epoch": 0.38,
"learning_rate": 6.329113924050633e-08,
"logits/generated": 2.870387315750122,
"logits/real": 2.7046616077423096,
"logps/generated": -579.2174072265625,
"logps/real": -1487.188232421875,
"loss": 0.2589,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.0812404155731201,
"rewards/margins": 1.698075532913208,
"rewards/real": 0.6168351173400879,
"step": 100
},
{
"epoch": 0.42,
"learning_rate": 6.962025316455696e-08,
"logits/generated": 2.9290151596069336,
"logits/real": 2.728487014770508,
"logps/generated": -648.2110595703125,
"logps/real": -1627.390869140625,
"loss": 0.2453,
"rewards/accuracies": 1.0,
"rewards/generated": -1.244185209274292,
"rewards/margins": 1.9264612197875977,
"rewards/real": 0.6822759509086609,
"step": 110
},
{
"epoch": 0.46,
"learning_rate": 7.59493670886076e-08,
"logits/generated": 2.9588751792907715,
"logits/real": 2.5211470127105713,
"logps/generated": -652.1360473632812,
"logps/real": -1250.042236328125,
"loss": 0.2277,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -1.391522765159607,
"rewards/margins": 1.8222811222076416,
"rewards/real": 0.43075838685035706,
"step": 120
},
{
"epoch": 0.5,
"learning_rate": 8.227848101265823e-08,
"logits/generated": 3.0586698055267334,
"logits/real": 2.4790332317352295,
"logps/generated": -611.0165405273438,
"logps/real": -1484.681884765625,
"loss": 0.188,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -1.7131977081298828,
"rewards/margins": 2.333926200866699,
"rewards/real": 0.6207287907600403,
"step": 130
},
{
"epoch": 0.53,
"learning_rate": 8.860759493670885e-08,
"logits/generated": 2.900022506713867,
"logits/real": 2.670552968978882,
"logps/generated": -655.8267822265625,
"logps/real": -1645.905029296875,
"loss": 0.1467,
"rewards/accuracies": 1.0,
"rewards/generated": -2.051678419113159,
"rewards/margins": 2.9112563133239746,
"rewards/real": 0.8595778346061707,
"step": 140
},
{
"epoch": 0.57,
"learning_rate": 9.493670886075948e-08,
"logits/generated": 3.083251476287842,
"logits/real": 2.547938108444214,
"logps/generated": -647.1588134765625,
"logps/real": -1623.0638427734375,
"loss": 0.122,
"rewards/accuracies": 1.0,
"rewards/generated": -2.5622642040252686,
"rewards/margins": 3.50146746635437,
"rewards/real": 0.9392032623291016,
"step": 150
},
{
"epoch": 0.61,
"learning_rate": 9.985855728429985e-08,
"logits/generated": 3.189711809158325,
"logits/real": 2.609210729598999,
"logps/generated": -673.2716064453125,
"logps/real": -1431.1510009765625,
"loss": 0.1103,
"rewards/accuracies": 1.0,
"rewards/generated": -2.6634578704833984,
"rewards/margins": 3.5925452709198,
"rewards/real": 0.9290875196456909,
"step": 160
},
{
"epoch": 0.65,
"learning_rate": 9.915134370579915e-08,
"logits/generated": 2.796431064605713,
"logits/real": 2.6858787536621094,
"logps/generated": -623.2662353515625,
"logps/real": -1411.96533203125,
"loss": 0.1011,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -2.4235339164733887,
"rewards/margins": 3.487780809402466,
"rewards/real": 1.0642468929290771,
"step": 170
},
{
"epoch": 0.69,
"learning_rate": 9.844413012729844e-08,
"logits/generated": 2.992323875427246,
"logits/real": 2.6532320976257324,
"logps/generated": -626.7801513671875,
"logps/real": -1628.442626953125,
"loss": 0.1009,
"rewards/accuracies": 1.0,
"rewards/generated": -2.747467517852783,
"rewards/margins": 4.025119781494141,
"rewards/real": 1.2776525020599365,
"step": 180
},
{
"epoch": 0.72,
"learning_rate": 9.773691654879774e-08,
"logits/generated": 3.0291736125946045,
"logits/real": 2.5113823413848877,
"logps/generated": -576.511474609375,
"logps/real": -1578.1036376953125,
"loss": 0.0983,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -2.7749691009521484,
"rewards/margins": 3.916727066040039,
"rewards/real": 1.1417579650878906,
"step": 190
},
{
"epoch": 0.76,
"learning_rate": 9.702970297029703e-08,
"logits/generated": 2.8259243965148926,
"logits/real": 2.727518081665039,
"logps/generated": -609.0852661132812,
"logps/real": -1438.0333251953125,
"loss": 0.0846,
"rewards/accuracies": 1.0,
"rewards/generated": -2.6536800861358643,
"rewards/margins": 3.9251868724823,
"rewards/real": 1.2715070247650146,
"step": 200
},
{
"epoch": 0.8,
"learning_rate": 9.632248939179631e-08,
"logits/generated": 3.0957698822021484,
"logits/real": 2.5199692249298096,
"logps/generated": -674.4405517578125,
"logps/real": -1574.9976806640625,
"loss": 0.0784,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.413438320159912,
"rewards/margins": 4.9110894203186035,
"rewards/real": 1.497650384902954,
"step": 210
},
{
"epoch": 0.84,
"learning_rate": 9.561527581329562e-08,
"logits/generated": 3.059602737426758,
"logits/real": 2.611131429672241,
"logps/generated": -657.6616821289062,
"logps/real": -1342.2515869140625,
"loss": 0.0751,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.3509624004364014,
"rewards/margins": 4.484201908111572,
"rewards/real": 1.1332390308380127,
"step": 220
},
{
"epoch": 0.88,
"learning_rate": 9.49080622347949e-08,
"logits/generated": 2.9535973072052,
"logits/real": 2.695953130722046,
"logps/generated": -623.5858764648438,
"logps/real": -1357.4754638671875,
"loss": 0.0752,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.0820462703704834,
"rewards/margins": 4.444340705871582,
"rewards/real": 1.3622944355010986,
"step": 230
},
{
"epoch": 0.91,
"learning_rate": 9.420084865629419e-08,
"logits/generated": 3.0728561878204346,
"logits/real": 2.6514034271240234,
"logps/generated": -646.6805419921875,
"logps/real": -1661.2340087890625,
"loss": 0.074,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -3.4602718353271484,
"rewards/margins": 5.120414733886719,
"rewards/real": 1.6601425409317017,
"step": 240
},
{
"epoch": 0.95,
"learning_rate": 9.349363507779348e-08,
"logits/generated": 3.14707088470459,
"logits/real": 2.582815647125244,
"logps/generated": -659.8969116210938,
"logps/real": -1602.3460693359375,
"loss": 0.0605,
"rewards/accuracies": 1.0,
"rewards/generated": -3.8163421154022217,
"rewards/margins": 5.412659645080566,
"rewards/real": 1.5963174104690552,
"step": 250
},
{
"epoch": 0.99,
"learning_rate": 9.278642149929278e-08,
"logits/generated": 2.910804271697998,
"logits/real": 2.588214159011841,
"logps/generated": -618.7315673828125,
"logps/real": -1585.5712890625,
"loss": 0.0591,
"rewards/accuracies": 1.0,
"rewards/generated": -3.172882556915283,
"rewards/margins": 4.7417426109313965,
"rewards/real": 1.5688601732254028,
"step": 260
},
{
"epoch": 1.03,
"learning_rate": 9.207920792079208e-08,
"logits/generated": 3.0260536670684814,
"logits/real": 2.576744556427002,
"logps/generated": -712.7923583984375,
"logps/real": -1611.0130615234375,
"loss": 0.0658,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.007518768310547,
"rewards/margins": 5.749050617218018,
"rewards/real": 1.7415320873260498,
"step": 270
},
{
"epoch": 1.07,
"learning_rate": 9.137199434229137e-08,
"logits/generated": 3.0747456550598145,
"logits/real": 2.6565499305725098,
"logps/generated": -677.0684814453125,
"logps/real": -1437.9249267578125,
"loss": 0.0598,
"rewards/accuracies": 1.0,
"rewards/generated": -4.153753280639648,
"rewards/margins": 5.587630271911621,
"rewards/real": 1.4338771104812622,
"step": 280
},
{
"epoch": 1.1,
"learning_rate": 9.066478076379066e-08,
"logits/generated": 3.047402858734131,
"logits/real": 2.692274332046509,
"logps/generated": -652.3853759765625,
"logps/real": -1553.9385986328125,
"loss": 0.0612,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.262011528015137,
"rewards/margins": 5.818949222564697,
"rewards/real": 1.556937575340271,
"step": 290
},
{
"epoch": 1.14,
"learning_rate": 8.995756718528995e-08,
"logits/generated": 2.943486452102661,
"logits/real": 2.4795732498168945,
"logps/generated": -604.9609375,
"logps/real": -1512.994140625,
"loss": 0.0622,
"rewards/accuracies": 1.0,
"rewards/generated": -3.6027259826660156,
"rewards/margins": 5.401385307312012,
"rewards/real": 1.7986586093902588,
"step": 300
},
{
"epoch": 1.18,
"learning_rate": 8.925035360678924e-08,
"logits/generated": 2.987004518508911,
"logits/real": 2.6335082054138184,
"logps/generated": -626.880126953125,
"logps/real": -1727.589599609375,
"loss": 0.0537,
"rewards/accuracies": 1.0,
"rewards/generated": -3.473705768585205,
"rewards/margins": 5.608192443847656,
"rewards/real": 2.1344869136810303,
"step": 310
},
{
"epoch": 1.22,
"learning_rate": 8.854314002828854e-08,
"logits/generated": 3.1043455600738525,
"logits/real": 2.5045957565307617,
"logps/generated": -654.8699951171875,
"logps/real": -1668.8502197265625,
"loss": 0.056,
"rewards/accuracies": 1.0,
"rewards/generated": -3.9041481018066406,
"rewards/margins": 5.877499580383301,
"rewards/real": 1.9733517169952393,
"step": 320
},
{
"epoch": 1.26,
"learning_rate": 8.783592644978784e-08,
"logits/generated": 2.9959933757781982,
"logits/real": 2.833263874053955,
"logps/generated": -700.354248046875,
"logps/real": -1501.94775390625,
"loss": 0.0495,
"rewards/accuracies": 1.0,
"rewards/generated": -4.707437515258789,
"rewards/margins": 6.398648262023926,
"rewards/real": 1.691210389137268,
"step": 330
},
{
"epoch": 1.3,
"learning_rate": 8.712871287128713e-08,
"logits/generated": 3.1212868690490723,
"logits/real": 2.434352159500122,
"logps/generated": -631.5654296875,
"logps/real": -1499.0816650390625,
"loss": 0.056,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -3.8523108959198,
"rewards/margins": 5.5262956619262695,
"rewards/real": 1.6739847660064697,
"step": 340
},
{
"epoch": 1.33,
"learning_rate": 8.642149929278641e-08,
"logits/generated": 3.180377244949341,
"logits/real": 2.5462543964385986,
"logps/generated": -658.816162109375,
"logps/real": -1713.6314697265625,
"loss": 0.0449,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.1651411056518555,
"rewards/margins": 6.40631628036499,
"rewards/real": 2.241175651550293,
"step": 350
},
{
"epoch": 1.37,
"learning_rate": 8.57142857142857e-08,
"logits/generated": 3.1063506603240967,
"logits/real": 2.604684352874756,
"logps/generated": -630.5732421875,
"logps/real": -1476.998779296875,
"loss": 0.0508,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -4.010001182556152,
"rewards/margins": 5.754232406616211,
"rewards/real": 1.74423086643219,
"step": 360
},
{
"epoch": 1.41,
"learning_rate": 8.5007072135785e-08,
"logits/generated": 2.880650758743286,
"logits/real": 2.8358330726623535,
"logps/generated": -612.5302124023438,
"logps/real": -1495.0152587890625,
"loss": 0.0521,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -4.035798072814941,
"rewards/margins": 6.071022987365723,
"rewards/real": 2.035224676132202,
"step": 370
},
{
"epoch": 1.45,
"learning_rate": 8.429985855728429e-08,
"logits/generated": 2.8637542724609375,
"logits/real": 2.558077335357666,
"logps/generated": -667.6907958984375,
"logps/real": -1590.078857421875,
"loss": 0.0392,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.026558876037598,
"rewards/margins": 7.185013771057129,
"rewards/real": 2.158454656600952,
"step": 380
},
{
"epoch": 1.49,
"learning_rate": 8.35926449787836e-08,
"logits/generated": 3.141674518585205,
"logits/real": 2.5596821308135986,
"logps/generated": -645.9779663085938,
"logps/real": -1424.9947509765625,
"loss": 0.0396,
"rewards/accuracies": 1.0,
"rewards/generated": -4.634699821472168,
"rewards/margins": 6.322070121765137,
"rewards/real": 1.6873706579208374,
"step": 390
},
{
"epoch": 1.52,
"learning_rate": 8.288543140028288e-08,
"logits/generated": 3.265359878540039,
"logits/real": 2.5316691398620605,
"logps/generated": -671.8311767578125,
"logps/real": -1545.91796875,
"loss": 0.0326,
"rewards/accuracies": 1.0,
"rewards/generated": -5.4212493896484375,
"rewards/margins": 7.516656398773193,
"rewards/real": 2.0954079627990723,
"step": 400
},
{
"epoch": 1.56,
"learning_rate": 8.217821782178217e-08,
"logits/generated": 3.1239216327667236,
"logits/real": 2.669434070587158,
"logps/generated": -656.9334716796875,
"logps/real": -1509.529541015625,
"loss": 0.0422,
"rewards/accuracies": 1.0,
"rewards/generated": -4.773463249206543,
"rewards/margins": 6.696150779724121,
"rewards/real": 1.9226871728897095,
"step": 410
},
{
"epoch": 1.6,
"learning_rate": 8.147100424328147e-08,
"logits/generated": 3.169534683227539,
"logits/real": 2.637077808380127,
"logps/generated": -701.36767578125,
"logps/real": -1685.140380859375,
"loss": 0.0393,
"rewards/accuracies": 1.0,
"rewards/generated": -5.24563455581665,
"rewards/margins": 7.496856689453125,
"rewards/real": 2.2512223720550537,
"step": 420
},
{
"epoch": 1.64,
"learning_rate": 8.076379066478076e-08,
"logits/generated": 2.9224164485931396,
"logits/real": 2.562767505645752,
"logps/generated": -679.8922729492188,
"logps/real": -1469.2054443359375,
"loss": 0.0388,
"rewards/accuracies": 1.0,
"rewards/generated": -4.989293098449707,
"rewards/margins": 7.009693145751953,
"rewards/real": 2.020399570465088,
"step": 430
},
{
"epoch": 1.68,
"learning_rate": 8.005657708628005e-08,
"logits/generated": 2.9800190925598145,
"logits/real": 2.654110908508301,
"logps/generated": -697.5772705078125,
"logps/real": -1385.52099609375,
"loss": 0.0376,
"rewards/accuracies": 1.0,
"rewards/generated": -5.243396759033203,
"rewards/margins": 7.55600643157959,
"rewards/real": 2.312610149383545,
"step": 440
},
{
"epoch": 1.71,
"learning_rate": 7.934936350777935e-08,
"logits/generated": 3.078356981277466,
"logits/real": 2.535681962966919,
"logps/generated": -660.41943359375,
"logps/real": -1552.767822265625,
"loss": 0.0262,
"rewards/accuracies": 1.0,
"rewards/generated": -5.234507083892822,
"rewards/margins": 7.48751974105835,
"rewards/real": 2.2530131340026855,
"step": 450
},
{
"epoch": 1.75,
"learning_rate": 7.864214992927864e-08,
"logits/generated": 2.9969305992126465,
"logits/real": 2.7245428562164307,
"logps/generated": -742.2290649414062,
"logps/real": -1633.814453125,
"loss": 0.0317,
"rewards/accuracies": 1.0,
"rewards/generated": -5.18667459487915,
"rewards/margins": 7.811495780944824,
"rewards/real": 2.624821186065674,
"step": 460
},
{
"epoch": 1.79,
"learning_rate": 7.793493635077794e-08,
"logits/generated": 3.072664260864258,
"logits/real": 2.67760968208313,
"logps/generated": -669.0721435546875,
"logps/real": -1515.5504150390625,
"loss": 0.0351,
"rewards/accuracies": 1.0,
"rewards/generated": -4.986624717712402,
"rewards/margins": 7.337203025817871,
"rewards/real": 2.350578546524048,
"step": 470
},
{
"epoch": 1.83,
"learning_rate": 7.722772277227723e-08,
"logits/generated": 3.0342142581939697,
"logits/real": 2.5741496086120605,
"logps/generated": -678.1422729492188,
"logps/real": -1544.0634765625,
"loss": 0.0307,
"rewards/accuracies": 1.0,
"rewards/generated": -5.9622602462768555,
"rewards/margins": 8.503713607788086,
"rewards/real": 2.5414538383483887,
"step": 480
},
{
"epoch": 1.87,
"learning_rate": 7.652050919377651e-08,
"logits/generated": 3.1236672401428223,
"logits/real": 2.6698803901672363,
"logps/generated": -673.8851928710938,
"logps/real": -1506.4398193359375,
"loss": 0.0251,
"rewards/accuracies": 1.0,
"rewards/generated": -5.428993225097656,
"rewards/margins": 8.060708999633789,
"rewards/real": 2.631716251373291,
"step": 490
},
{
"epoch": 1.9,
"learning_rate": 7.58132956152758e-08,
"logits/generated": 3.169041633605957,
"logits/real": 2.631805658340454,
"logps/generated": -637.0628662109375,
"logps/real": -1626.2333984375,
"loss": 0.0338,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.156910419464111,
"rewards/margins": 7.718507289886475,
"rewards/real": 2.5615973472595215,
"step": 500
},
{
"epoch": 1.94,
"learning_rate": 7.51060820367751e-08,
"logits/generated": 3.1486570835113525,
"logits/real": 2.661397933959961,
"logps/generated": -655.4933471679688,
"logps/real": -1476.0849609375,
"loss": 0.0256,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.335035800933838,
"rewards/margins": 7.7971930503845215,
"rewards/real": 2.462157726287842,
"step": 510
},
{
"epoch": 1.98,
"learning_rate": 7.43988684582744e-08,
"logits/generated": 2.928063154220581,
"logits/real": 2.730922222137451,
"logps/generated": -652.9177856445312,
"logps/real": -1432.533935546875,
"loss": 0.031,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.431285858154297,
"rewards/margins": 8.037918090820312,
"rewards/real": 2.606632709503174,
"step": 520
},
{
"epoch": 2.02,
"learning_rate": 7.36916548797737e-08,
"logits/generated": 3.1943507194519043,
"logits/real": 2.7164087295532227,
"logps/generated": -679.414794921875,
"logps/real": -1487.551025390625,
"loss": 0.022,
"rewards/accuracies": 1.0,
"rewards/generated": -5.67262601852417,
"rewards/margins": 8.073965072631836,
"rewards/real": 2.4013381004333496,
"step": 530
},
{
"epoch": 2.06,
"learning_rate": 7.298444130127298e-08,
"logits/generated": 3.188331127166748,
"logits/real": 2.6221508979797363,
"logps/generated": -695.04931640625,
"logps/real": -1611.9033203125,
"loss": 0.0215,
"rewards/accuracies": 1.0,
"rewards/generated": -5.9647440910339355,
"rewards/margins": 9.016992568969727,
"rewards/real": 3.052248477935791,
"step": 540
},
{
"epoch": 2.1,
"learning_rate": 7.227722772277227e-08,
"logits/generated": 3.062971591949463,
"logits/real": 2.652564525604248,
"logps/generated": -722.2522583007812,
"logps/real": -1378.610107421875,
"loss": 0.022,
"rewards/accuracies": 1.0,
"rewards/generated": -6.261225700378418,
"rewards/margins": 9.063823699951172,
"rewards/real": 2.802598476409912,
"step": 550
},
{
"epoch": 2.13,
"learning_rate": 7.157001414427156e-08,
"logits/generated": 2.944784641265869,
"logits/real": 2.626530408859253,
"logps/generated": -630.6044311523438,
"logps/real": -1555.120849609375,
"loss": 0.0287,
"rewards/accuracies": 1.0,
"rewards/generated": -5.279501914978027,
"rewards/margins": 8.238837242126465,
"rewards/real": 2.9593350887298584,
"step": 560
},
{
"epoch": 2.17,
"learning_rate": 7.086280056577086e-08,
"logits/generated": 2.974566698074341,
"logits/real": 2.734873056411743,
"logps/generated": -740.10498046875,
"logps/real": -1393.660400390625,
"loss": 0.026,
"rewards/accuracies": 1.0,
"rewards/generated": -6.561972141265869,
"rewards/margins": 9.23666000366211,
"rewards/real": 2.6746881008148193,
"step": 570
},
{
"epoch": 2.21,
"learning_rate": 7.015558698727016e-08,
"logits/generated": 3.162912130355835,
"logits/real": 2.7691640853881836,
"logps/generated": -637.5083618164062,
"logps/real": -1494.177490234375,
"loss": 0.0244,
"rewards/accuracies": 1.0,
"rewards/generated": -5.555890083312988,
"rewards/margins": 8.500304222106934,
"rewards/real": 2.944413900375366,
"step": 580
},
{
"epoch": 2.25,
"learning_rate": 6.944837340876945e-08,
"logits/generated": 2.8717501163482666,
"logits/real": 2.6633965969085693,
"logps/generated": -659.5353393554688,
"logps/real": -1353.6895751953125,
"loss": 0.0217,
"rewards/accuracies": 1.0,
"rewards/generated": -6.05059814453125,
"rewards/margins": 8.580648422241211,
"rewards/real": 2.530050754547119,
"step": 590
},
{
"epoch": 2.29,
"learning_rate": 6.874115983026874e-08,
"logits/generated": 3.1894919872283936,
"logits/real": 2.647244691848755,
"logps/generated": -659.2657470703125,
"logps/real": -1401.873779296875,
"loss": 0.0149,
"rewards/accuracies": 1.0,
"rewards/generated": -5.985430717468262,
"rewards/margins": 8.887848854064941,
"rewards/real": 2.902418375015259,
"step": 600
},
{
"epoch": 2.32,
"learning_rate": 6.803394625176802e-08,
"logits/generated": 3.2617828845977783,
"logits/real": 2.715851306915283,
"logps/generated": -682.93603515625,
"logps/real": -1624.47509765625,
"loss": 0.0211,
"rewards/accuracies": 1.0,
"rewards/generated": -6.639365196228027,
"rewards/margins": 10.307806015014648,
"rewards/real": 3.6684412956237793,
"step": 610
},
{
"epoch": 2.36,
"learning_rate": 6.732673267326733e-08,
"logits/generated": 3.0423474311828613,
"logits/real": 2.620255947113037,
"logps/generated": -686.6878662109375,
"logps/real": -1532.2369384765625,
"loss": 0.0197,
"rewards/accuracies": 1.0,
"rewards/generated": -6.549188137054443,
"rewards/margins": 10.09010124206543,
"rewards/real": 3.5409133434295654,
"step": 620
},
{
"epoch": 2.4,
"learning_rate": 6.661951909476661e-08,
"logits/generated": 3.0638270378112793,
"logits/real": 2.641237258911133,
"logps/generated": -684.035888671875,
"logps/real": -1405.203369140625,
"loss": 0.0182,
"rewards/accuracies": 1.0,
"rewards/generated": -6.292230606079102,
"rewards/margins": 9.292798042297363,
"rewards/real": 3.0005664825439453,
"step": 630
},
{
"epoch": 2.44,
"learning_rate": 6.591230551626592e-08,
"logits/generated": 2.9729528427124023,
"logits/real": 2.662184715270996,
"logps/generated": -651.6361083984375,
"logps/real": -1185.017578125,
"loss": 0.0219,
"rewards/accuracies": 1.0,
"rewards/generated": -6.018342971801758,
"rewards/margins": 8.436049461364746,
"rewards/real": 2.41770601272583,
"step": 640
},
{
"epoch": 2.48,
"learning_rate": 6.52050919377652e-08,
"logits/generated": 2.9936587810516357,
"logits/real": 2.7459309101104736,
"logps/generated": -617.2003784179688,
"logps/real": -1374.72607421875,
"loss": 0.0199,
"rewards/accuracies": 1.0,
"rewards/generated": -6.145440578460693,
"rewards/margins": 9.338098526000977,
"rewards/real": 3.192657470703125,
"step": 650
},
{
"epoch": 2.51,
"learning_rate": 6.449787835926449e-08,
"logits/generated": 3.140695095062256,
"logits/real": 2.72432541847229,
"logps/generated": -712.6917724609375,
"logps/real": -1558.529296875,
"loss": 0.0128,
"rewards/accuracies": 1.0,
"rewards/generated": -7.1376166343688965,
"rewards/margins": 10.716670036315918,
"rewards/real": 3.5790531635284424,
"step": 660
},
{
"epoch": 2.55,
"learning_rate": 6.379066478076379e-08,
"logits/generated": 3.162978172302246,
"logits/real": 2.6974151134490967,
"logps/generated": -722.5013427734375,
"logps/real": -1497.13232421875,
"loss": 0.0184,
"rewards/accuracies": 1.0,
"rewards/generated": -7.2485809326171875,
"rewards/margins": 10.17140007019043,
"rewards/real": 2.922818660736084,
"step": 670
},
{
"epoch": 2.59,
"learning_rate": 6.308345120226308e-08,
"logits/generated": 3.1272811889648438,
"logits/real": 2.774167776107788,
"logps/generated": -650.5689697265625,
"logps/real": -1484.775634765625,
"loss": 0.0181,
"rewards/accuracies": 1.0,
"rewards/generated": -6.5811662673950195,
"rewards/margins": 10.269671440124512,
"rewards/real": 3.6885063648223877,
"step": 680
},
{
"epoch": 2.63,
"learning_rate": 6.237623762376237e-08,
"logits/generated": 3.171654462814331,
"logits/real": 2.742036819458008,
"logps/generated": -754.7232666015625,
"logps/real": -1549.0023193359375,
"loss": 0.0154,
"rewards/accuracies": 1.0,
"rewards/generated": -7.933679103851318,
"rewards/margins": 11.404020309448242,
"rewards/real": 3.4703421592712402,
"step": 690
},
{
"epoch": 2.67,
"learning_rate": 6.166902404526166e-08,
"logits/generated": 3.1139299869537354,
"logits/real": 2.8051939010620117,
"logps/generated": -735.65087890625,
"logps/real": -1367.9779052734375,
"loss": 0.0153,
"rewards/accuracies": 1.0,
"rewards/generated": -7.616179466247559,
"rewards/margins": 11.006625175476074,
"rewards/real": 3.3904449939727783,
"step": 700
},
{
"epoch": 2.7,
"learning_rate": 6.096181046676096e-08,
"logits/generated": 3.1485302448272705,
"logits/real": 2.6787030696868896,
"logps/generated": -647.2530517578125,
"logps/real": -1589.8712158203125,
"loss": 0.0174,
"rewards/accuracies": 1.0,
"rewards/generated": -6.846640110015869,
"rewards/margins": 10.44523811340332,
"rewards/real": 3.598597288131714,
"step": 710
},
{
"epoch": 2.74,
"learning_rate": 6.025459688826026e-08,
"logits/generated": 3.155958890914917,
"logits/real": 2.688934803009033,
"logps/generated": -694.9849243164062,
"logps/real": -1377.62548828125,
"loss": 0.0185,
"rewards/accuracies": 1.0,
"rewards/generated": -7.8145751953125,
"rewards/margins": 11.082307815551758,
"rewards/real": 3.267733097076416,
"step": 720
},
{
"epoch": 2.78,
"learning_rate": 5.954738330975955e-08,
"logits/generated": 2.951596736907959,
"logits/real": 2.6604816913604736,
"logps/generated": -621.7342529296875,
"logps/real": -1276.5732421875,
"loss": 0.0178,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -5.571743488311768,
"rewards/margins": 8.480446815490723,
"rewards/real": 2.908703327178955,
"step": 730
},
{
"epoch": 2.82,
"learning_rate": 5.8840169731258837e-08,
"logits/generated": 2.979721784591675,
"logits/real": 2.7603650093078613,
"logps/generated": -680.7958984375,
"logps/real": -1584.530029296875,
"loss": 0.0171,
"rewards/accuracies": 1.0,
"rewards/generated": -7.498532295227051,
"rewards/margins": 11.40682601928711,
"rewards/real": 3.9082934856414795,
"step": 740
},
{
"epoch": 2.86,
"learning_rate": 5.8132956152758125e-08,
"logits/generated": 3.0327863693237305,
"logits/real": 2.7106642723083496,
"logps/generated": -674.9633178710938,
"logps/real": -1397.305419921875,
"loss": 0.0199,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -6.23696756362915,
"rewards/margins": 9.843786239624023,
"rewards/real": 3.6068196296691895,
"step": 750
},
{
"epoch": 2.9,
"learning_rate": 5.742574257425742e-08,
"logits/generated": 3.1310720443725586,
"logits/real": 2.61232328414917,
"logps/generated": -686.5684814453125,
"logps/real": -1574.0673828125,
"loss": 0.0111,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -7.091876983642578,
"rewards/margins": 11.274415969848633,
"rewards/real": 4.182539939880371,
"step": 760
},
{
"epoch": 2.93,
"learning_rate": 5.671852899575672e-08,
"logits/generated": 2.9953229427337646,
"logits/real": 2.858652353286743,
"logps/generated": -703.0581665039062,
"logps/real": -1572.0938720703125,
"loss": 0.0172,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -7.434505462646484,
"rewards/margins": 11.167814254760742,
"rewards/real": 3.733309268951416,
"step": 770
},
{
"epoch": 2.97,
"learning_rate": 5.601131541725601e-08,
"logits/generated": 3.2020938396453857,
"logits/real": 2.65535831451416,
"logps/generated": -668.9046630859375,
"logps/real": -1516.696533203125,
"loss": 0.0179,
"rewards/accuracies": 1.0,
"rewards/generated": -7.276400566101074,
"rewards/margins": 11.022329330444336,
"rewards/real": 3.7459285259246826,
"step": 780
},
{
"epoch": 3.01,
"learning_rate": 5.5304101838755304e-08,
"logits/generated": 3.178405523300171,
"logits/real": 2.871461868286133,
"logps/generated": -699.7593994140625,
"logps/real": -1644.784423828125,
"loss": 0.0139,
"rewards/accuracies": 1.0,
"rewards/generated": -7.41660213470459,
"rewards/margins": 11.70053482055664,
"rewards/real": 4.283932685852051,
"step": 790
},
{
"epoch": 3.05,
"learning_rate": 5.459688826025459e-08,
"logits/generated": 3.105088472366333,
"logits/real": 2.7389116287231445,
"logps/generated": -686.2722778320312,
"logps/real": -1577.5899658203125,
"loss": 0.0145,
"rewards/accuracies": 1.0,
"rewards/generated": -7.21103572845459,
"rewards/margins": 11.520840644836426,
"rewards/real": 4.309804439544678,
"step": 800
},
{
"epoch": 3.09,
"learning_rate": 5.388967468175388e-08,
"logits/generated": 3.111755132675171,
"logits/real": 2.6383872032165527,
"logps/generated": -740.4054565429688,
"logps/real": -1518.056884765625,
"loss": 0.0155,
"rewards/accuracies": 1.0,
"rewards/generated": -8.46458625793457,
"rewards/margins": 12.396316528320312,
"rewards/real": 3.9317307472229004,
"step": 810
},
{
"epoch": 3.12,
"learning_rate": 5.318246110325318e-08,
"logits/generated": 3.1267387866973877,
"logits/real": 2.761569023132324,
"logps/generated": -690.6373291015625,
"logps/real": -1433.797119140625,
"loss": 0.0127,
"rewards/accuracies": 1.0,
"rewards/generated": -7.513435363769531,
"rewards/margins": 11.60542106628418,
"rewards/real": 4.091986179351807,
"step": 820
},
{
"epoch": 3.16,
"learning_rate": 5.2475247524752476e-08,
"logits/generated": 3.094365358352661,
"logits/real": 2.7339248657226562,
"logps/generated": -677.4999389648438,
"logps/real": -1461.193603515625,
"loss": 0.0123,
"rewards/accuracies": 1.0,
"rewards/generated": -7.2015275955200195,
"rewards/margins": 10.88963508605957,
"rewards/real": 3.68810772895813,
"step": 830
},
{
"epoch": 3.2,
"learning_rate": 5.1768033946251764e-08,
"logits/generated": 3.1627821922302246,
"logits/real": 2.6869568824768066,
"logps/generated": -708.0393676757812,
"logps/real": -1620.342529296875,
"loss": 0.013,
"rewards/accuracies": 1.0,
"rewards/generated": -6.853402614593506,
"rewards/margins": 11.502897262573242,
"rewards/real": 4.649496078491211,
"step": 840
},
{
"epoch": 3.24,
"learning_rate": 5.106082036775106e-08,
"logits/generated": 2.925158977508545,
"logits/real": 2.806152820587158,
"logps/generated": -670.198486328125,
"logps/real": -1649.2200927734375,
"loss": 0.0075,
"rewards/accuracies": 1.0,
"rewards/generated": -7.177328586578369,
"rewards/margins": 11.58249568939209,
"rewards/real": 4.405167102813721,
"step": 850
},
{
"epoch": 3.28,
"learning_rate": 5.035360678925035e-08,
"logits/generated": 3.0794711112976074,
"logits/real": 2.7551910877227783,
"logps/generated": -688.251708984375,
"logps/real": -1551.3802490234375,
"loss": 0.0092,
"rewards/accuracies": 1.0,
"rewards/generated": -7.524941921234131,
"rewards/margins": 11.831631660461426,
"rewards/real": 4.306689262390137,
"step": 860
},
{
"epoch": 3.31,
"learning_rate": 4.964639321074964e-08,
"logits/generated": 3.2023205757141113,
"logits/real": 2.7396531105041504,
"logps/generated": -726.21484375,
"logps/real": -1432.2078857421875,
"loss": 0.0096,
"rewards/accuracies": 1.0,
"rewards/generated": -8.621014595031738,
"rewards/margins": 12.233930587768555,
"rewards/real": 3.6129164695739746,
"step": 870
},
{
"epoch": 3.35,
"learning_rate": 4.8939179632248937e-08,
"logits/generated": 3.036933422088623,
"logits/real": 2.7342514991760254,
"logps/generated": -679.8505859375,
"logps/real": -1413.323974609375,
"loss": 0.0156,
"rewards/accuracies": 1.0,
"rewards/generated": -7.424284934997559,
"rewards/margins": 11.5066499710083,
"rewards/real": 4.082363605499268,
"step": 880
},
{
"epoch": 3.39,
"learning_rate": 4.823196605374823e-08,
"logits/generated": 3.1470329761505127,
"logits/real": 2.7851274013519287,
"logps/generated": -707.0967407226562,
"logps/real": -1301.5491943359375,
"loss": 0.0129,
"rewards/accuracies": 1.0,
"rewards/generated": -7.919151306152344,
"rewards/margins": 11.449126243591309,
"rewards/real": 3.529975175857544,
"step": 890
},
{
"epoch": 3.43,
"learning_rate": 4.752475247524752e-08,
"logits/generated": 3.122591018676758,
"logits/real": 2.899055242538452,
"logps/generated": -719.7399291992188,
"logps/real": -1345.24267578125,
"loss": 0.0093,
"rewards/accuracies": 1.0,
"rewards/generated": -8.44778060913086,
"rewards/margins": 12.348614692687988,
"rewards/real": 3.9008331298828125,
"step": 900
},
{
"epoch": 3.47,
"learning_rate": 4.6817538896746814e-08,
"logits/generated": 3.1257503032684326,
"logits/real": 2.7537941932678223,
"logps/generated": -675.872314453125,
"logps/real": -1462.6578369140625,
"loss": 0.0083,
"rewards/accuracies": 1.0,
"rewards/generated": -7.721535682678223,
"rewards/margins": 11.623268127441406,
"rewards/real": 3.901732921600342,
"step": 910
},
{
"epoch": 3.5,
"learning_rate": 4.611032531824611e-08,
"logits/generated": 3.1477560997009277,
"logits/real": 2.8998966217041016,
"logps/generated": -659.6651611328125,
"logps/real": -1254.7550048828125,
"loss": 0.011,
"rewards/accuracies": 1.0,
"rewards/generated": -6.890495300292969,
"rewards/margins": 10.485551834106445,
"rewards/real": 3.595057725906372,
"step": 920
},
{
"epoch": 3.54,
"learning_rate": 4.5403111739745404e-08,
"logits/generated": 3.0974771976470947,
"logits/real": 2.7432289123535156,
"logps/generated": -679.0161743164062,
"logps/real": -1514.939453125,
"loss": 0.0153,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -8.148828506469727,
"rewards/margins": 12.779142379760742,
"rewards/real": 4.630312919616699,
"step": 930
},
{
"epoch": 3.58,
"learning_rate": 4.469589816124469e-08,
"logits/generated": 3.1638052463531494,
"logits/real": 2.8109536170959473,
"logps/generated": -698.923828125,
"logps/real": -1472.047119140625,
"loss": 0.012,
"rewards/accuracies": 1.0,
"rewards/generated": -7.732884407043457,
"rewards/margins": 11.864561080932617,
"rewards/real": 4.13167667388916,
"step": 940
},
{
"epoch": 3.62,
"learning_rate": 4.3988684582743986e-08,
"logits/generated": 3.1099958419799805,
"logits/real": 2.689439296722412,
"logps/generated": -644.0321044921875,
"logps/real": -1493.985107421875,
"loss": 0.0103,
"rewards/accuracies": 1.0,
"rewards/generated": -7.19980001449585,
"rewards/margins": 11.551348686218262,
"rewards/real": 4.351548194885254,
"step": 950
},
{
"epoch": 3.66,
"learning_rate": 4.328147100424328e-08,
"logits/generated": 3.068477153778076,
"logits/real": 2.7735862731933594,
"logps/generated": -691.45166015625,
"logps/real": -1532.789306640625,
"loss": 0.0079,
"rewards/accuracies": 1.0,
"rewards/generated": -7.632587432861328,
"rewards/margins": 12.373538970947266,
"rewards/real": 4.740950584411621,
"step": 960
},
{
"epoch": 3.7,
"learning_rate": 4.257425742574257e-08,
"logits/generated": 3.0876381397247314,
"logits/real": 2.819197177886963,
"logps/generated": -723.0416259765625,
"logps/real": -1523.8883056640625,
"loss": 0.0072,
"rewards/accuracies": 1.0,
"rewards/generated": -8.482768058776855,
"rewards/margins": 12.97950553894043,
"rewards/real": 4.496739387512207,
"step": 970
},
{
"epoch": 3.73,
"learning_rate": 4.186704384724187e-08,
"logits/generated": 3.1232168674468994,
"logits/real": 2.7532286643981934,
"logps/generated": -705.3154296875,
"logps/real": -1505.3544921875,
"loss": 0.007,
"rewards/accuracies": 1.0,
"rewards/generated": -9.721161842346191,
"rewards/margins": 14.407315254211426,
"rewards/real": 4.686154365539551,
"step": 980
},
{
"epoch": 3.77,
"learning_rate": 4.115983026874116e-08,
"logits/generated": 3.03477144241333,
"logits/real": 2.8783843517303467,
"logps/generated": -643.8148803710938,
"logps/real": -1341.3072509765625,
"loss": 0.0125,
"rewards/accuracies": 1.0,
"rewards/generated": -8.113399505615234,
"rewards/margins": 12.22516918182373,
"rewards/real": 4.111769199371338,
"step": 990
},
{
"epoch": 3.81,
"learning_rate": 4.045261669024045e-08,
"logits/generated": 3.1741697788238525,
"logits/real": 2.8709182739257812,
"logps/generated": -732.4049682617188,
"logps/real": -1317.285400390625,
"loss": 0.0067,
"rewards/accuracies": 1.0,
"rewards/generated": -9.147187232971191,
"rewards/margins": 12.971673965454102,
"rewards/real": 3.8244857788085938,
"step": 1000
},
{
"epoch": 3.85,
"learning_rate": 3.974540311173974e-08,
"logits/generated": 3.0487265586853027,
"logits/real": 2.70530366897583,
"logps/generated": -710.79931640625,
"logps/real": -1459.953125,
"loss": 0.0078,
"rewards/accuracies": 1.0,
"rewards/generated": -8.931787490844727,
"rewards/margins": 12.98430347442627,
"rewards/real": 4.052516460418701,
"step": 1010
},
{
"epoch": 3.89,
"learning_rate": 3.9038189533239036e-08,
"logits/generated": 3.2147929668426514,
"logits/real": 2.8529582023620605,
"logps/generated": -705.3197021484375,
"logps/real": -1510.0345458984375,
"loss": 0.0087,
"rewards/accuracies": 1.0,
"rewards/generated": -9.200128555297852,
"rewards/margins": 13.442059516906738,
"rewards/real": 4.241931438446045,
"step": 1020
},
{
"epoch": 3.92,
"learning_rate": 3.833097595473833e-08,
"logits/generated": 3.0952019691467285,
"logits/real": 2.843813419342041,
"logps/generated": -767.7200317382812,
"logps/real": -1536.8294677734375,
"loss": 0.007,
"rewards/accuracies": 1.0,
"rewards/generated": -9.248136520385742,
"rewards/margins": 14.309036254882812,
"rewards/real": 5.06089973449707,
"step": 1030
},
{
"epoch": 3.96,
"learning_rate": 3.762376237623762e-08,
"logits/generated": 3.064669609069824,
"logits/real": 2.8759753704071045,
"logps/generated": -747.2686767578125,
"logps/real": -1288.828857421875,
"loss": 0.011,
"rewards/accuracies": 1.0,
"rewards/generated": -9.522977828979492,
"rewards/margins": 13.202028274536133,
"rewards/real": 3.679051637649536,
"step": 1040
},
{
"epoch": 4.0,
"learning_rate": 3.6916548797736914e-08,
"logits/generated": 3.093175172805786,
"logits/real": 2.7089123725891113,
"logps/generated": -727.3369750976562,
"logps/real": -1490.734619140625,
"loss": 0.0072,
"rewards/accuracies": 1.0,
"rewards/generated": -9.388141632080078,
"rewards/margins": 14.153898239135742,
"rewards/real": 4.765757083892822,
"step": 1050
},
{
"epoch": 4.04,
"learning_rate": 3.620933521923621e-08,
"logits/generated": 3.0252323150634766,
"logits/real": 2.6994900703430176,
"logps/generated": -706.836181640625,
"logps/real": -1605.043701171875,
"loss": 0.0071,
"rewards/accuracies": 1.0,
"rewards/generated": -8.568842887878418,
"rewards/margins": 13.715721130371094,
"rewards/real": 5.146877288818359,
"step": 1060
},
{
"epoch": 4.08,
"learning_rate": 3.55021216407355e-08,
"logits/generated": 3.1279215812683105,
"logits/real": 2.813671588897705,
"logps/generated": -725.0180053710938,
"logps/real": -1311.329833984375,
"loss": 0.0056,
"rewards/accuracies": 1.0,
"rewards/generated": -9.17328929901123,
"rewards/margins": 13.425753593444824,
"rewards/real": 4.252464294433594,
"step": 1070
},
{
"epoch": 4.11,
"learning_rate": 3.47949080622348e-08,
"logits/generated": 3.1973278522491455,
"logits/real": 2.841034412384033,
"logps/generated": -739.7282104492188,
"logps/real": -1421.683349609375,
"loss": 0.0066,
"rewards/accuracies": 1.0,
"rewards/generated": -9.601125717163086,
"rewards/margins": 13.759020805358887,
"rewards/real": 4.157895088195801,
"step": 1080
},
{
"epoch": 4.15,
"learning_rate": 3.4087694483734086e-08,
"logits/generated": 3.14319109916687,
"logits/real": 2.650394916534424,
"logps/generated": -694.9728393554688,
"logps/real": -1557.013671875,
"loss": 0.0075,
"rewards/accuracies": 1.0,
"rewards/generated": -8.979018211364746,
"rewards/margins": 14.022770881652832,
"rewards/real": 5.043752193450928,
"step": 1090
},
{
"epoch": 4.19,
"learning_rate": 3.3380480905233374e-08,
"logits/generated": 3.283674716949463,
"logits/real": 2.774038791656494,
"logps/generated": -750.1487426757812,
"logps/real": -1579.89697265625,
"loss": 0.0086,
"rewards/accuracies": 1.0,
"rewards/generated": -10.157815933227539,
"rewards/margins": 14.974313735961914,
"rewards/real": 4.816495895385742,
"step": 1100
},
{
"epoch": 4.23,
"learning_rate": 3.2673267326732676e-08,
"logits/generated": 3.134382963180542,
"logits/real": 2.8532094955444336,
"logps/generated": -750.8877563476562,
"logps/real": -1525.961669921875,
"loss": 0.0077,
"rewards/accuracies": 1.0,
"rewards/generated": -9.825960159301758,
"rewards/margins": 14.588778495788574,
"rewards/real": 4.762818336486816,
"step": 1110
},
{
"epoch": 4.27,
"learning_rate": 3.1966053748231964e-08,
"logits/generated": 3.130510091781616,
"logits/real": 2.9025044441223145,
"logps/generated": -675.0423583984375,
"logps/real": -1538.1998291015625,
"loss": 0.0098,
"rewards/accuracies": 1.0,
"rewards/generated": -9.219243049621582,
"rewards/margins": 14.35087776184082,
"rewards/real": 5.131636619567871,
"step": 1120
},
{
"epoch": 4.3,
"learning_rate": 3.125884016973126e-08,
"logits/generated": 3.234394073486328,
"logits/real": 2.7886247634887695,
"logps/generated": -688.5629272460938,
"logps/real": -1481.4114990234375,
"loss": 0.0074,
"rewards/accuracies": 1.0,
"rewards/generated": -8.884227752685547,
"rewards/margins": 13.749975204467773,
"rewards/real": 4.865747928619385,
"step": 1130
},
{
"epoch": 4.34,
"learning_rate": 3.0551626591230553e-08,
"logits/generated": 3.151052474975586,
"logits/real": 2.8268675804138184,
"logps/generated": -702.4296264648438,
"logps/real": -1545.9931640625,
"loss": 0.005,
"rewards/accuracies": 1.0,
"rewards/generated": -8.792219161987305,
"rewards/margins": 13.802957534790039,
"rewards/real": 5.010737419128418,
"step": 1140
},
{
"epoch": 4.38,
"learning_rate": 2.984441301272984e-08,
"logits/generated": 3.0674405097961426,
"logits/real": 2.791660785675049,
"logps/generated": -728.8613891601562,
"logps/real": -1402.8472900390625,
"loss": 0.0044,
"rewards/accuracies": 1.0,
"rewards/generated": -9.552156448364258,
"rewards/margins": 13.963113784790039,
"rewards/real": 4.410956382751465,
"step": 1150
},
{
"epoch": 4.42,
"learning_rate": 2.9137199434229136e-08,
"logits/generated": 3.085538864135742,
"logits/real": 2.8696396350860596,
"logps/generated": -699.1375732421875,
"logps/real": -1528.8197021484375,
"loss": 0.0061,
"rewards/accuracies": 1.0,
"rewards/generated": -9.45177173614502,
"rewards/margins": 14.307217597961426,
"rewards/real": 4.8554463386535645,
"step": 1160
},
{
"epoch": 4.46,
"learning_rate": 2.8429985855728428e-08,
"logits/generated": 3.1769864559173584,
"logits/real": 2.864468574523926,
"logps/generated": -730.0369262695312,
"logps/real": -1405.126220703125,
"loss": 0.006,
"rewards/accuracies": 1.0,
"rewards/generated": -10.242330551147461,
"rewards/margins": 14.709861755371094,
"rewards/real": 4.467529296875,
"step": 1170
},
{
"epoch": 4.5,
"learning_rate": 2.7722772277227722e-08,
"logits/generated": 3.0584521293640137,
"logits/real": 2.7300102710723877,
"logps/generated": -722.6023559570312,
"logps/real": -1533.9681396484375,
"loss": 0.0089,
"rewards/accuracies": 1.0,
"rewards/generated": -10.0105562210083,
"rewards/margins": 14.953516960144043,
"rewards/real": 4.942961692810059,
"step": 1180
},
{
"epoch": 4.53,
"learning_rate": 2.7015558698727014e-08,
"logits/generated": 3.1814303398132324,
"logits/real": 2.767988681793213,
"logps/generated": -705.47021484375,
"logps/real": -1517.984619140625,
"loss": 0.0052,
"rewards/accuracies": 1.0,
"rewards/generated": -10.146055221557617,
"rewards/margins": 15.356274604797363,
"rewards/real": 5.210217475891113,
"step": 1190
},
{
"epoch": 4.57,
"learning_rate": 2.6308345120226305e-08,
"logits/generated": 3.2221198081970215,
"logits/real": 2.8411731719970703,
"logps/generated": -727.7474365234375,
"logps/real": -1452.932373046875,
"loss": 0.0045,
"rewards/accuracies": 1.0,
"rewards/generated": -10.274625778198242,
"rewards/margins": 15.288644790649414,
"rewards/real": 5.014019012451172,
"step": 1200
},
{
"epoch": 4.61,
"learning_rate": 2.56011315417256e-08,
"logits/generated": 2.9806618690490723,
"logits/real": 2.761728286743164,
"logps/generated": -739.073974609375,
"logps/real": -1438.574951171875,
"loss": 0.0054,
"rewards/accuracies": 1.0,
"rewards/generated": -10.485044479370117,
"rewards/margins": 15.389866828918457,
"rewards/real": 4.904819965362549,
"step": 1210
},
{
"epoch": 4.65,
"learning_rate": 2.489391796322489e-08,
"logits/generated": 3.0833356380462646,
"logits/real": 2.8559961318969727,
"logps/generated": -737.3714599609375,
"logps/real": -1319.4708251953125,
"loss": 0.0088,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -9.90731430053711,
"rewards/margins": 14.22021484375,
"rewards/real": 4.312900066375732,
"step": 1220
},
{
"epoch": 4.69,
"learning_rate": 2.4186704384724186e-08,
"logits/generated": 3.1098473072052,
"logits/real": 2.8987298011779785,
"logps/generated": -751.8609619140625,
"logps/real": -1390.7666015625,
"loss": 0.0063,
"rewards/accuracies": 1.0,
"rewards/generated": -10.742658615112305,
"rewards/margins": 15.709897994995117,
"rewards/real": 4.967240810394287,
"step": 1230
},
{
"epoch": 4.72,
"learning_rate": 2.3479490806223478e-08,
"logits/generated": 3.181952476501465,
"logits/real": 2.8264319896698,
"logps/generated": -725.786865234375,
"logps/real": -1668.9781494140625,
"loss": 0.0057,
"rewards/accuracies": 1.0,
"rewards/generated": -9.877522468566895,
"rewards/margins": 15.596285820007324,
"rewards/real": 5.7187628746032715,
"step": 1240
},
{
"epoch": 4.76,
"learning_rate": 2.2772277227722772e-08,
"logits/generated": 3.2247843742370605,
"logits/real": 2.787977695465088,
"logps/generated": -689.7197265625,
"logps/real": -1527.9981689453125,
"loss": 0.0069,
"rewards/accuracies": 1.0,
"rewards/generated": -9.805652618408203,
"rewards/margins": 14.870402336120605,
"rewards/real": 5.064749240875244,
"step": 1250
},
{
"epoch": 4.8,
"learning_rate": 2.2065063649222067e-08,
"logits/generated": 3.1002349853515625,
"logits/real": 2.8746185302734375,
"logps/generated": -755.68994140625,
"logps/real": -1484.486328125,
"loss": 0.0053,
"rewards/accuracies": 1.0,
"rewards/generated": -9.283646583557129,
"rewards/margins": 14.29266357421875,
"rewards/real": 5.009018421173096,
"step": 1260
},
{
"epoch": 4.84,
"learning_rate": 2.1357850070721355e-08,
"logits/generated": 3.172830104827881,
"logits/real": 2.891188859939575,
"logps/generated": -711.9218139648438,
"logps/real": -1525.380615234375,
"loss": 0.0075,
"rewards/accuracies": 1.0,
"rewards/generated": -9.644311904907227,
"rewards/margins": 14.991864204406738,
"rewards/real": 5.3475518226623535,
"step": 1270
},
{
"epoch": 4.88,
"learning_rate": 2.065063649222065e-08,
"logits/generated": 3.162585735321045,
"logits/real": 2.814383029937744,
"logps/generated": -691.09375,
"logps/real": -1261.48046875,
"loss": 0.0063,
"rewards/accuracies": 1.0,
"rewards/generated": -8.76778793334961,
"rewards/margins": 13.0126953125,
"rewards/real": 4.244906425476074,
"step": 1280
},
{
"epoch": 4.91,
"learning_rate": 1.994342291371994e-08,
"logits/generated": 3.1490421295166016,
"logits/real": 2.8507959842681885,
"logps/generated": -685.4339599609375,
"logps/real": -1518.389404296875,
"loss": 0.0059,
"rewards/accuracies": 1.0,
"rewards/generated": -9.577953338623047,
"rewards/margins": 14.928466796875,
"rewards/real": 5.350512504577637,
"step": 1290
},
{
"epoch": 4.95,
"learning_rate": 1.9236209335219236e-08,
"logits/generated": 3.1204447746276855,
"logits/real": 2.761258125305176,
"logps/generated": -718.8822021484375,
"logps/real": -1507.6552734375,
"loss": 0.0062,
"rewards/accuracies": 1.0,
"rewards/generated": -10.944616317749023,
"rewards/margins": 15.193156242370605,
"rewards/real": 4.248539447784424,
"step": 1300
},
{
"epoch": 4.99,
"learning_rate": 1.8528995756718528e-08,
"logits/generated": 3.202990770339966,
"logits/real": 2.789196014404297,
"logps/generated": -736.8391723632812,
"logps/real": -1285.6285400390625,
"loss": 0.0069,
"rewards/accuracies": 1.0,
"rewards/generated": -9.708449363708496,
"rewards/margins": 14.332748413085938,
"rewards/real": 4.624300003051758,
"step": 1310
},
{
"epoch": 5.03,
"learning_rate": 1.782178217821782e-08,
"logits/generated": 3.044565200805664,
"logits/real": 2.813560724258423,
"logps/generated": -749.41845703125,
"logps/real": -1450.473876953125,
"loss": 0.0039,
"rewards/accuracies": 1.0,
"rewards/generated": -9.962289810180664,
"rewards/margins": 15.018086433410645,
"rewards/real": 5.055794715881348,
"step": 1320
},
{
"epoch": 5.07,
"learning_rate": 1.7114568599717114e-08,
"logits/generated": 3.1547980308532715,
"logits/real": 2.8710808753967285,
"logps/generated": -685.3002319335938,
"logps/real": -1505.046875,
"loss": 0.0042,
"rewards/accuracies": 1.0,
"rewards/generated": -10.081647872924805,
"rewards/margins": 15.215327262878418,
"rewards/real": 5.133678913116455,
"step": 1330
},
{
"epoch": 5.1,
"learning_rate": 1.640735502121641e-08,
"logits/generated": 3.111386299133301,
"logits/real": 2.878361701965332,
"logps/generated": -712.4747924804688,
"logps/real": -1361.859375,
"loss": 0.0066,
"rewards/accuracies": 1.0,
"rewards/generated": -9.593215942382812,
"rewards/margins": 13.842330932617188,
"rewards/real": 4.249114990234375,
"step": 1340
},
{
"epoch": 5.14,
"learning_rate": 1.57001414427157e-08,
"logits/generated": 3.1498868465423584,
"logits/real": 2.785670518875122,
"logps/generated": -749.1596069335938,
"logps/real": -1366.0186767578125,
"loss": 0.0041,
"rewards/accuracies": 1.0,
"rewards/generated": -10.603869438171387,
"rewards/margins": 15.060259819030762,
"rewards/real": 4.45639181137085,
"step": 1350
},
{
"epoch": 5.18,
"learning_rate": 1.499292786421499e-08,
"logits/generated": 3.2311694622039795,
"logits/real": 2.8071227073669434,
"logps/generated": -698.42333984375,
"logps/real": -1556.6697998046875,
"loss": 0.0045,
"rewards/accuracies": 1.0,
"rewards/generated": -10.801340103149414,
"rewards/margins": 16.656749725341797,
"rewards/real": 5.855408668518066,
"step": 1360
},
{
"epoch": 5.22,
"learning_rate": 1.4285714285714284e-08,
"logits/generated": 3.204010486602783,
"logits/real": 2.8270351886749268,
"logps/generated": -768.1214599609375,
"logps/real": -1522.0247802734375,
"loss": 0.0037,
"rewards/accuracies": 1.0,
"rewards/generated": -11.407632827758789,
"rewards/margins": 16.46245574951172,
"rewards/real": 5.054823875427246,
"step": 1370
},
{
"epoch": 5.26,
"learning_rate": 1.3578500707213577e-08,
"logits/generated": 3.2223219871520996,
"logits/real": 2.748478412628174,
"logps/generated": -709.1806640625,
"logps/real": -1449.1051025390625,
"loss": 0.0042,
"rewards/accuracies": 1.0,
"rewards/generated": -10.24437427520752,
"rewards/margins": 15.332929611206055,
"rewards/real": 5.088555335998535,
"step": 1380
},
{
"epoch": 5.3,
"learning_rate": 1.287128712871287e-08,
"logits/generated": 2.949439525604248,
"logits/real": 2.8462400436401367,
"logps/generated": -702.388427734375,
"logps/real": -1221.220458984375,
"loss": 0.0065,
"rewards/accuracies": 1.0,
"rewards/generated": -9.625406265258789,
"rewards/margins": 13.894259452819824,
"rewards/real": 4.268852710723877,
"step": 1390
},
{
"epoch": 5.33,
"learning_rate": 1.2164073550212164e-08,
"logits/generated": 3.1298446655273438,
"logits/real": 2.8281898498535156,
"logps/generated": -750.8180541992188,
"logps/real": -1293.175048828125,
"loss": 0.0056,
"rewards/accuracies": 1.0,
"rewards/generated": -10.595232009887695,
"rewards/margins": 15.49177360534668,
"rewards/real": 4.896543025970459,
"step": 1400
},
{
"epoch": 5.37,
"learning_rate": 1.1456859971711457e-08,
"logits/generated": 3.046416759490967,
"logits/real": 2.7686657905578613,
"logps/generated": -703.9090576171875,
"logps/real": -1550.712158203125,
"loss": 0.005,
"rewards/accuracies": 1.0,
"rewards/generated": -9.808374404907227,
"rewards/margins": 15.086359024047852,
"rewards/real": 5.277985572814941,
"step": 1410
},
{
"epoch": 5.41,
"learning_rate": 1.074964639321075e-08,
"logits/generated": 3.1554598808288574,
"logits/real": 2.8488306999206543,
"logps/generated": -740.9146118164062,
"logps/real": -1494.5152587890625,
"loss": 0.0043,
"rewards/accuracies": 1.0,
"rewards/generated": -10.964345932006836,
"rewards/margins": 16.294296264648438,
"rewards/real": 5.329949378967285,
"step": 1420
},
{
"epoch": 5.45,
"learning_rate": 1.0042432814710041e-08,
"logits/generated": 3.15816330909729,
"logits/real": 2.857891082763672,
"logps/generated": -700.6273803710938,
"logps/real": -1395.680419921875,
"loss": 0.0059,
"rewards/accuracies": 1.0,
"rewards/generated": -10.415566444396973,
"rewards/margins": 14.575197219848633,
"rewards/real": 4.15963077545166,
"step": 1430
},
{
"epoch": 5.49,
"learning_rate": 9.335219236209336e-09,
"logits/generated": 3.150120496749878,
"logits/real": 2.801614761352539,
"logps/generated": -702.9065551757812,
"logps/real": -1336.076904296875,
"loss": 0.0066,
"rewards/accuracies": 1.0,
"rewards/generated": -10.592188835144043,
"rewards/margins": 15.421045303344727,
"rewards/real": 4.828855037689209,
"step": 1440
},
{
"epoch": 5.52,
"learning_rate": 8.628005657708627e-09,
"logits/generated": 3.075422525405884,
"logits/real": 2.759566307067871,
"logps/generated": -750.0064086914062,
"logps/real": -1322.994140625,
"loss": 0.0064,
"rewards/accuracies": 1.0,
"rewards/generated": -11.502080917358398,
"rewards/margins": 16.186368942260742,
"rewards/real": 4.68428897857666,
"step": 1450
},
{
"epoch": 5.56,
"learning_rate": 7.92079207920792e-09,
"logits/generated": 2.975106716156006,
"logits/real": 2.8558712005615234,
"logps/generated": -730.2501831054688,
"logps/real": -1330.128173828125,
"loss": 0.0043,
"rewards/accuracies": 1.0,
"rewards/generated": -10.35800552368164,
"rewards/margins": 14.964300155639648,
"rewards/real": 4.60629415512085,
"step": 1460
},
{
"epoch": 5.6,
"learning_rate": 7.2135785007072135e-09,
"logits/generated": 3.338933229446411,
"logits/real": 2.866070032119751,
"logps/generated": -702.8323974609375,
"logps/real": -1660.9990234375,
"loss": 0.0072,
"rewards/accuracies": 1.0,
"rewards/generated": -10.770402908325195,
"rewards/margins": 16.416067123413086,
"rewards/real": 5.645665645599365,
"step": 1470
},
{
"epoch": 5.64,
"learning_rate": 6.506364922206506e-09,
"logits/generated": 3.193213701248169,
"logits/real": 2.8589539527893066,
"logps/generated": -730.3358154296875,
"logps/real": -1374.8104248046875,
"loss": 0.0053,
"rewards/accuracies": 1.0,
"rewards/generated": -10.911657333374023,
"rewards/margins": 15.75550651550293,
"rewards/real": 4.843849182128906,
"step": 1480
},
{
"epoch": 5.68,
"learning_rate": 5.799151343705799e-09,
"logits/generated": 3.0994668006896973,
"logits/real": 2.7762365341186523,
"logps/generated": -667.8460693359375,
"logps/real": -1382.4453125,
"loss": 0.0047,
"rewards/accuracies": 1.0,
"rewards/generated": -9.389671325683594,
"rewards/margins": 14.397994995117188,
"rewards/real": 5.008322238922119,
"step": 1490
},
{
"epoch": 5.71,
"learning_rate": 5.091937765205092e-09,
"logits/generated": 3.0917019844055176,
"logits/real": 2.8576996326446533,
"logps/generated": -709.4324340820312,
"logps/real": -1651.778076171875,
"loss": 0.0048,
"rewards/accuracies": 1.0,
"rewards/generated": -10.680952072143555,
"rewards/margins": 16.81113052368164,
"rewards/real": 6.130180358886719,
"step": 1500
},
{
"epoch": 5.75,
"learning_rate": 4.384724186704385e-09,
"logits/generated": 3.144845724105835,
"logits/real": 2.8753390312194824,
"logps/generated": -750.3989868164062,
"logps/real": -1244.6375732421875,
"loss": 0.0051,
"rewards/accuracies": 1.0,
"rewards/generated": -11.346524238586426,
"rewards/margins": 16.04071807861328,
"rewards/real": 4.694192409515381,
"step": 1510
},
{
"epoch": 5.79,
"learning_rate": 3.6775106082036773e-09,
"logits/generated": 3.173389196395874,
"logits/real": 2.8000612258911133,
"logps/generated": -695.5457763671875,
"logps/real": -1390.158935546875,
"loss": 0.0047,
"rewards/accuracies": 1.0,
"rewards/generated": -9.513983726501465,
"rewards/margins": 14.579229354858398,
"rewards/real": 5.065245151519775,
"step": 1520
},
{
"epoch": 5.83,
"learning_rate": 2.97029702970297e-09,
"logits/generated": 3.1202340126037598,
"logits/real": 2.816042900085449,
"logps/generated": -763.26806640625,
"logps/real": -1385.561279296875,
"loss": 0.0052,
"rewards/accuracies": 1.0,
"rewards/generated": -11.074748039245605,
"rewards/margins": 16.243824005126953,
"rewards/real": 5.169075965881348,
"step": 1530
},
{
"epoch": 5.87,
"learning_rate": 2.263083451202263e-09,
"logits/generated": 3.286569118499756,
"logits/real": 2.9330081939697266,
"logps/generated": -684.3175659179688,
"logps/real": -1652.4827880859375,
"loss": 0.0044,
"rewards/accuracies": 1.0,
"rewards/generated": -10.155891418457031,
"rewards/margins": 15.940603256225586,
"rewards/real": 5.784712314605713,
"step": 1540
},
{
"epoch": 5.9,
"learning_rate": 1.5558698727015557e-09,
"logits/generated": 3.17760968208313,
"logits/real": 2.854255199432373,
"logps/generated": -746.004150390625,
"logps/real": -1345.9742431640625,
"loss": 0.0038,
"rewards/accuracies": 1.0,
"rewards/generated": -11.761791229248047,
"rewards/margins": 16.628210067749023,
"rewards/real": 4.86641788482666,
"step": 1550
},
{
"epoch": 5.94,
"learning_rate": 8.486562942008486e-10,
"logits/generated": 3.056246280670166,
"logits/real": 2.8659956455230713,
"logps/generated": -775.7562255859375,
"logps/real": -1462.85009765625,
"loss": 0.0045,
"rewards/accuracies": 1.0,
"rewards/generated": -11.534282684326172,
"rewards/margins": 16.907535552978516,
"rewards/real": 5.3732523918151855,
"step": 1560
},
{
"epoch": 5.98,
"learning_rate": 1.4144271570014144e-10,
"logits/generated": 3.131124496459961,
"logits/real": 2.8286118507385254,
"logps/generated": -742.1873779296875,
"logps/real": -1465.8543701171875,
"loss": 0.0037,
"rewards/accuracies": 1.0,
"rewards/generated": -10.972893714904785,
"rewards/margins": 16.086231231689453,
"rewards/real": 5.113335609436035,
"step": 1570
},
{
"epoch": 5.99,
"step": 1572,
"total_flos": 0.0,
"train_loss": 0.057728804025485636,
"train_runtime": 21391.1697,
"train_samples_per_second": 4.712,
"train_steps_per_second": 0.073
}
],
"logging_steps": 10,
"max_steps": 1572,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}