|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.988571428571428, |
|
"eval_steps": 100, |
|
"global_step": 1572, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.1645569620253163e-09, |
|
"logits/generated": 3.512105703353882, |
|
"logits/real": 3.202550172805786, |
|
"logps/generated": -465.1200256347656, |
|
"logps/real": -1523.068603515625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 3.1645569620253166e-08, |
|
"logits/generated": 3.52996826171875, |
|
"logits/real": 3.1474177837371826, |
|
"logps/generated": -455.0594177246094, |
|
"logps/real": -1564.237548828125, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/generated": 0.00841978657990694, |
|
"rewards/margins": 0.014310520142316818, |
|
"rewards/real": 0.022730309516191483, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 6.329113924050633e-08, |
|
"logits/generated": 3.4747085571289062, |
|
"logits/real": 2.9607560634613037, |
|
"logps/generated": -404.8710632324219, |
|
"logps/real": -1600.1513671875, |
|
"loss": 0.5693, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/generated": -0.09713525325059891, |
|
"rewards/margins": 0.30425113439559937, |
|
"rewards/real": 0.20711591839790344, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 9.49367088607595e-08, |
|
"logits/generated": 3.6858959197998047, |
|
"logits/real": 3.0412940979003906, |
|
"logps/generated": -421.63653564453125, |
|
"logps/real": -1533.8973388671875, |
|
"loss": 0.3593, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/generated": -0.3924674689769745, |
|
"rewards/margins": 0.9940061569213867, |
|
"rewards/real": 0.6015387177467346, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.2658227848101266e-07, |
|
"logits/generated": 3.7982068061828613, |
|
"logits/real": 2.8881797790527344, |
|
"logps/generated": -403.89447021484375, |
|
"logps/real": -2011.547607421875, |
|
"loss": 0.2263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.6449347734451294, |
|
"rewards/margins": 1.951049566268921, |
|
"rewards/real": 1.3061145544052124, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.5822784810126582e-07, |
|
"logits/generated": 3.733821153640747, |
|
"logits/real": 2.91530442237854, |
|
"logps/generated": -424.7178649902344, |
|
"logps/real": -1627.3607177734375, |
|
"loss": 0.1737, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.025488257408142, |
|
"rewards/margins": 2.2531561851501465, |
|
"rewards/real": 1.2276678085327148, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.89873417721519e-07, |
|
"logits/generated": 3.667829990386963, |
|
"logits/real": 2.897120237350464, |
|
"logps/generated": -397.2341003417969, |
|
"logps/real": -1588.1214599609375, |
|
"loss": 0.1079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.396870493888855, |
|
"rewards/margins": 3.004943370819092, |
|
"rewards/real": 1.6080729961395264, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.2151898734177212e-07, |
|
"logits/generated": 3.703988552093506, |
|
"logits/real": 3.3058135509490967, |
|
"logps/generated": -450.5978088378906, |
|
"logps/real": -1583.788818359375, |
|
"loss": 0.0638, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.1847541332244873, |
|
"rewards/margins": 4.125738620758057, |
|
"rewards/real": 1.9409840106964111, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.5316455696202533e-07, |
|
"logits/generated": 3.5381362438201904, |
|
"logits/real": 3.0336380004882812, |
|
"logps/generated": -426.459716796875, |
|
"logps/real": -1585.524658203125, |
|
"loss": 0.0623, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -1.9288753271102905, |
|
"rewards/margins": 4.461968898773193, |
|
"rewards/real": 2.533093214035034, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.848101265822785e-07, |
|
"logits/generated": 3.9699149131774902, |
|
"logits/real": 2.959277868270874, |
|
"logps/generated": -462.84478759765625, |
|
"logps/real": -1669.855712890625, |
|
"loss": 0.0539, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.831137180328369, |
|
"rewards/margins": 5.640450477600098, |
|
"rewards/real": 2.8093132972717285, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.1645569620253163e-07, |
|
"logits/generated": 3.649505615234375, |
|
"logits/real": 3.228405475616455, |
|
"logps/generated": -463.5885314941406, |
|
"logps/real": -1659.1978759765625, |
|
"loss": 0.0399, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.8427340984344482, |
|
"rewards/margins": 6.116082668304443, |
|
"rewards/real": 3.273348331451416, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.481012658227848e-07, |
|
"logits/generated": 3.5634586811065674, |
|
"logits/real": 3.0387678146362305, |
|
"logps/generated": -475.7396545410156, |
|
"logps/real": -1772.5689697265625, |
|
"loss": 0.043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.7554657459259033, |
|
"rewards/margins": 6.204745292663574, |
|
"rewards/real": 3.449280261993408, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.79746835443038e-07, |
|
"logits/generated": 3.531033992767334, |
|
"logits/real": 3.0381414890289307, |
|
"logps/generated": -435.5464782714844, |
|
"logps/real": -1628.22119140625, |
|
"loss": 0.0403, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -2.647515296936035, |
|
"rewards/margins": 5.9579033851623535, |
|
"rewards/real": 3.3103878498077393, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.1139240506329117e-07, |
|
"logits/generated": 3.513434648513794, |
|
"logits/real": 2.99871826171875, |
|
"logps/generated": -438.3785095214844, |
|
"logps/real": -1705.972412109375, |
|
"loss": 0.0341, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.1811232566833496, |
|
"rewards/margins": 7.164037227630615, |
|
"rewards/real": 3.982914447784424, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.4303797468354424e-07, |
|
"logits/generated": 3.7991561889648438, |
|
"logits/real": 3.105447292327881, |
|
"logps/generated": -479.6163635253906, |
|
"logps/real": -1677.7916259765625, |
|
"loss": 0.0325, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.3546764850616455, |
|
"rewards/margins": 7.303455352783203, |
|
"rewards/real": 3.9487788677215576, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.746835443037974e-07, |
|
"logits/generated": 3.594744920730591, |
|
"logits/real": 2.9824934005737305, |
|
"logps/generated": -436.2431640625, |
|
"logps/real": -1721.3759765625, |
|
"loss": 0.0232, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.755667209625244, |
|
"rewards/margins": 7.839377403259277, |
|
"rewards/real": 4.083710670471191, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.992927864214993e-07, |
|
"logits/generated": 3.6612861156463623, |
|
"logits/real": 2.9343557357788086, |
|
"logps/generated": -483.659912109375, |
|
"logps/real": -1582.573486328125, |
|
"loss": 0.0205, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.5960216522216797, |
|
"rewards/margins": 7.640145301818848, |
|
"rewards/real": 4.04412317276001, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.957567185289958e-07, |
|
"logits/generated": 3.589632034301758, |
|
"logits/real": 3.0019402503967285, |
|
"logps/generated": -425.794677734375, |
|
"logps/real": -1487.097412109375, |
|
"loss": 0.0221, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.8624930381774902, |
|
"rewards/margins": 8.045273780822754, |
|
"rewards/real": 4.1827802658081055, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.922206506364922e-07, |
|
"logits/generated": 3.754439115524292, |
|
"logits/real": 2.9574403762817383, |
|
"logps/generated": -503.462646484375, |
|
"logps/real": -1799.0537109375, |
|
"loss": 0.0181, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.270089149475098, |
|
"rewards/margins": 9.556356430053711, |
|
"rewards/real": 5.286266326904297, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.886845827439887e-07, |
|
"logits/generated": 3.9537768363952637, |
|
"logits/real": 2.8257808685302734, |
|
"logps/generated": -414.7232971191406, |
|
"logps/real": -1848.5228271484375, |
|
"loss": 0.0182, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.797616958618164, |
|
"rewards/margins": 9.304213523864746, |
|
"rewards/real": 5.506596565246582, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.851485148514851e-07, |
|
"logits/generated": 3.7061352729797363, |
|
"logits/real": 2.828097105026245, |
|
"logps/generated": -478.8387145996094, |
|
"logps/real": -1567.3736572265625, |
|
"loss": 0.0162, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.287292003631592, |
|
"rewards/margins": 9.111185073852539, |
|
"rewards/real": 4.823891639709473, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.816124469589816e-07, |
|
"logits/generated": 3.8071022033691406, |
|
"logits/real": 2.786395788192749, |
|
"logps/generated": -497.02044677734375, |
|
"logps/real": -1951.7906494140625, |
|
"loss": 0.0116, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.036887168884277, |
|
"rewards/margins": 11.377471923828125, |
|
"rewards/real": 6.340585231781006, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.780763790664781e-07, |
|
"logits/generated": 3.5943808555603027, |
|
"logits/real": 3.0639989376068115, |
|
"logps/generated": -466.7726135253906, |
|
"logps/real": -1364.220458984375, |
|
"loss": 0.0138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.49011754989624, |
|
"rewards/margins": 9.089958190917969, |
|
"rewards/real": 4.599841594696045, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.745403111739745e-07, |
|
"logits/generated": 3.7227370738983154, |
|
"logits/real": 2.9175028800964355, |
|
"logps/generated": -474.84954833984375, |
|
"logps/real": -1725.422119140625, |
|
"loss": 0.0109, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.81961727142334, |
|
"rewards/margins": 10.596981048583984, |
|
"rewards/real": 5.777363300323486, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.7100424328147096e-07, |
|
"logits/generated": 3.338122844696045, |
|
"logits/real": 3.0875296592712402, |
|
"logps/generated": -435.6485290527344, |
|
"logps/real": -1685.8828125, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.066224098205566, |
|
"rewards/margins": 9.890088081359863, |
|
"rewards/real": 5.823863506317139, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.674681753889674e-07, |
|
"logits/generated": 3.593212127685547, |
|
"logits/real": 2.7413556575775146, |
|
"logps/generated": -402.1564025878906, |
|
"logps/real": -1605.6131591796875, |
|
"loss": 0.0143, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.504222869873047, |
|
"rewards/margins": 10.564704895019531, |
|
"rewards/real": 6.060481071472168, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.6393210749646387e-07, |
|
"logits/generated": 3.5109028816223145, |
|
"logits/real": 2.8503494262695312, |
|
"logps/generated": -526.8668823242188, |
|
"logps/real": -1490.2159423828125, |
|
"loss": 0.013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.489691257476807, |
|
"rewards/margins": 11.207723617553711, |
|
"rewards/real": 5.718031883239746, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 4.603960396039604e-07, |
|
"logits/generated": 3.4985408782958984, |
|
"logits/real": 2.8388476371765137, |
|
"logps/generated": -510.9580993652344, |
|
"logps/real": -1608.238525390625, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.455083847045898, |
|
"rewards/margins": 11.62474250793457, |
|
"rewards/real": 6.1696577072143555, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.5685997171145683e-07, |
|
"logits/generated": 3.408998966217041, |
|
"logits/real": 2.9803500175476074, |
|
"logps/generated": -467.8755798339844, |
|
"logps/real": -1577.332763671875, |
|
"loss": 0.0086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.607177734375, |
|
"rewards/margins": 10.38925552368164, |
|
"rewards/real": 5.782077312469482, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 4.533239038189533e-07, |
|
"logits/generated": 3.7580649852752686, |
|
"logits/real": 3.202885866165161, |
|
"logps/generated": -476.220458984375, |
|
"logps/real": -1511.313720703125, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.54539155960083, |
|
"rewards/margins": 11.575006484985352, |
|
"rewards/real": 6.0296149253845215, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.4978783592644974e-07, |
|
"logits/generated": 3.447097063064575, |
|
"logits/real": 2.6315269470214844, |
|
"logps/generated": -475.2987365722656, |
|
"logps/real": -1564.4840087890625, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.546289443969727, |
|
"rewards/margins": 11.75828742980957, |
|
"rewards/real": 6.211997032165527, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.462517680339462e-07, |
|
"logits/generated": 3.4015917778015137, |
|
"logits/real": 2.9662914276123047, |
|
"logps/generated": -437.78826904296875, |
|
"logps/real": -1598.526123046875, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.93220329284668, |
|
"rewards/margins": 12.351015090942383, |
|
"rewards/real": 7.4188127517700195, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.427157001414427e-07, |
|
"logits/generated": 3.510097026824951, |
|
"logits/real": 2.913818836212158, |
|
"logps/generated": -456.273681640625, |
|
"logps/real": -1717.4222412109375, |
|
"loss": 0.0076, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.022789478302002, |
|
"rewards/margins": 13.268719673156738, |
|
"rewards/real": 8.245931625366211, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.3917963224893915e-07, |
|
"logits/generated": 3.4145877361297607, |
|
"logits/real": 2.8273959159851074, |
|
"logps/generated": -465.119873046875, |
|
"logps/real": -1708.9052734375, |
|
"loss": 0.0066, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.375365257263184, |
|
"rewards/margins": 12.598322868347168, |
|
"rewards/real": 7.222956657409668, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.356435643564356e-07, |
|
"logits/generated": 3.495450973510742, |
|
"logits/real": 2.7503676414489746, |
|
"logps/generated": -445.3560485839844, |
|
"logps/real": -1563.655517578125, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.360494136810303, |
|
"rewards/margins": 12.658773422241211, |
|
"rewards/real": 7.29827880859375, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.3210749646393206e-07, |
|
"logits/generated": 3.579958438873291, |
|
"logits/real": 2.7801036834716797, |
|
"logps/generated": -436.92364501953125, |
|
"logps/real": -1706.568359375, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.444735527038574, |
|
"rewards/margins": 12.98694133758545, |
|
"rewards/real": 7.5422043800354, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.285714285714285e-07, |
|
"logits/generated": 3.5498404502868652, |
|
"logits/real": 2.8857247829437256, |
|
"logps/generated": -467.14898681640625, |
|
"logps/real": -1606.5517578125, |
|
"loss": 0.0073, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -4.917501926422119, |
|
"rewards/margins": 12.757287979125977, |
|
"rewards/real": 7.839786529541016, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.25035360678925e-07, |
|
"logits/generated": 3.588599681854248, |
|
"logits/real": 2.7910194396972656, |
|
"logps/generated": -484.51446533203125, |
|
"logps/real": -1753.3662109375, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.856254577636719, |
|
"rewards/margins": 14.548685073852539, |
|
"rewards/real": 8.69243049621582, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.214992927864215e-07, |
|
"logits/generated": 3.4956557750701904, |
|
"logits/real": 2.6511971950531006, |
|
"logps/generated": -466.9720764160156, |
|
"logps/real": -1695.6744384765625, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.525470733642578, |
|
"rewards/margins": 14.2725191116333, |
|
"rewards/real": 8.747048377990723, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.1796322489391793e-07, |
|
"logits/generated": 3.2205729484558105, |
|
"logits/real": 2.746440887451172, |
|
"logps/generated": -445.98779296875, |
|
"logps/real": -1526.604736328125, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.600844383239746, |
|
"rewards/margins": 13.368339538574219, |
|
"rewards/real": 7.7674970626831055, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.144271570014144e-07, |
|
"logits/generated": 3.5135910511016846, |
|
"logits/real": 2.751537799835205, |
|
"logps/generated": -441.55523681640625, |
|
"logps/real": -1792.7021484375, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.988682746887207, |
|
"rewards/margins": 15.011758804321289, |
|
"rewards/real": 9.023075103759766, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 4.1089108910891084e-07, |
|
"logits/generated": 3.4969635009765625, |
|
"logits/real": 2.874459981918335, |
|
"logps/generated": -480.62884521484375, |
|
"logps/real": -1726.501953125, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.103398323059082, |
|
"rewards/margins": 14.921289443969727, |
|
"rewards/real": 8.817892074584961, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 4.0735502121640734e-07, |
|
"logits/generated": 3.732043743133545, |
|
"logits/real": 2.9122977256774902, |
|
"logps/generated": -471.2142639160156, |
|
"logps/real": -1475.9720458984375, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.9775614738464355, |
|
"rewards/margins": 14.218009948730469, |
|
"rewards/real": 8.240448951721191, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 4.038189533239038e-07, |
|
"logits/generated": 3.5188686847686768, |
|
"logits/real": 2.9900317192077637, |
|
"logps/generated": -560.73486328125, |
|
"logps/real": -1395.8021240234375, |
|
"loss": 0.02, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/generated": -7.657801628112793, |
|
"rewards/margins": 15.55529499053955, |
|
"rewards/real": 7.897493839263916, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.0028288543140025e-07, |
|
"logits/generated": 3.3781158924102783, |
|
"logits/real": 2.629373073577881, |
|
"logps/generated": -485.42156982421875, |
|
"logps/real": -1796.0904541015625, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.0816969871521, |
|
"rewards/margins": 16.453693389892578, |
|
"rewards/real": 9.371994972229004, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.967468175388967e-07, |
|
"logits/generated": 3.460228443145752, |
|
"logits/real": 2.9703357219696045, |
|
"logps/generated": -460.6011657714844, |
|
"logps/real": -1839.8109130859375, |
|
"loss": 0.0032, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -6.300644397735596, |
|
"rewards/margins": 15.726030349731445, |
|
"rewards/real": 9.425384521484375, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.9321074964639316e-07, |
|
"logits/generated": 3.0205512046813965, |
|
"logits/real": 2.8035550117492676, |
|
"logps/generated": -502.0563049316406, |
|
"logps/real": -1389.227783203125, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.311892032623291, |
|
"rewards/margins": 15.14806842803955, |
|
"rewards/real": 7.836176872253418, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.8967468175388967e-07, |
|
"logits/generated": 3.6069533824920654, |
|
"logits/real": 2.726623773574829, |
|
"logps/generated": -505.29437255859375, |
|
"logps/real": -1689.9912109375, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.359295845031738, |
|
"rewards/margins": 16.689287185668945, |
|
"rewards/real": 9.32999324798584, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.861386138613861e-07, |
|
"logits/generated": 3.4885826110839844, |
|
"logits/real": 2.5398058891296387, |
|
"logps/generated": -468.04608154296875, |
|
"logps/real": -1690.434326171875, |
|
"loss": 0.0029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.317099571228027, |
|
"rewards/margins": 15.526530265808105, |
|
"rewards/real": 8.209431648254395, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.8260254596888257e-07, |
|
"logits/generated": 3.4325199127197266, |
|
"logits/real": 2.7468791007995605, |
|
"logps/generated": -485.3282165527344, |
|
"logps/real": -1564.2491455078125, |
|
"loss": 0.0038, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.328545570373535, |
|
"rewards/margins": 16.984691619873047, |
|
"rewards/real": 8.656146049499512, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.7906647807637903e-07, |
|
"logits/generated": 3.2969658374786377, |
|
"logits/real": 2.644705295562744, |
|
"logps/generated": -463.7992248535156, |
|
"logps/real": -1622.1402587890625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.6635894775390625, |
|
"rewards/margins": 16.660659790039062, |
|
"rewards/real": 8.9970703125, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.755304101838755e-07, |
|
"logits/generated": 3.4488253593444824, |
|
"logits/real": 2.9401419162750244, |
|
"logps/generated": -500.5553283691406, |
|
"logps/real": -1580.5606689453125, |
|
"loss": 0.0033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.708639621734619, |
|
"rewards/margins": 16.773143768310547, |
|
"rewards/real": 9.064504623413086, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.71994342291372e-07, |
|
"logits/generated": 3.3167099952697754, |
|
"logits/real": 3.012571334838867, |
|
"logps/generated": -521.168701171875, |
|
"logps/real": -1524.885986328125, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.435615539550781, |
|
"rewards/margins": 16.3592472076416, |
|
"rewards/real": 8.923630714416504, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 3.6845827439886844e-07, |
|
"logits/generated": 3.354684352874756, |
|
"logits/real": 2.7756412029266357, |
|
"logps/generated": -468.66552734375, |
|
"logps/real": -1526.357666015625, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.224092483520508, |
|
"rewards/margins": 16.959665298461914, |
|
"rewards/real": 8.735573768615723, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 3.649222065063649e-07, |
|
"logits/generated": 3.5977184772491455, |
|
"logits/real": 2.68579363822937, |
|
"logps/generated": -544.5694580078125, |
|
"logps/real": -1571.634033203125, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.363500595092773, |
|
"rewards/margins": 17.53289794921875, |
|
"rewards/real": 9.16939640045166, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 3.6138613861386135e-07, |
|
"logits/generated": 3.4918408393859863, |
|
"logits/real": 2.729196071624756, |
|
"logps/generated": -518.4525146484375, |
|
"logps/real": -1421.8359375, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.855493545532227, |
|
"rewards/margins": 17.412639617919922, |
|
"rewards/real": 8.557147979736328, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 3.578500707213578e-07, |
|
"logits/generated": 3.2267754077911377, |
|
"logits/real": 2.8264071941375732, |
|
"logps/generated": -458.85308837890625, |
|
"logps/real": -1482.390625, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.917211055755615, |
|
"rewards/margins": 17.222782135009766, |
|
"rewards/real": 9.305570602416992, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.543140028288543e-07, |
|
"logits/generated": 3.449310302734375, |
|
"logits/real": 2.881521701812744, |
|
"logps/generated": -479.64202880859375, |
|
"logps/real": -1629.423095703125, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.995371341705322, |
|
"rewards/margins": 18.3244686126709, |
|
"rewards/real": 10.329096794128418, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.5077793493635076e-07, |
|
"logits/generated": 3.4941279888153076, |
|
"logits/real": 2.9073030948638916, |
|
"logps/generated": -487.73846435546875, |
|
"logps/real": -1576.749267578125, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.9393134117126465, |
|
"rewards/margins": 17.776561737060547, |
|
"rewards/real": 9.837248802185059, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.472418670438472e-07, |
|
"logits/generated": 3.173529863357544, |
|
"logits/real": 2.8594255447387695, |
|
"logps/generated": -487.2096252441406, |
|
"logps/real": -1416.3604736328125, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.219686031341553, |
|
"rewards/margins": 15.569984436035156, |
|
"rewards/real": 8.350296974182129, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 3.4370579915134367e-07, |
|
"logits/generated": 3.167581081390381, |
|
"logits/real": 2.622349500656128, |
|
"logps/generated": -450.83447265625, |
|
"logps/real": -1434.6671142578125, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.358783721923828, |
|
"rewards/margins": 16.64669418334961, |
|
"rewards/real": 9.287908554077148, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 3.401697312588401e-07, |
|
"logits/generated": 3.5104973316192627, |
|
"logits/real": 2.580019474029541, |
|
"logps/generated": -495.33746337890625, |
|
"logps/real": -1505.913818359375, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.833502292633057, |
|
"rewards/margins": 16.95751953125, |
|
"rewards/real": 9.124017715454102, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 3.3663366336633663e-07, |
|
"logits/generated": 3.2511913776397705, |
|
"logits/real": 2.7187373638153076, |
|
"logps/generated": -495.783447265625, |
|
"logps/real": -1531.77294921875, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.153390884399414, |
|
"rewards/margins": 17.530384063720703, |
|
"rewards/real": 9.376993179321289, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 3.330975954738331e-07, |
|
"logits/generated": 3.2725577354431152, |
|
"logits/real": 2.7447569370269775, |
|
"logps/generated": -491.76397705078125, |
|
"logps/real": -1607.060791015625, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.7982048988342285, |
|
"rewards/margins": 17.868427276611328, |
|
"rewards/real": 10.07022476196289, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 3.2956152758132954e-07, |
|
"logits/generated": 3.2170231342315674, |
|
"logits/real": 2.760910749435425, |
|
"logps/generated": -479.82989501953125, |
|
"logps/real": -1328.6890869140625, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.747633934020996, |
|
"rewards/margins": 16.90505027770996, |
|
"rewards/real": 9.157418251037598, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.26025459688826e-07, |
|
"logits/generated": 3.2980682849884033, |
|
"logits/real": 2.6026058197021484, |
|
"logps/generated": -484.3726501464844, |
|
"logps/real": -1501.859619140625, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.929776668548584, |
|
"rewards/margins": 18.278278350830078, |
|
"rewards/real": 10.348502159118652, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.2248939179632245e-07, |
|
"logits/generated": 3.410261631011963, |
|
"logits/real": 2.8707950115203857, |
|
"logps/generated": -550.3458251953125, |
|
"logps/real": -1484.9326171875, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.744172096252441, |
|
"rewards/margins": 18.63943862915039, |
|
"rewards/real": 9.895263671875, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.1895332390381895e-07, |
|
"logits/generated": 3.624340772628784, |
|
"logits/real": 2.644595146179199, |
|
"logps/generated": -510.18096923828125, |
|
"logps/real": -1679.375, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.56155014038086, |
|
"rewards/margins": 20.045724868774414, |
|
"rewards/real": 11.484173774719238, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 3.154172560113154e-07, |
|
"logits/generated": 3.5800070762634277, |
|
"logits/real": 2.905097246170044, |
|
"logps/generated": -514.9693603515625, |
|
"logps/real": -1498.704345703125, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.549358367919922, |
|
"rewards/margins": 18.856287002563477, |
|
"rewards/real": 10.306925773620605, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 3.1188118811881186e-07, |
|
"logits/generated": 3.3421058654785156, |
|
"logits/real": 2.714266538619995, |
|
"logps/generated": -497.5679626464844, |
|
"logps/real": -1502.726806640625, |
|
"loss": 0.0023, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.259305953979492, |
|
"rewards/margins": 18.61314582824707, |
|
"rewards/real": 10.353840827941895, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 3.083451202263083e-07, |
|
"logits/generated": 3.3471736907958984, |
|
"logits/real": 2.7787163257598877, |
|
"logps/generated": -517.1281127929688, |
|
"logps/real": -1472.466064453125, |
|
"loss": 0.0027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.416834831237793, |
|
"rewards/margins": 18.77850914001465, |
|
"rewards/real": 10.361673355102539, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 3.0480905233380477e-07, |
|
"logits/generated": 3.2355308532714844, |
|
"logits/real": 2.852546215057373, |
|
"logps/generated": -500.0751953125, |
|
"logps/real": -1651.260986328125, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.010438919067383, |
|
"rewards/margins": 19.449283599853516, |
|
"rewards/real": 11.438843727111816, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 3.012729844413013e-07, |
|
"logits/generated": 3.437945604324341, |
|
"logits/real": 2.749911308288574, |
|
"logps/generated": -542.2654418945312, |
|
"logps/real": -1487.933837890625, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.703554153442383, |
|
"rewards/margins": 19.010265350341797, |
|
"rewards/real": 10.306710243225098, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.9773691654879773e-07, |
|
"logits/generated": 3.234304428100586, |
|
"logits/real": 2.689356803894043, |
|
"logps/generated": -459.7379455566406, |
|
"logps/real": -1269.926025390625, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -7.5548601150512695, |
|
"rewards/margins": 16.455080032348633, |
|
"rewards/real": 8.900218963623047, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.942008486562942e-07, |
|
"logits/generated": 3.4382824897766113, |
|
"logits/real": 2.6454930305480957, |
|
"logps/generated": -517.6546630859375, |
|
"logps/real": -1617.267333984375, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.587407112121582, |
|
"rewards/margins": 19.053064346313477, |
|
"rewards/real": 10.465655326843262, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.9066478076379064e-07, |
|
"logits/generated": 3.0672454833984375, |
|
"logits/real": 2.67156720161438, |
|
"logps/generated": -459.13262939453125, |
|
"logps/real": -1313.4677734375, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.137083053588867, |
|
"rewards/margins": 17.47877311706543, |
|
"rewards/real": 9.341691970825195, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 2.871287128712871e-07, |
|
"logits/generated": 3.4013969898223877, |
|
"logits/real": 2.417377233505249, |
|
"logps/generated": -492.2867736816406, |
|
"logps/real": -1638.200927734375, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.533792495727539, |
|
"rewards/margins": 19.663969039916992, |
|
"rewards/real": 11.13017749786377, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.835926449787836e-07, |
|
"logits/generated": 3.388955593109131, |
|
"logits/real": 2.9141921997070312, |
|
"logps/generated": -545.1387939453125, |
|
"logps/real": -1538.4222412109375, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.785470962524414, |
|
"rewards/margins": 21.193279266357422, |
|
"rewards/real": 11.407808303833008, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.8005657708628005e-07, |
|
"logits/generated": 3.622591733932495, |
|
"logits/real": 2.7291853427886963, |
|
"logps/generated": -547.1531982421875, |
|
"logps/real": -1743.8941650390625, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.029909133911133, |
|
"rewards/margins": 21.701557159423828, |
|
"rewards/real": 11.671648025512695, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 2.765205091937765e-07, |
|
"logits/generated": 3.347827196121216, |
|
"logits/real": 2.813518524169922, |
|
"logps/generated": -518.5848388671875, |
|
"logps/real": -1608.0418701171875, |
|
"loss": 0.0024, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.312664985656738, |
|
"rewards/margins": 21.00381851196289, |
|
"rewards/real": 11.69115161895752, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 2.7298444130127296e-07, |
|
"logits/generated": 3.2987990379333496, |
|
"logits/real": 2.9453186988830566, |
|
"logps/generated": -522.5285034179688, |
|
"logps/real": -1608.2236328125, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.82221794128418, |
|
"rewards/margins": 22.511926651000977, |
|
"rewards/real": 12.689709663391113, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 2.694483734087694e-07, |
|
"logits/generated": 3.4462966918945312, |
|
"logits/real": 2.625929117202759, |
|
"logps/generated": -545.0180053710938, |
|
"logps/real": -1427.6646728515625, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.479304313659668, |
|
"rewards/margins": 22.244796752929688, |
|
"rewards/real": 10.765493392944336, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 2.659123055162659e-07, |
|
"logits/generated": 3.2480366230010986, |
|
"logits/real": 2.8531947135925293, |
|
"logps/generated": -530.7342529296875, |
|
"logps/real": -1727.2086181640625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.427630424499512, |
|
"rewards/margins": 22.206254959106445, |
|
"rewards/real": 12.778624534606934, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 2.623762376237624e-07, |
|
"logits/generated": 3.5078930854797363, |
|
"logits/real": 2.7171075344085693, |
|
"logps/generated": -507.3907775878906, |
|
"logps/real": -1634.822998046875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.803173065185547, |
|
"rewards/margins": 20.158788681030273, |
|
"rewards/real": 11.355618476867676, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 2.5884016973125883e-07, |
|
"logits/generated": 3.389615535736084, |
|
"logits/real": 2.6584954261779785, |
|
"logps/generated": -519.1912841796875, |
|
"logps/real": -1511.683349609375, |
|
"loss": 0.0012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.256002426147461, |
|
"rewards/margins": 19.75589370727539, |
|
"rewards/real": 10.499893188476562, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 2.553041018387553e-07, |
|
"logits/generated": 3.1219124794006348, |
|
"logits/real": 2.69732928276062, |
|
"logps/generated": -513.461181640625, |
|
"logps/real": -1559.185302734375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.307373046875, |
|
"rewards/margins": 20.579065322875977, |
|
"rewards/real": 11.271692276000977, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 2.5176803394625174e-07, |
|
"logits/generated": 3.169861316680908, |
|
"logits/real": 2.5963492393493652, |
|
"logps/generated": -531.0164184570312, |
|
"logps/real": -1578.7210693359375, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.622026443481445, |
|
"rewards/margins": 21.500263214111328, |
|
"rewards/real": 11.878233909606934, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 2.482319660537482e-07, |
|
"logits/generated": 3.3711845874786377, |
|
"logits/real": 2.759648323059082, |
|
"logps/generated": -523.5821533203125, |
|
"logps/real": -1462.2935791015625, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.834948539733887, |
|
"rewards/margins": 21.79311180114746, |
|
"rewards/real": 10.958162307739258, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 2.446958981612447e-07, |
|
"logits/generated": 3.282954454421997, |
|
"logits/real": 2.7643918991088867, |
|
"logps/generated": -473.2938537597656, |
|
"logps/real": -1457.6839599609375, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.40355110168457, |
|
"rewards/margins": 19.281604766845703, |
|
"rewards/real": 10.878053665161133, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 2.4115983026874115e-07, |
|
"logits/generated": 3.4354591369628906, |
|
"logits/real": 2.6684250831604004, |
|
"logps/generated": -536.3674926757812, |
|
"logps/real": -1729.115966796875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.493001937866211, |
|
"rewards/margins": 23.19765853881836, |
|
"rewards/real": 12.704656600952148, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 2.376237623762376e-07, |
|
"logits/generated": 3.266200304031372, |
|
"logits/real": 2.749070644378662, |
|
"logps/generated": -506.13970947265625, |
|
"logps/real": -1660.871337890625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.553144454956055, |
|
"rewards/margins": 22.273033142089844, |
|
"rewards/real": 12.719888687133789, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 2.3408769448373408e-07, |
|
"logits/generated": 3.4035401344299316, |
|
"logits/real": 2.6919925212860107, |
|
"logps/generated": -545.0686645507812, |
|
"logps/real": -1454.680419921875, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.882238388061523, |
|
"rewards/margins": 21.769603729248047, |
|
"rewards/real": 11.88736629486084, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 2.3055162659123054e-07, |
|
"logits/generated": 3.244786024093628, |
|
"logits/real": 2.7183642387390137, |
|
"logps/generated": -538.6968383789062, |
|
"logps/real": -1382.13330078125, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.824942588806152, |
|
"rewards/margins": 22.102800369262695, |
|
"rewards/real": 11.277857780456543, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 2.2701555869872702e-07, |
|
"logits/generated": 3.3680167198181152, |
|
"logits/real": 2.5044057369232178, |
|
"logps/generated": -547.4840087890625, |
|
"logps/real": -1493.070556640625, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.53519344329834, |
|
"rewards/margins": 22.55548667907715, |
|
"rewards/real": 12.020292282104492, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.2347949080622347e-07, |
|
"logits/generated": 3.4719784259796143, |
|
"logits/real": 2.6997435092926025, |
|
"logps/generated": -531.0570678710938, |
|
"logps/real": -1509.5135498046875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.211065292358398, |
|
"rewards/margins": 22.259336471557617, |
|
"rewards/real": 12.048271179199219, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 2.1994342291371993e-07, |
|
"logits/generated": 3.3083972930908203, |
|
"logits/real": 2.4025731086730957, |
|
"logps/generated": -575.9044189453125, |
|
"logps/real": -1759.0048828125, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.274171829223633, |
|
"rewards/margins": 24.654094696044922, |
|
"rewards/real": 13.379923820495605, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 2.164073550212164e-07, |
|
"logits/generated": 3.4808990955352783, |
|
"logits/real": 2.577362060546875, |
|
"logps/generated": -463.35638427734375, |
|
"logps/real": -1399.489013671875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.753828048706055, |
|
"rewards/margins": 19.656505584716797, |
|
"rewards/real": 10.902679443359375, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 2.1287128712871286e-07, |
|
"logits/generated": 3.4557316303253174, |
|
"logits/real": 2.766918659210205, |
|
"logps/generated": -553.6686401367188, |
|
"logps/real": -1662.569091796875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.936054229736328, |
|
"rewards/margins": 24.027223587036133, |
|
"rewards/real": 13.091169357299805, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 2.0933521923620934e-07, |
|
"logits/generated": 3.14534592628479, |
|
"logits/real": 2.6436448097229004, |
|
"logps/generated": -510.17950439453125, |
|
"logps/real": -1495.9129638671875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.579751968383789, |
|
"rewards/margins": 21.640422821044922, |
|
"rewards/real": 12.06067180633545, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 2.057991513437058e-07, |
|
"logits/generated": 3.3893933296203613, |
|
"logits/real": 2.515793800354004, |
|
"logps/generated": -507.2007751464844, |
|
"logps/real": -1553.794189453125, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.590211868286133, |
|
"rewards/margins": 23.068220138549805, |
|
"rewards/real": 12.478008270263672, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 2.0226308345120225e-07, |
|
"logits/generated": 3.168189525604248, |
|
"logits/real": 2.5998377799987793, |
|
"logps/generated": -535.4756469726562, |
|
"logps/real": -1227.1500244140625, |
|
"loss": 0.0011, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.283498764038086, |
|
"rewards/margins": 21.739755630493164, |
|
"rewards/real": 10.456255912780762, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1.9872701555869873e-07, |
|
"logits/generated": 3.0489237308502197, |
|
"logits/real": 2.500216484069824, |
|
"logps/generated": -534.8651123046875, |
|
"logps/real": -1446.293701171875, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.704988479614258, |
|
"rewards/margins": 22.91129493713379, |
|
"rewards/real": 11.206305503845215, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.9519094766619518e-07, |
|
"logits/generated": 3.4261863231658936, |
|
"logits/real": 2.486532688140869, |
|
"logps/generated": -550.0924072265625, |
|
"logps/real": -1607.067138671875, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.577852249145508, |
|
"rewards/margins": 24.368900299072266, |
|
"rewards/real": 12.791049003601074, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.9165487977369166e-07, |
|
"logits/generated": 3.2348227500915527, |
|
"logits/real": 2.7686595916748047, |
|
"logps/generated": -570.3921508789062, |
|
"logps/real": -1670.5628662109375, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.573652267456055, |
|
"rewards/margins": 25.506744384765625, |
|
"rewards/real": 13.93309211730957, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 1.8811881188118812e-07, |
|
"logits/generated": 3.115082263946533, |
|
"logits/real": 2.5799102783203125, |
|
"logps/generated": -503.86956787109375, |
|
"logps/real": -1525.8857421875, |
|
"loss": 0.0013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -9.889609336853027, |
|
"rewards/margins": 22.231243133544922, |
|
"rewards/real": 12.341632843017578, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.8458274398868457e-07, |
|
"logits/generated": 3.2662482261657715, |
|
"logits/real": 2.468737840652466, |
|
"logps/generated": -522.85498046875, |
|
"logps/real": -1517.8037109375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.054136276245117, |
|
"rewards/margins": 24.148630142211914, |
|
"rewards/real": 13.094491958618164, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 1.8104667609618105e-07, |
|
"logits/generated": 3.3418450355529785, |
|
"logits/real": 2.6237406730651855, |
|
"logps/generated": -586.31591796875, |
|
"logps/real": -1651.44921875, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.165428161621094, |
|
"rewards/margins": 27.307897567749023, |
|
"rewards/real": 14.142468452453613, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.775106082036775e-07, |
|
"logits/generated": 3.2586989402770996, |
|
"logits/real": 2.557404041290283, |
|
"logps/generated": -502.6537170410156, |
|
"logps/real": -1429.9771728515625, |
|
"loss": 0.0009, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.023027420043945, |
|
"rewards/margins": 23.140275955200195, |
|
"rewards/real": 12.117250442504883, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 1.7397454031117398e-07, |
|
"logits/generated": 3.426239013671875, |
|
"logits/real": 2.6774344444274902, |
|
"logps/generated": -572.1189575195312, |
|
"logps/real": -1397.4552001953125, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.265462875366211, |
|
"rewards/margins": 24.69881820678711, |
|
"rewards/real": 11.433354377746582, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.7043847241867044e-07, |
|
"logits/generated": 3.0871334075927734, |
|
"logits/real": 2.5882654190063477, |
|
"logps/generated": -550.581298828125, |
|
"logps/real": -1440.5426025390625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.109870910644531, |
|
"rewards/margins": 25.10569190979004, |
|
"rewards/real": 13.995819091796875, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.669024045261669e-07, |
|
"logits/generated": 3.4628639221191406, |
|
"logits/real": 2.6520979404449463, |
|
"logps/generated": -516.7633666992188, |
|
"logps/real": -1631.560302734375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.816521644592285, |
|
"rewards/margins": 25.77557373046875, |
|
"rewards/real": 13.959050178527832, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 1.6336633663366337e-07, |
|
"logits/generated": 3.325192928314209, |
|
"logits/real": 2.6561620235443115, |
|
"logps/generated": -579.8635864257812, |
|
"logps/real": -1503.974609375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.559514045715332, |
|
"rewards/margins": 25.418582916259766, |
|
"rewards/real": 12.85906982421875, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 1.5983026874115983e-07, |
|
"logits/generated": 3.2172133922576904, |
|
"logits/real": 2.644768238067627, |
|
"logps/generated": -523.1590576171875, |
|
"logps/real": -1488.0220947265625, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.404559135437012, |
|
"rewards/margins": 23.979520797729492, |
|
"rewards/real": 12.574960708618164, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 1.562942008486563e-07, |
|
"logits/generated": 3.0821359157562256, |
|
"logits/real": 2.5439810752868652, |
|
"logps/generated": -466.92694091796875, |
|
"logps/real": -1334.6812744140625, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.233351707458496, |
|
"rewards/margins": 21.963369369506836, |
|
"rewards/real": 11.73001766204834, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 1.5275813295615276e-07, |
|
"logits/generated": 3.4721286296844482, |
|
"logits/real": 2.5629734992980957, |
|
"logps/generated": -569.9315795898438, |
|
"logps/real": -1553.0869140625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.754822731018066, |
|
"rewards/margins": 25.470844268798828, |
|
"rewards/real": 13.716024398803711, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 1.4922206506364921e-07, |
|
"logits/generated": 3.156252384185791, |
|
"logits/real": 2.703667163848877, |
|
"logps/generated": -512.1444091796875, |
|
"logps/real": -1513.850830078125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.257915496826172, |
|
"rewards/margins": 24.550378799438477, |
|
"rewards/real": 13.292462348937988, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 1.456859971711457e-07, |
|
"logits/generated": 3.141005277633667, |
|
"logits/real": 2.8431308269500732, |
|
"logps/generated": -560.7545776367188, |
|
"logps/real": -1514.60693359375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.668886184692383, |
|
"rewards/margins": 25.635141372680664, |
|
"rewards/real": 12.966257095336914, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 1.4214992927864215e-07, |
|
"logits/generated": 3.4051051139831543, |
|
"logits/real": 2.6105704307556152, |
|
"logps/generated": -574.8484497070312, |
|
"logps/real": -1481.771484375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.965988159179688, |
|
"rewards/margins": 26.073007583618164, |
|
"rewards/real": 13.107017517089844, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.3861386138613863e-07, |
|
"logits/generated": 3.417850971221924, |
|
"logits/real": 2.5431482791900635, |
|
"logps/generated": -557.5526123046875, |
|
"logps/real": -1832.751953125, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.886221885681152, |
|
"rewards/margins": 26.28768539428711, |
|
"rewards/real": 14.401464462280273, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 1.3507779349363508e-07, |
|
"logits/generated": 3.3368937969207764, |
|
"logits/real": 2.492691993713379, |
|
"logps/generated": -503.24652099609375, |
|
"logps/real": -1523.6234130859375, |
|
"loss": 0.0007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.509819030761719, |
|
"rewards/margins": 25.515331268310547, |
|
"rewards/real": 14.005514144897461, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 1.3154172560113154e-07, |
|
"logits/generated": 3.381847858428955, |
|
"logits/real": 2.6287689208984375, |
|
"logps/generated": -530.4554443359375, |
|
"logps/real": -1445.392333984375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.088003158569336, |
|
"rewards/margins": 25.268281936645508, |
|
"rewards/real": 13.180277824401855, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 1.2800565770862802e-07, |
|
"logits/generated": 3.0044195652008057, |
|
"logits/real": 2.6941545009613037, |
|
"logps/generated": -580.5851440429688, |
|
"logps/real": -1566.2730712890625, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.416638374328613, |
|
"rewards/margins": 27.365131378173828, |
|
"rewards/real": 13.948491096496582, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 1.2446958981612447e-07, |
|
"logits/generated": 3.0417532920837402, |
|
"logits/real": 2.775116443634033, |
|
"logps/generated": -564.0277099609375, |
|
"logps/real": -1237.641357421875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.711797714233398, |
|
"rewards/margins": 24.17923355102539, |
|
"rewards/real": 11.467435836791992, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 1.2093352192362092e-07, |
|
"logits/generated": 3.1363422870635986, |
|
"logits/real": 2.7018938064575195, |
|
"logps/generated": -546.12109375, |
|
"logps/real": -1475.1849365234375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.732572555541992, |
|
"rewards/margins": 26.642467498779297, |
|
"rewards/real": 13.90989875793457, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 1.1739745403111739e-07, |
|
"logits/generated": 3.2991225719451904, |
|
"logits/real": 2.5671286582946777, |
|
"logps/generated": -463.2415466308594, |
|
"logps/real": -1702.0286865234375, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -11.021214485168457, |
|
"rewards/margins": 26.277912139892578, |
|
"rewards/real": 15.256696701049805, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 1.1386138613861386e-07, |
|
"logits/generated": 3.336590528488159, |
|
"logits/real": 2.4211459159851074, |
|
"logps/generated": -574.0828857421875, |
|
"logps/real": -1439.0128173828125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.705076217651367, |
|
"rewards/margins": 25.831045150756836, |
|
"rewards/real": 12.125969886779785, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 1.1032531824611033e-07, |
|
"logits/generated": 3.1198062896728516, |
|
"logits/real": 2.5011868476867676, |
|
"logps/generated": -568.57177734375, |
|
"logps/real": -1326.2716064453125, |
|
"loss": 0.0006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.998083114624023, |
|
"rewards/margins": 25.015640258789062, |
|
"rewards/real": 12.017557144165039, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.0678925035360678e-07, |
|
"logits/generated": 3.2028965950012207, |
|
"logits/real": 2.5122649669647217, |
|
"logps/generated": -561.6862182617188, |
|
"logps/real": -1607.349365234375, |
|
"loss": 0.0005, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.135000228881836, |
|
"rewards/margins": 26.576717376708984, |
|
"rewards/real": 13.441716194152832, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 1.0325318246110325e-07, |
|
"logits/generated": 3.216439723968506, |
|
"logits/real": 2.6768908500671387, |
|
"logps/generated": -613.3177490234375, |
|
"logps/real": -1402.431396484375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.124387741088867, |
|
"rewards/margins": 28.185840606689453, |
|
"rewards/real": 13.061452865600586, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 9.971711456859971e-08, |
|
"logits/generated": 3.2150936126708984, |
|
"logits/real": 2.443791627883911, |
|
"logps/generated": -570.3302001953125, |
|
"logps/real": -1579.4129638671875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.96008586883545, |
|
"rewards/margins": 27.931583404541016, |
|
"rewards/real": 13.97149658203125, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"learning_rate": 9.618104667609618e-08, |
|
"logits/generated": 3.172091007232666, |
|
"logits/real": 2.476423978805542, |
|
"logps/generated": -550.3682250976562, |
|
"logps/real": -1797.6273193359375, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.748133659362793, |
|
"rewards/margins": 27.63075828552246, |
|
"rewards/real": 13.882623672485352, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 9.264497878359263e-08, |
|
"logits/generated": 3.2214691638946533, |
|
"logits/real": 2.508018732070923, |
|
"logps/generated": -520.4338989257812, |
|
"logps/real": -1491.5899658203125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.596437454223633, |
|
"rewards/margins": 27.078861236572266, |
|
"rewards/real": 14.482423782348633, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 8.91089108910891e-08, |
|
"logits/generated": 3.090144634246826, |
|
"logits/real": 2.64501953125, |
|
"logps/generated": -575.699462890625, |
|
"logps/real": -1449.6285400390625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.906789779663086, |
|
"rewards/margins": 27.012332916259766, |
|
"rewards/real": 13.105542182922363, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 8.557284299858557e-08, |
|
"logits/generated": 3.216085910797119, |
|
"logits/real": 2.628757953643799, |
|
"logps/generated": -563.4234619140625, |
|
"logps/real": -1506.6536865234375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.473469734191895, |
|
"rewards/margins": 27.185466766357422, |
|
"rewards/real": 12.711997032165527, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 8.203677510608204e-08, |
|
"logits/generated": 3.2252001762390137, |
|
"logits/real": 2.5635406970977783, |
|
"logps/generated": -554.8922119140625, |
|
"logps/real": -1379.961669921875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.215415954589844, |
|
"rewards/margins": 24.81317138671875, |
|
"rewards/real": 11.597757339477539, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 7.85007072135785e-08, |
|
"logits/generated": 3.1512575149536133, |
|
"logits/real": 2.3962998390197754, |
|
"logps/generated": -574.1869506835938, |
|
"logps/real": -1498.155517578125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.913461685180664, |
|
"rewards/margins": 27.563989639282227, |
|
"rewards/real": 13.650527954101562, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 7.496463932107496e-08, |
|
"logits/generated": 3.3186583518981934, |
|
"logits/real": 2.5883545875549316, |
|
"logps/generated": -550.5670166015625, |
|
"logps/real": -1544.77392578125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.482427597045898, |
|
"rewards/margins": 27.15085220336914, |
|
"rewards/real": 14.668424606323242, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 7.142857142857142e-08, |
|
"logits/generated": 3.263967990875244, |
|
"logits/real": 2.536264181137085, |
|
"logps/generated": -589.7340698242188, |
|
"logps/real": -1500.179931640625, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.068285942077637, |
|
"rewards/margins": 28.84073829650879, |
|
"rewards/real": 13.772455215454102, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 6.789250353606789e-08, |
|
"logits/generated": 3.497816801071167, |
|
"logits/real": 2.428816318511963, |
|
"logps/generated": -596.6787109375, |
|
"logps/real": -1555.1085205078125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.438451766967773, |
|
"rewards/margins": 29.081518173217773, |
|
"rewards/real": 13.643072128295898, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 6.435643564356436e-08, |
|
"logits/generated": 3.010061264038086, |
|
"logits/real": 2.4792978763580322, |
|
"logps/generated": -526.48046875, |
|
"logps/real": -1130.676513671875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.853174209594727, |
|
"rewards/margins": 25.5134334564209, |
|
"rewards/real": 11.660261154174805, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 5.33, |
|
"learning_rate": 6.082036775106081e-08, |
|
"logits/generated": 3.1093571186065674, |
|
"logits/real": 2.4732513427734375, |
|
"logps/generated": -529.5757446289062, |
|
"logps/real": -1343.6026611328125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.401544570922852, |
|
"rewards/margins": 25.137725830078125, |
|
"rewards/real": 12.736178398132324, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 5.7284299858557285e-08, |
|
"logits/generated": 3.3876266479492188, |
|
"logits/real": 2.5899083614349365, |
|
"logps/generated": -588.9549560546875, |
|
"logps/real": -1509.0943603515625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.996371269226074, |
|
"rewards/margins": 27.71066665649414, |
|
"rewards/real": 13.714296340942383, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 5.374823196605375e-08, |
|
"logits/generated": 3.112064838409424, |
|
"logits/real": 2.6190438270568848, |
|
"logps/generated": -551.2916870117188, |
|
"logps/real": -1541.237060546875, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.962061882019043, |
|
"rewards/margins": 27.94354248046875, |
|
"rewards/real": 13.981483459472656, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 5.0212164073550206e-08, |
|
"logits/generated": 3.4540042877197266, |
|
"logits/real": 2.519503116607666, |
|
"logps/generated": -547.7754516601562, |
|
"logps/real": -1539.6217041015625, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.80566692352295, |
|
"rewards/margins": 27.109554290771484, |
|
"rewards/real": 13.303888320922852, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"learning_rate": 4.667609618104667e-08, |
|
"logits/generated": 3.2564361095428467, |
|
"logits/real": 2.5632336139678955, |
|
"logps/generated": -564.44287109375, |
|
"logps/real": -1403.699951171875, |
|
"loss": 0.0004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.87768268585205, |
|
"rewards/margins": 27.802310943603516, |
|
"rewards/real": 12.924627304077148, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 4.3140028288543134e-08, |
|
"logits/generated": 3.235797166824341, |
|
"logits/real": 2.559129238128662, |
|
"logps/generated": -535.4729614257812, |
|
"logps/real": -1371.961669921875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.979475021362305, |
|
"rewards/margins": 26.749774932861328, |
|
"rewards/real": 12.770299911499023, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 3.96039603960396e-08, |
|
"logits/generated": 3.105957508087158, |
|
"logits/real": 2.7094485759735107, |
|
"logps/generated": -624.28662109375, |
|
"logps/real": -1619.6658935546875, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -16.26133155822754, |
|
"rewards/margins": 30.236867904663086, |
|
"rewards/real": 13.97553825378418, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 3.606789250353607e-08, |
|
"logits/generated": 3.269968032836914, |
|
"logits/real": 2.525156021118164, |
|
"logps/generated": -547.2183837890625, |
|
"logps/real": -1683.659423828125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.063116073608398, |
|
"rewards/margins": 28.569652557373047, |
|
"rewards/real": 15.506538391113281, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 5.64, |
|
"learning_rate": 3.253182461103253e-08, |
|
"logits/generated": 3.100898265838623, |
|
"logits/real": 2.5007834434509277, |
|
"logps/generated": -537.1228637695312, |
|
"logps/real": -1622.945556640625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.297723770141602, |
|
"rewards/margins": 27.18996810913086, |
|
"rewards/real": 13.892248153686523, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 2.8995756718528992e-08, |
|
"logits/generated": 3.1697795391082764, |
|
"logits/real": 2.515049457550049, |
|
"logps/generated": -528.9276733398438, |
|
"logps/real": -1365.165771484375, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.908604621887207, |
|
"rewards/margins": 25.516712188720703, |
|
"rewards/real": 12.608107566833496, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 2.545968882602546e-08, |
|
"logits/generated": 3.247465133666992, |
|
"logits/real": 2.738851308822632, |
|
"logps/generated": -526.6806030273438, |
|
"logps/real": -1680.171142578125, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -13.784246444702148, |
|
"rewards/margins": 29.31841468811035, |
|
"rewards/real": 15.53416633605957, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 2.1923620933521923e-08, |
|
"logits/generated": 3.180821180343628, |
|
"logits/real": 2.666006565093994, |
|
"logps/generated": -635.9178466796875, |
|
"logps/real": -1368.9986572265625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.650575637817383, |
|
"rewards/margins": 27.650531768798828, |
|
"rewards/real": 12.999956130981445, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 1.8387553041018386e-08, |
|
"logits/generated": 3.0839998722076416, |
|
"logits/real": 2.581054210662842, |
|
"logps/generated": -544.0230712890625, |
|
"logps/real": -1359.1251220703125, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.829536437988281, |
|
"rewards/margins": 26.41360855102539, |
|
"rewards/real": 13.584071159362793, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 1.485148514851485e-08, |
|
"logits/generated": 3.3319427967071533, |
|
"logits/real": 2.6179628372192383, |
|
"logps/generated": -590.0933837890625, |
|
"logps/real": -1592.672119140625, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.341830253601074, |
|
"rewards/margins": 29.359546661376953, |
|
"rewards/real": 15.017718315124512, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"learning_rate": 1.1315417256011316e-08, |
|
"logits/generated": 3.4769108295440674, |
|
"logits/real": 2.5944294929504395, |
|
"logps/generated": -614.6587524414062, |
|
"logps/real": -1540.0328369140625, |
|
"loss": 0.0003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.357019424438477, |
|
"rewards/margins": 28.344806671142578, |
|
"rewards/real": 12.987787246704102, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 7.77934936350778e-09, |
|
"logits/generated": 3.231849193572998, |
|
"logits/real": 2.5611472129821777, |
|
"logps/generated": -572.6622314453125, |
|
"logps/real": -1665.4876708984375, |
|
"loss": 0.0002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.289937019348145, |
|
"rewards/margins": 29.341079711914062, |
|
"rewards/real": 15.051142692565918, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 4.243281471004243e-09, |
|
"logits/generated": 3.283064603805542, |
|
"logits/real": 2.623944044113159, |
|
"logps/generated": -635.8428955078125, |
|
"logps/real": -1552.221923828125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -15.60615348815918, |
|
"rewards/margins": 30.579345703125, |
|
"rewards/real": 14.973190307617188, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 7.072135785007072e-10, |
|
"logits/generated": 3.2523193359375, |
|
"logits/real": 2.4982213973999023, |
|
"logps/generated": -530.0173950195312, |
|
"logps/real": -1398.9669189453125, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -12.438911437988281, |
|
"rewards/margins": 25.966604232788086, |
|
"rewards/real": 13.527694702148438, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"step": 1572, |
|
"total_flos": 0.0, |
|
"train_loss": 0.018845485343664946, |
|
"train_runtime": 21431.5998, |
|
"train_samples_per_second": 4.703, |
|
"train_steps_per_second": 0.073 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1572, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|